params <-
list(report_date = structure(20584, class = "Date"))

## ----simulate-data, message=FALSE, warning=FALSE------------------------------
library(Seurat)
library(XYomics)
library(dplyr)

sim.seurat <- readRDS(
  system.file("extdata", "sc_toy_seurat.rds", package = "XYomics")
)

## ----preprocessing, message=FALSE, warning=FALSE, results='hide'--------------
# Precomputed during data generation

# sim.seurat <- NormalizeData(sim.seurat)
# sim.seurat <- FindVariableFeatures(sim.seurat)
# sim.seurat <- ScaleData(sim.seurat)
# sim.seurat <- RunPCA(sim.seurat)
# sim.seurat <- FindNeighbors(sim.seurat)
# sim.seurat <- FindClusters(sim.seurat)
# sim.seurat <- RunUMAP(sim.seurat)
DimPlot(sim.seurat)

## ----annotate-data, warning=FALSE---------------------------------------------
table(sim.seurat$cell_type)
table(sim.seurat$sex)
table(sim.seurat$status)


## ----sex-stratified-analysis, eval=FALSE--------------------------------------
# # Run for all cell types
# # if you want to have the female and male degs
# 
# results_degs <- sex_stratified_analysis_sc(sim.seurat)
# # The differential expression method can be specified using the `test.use` parameter.
# # Available options include "MAST", "bimod", "poisson", "LR", and any other
# # method supported by Seurat::FindMarkers().
# #
# # The `min_samples` parameter defines the minimum number of cells required
# # to perform the analysis. The default value is 3.
# 
# #
# # Internal QC results generated by XYomics can be accessed via:
# # results_degs$validation
# #
# # This QC output includes assessment of sex and phenotype balance, per-group cell
# # counts, detection of under-represented groups below the minimum cell threshold,
# # and overall design imbalance warnings, as well as dataset sparsity metrics.
# 
# # if you want the categorized DEGs by cell type more lenient than the default thesholds as it is simulated data
# 
# results <- sex_stratified_pipeline_sc(sim.seurat, min_abs_logfc = 0.1,  alpha = 0.1)
# # By default, `target_cell_type_flag = "all"`, meaning that all cell types are plotted.
# # You can also run on one or multiple cell types, for example:
# # target_cell_type_flag = "cell type 1", "cell type 2"
# 

## ----load-stratified-results--------------------------------------------------
results_degs <- readRDS(
  system.file("extdata", "sc_results_degs_small.rds", package = "XYomics")
)

results <- readRDS(
  system.file("extdata", "sc_results_small.rds", package = "XYomics")
)

head(results$`cell type 1`)

## ----sex-interaction-analysis, eval=FALSE-------------------------------------
# # Example for one cell type (e.g., "cell type 1")
# 
# interaction_results <- sex_interaction_pipeline_sc(sim.seurat)
# # By default, `target_cell_type_flag = "all"`, meaning that all cell types are plotted.
# # You can also specify one or multiple cell types, for example:
# # target_cell_type_flag = c("cell type 1", "cell type 2")
# #
# # The `min_samples` parameter defines the minimum number of cells required
# # to perform the interaction analysis. The default value is 20.

## ----load-interaction-results-------------------------------------------------
interaction_results <- readRDS(
  system.file("extdata", "sc_interaction_results_small.rds", package = "XYomics")
)

head(interaction_results$`cell type 1`$summary_stats)# signficant sex-modulated DEGs: interaction_results$`cell type 1`$sig_results 

## ----dot-plots, message=FALSE, warning=FALSE, fig.width=10, fig.height=8------
# Get top gene from each category for 'cell type 1'
top_male_gene <- results$`cell type 1` %>% filter(DEG_Type == "male-specific") %>% top_n(1, -Male_FDR) %>% pull(Gene_Symbols)
top_female_gene <- results$`cell type 1`  %>% filter(DEG_Type == "female-specific") %>% top_n(1, -Female_FDR) %>% pull(Gene_Symbols)
top_dimorphic_gene <- results$`cell type 1`  %>% filter(DEG_Type == "sex-dimorphic") %>% top_n(1, -Male_FDR) %>% pull(Gene_Symbols)
top_neutral_gene <- results$`cell type 1`  %>% filter(DEG_Type == "sex-neutral") %>% top_n(1, -Male_FDR) %>% pull(Gene_Symbols)

top_genes <- c(top_male_gene, top_female_gene, top_dimorphic_gene, top_neutral_gene)

generate_dotplot_sc(sim.seurat, top_genes, target_cell_type_flag = "cell type 1")
# By default, `target_cell_type_flag = "all"`, meaning that all cell types are plotted.
# You can also specify one or multiple cell types, for example:
# target_cell_type_flag = c("cell type 1", "cell type 2")

## ----pathway-analysis, eval=FALSE---------------------------------------------
# # Run for all cell types
# pathway_category <- lapply(
#   results,
#   function(cell) categorized_enrich(cell, enrichment_db = "GO")
# )
# # The `enrichment_db` parameter can be set to "GO", "KEGG", "REACTOME".
# # If the user wants to use a custom database, a TERM2GENE data frame must be
# # provided via the `custom_db` parameter.
# #
# # Gene identifiers can be either any Gene identifier type supported by OrgDb
# #' (e.g., "SYMBOL", "ENTREZID", "ENSEMBL", "UNIPROT")
# # using the `gene_type` parameter.
# #
# # The `return_df` parameter controls the output format:
# # if set to TRUE, results are returned as data frames;
# # by default (FALSE), enrichResult objects are returned.
# # Run for one specific cell type
# # pathway_category_one <- categorized_enrich(results$`cell type 1`, return_df =T, enrichment_db = "GO")

## ----load-pathway-results-----------------------------------------------------
pathway_category <- readRDS(
  system.file("extdata", "sc_pathway_single_small.rds", package = "XYomics")
)

pathway_category_one <- readRDS(
  system.file("extdata", "sc_pathway_single_small_df.rds", package = "XYomics")
)

head(pathway_category_one$`female-specific`)

## ----celltype1_plot ,  results='asis', fig.keep='all', fig.width=15, fig.height=7, message=FALSE----
# visualize via dotplot
plot <- plot_enrichment_dotplots(pathway_category)
print(plot)

## ----build-network, message=FALSE, warning=FALSE------------------------------
# Fetch STRING network (can be replaced with a custom network)
# g <- get_string_network(organism = "9606", score_threshold = 900)

# Load a pre-existing network from a file
g <- readRDS(system.file("extdata", "string_example_network.rds", package = "XYomics")) # You can also download the hormonal network "hormonal_network_metacore.rds" instead of "string_example_network.rds".


## ----network-analysis, eval=FALSE---------------------------------------------
# 
# ### Run for all cell type
# network_results <- ppi_pipeline(results, g)
# 
# # By default, `target_cell_type_flag = "all"`, meaning that all cell types are plotted.
# # You can also specify one or multiple cell types, for example:
# # target_cell_type_flag = c("cell type 1", "cell type 2")
# 
# # Example for one specific cell type
# 
# 
# network_results_one <- ppi_pipeline(results,g , target_cell_type_flag = "cell type 2")

## ----load-network-results-----------------------------------------------------
network_results <- readRDS(
  system.file("extdata", "sc_network_results_small.rds", package = "XYomics")
)

## ----visualize-network, warning=FALSE, message=FALSE--------------------------
female_network <- network_results$`cell type 2`$female_network

#Generate network visualization
plot_network(female_network, "female-specific", results$`cell type 2`, "Cell type 2", show_barplot = T) 

#Generate all_plots for all categories and all cell types

all_plots <- plot_network_pipeline(network_results, results)


## ----report, eval=FALSE-------------------------------------------------------
# # This command generates a comprehensive HTML report for single-cell dataset
# generate_cat_report(results, pathway_category, network_results)
# 
# 

