└── R_version ├── LICENSE ├── .gitignore ├── CITATION.cff ├── Dockerfile ├── README.md ├── renv.lock ├── docs ├── user_guide.md ├── architecture.md ├── publications.md └── tutorials │ └── wgcna_tutorial.Rmd ├── environment.yml ├── singularity.def ├── paper ├── manuscript.tex └── references.bib ├── notebooks ├── 01_data_exploration.ipynb ├── 02_qc_and_normalization.ipynb ├── 03_network_construction.ipynb └── 04_module_detection_and_viz.ipynb ├── .gitattributes ├── .RData ├── tests ├── testthat.R └── testthat │ ├── test_soft_threshold.R │ ├── test_module_detection.R │ └── test_preprocess.R ├── config ├── logging.yaml ├── cluster.yaml └── default.yaml ├── .Rhistory ├── Rpackage ├── inst │ └── shiny │ │ ├── app.R │ │ ├── ui.R │ │ └── server.R ├── NAMESPACE ├── DESCRIPTION └── R │ ├── network_construction.R │ ├── module_detection.R │ ├── soft_threshold.R │ ├── data_preprocessing.R │ └── dendrogram_generation.R ├── ok.R ├── scripts ├── deploy.sh ├── run_cluster.sh └── run_local.sh ├── pipeline ├── scripts │ ├── preprocess.R │ ├── build_tom.R │ ├── pick_soft_threshold.R │ └── detect_modules.R └── Snakefile ├── .github └── workflows │ └── ci.yml └── src ├── utils.R └── viz_helpers.R /R_version/LICENSE: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/CITATION.cff: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/Dockerfile: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/renv.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/docs/user_guide.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/environment.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/singularity.def: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/docs/architecture.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/docs/publications.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/paper/manuscript.tex: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/paper/references.bib: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/docs/tutorials/wgcna_tutorial.Rmd: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/notebooks/01_data_exploration.ipynb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/notebooks/02_qc_and_normalization.ipynb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/notebooks/03_network_construction.ipynb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/notebooks/04_module_detection_and_viz.ipynb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R_version/.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /R_version/.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxprogrammer007/MyWGCNAResearchProject/main/R_version/.RData -------------------------------------------------------------------------------- /R_version/tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(MyWGCNAResearchProject) 3 | 4 | test_check("MyWGCNAResearchProject") 5 | -------------------------------------------------------------------------------- /R_version/config/logging.yaml: -------------------------------------------------------------------------------- 1 | # Logging configuration (used by src/utils.R:initLogging) 2 | logfile: "results/logs/pipeline.log" 3 | level: "INFO" # DEBUG | INFO | WARN | ERROR 4 | -------------------------------------------------------------------------------- /R_version/.Rhistory: -------------------------------------------------------------------------------- 1 | version() 2 | version 3 | gc() 4 | snakemake --cores 4 \ 5 | library(MyWGCNAResearchProject) 6 | shiny::runApp(system.file("shiny", package="MyWGCNAResearchProject")) 7 | shiny::runApp(system.file("shiny", package="MyWGCNAResearchProject")) 8 | source("~/MyWGCNAResearchProject/ok.R") 9 | source('MyWGCNAResearchProject/ok.R') 10 | gc() 11 | -------------------------------------------------------------------------------- /R_version/config/cluster.yaml: -------------------------------------------------------------------------------- 1 | # Default settings for all Snakemake rules when running on an HPC scheduler 2 | __default__: 3 | account: my_project 4 | time: "02:00:00" 5 | ntasks: 1 6 | mem: "4G" 7 | 8 | # Override for heavier steps 9 | detect_modules: 10 | account: my_project 11 | time: "04:00:00" 12 | ntasks: 1 13 | mem: "16G" 14 | -------------------------------------------------------------------------------- /R_version/Rpackage/inst/shiny/app.R: -------------------------------------------------------------------------------- 1 | # app.R — entry point 2 | library(shiny) 3 | library(MyWGCNAResearchProject) 4 | 5 | # Source UI and server 6 | source(system.file("shiny/ui.R", package = "MyWGCNAResearchProject"), local = TRUE) 7 | source(system.file("shiny/server.R", package = "MyWGCNAResearchProject"), local = TRUE) 8 | 9 | # Launch 10 | shinyApp(ui = ui, server = server) 11 | -------------------------------------------------------------------------------- /R_version/ok.R: -------------------------------------------------------------------------------- 1 | # ok.R — development entrypoint 2 | 3 | # 1. Set your project root 4 | setwd("~/MyWGCNAResearchProject") 5 | 6 | # 2. Load devtools so we can source the package code 7 | if (!requireNamespace("devtools", quietly=TRUE)) install.packages("devtools") 8 | devtools::load_all("Rpackage") # sources everything under Rpackage/R/ 9 | 10 | # 3. Launch the Shiny app 11 | shiny::runApp("Rpackage/inst/shiny", launch.browser = TRUE) 12 | -------------------------------------------------------------------------------- /R_version/scripts/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # scripts/deploy.sh 3 | # ----------------- 4 | # Usage: ./scripts/deploy.sh [tag] 5 | # Example: ./scripts/deploy.sh v0.1.0 6 | 7 | TAG=${1:-latest} 8 | IMAGE_NAME=mydockerhubusername/mywgcna:${TAG} 9 | 10 | echo "📦 Building Docker image: $IMAGE_NAME" 11 | docker build -t $IMAGE_NAME . 12 | 13 | echo "🔑 Pushing to Docker Hub" 14 | docker push $IMAGE_NAME 15 | 16 | echo "✅ Deployment image pushed: $IMAGE_NAME" 17 | -------------------------------------------------------------------------------- /R_version/Rpackage/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Export your core functions 2 | export( 3 | loadExpressionData, 4 | filterGenesByVariance, 5 | detectOutlierSamples, 6 | normalizeExpression, 7 | pickSoftPower, 8 | plotSoftPower, 9 | constructNetwork, 10 | detectModules, 11 | mergeModules, 12 | generateRobustDendrogram, 13 | plotInteractiveDendrogram 14 | ) 15 | 16 | # Import from other packages 17 | import( 18 | WGCNA, 19 | dynamicTreeCut, 20 | flashClust, 21 | dendextend, 22 | plotly, 23 | ggdendro, 24 | DESeq2, 25 | stats, 26 | utils 27 | ) 28 | -------------------------------------------------------------------------------- /R_version/tests/testthat/test_soft_threshold.R: -------------------------------------------------------------------------------- 1 | context("Soft-threshold power selection") 2 | 3 | test_that("pickSoftPower returns valid power and fitIndices", { 4 | # simulate small expr: 10 samples × 50 genes 5 | set.seed(1) 6 | dat <- matrix(rnorm(500), nrow = 10, ncol = 50) 7 | rownames(dat) <- paste0("S",1:10) 8 | colnames(dat) <- paste0("G",1:50) 9 | res <- pickSoftPower(dat, powers = 1:5, networkType = "unsigned", corType = "pearson", R2cut = 0) 10 | expect_true(is.list(res)) 11 | expect_true("power" %in% names(res)) 12 | expect_true(res$power %in% 1:5) 13 | expect_true(is.matrix(res$fitIndices)) 14 | expect_equal(ncol(res$fitIndices) >= 5, TRUE) 15 | }) 16 | -------------------------------------------------------------------------------- /R_version/scripts/run_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # scripts/run_cluster.sh 3 | # ---------------------- 4 | # Usage: ./scripts/run_cluster.sh 5 | # Submits Snakemake jobs to the cluster (via sbatch). 6 | 7 | # Number of parallel jobs 8 | JOBS=100 9 | 10 | echo "🐍 Submitting Snakemake workflow to cluster (up to $JOBS jobs)..." 11 | snakemake --jobs $JOBS \ 12 | --cluster-config config/cluster.yaml \ 13 | --cluster "sbatch -A {cluster.account} -t {cluster.time} -n {cluster.ntasks} --mem={cluster.mem}" \ 14 | --latency-wait 60 \ 15 | --rerun-incomplete \ 16 | --snakefile pipeline/Snakefile \ 17 | --configfile config/default.yaml \ 18 | --directory . 19 | -------------------------------------------------------------------------------- /R_version/Rpackage/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: MyWGCNAResearchProject 2 | Type: Package 3 | Title: Robust WGCNA Analysis & Shiny App 4 | Version: 0.1.0 5 | Authors@R: 6 | person("Your", "Name", email = "you@example.com", role = c("aut", "cre")) 7 | Description: Provides functions for robust WGCNA network construction, module detection, 8 | dendrogram stability assessment, and an interactive Shiny interface. 9 | Depends: 10 | R (>= 4.1), 11 | WGCNA, 12 | dynamicTreeCut, 13 | flashClust, 14 | dendextend, 15 | plotly, 16 | ggdendro 17 | Imports: 18 | stats, 19 | utils, 20 | DESeq2 21 | License: MIT + file LICENSE 22 | Encoding: UTF-8 23 | LazyData: true 24 | Roxygen: list(markdown = TRUE) 25 | RoxygenNote: 7.2.1 26 | -------------------------------------------------------------------------------- /R_version/Rpackage/R/network_construction.R: -------------------------------------------------------------------------------- 1 | # network_construction.R 2 | # ======================= 3 | # Build adjacency and TOM matrices 4 | 5 | library(WGCNA) 6 | 7 | #' Construct network adjacency and TOM dissimilarity 8 | #' 9 | #' @param datExpr samples × genes matrix 10 | #' @param power soft-thresholding power 11 | #' @param networkType "unsigned" or "signed" 12 | #' @param corType "pearson" or "bicor" 13 | #' @return list(adjacency, TOM, dissTOM) 14 | constructNetwork <- function( 15 | datExpr, 16 | power, 17 | networkType = "unsigned", 18 | corType = "pearson" 19 | ) { 20 | corFnc <- if (corType == "bicor") bicor else cor 21 | adj <- adjacency( 22 | datExpr, 23 | power = power, 24 | type = networkType, 25 | corFnc = corFnc 26 | ) 27 | TOM <- TOMsimilarity(adj) 28 | dissTOM <- 1 - TOM 29 | list(adjacency = adj, TOM = TOM, dissTOM = dissTOM) 30 | } 31 | -------------------------------------------------------------------------------- /R_version/scripts/run_local.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # scripts/run_local.sh 3 | # -------------------- 4 | # Usage: ./scripts/run_local.sh [pipeline|shiny] 5 | # Default: shiny 6 | 7 | MODE=${1:-shiny} 8 | 9 | # Load R package in place and start Shiny 10 | if [ "$MODE" = "shiny" ]; then 11 | echo "🚀 Launching Shiny app (development mode)..." 12 | Rscript -e " \ 13 | if (!requireNamespace('devtools',quietly=TRUE)) install.packages('devtools'); \ 14 | devtools::load_all('Rpackage'); \ 15 | shiny::runApp('Rpackage/inst/shiny', launch.browser=TRUE) \ 16 | " 17 | exit $? 18 | fi 19 | 20 | # Run the Snakemake pipeline locally 21 | if [ "$MODE" = "pipeline" ]; then 22 | echo "🐍 Running Snakemake pipeline..." 23 | snakemake --cores 4 \ 24 | --snakefile pipeline/Snakefile \ 25 | --configfile config/default.yaml \ 26 | --directory . 27 | exit $? 28 | fi 29 | 30 | echo "Unknown mode: $MODE. Use 'pipeline' or 'shiny'." 31 | exit 1 32 | -------------------------------------------------------------------------------- /R_version/config/default.yaml: -------------------------------------------------------------------------------- 1 | # Path to raw expression data (CSV with genes×samples) 2 | expr_file: "data/raw/example_expression.csv" 3 | 4 | # Preprocessing 5 | norm_method: "vst" # "vst" or "log2" 6 | topN: 10000 # number of highest-variance genes to keep 7 | 8 | # Soft‐threshold power scanning 9 | powers: [1,2,3,4,5,6,7,8,9,10,12,14,16,18,20] 10 | tom_type: "unsigned" # "unsigned" or "signed" 11 | cor_type: "pearson" # "pearson" or "bicor" 12 | 13 | # Module detection 14 | deep_split: 2 # dynamicTreeCut sensitivity (0–4) 15 | min_module_size: 30 # smallest module size 16 | merge_height: 0.25 # merge modules whose eigengenes correlate > (1–merge_height) 17 | 18 | # Outlier removal & stability 19 | outlier_z: 2.5 # Z-score cutoff for removing sample outliers 20 | bootstrap: false # whether to bootstrap for stability 21 | n_boot: 100 # bootstrap iterations if enabled 22 | -------------------------------------------------------------------------------- /R_version/Rpackage/inst/shiny/ui.R: -------------------------------------------------------------------------------- 1 | # ui.R 2 | library(shiny) 3 | library(plotly) 4 | 5 | ui <- fluidPage( 6 | titlePanel("Robust WGCNA Explorer"), 7 | sidebarLayout( 8 | sidebarPanel( 9 | fileInput("exprFile", "Upload Expression CSV", accept = ".csv"), 10 | selectInput("normMethod", "Normalize using:", choices = c("log2", "vst")), 11 | sliderInput("topVar", "Top N variable genes:", min = 1000, max = 20000, value = 10000, step = 1000), 12 | actionButton("runBtn", "Run WGCNA"), 13 | hr(), 14 | numericInput("deepSplit", "Tree cut deepSplit (0–4):", value = 2, min = 0, max = 4), 15 | numericInput("minModSize", "Min module size:", value = 30, min = 5, max = 200), 16 | numericInput("mergeHeight", "Module merge height:", value = 0.25, min = 0, max = 1, step = 0.05) 17 | ), 18 | mainPanel( 19 | tabsetPanel( 20 | tabPanel("Dendrogram", plotOutput("dendPlot")), 21 | tabPanel("Interactive", plotlyOutput("dendPlotly")), 22 | tabPanel("Module Colors", uiOutput("modLegend")) 23 | ) 24 | ) 25 | ) 26 | ) 27 | -------------------------------------------------------------------------------- /R_version/pipeline/scripts/preprocess.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # pipeline/scripts/preprocess.R 4 | # ----------------------------- 5 | # Load & filter genes, then normalize expression 6 | suppressPackageStartupMessages({ 7 | library(optparse) 8 | library(MyWGCNAResearchProject) 9 | }) 10 | 11 | opt <- parse_args(OptionParser(option_list = list( 12 | make_option(c("-i", "--input"), type = "character", help = "Raw expression CSV"), 13 | make_option(c("-o", "--output"), type = "character", help = "Normalized RDS output"), 14 | make_option(c("-m", "--method"), type = "character", default = "vst", 15 | help = "Normalization: 'log2' or 'vst'"), 16 | make_option(c("-t", "--topN"), type = "integer", default = 10000, 17 | help = "Top N variable genes")) 18 | )) 19 | 20 | dir.create(dirname(opt$output), recursive = TRUE, showWarnings = FALSE) 21 | 22 | expr <- loadExpressionData(opt$input) 23 | expr_filt <- filterGenesByVariance(expr, topN = opt$topN) 24 | expr_norm <- normalizeExpression(expr_filt, method = opt$method) 25 | 26 | saveRDS(expr_norm, file = opt$output) -------------------------------------------------------------------------------- /R_version/tests/testthat/test_module_detection.R: -------------------------------------------------------------------------------- 1 | context("Module detection and merging") 2 | 3 | test_that("detectModules identifies at least one module", { 4 | # small toy TOM: block diagonal to force two modules 5 | tom <- diag(10) 6 | tom[1:5,1:5] <- tom[1:5,1:5] + 1 7 | tom[6:10,6:10] <- tom[6:10,6:10] + 1 8 | dissTOM <- 1 - tom 9 | res <- detectModules(dissTOM, deepSplit = 1, minModuleSize = 2) 10 | expect_true(is.list(res)) 11 | expect_true("moduleColors" %in% names(res)) 12 | uniqueColors <- unique(res$moduleColors) 13 | expect_true(length(uniqueColors) >= 2) 14 | }) 15 | 16 | test_that("mergeModules merges correctly when multiple modules", { 17 | # simulate expr for merging test: 6 samples × 6 genes 18 | mat <- matrix(rnorm(36), nrow = 6, ncol = 6) 19 | datExpr <- mat 20 | # create two modules: first 3 genes and last 3 genes 21 | colors <- c(rep("blue",3), rep("red",3)) 22 | merged <- mergeModules(datExpr, colors, cutHeight = 0.5) 23 | expect_true(is.list(merged)) 24 | expect_true("mergedColors" %in% names(merged)) 25 | expect_equal(length(merged$mergedColors), 6) 26 | }) 27 | -------------------------------------------------------------------------------- /R_version/pipeline/scripts/build_tom.R: -------------------------------------------------------------------------------- 1 | # pipeline/scripts/build_tom.R 2 | # ----------------------------- 3 | suppressPackageStartupMessages({ 4 | library(optparse) 5 | library(MyWGCNAResearchProject) 6 | }) 7 | 8 | opt <- parse_args(OptionParser(option_list = list( 9 | make_option(c("-e", "--expr"), type = "character", help = "Normalized RDS input"), 10 | make_option(c("-p", "--power"), type = "character", help = "Power RDS input"), 11 | make_option(c("-o", "--output"), type = "character", help = "TOM RDS output"), 12 | make_option(c("--networkType"), type = "character", default = "unsigned"), 13 | make_option(c("--corType"), type = "character", default = "pearson") 14 | ))) 15 | 16 | dir.create(dirname(opt$output), recursive = TRUE, showWarnings = FALSE) 17 | 18 | expr_norm <- readRDS(opt$expr) 19 | power <- readRDS(opt$power) 20 | datExpr <- t(expr_norm) 21 | 22 | res <- constructNetwork(datExpr, 23 | power = power, 24 | networkType = opt$networkType, 25 | corType = opt$corType) 26 | 27 | saveRDS(res$TOM, file = opt$output) 28 | -------------------------------------------------------------------------------- /R_version/.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: R-CMD-check 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v3 14 | 15 | - name: Setup R 16 | uses: r-lib/actions/setup-r@v2 17 | 18 | - name: Setup Pandoc 19 | uses: r-lib/actions/setup-pandoc@v2 20 | 21 | - name: Install system dependencies 22 | run: | 23 | sudo apt-get update 24 | sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev 25 | 26 | - name: Install R package dependencies 27 | run: | 28 | Rscript -e 'install.packages(c("devtools","roxygen2","testthat"))' 29 | Rscript -e 'devtools::install_deps(dependencies = TRUE)' 30 | 31 | - name: Run tests 32 | run: | 33 | Rscript -e 'devtools::test()' 34 | 35 | - name: Check package 36 | run: | 37 | Rscript -e 'devtools::check()' 38 | 39 | - name: Lint code 40 | run: | 41 | Rscript -e 'if (!requireNamespace("lintr", quietly=TRUE)) install.packages("lintr"); lintr::lint_dir("Rpackage/R")' 42 | -------------------------------------------------------------------------------- /R_version/tests/testthat/test_preprocess.R: -------------------------------------------------------------------------------- 1 | context("Data preprocessing") 2 | 3 | test_that("loadExpressionData reads a small CSV correctly", { 4 | # create temp file 5 | tmp <- tempfile(fileext = ".csv") 6 | write.csv(data.frame(Sample1 = 1:3, Sample2 = c(2,4,6)), 7 | row.names = c("GeneA","GeneB","GeneC"), 8 | file = tmp) 9 | expr <- loadExpressionData(tmp, sep = ",", row.names = 1) 10 | expect_true(is.matrix(expr)) 11 | expect_equal(rownames(expr), c("GeneA","GeneB","GeneC")) 12 | expect_equal(colnames(expr), c("Sample1","Sample2")) 13 | file.remove(tmp) 14 | }) 15 | 16 | test_that("filterGenesByVariance retains top N genes", { 17 | mat <- matrix(1:20, nrow = 5) 18 | rownames(mat) <- paste0("G",1:5) 19 | # variances are increasing across rows 20 | filtered <- filterGenesByVariance(mat, topN = 3) 21 | expect_equal(nrow(filtered), 3) 22 | expect_equal(rownames(filtered), c("G5","G4","G3")) 23 | }) 24 | 25 | test_that("normalizeExpression on log2 produces no NAs", { 26 | mat <- matrix(c(0,1,4,9), nrow = 2) 27 | norm <- normalizeExpression(mat, method = "log2") 28 | expect_false(any(is.na(norm))) 29 | expect_equal(norm[1,1], log2(0+1)) 30 | }) 31 | 32 | -------------------------------------------------------------------------------- /R_version/Rpackage/inst/shiny/server.R: -------------------------------------------------------------------------------- 1 | # server.R 2 | library(shiny) 3 | library(MyWGCNAResearchProject) 4 | library(plotly) 5 | 6 | server <- function(input, output, session) { 7 | wgcnaRes <- eventReactive(input$runBtn, { 8 | req(input$exprFile) 9 | expr <- loadExpressionData(input$exprFile$datapath) 10 | expr <- filterGenesByVariance(expr, topN = input$topVar) 11 | norm <- normalizeExpression(expr, method = input$normMethod) 12 | generateRobustDendrogram( 13 | exprData = norm, 14 | deepSplit = input$deepSplit, 15 | minModuleSize = input$minModSize, 16 | mergeCutHeight = input$mergeHeight, 17 | bootstrap = FALSE 18 | ) 19 | }) 20 | 21 | output$dendPlot <- renderPlot({ 22 | req(wgcnaRes()) 23 | # static plot is drawn inside generateRobustDendrogram() 24 | invisible() 25 | }) 26 | 27 | output$dendPlotly <- renderPlotly({ 28 | res <- wgcnaRes() 29 | plotInteractiveDendrogram(res$geneTree, res$stability) 30 | }) 31 | 32 | output$modLegend <- renderUI({ 33 | res <- wgcnaRes() 34 | cols <- unique(res$moduleColors) 35 | tags$div( 36 | lapply(cols, function(col) { 37 | tags$span(style = paste0("background:", col, ";padding:5px;margin:2px;display:inline-block;"), col) 38 | }) 39 | ) 40 | }) 41 | } 42 | -------------------------------------------------------------------------------- /R_version/pipeline/scripts/pick_soft_threshold.R: -------------------------------------------------------------------------------- 1 | # pipeline/scripts/pick_soft_threshold.R 2 | # -------------------------------------- 3 | suppressPackageStartupMessages({ 4 | library(optparse) 5 | library(MyWGCNAResearchProject) 6 | }) 7 | 8 | opt <- parse_args(OptionParser(option_list = list( 9 | make_option(c("-i", "--input"), type = "character", help = "Normalized RDS input"), 10 | make_option(c("-p", "--output-power"), type = "character", help = "Power RDS output"), 11 | make_option(c("-g", "--output-plot"), type = "character", help = "Plot PNG output"), 12 | make_option(c("--powers"), type = "character", default = "c(1:20)", 13 | help = "Candidate powers, e.g. 'c(1,2,3)'"), 14 | make_option(c("--networkType"), type = "character", default = "unsigned"), 15 | make_option(c("--corType"), type = "character", default = "pearson") 16 | ))) 17 | 18 | dir.create(dirname(opt$`output-power`), recursive = TRUE, showWarnings = FALSE) 19 | 20 | dir.create(dirname(opt$`output-plot`), recursive = TRUE, showWarnings = FALSE) 21 | 22 | expr_norm <- readRDS(opt$input) 23 | datExpr <- t(expr_norm) 24 | 25 | powers <- eval(parse(text = opt$powers)) 26 | res <- pickSoftPower(datExpr, 27 | powers = powers, 28 | networkType = opt$networkType, 29 | corType = opt$corType) 30 | 31 | saveRDS(res$power, file = opt$`output-power`) 32 | 33 | png(filename = opt$`output-plot`, width = 800, height = 600) 34 | plotSoftPower(res$fitIndices) 35 | dev.off() -------------------------------------------------------------------------------- /R_version/Rpackage/R/module_detection.R: -------------------------------------------------------------------------------- 1 | # module_detection.R 2 | # ==================== 3 | # Dynamic tree cutting & module merging 4 | 5 | library(WGCNA) 6 | library(dynamicTreeCut) 7 | 8 | #' Detect modules from dissTOM 9 | #' 10 | #' @param dissTOM TOM dissimilarity matrix 11 | #' @param deepSplit sensitivity (0–4) 12 | #' @param minModuleSize minimum cluster size 13 | #' @param method clustering linkage ("average", etc.) 14 | #' @return list(geneTree, moduleColors) 15 | detectModules <- function( 16 | dissTOM, 17 | deepSplit = 2, 18 | minModuleSize = 30, 19 | method = "average" 20 | ) { 21 | geneTree <- flashClust(as.dist(dissTOM), method = method) 22 | dynMods <- cutreeDynamic( 23 | dendro = geneTree, 24 | distM = dissTOM, 25 | deepSplit = deepSplit, 26 | pamRespectsDendro = FALSE, 27 | minClusterSize = minModuleSize 28 | ) 29 | moduleColors <- labels2colors(dynMods) 30 | list(geneTree = geneTree, moduleColors = moduleColors) 31 | } 32 | 33 | #' Merge similar modules based on eigengene clustering 34 | #' 35 | #' @param datExpr samples × genes matrix 36 | #' @param moduleColors vector of module assignments 37 | #' @param cutHeight merge threshold (e.g. 0.25) 38 | #' @return list(mergedColors, mergeInfo) 39 | mergeModules <- function( 40 | datExpr, 41 | moduleColors, 42 | cutHeight = 0.25 43 | ) { 44 | merge <- mergeCloseModules( 45 | datExpr, 46 | moduleColors, 47 | cutHeight = cutHeight, 48 | verbose = 0 49 | ) 50 | list(mergedColors = merge$colors, mergeInfo = merge) 51 | } 52 | -------------------------------------------------------------------------------- /R_version/src/utils.R: -------------------------------------------------------------------------------- 1 | # src/utils.R 2 | # ========================= 3 | # Utility functions for configuration management, logging, and directory setup 4 | 5 | # Ensure required packages are installed 6 | if (!requireNamespace("yaml", quietly=TRUE)) install.packages("yaml") 7 | if (!requireNamespace("logging", quietly=TRUE)) install.packages("logging") 8 | 9 | library(yaml) 10 | library(logging) 11 | 12 | #' Read YAML configuration file 13 | #' 14 | #' @param config_file Path to YAML file (default: "config/default.yaml") 15 | #' @return List of configuration parameters 16 | readConfig <- function(config_file = "config/default.yaml") { 17 | cfg <- yaml::read_yaml(config_file) 18 | return(cfg) 19 | } 20 | 21 | #' Initialize logging to file and console 22 | #' 23 | #' @param logfile File path for log output (default: "results/logs/pipeline.log") 24 | #' @param level Logging level as string (DEBUG, INFO, WARN, ERROR) 25 | #' @return NULL 26 | initLogging <- function(logfile = "results/logs/pipeline.log", level = "INFO") { 27 | basicConfig(level = getLevel(level)) 28 | addHandler(writeToFile, file = logfile, level = getLevel(level)) 29 | addHandler(writeToConsole, level = getLevel(level)) 30 | loginfo("Logging initialized. File: %s", logfile) 31 | } 32 | 33 | #' Ensure directories exist (create if missing) 34 | #' 35 | #' @param dirs Character vector of directory paths 36 | #' @return NULL 37 | ensureDirs <- function(dirs) { 38 | for (d in dirs) { 39 | if (!dir.exists(d)) { 40 | dir.create(d, recursive = TRUE) 41 | loginfo("Created directory: %s", d) 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /R_version/Rpackage/R/soft_threshold.R: -------------------------------------------------------------------------------- 1 | # soft_threshold.R 2 | # ================= 3 | # Auto-pick and visualize soft-thresholding power for scale-free topology 4 | 5 | library(WGCNA) 6 | 7 | #' Pick soft-thresholding power 8 | #' 9 | #' @param datExpr samples × genes matrix 10 | #' @param powers candidate integer vector 11 | #' @param networkType "unsigned" or "signed" 12 | #' @param corType "pearson" or "bicor" 13 | #' @param R2cut minimum scale-free R² 14 | #' @return list(power, fitIndices) 15 | pickSoftPower <- function( 16 | datExpr, 17 | powers = 1:20, 18 | networkType = "unsigned", 19 | corType = "pearson", 20 | R2cut = 0.80 21 | ) { 22 | corFnc <- if (corType == "bicor") bicor else cor 23 | sft <- pickSoftThreshold( 24 | datExpr, 25 | powerVector = powers, 26 | networkType = networkType, 27 | corFnc = corFnc, 28 | verbose = 0 29 | ) 30 | power <- sft$powerEstimate 31 | if (is.na(power) || sft$fitIndices[which(powers == power), "SFT.R.sq"] < R2cut) { 32 | # fallback to highest R² 33 | idx <- which.max(sft$fitIndices[, "SFT.R.sq"]) 34 | power <- powers[idx] 35 | message("Fallback power = ", power) 36 | } 37 | list(power = power, fitIndices = sft$fitIndices) 38 | } 39 | 40 | #' Plot scale-free fit and mean connectivity 41 | #' 42 | #' @param fitIndices from pickSoftThreshold() 43 | plotSoftPower <- function(fitIndices) { 44 | par(mfrow = c(1, 2)) 45 | plot(fitIndices[, 1], fitIndices[, 2], 46 | xlab = "Soft Threshold (power)", ylab = "Scale Free R²", 47 | type = "b", main = "Scale Free Fit") 48 | plot(fitIndices[, 1], fitIndices[, 5], 49 | xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", 50 | type = "b", main = "Mean Connectivity") 51 | par(mfrow = c(1, 1)) 52 | } 53 | -------------------------------------------------------------------------------- /R_version/Rpackage/R/data_preprocessing.R: -------------------------------------------------------------------------------- 1 | # data_preprocessing.R 2 | # ==================== 3 | # Functions to load, QC-filter and normalize expression data 4 | 5 | # dependencies 6 | library(WGCNA) 7 | if (!requireNamespace("DESeq2", quietly=TRUE)) { 8 | message("installing DESeq2 for VST normalization...") 9 | BiocManager::install("DESeq2") 10 | } 11 | library(DESeq2) 12 | 13 | #' Load expression matrix (genes × samples) 14 | #' 15 | #' @param file path to CSV/TSV 16 | #' @param sep field separator (',' or '\t') 17 | #' @param row.names column for gene IDs 18 | #' @return numeric matrix 19 | loadExpressionData <- function(file, sep = ",", row.names = 1) { 20 | df <- read.csv(file, sep = sep, row.names = row.names, check.names = FALSE) 21 | as.matrix(df) 22 | } 23 | 24 | #' Filter genes by variance 25 | #' 26 | #' @param exprData genes × samples matrix 27 | #' @param topN number of highest-variance genes to keep 28 | #' @return filtered matrix 29 | filterGenesByVariance <- function(exprData, topN = 10000) { 30 | vars <- apply(exprData, 1, var, na.rm = TRUE) 31 | keep <- order(vars, decreasing = TRUE)[seq_len(min(topN, length(vars)))] 32 | exprData[keep, , drop = FALSE] 33 | } 34 | 35 | #' Detect & remove outlier samples by mean-expression Z-score 36 | #' 37 | #' @param datExpr samples × genes matrix 38 | #' @param zCut Z-score cutoff 39 | #' @return cleaned datExpr 40 | detectOutlierSamples <- function(datExpr, zCut = 2.5) { 41 | sampleMeans <- rowMeans(datExpr, na.rm = TRUE) 42 | z <- scale(sampleMeans) 43 | out <- which(abs(z) > zCut) 44 | if (length(out)) { 45 | warning("Removing outlier samples: ", paste(rownames(datExpr)[out], collapse = ", ")) 46 | datExpr <- datExpr[-out, , drop = FALSE] 47 | } 48 | datExpr 49 | } 50 | 51 | #' Normalize using log2 or variance-stabilizing transform (VST) 52 | #' 53 | #' @param exprData raw counts genes × samples 54 | #' @param method "log2" or "vst" 55 | #' @return normalized matrix 56 | normalizeExpression <- function(exprData, method = c("log2", "vst")) { 57 | method <- match.arg(method) 58 | if (method == "log2") { 59 | log2(exprData + 1) 60 | } else { 61 | dds <- DESeqDataSetFromMatrix(countData = exprData, 62 | colData = DataFrame(row = colnames(exprData)), 63 | design = ~ 1) 64 | vst(dds, blind = TRUE) |> assay() 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /R_version/pipeline/scripts/detect_modules.R: -------------------------------------------------------------------------------- 1 | # pipeline/scripts/detect_modules.R 2 | # --------------------------------- 3 | suppressPackageStartupMessages({ 4 | library(optparse) 5 | library(MyWGCNAResearchProject) 6 | }) 7 | 8 | opt <- parse_args(OptionParser(option_list = list( 9 | make_option(c("-x","--expr"), type = "character", help = "Normalized RDS input"), 10 | make_option(c("-t","--tom"), type = "character", help = "TOM RDS input"), 11 | make_option(c("-p","--out-plot"), type = "character", help = "Dendrogram PNG output"), 12 | make_option(c("-m","--out-modules"), type = "character", help = "Modules TSV output"), 13 | make_option(c("--deepSplit"), type = "integer", default = 2), 14 | make_option(c("--minModuleSize"), type = "integer", default = 30), 15 | make_option(c("--mergeCutHeight"),type = "double", default = 0.25), 16 | make_option(c("--outlierZ"), type = "double", default = 2.5), 17 | make_option(c("--bootstrap"), type = "logical", default = FALSE), 18 | make_option(c("--nBoot"), type = "integer", default = 100) 19 | ))) 20 | 21 | dir.create(dirname(opt$`out-plot`), recursive = TRUE, showWarnings = FALSE) 22 | 23 | dir.create(dirname(opt$`out-modules`), recursive = TRUE, showWarnings = FALSE) 24 | 25 | expr_norm <- readRDS(opt$expr) 26 | tomMat <- readRDS(opt$tom) 27 | dissTOM <- 1 - tomMat 28 | 29 | mods <- detectModules(dissTOM, 30 | deepSplit = opt$deepSplit, 31 | minModuleSize = opt$minModuleSize) 32 | 33 | moduleColors <- mods$moduleColors 34 | geneTree <- mods$geneTree 35 | 36 | # Merge 37 | merged <- mergeModules(t(expr_norm), 38 | moduleColors, 39 | cutHeight = opt$mergeCutHeight) 40 | mergedColors <- merged$mergedColors 41 | 42 | # Write modules 43 | dfMods <- data.frame(Gene = names(mergedColors), 44 | Module = mergedColors, 45 | stringsAsFactors = FALSE) 46 | write.table(dfMods, 47 | file = opt$`out-modules`, 48 | sep = "\t", quote = FALSE, row.names = FALSE) 49 | 50 | # Plot dendrogram 51 | png(filename = opt$`out-plot`, width = 1000, height = 800) 52 | plotDendroAndColors(geneTree, 53 | mergedColors, 54 | groupLabels = c("Modules"), 55 | main = "Gene dendrogram and module colors") 56 | dev.off() 57 | -------------------------------------------------------------------------------- /R_version/src/viz_helpers.R: -------------------------------------------------------------------------------- 1 | # src/viz_helpers.R 2 | # ========================= 3 | # Visualization helpers for WGCNA outputs 4 | 5 | # Required packages 6 | if (!requireNamespace("ggplot2", quietly=TRUE)) install.packages("ggplot2") 7 | if (!requireNamespace("viridis", quietly=TRUE)) install.packages("viridis") 8 | if (!requireNamespace("pheatmap", quietly=TRUE)) install.packages("pheatmap") 9 | if (!requireNamespace("reshape2", quietly=TRUE)) install.packages("reshape2") 10 | 11 | library(ggplot2) 12 | library(viridis) 13 | library(pheatmap) 14 | library(reshape2) 15 | 16 | #' Theme for dendrogram plots 17 | #' 18 | #' @return ggplot2 theme object 19 | themeDendrogram <- function() { 20 | theme_minimal(base_size = 14) + 21 | theme( 22 | axis.title = element_blank(), 23 | axis.text = element_blank(), 24 | panel.grid = element_blank() 25 | ) 26 | } 27 | 28 | #' Plot TOM heatmap ordered by geneTree 29 | #' 30 | #' @param tom TOM similarity matrix 31 | #' @param geneTree hclust object 32 | #' @param moduleColors Vector of module colors corresponding to genes 33 | #' @param heatmap_file Optional path to save heatmap image 34 | #' @return pheatmap object 35 | plotTOMHeatmap <- function(tom, geneTree, moduleColors, heatmap_file = NULL) { 36 | ord <- geneTree$order 37 | mat_ord <- as.matrix(tom)[ord, ord] 38 | ann_row <- data.frame(Module = moduleColors[ord]) 39 | rownames(ann_row) <- rownames(mat_ord) 40 | 41 | ph <- pheatmap( 42 | mat_ord, 43 | color = viridis(100), 44 | cluster_rows = FALSE, 45 | cluster_cols = FALSE, 46 | annotation_row = ann_row, 47 | show_rownames = FALSE, 48 | show_colnames = FALSE 49 | ) 50 | if (!is.null(heatmap_file)) { 51 | ggsave(heatmap_file, plot = ph[[4]], width = 8, height = 6) 52 | loginfo("Saved TOM heatmap to %s", heatmap_file) 53 | } 54 | return(ph) 55 | } 56 | 57 | #' Plot module-trait correlation heatmap 58 | #' 59 | #' @param moduleTraitCor Matrix of correlations 60 | #' @param moduleTraitP Matrix of p-values (same dims as moduleTraitCor) 61 | #' @return ggplot2 object 62 | plotModuleTraitCorr <- function(moduleTraitCor, moduleTraitP = NULL) { 63 | df <- melt(moduleTraitCor) 64 | names(df) <- c("Module", "Trait", "Correlation") 65 | p <- ggplot(df, aes(x = Trait, y = Module, fill = Correlation)) + 66 | geom_tile() + 67 | scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) + 68 | theme_minimal() + 69 | labs(x = NULL, y = NULL) 70 | 71 | if (!is.null(moduleTraitP)) { 72 | df$p <- melt(moduleTraitP)$value 73 | p <- p + geom_text(aes(label = ifelse(p < 0.05, "*", "")), color = "black") 74 | } 75 | return(p) 76 | } 77 | -------------------------------------------------------------------------------- /R_version/pipeline/Snakefile: -------------------------------------------------------------------------------- 1 | # pipeline/Snakefile 2 | 3 | import os 4 | 5 | # 1) Load config 6 | configfile: "config/default.yaml" 7 | 8 | # 2) Define output directories (ensure they exist) 9 | INTER = "results/intermediate" 10 | PROC = "results/processed" 11 | FIGS = "results/figures" 12 | for d in [INTER, PROC, FIGS]: 13 | os.makedirs(d, exist_ok=True) 14 | 15 | # 3) Default final targets 16 | rule all: 17 | input: 18 | # normalized expression 19 | f"{INTER}/expr_norm.Rds", 20 | # chosen power & its plot 21 | f"{INTER}/power.Rds", 22 | f"{FIGS}/soft_threshold.png", 23 | # TOM matrix 24 | f"{INTER}/tom.Rds", 25 | # dendrogram & module‐color table 26 | f"{FIGS}/dendrogram.png", 27 | f"{PROC}/module_colors.tsv" 28 | 29 | # 4) Preprocessing: filter & normalize 30 | rule preprocess: 31 | input: 32 | raw = config["expr_file"] 33 | output: 34 | norm = f"{INTER}/expr_norm.Rds" 35 | params: 36 | method = config["norm_method"], 37 | topN = config["topN"] 38 | shell: 39 | """ 40 | Rscript pipeline/scripts/preprocess.R \ 41 | --input {input.raw} \ 42 | --output {output.norm} \ 43 | --method {params.method} \ 44 | --topN {params.topN} 45 | """ 46 | 47 | # 5) Soft‐threshold power selection 48 | rule pick_soft_threshold: 49 | input: 50 | norm = f"{INTER}/expr_norm.Rds" 51 | output: 52 | power = f"{INTER}/power.Rds", 53 | plot = f"{FIGS}/soft_threshold.png" 54 | params: 55 | powers = "c(" + ",".join(map(str, config["powers"])) + ")", 56 | networkType = config.get("tom_type", "unsigned"), 57 | corType = config.get("cor_type", "pearson") 58 | shell: 59 | """ 60 | Rscript pipeline/scripts/pick_soft_threshold.R \ 61 | --input {input.norm} \ 62 | --output-power {output.power} \ 63 | --output-plot {output.plot} \ 64 | --powers {params.powers} \ 65 | --networkType {params.networkType} \ 66 | --corType {params.corType} 67 | """ 68 | 69 | # 6) Build TOM 70 | rule build_tom: 71 | input: 72 | norm = f"{INTER}/expr_norm.Rds", 73 | power = f"{INTER}/power.Rds" 74 | output: 75 | tom = f"{INTER}/tom.Rds" 76 | params: 77 | networkType = config.get("tom_type", "unsigned"), 78 | corType = config.get("cor_type", "pearson") 79 | shell: 80 | """ 81 | Rscript pipeline/scripts/build_tom.R \ 82 | --expr {input.norm} \ 83 | --power {input.power} \ 84 | --output {output.tom} \ 85 | --networkType {params.networkType} \ 86 | --corType {params.corType} 87 | """ 88 | 89 | # 7) Detect modules & plot dendrogram 90 | rule detect_modules: 91 | input: 92 | tom = f"{INTER}/tom.Rds" 93 | output: 94 | dendrogram = f"{FIGS}/dendrogram.png", 95 | modules = f"{PROC}/module_colors.tsv" 96 | params: 97 | deepSplit = config["deep_split"], 98 | minModSize = config["min_module_size"], 99 | mergeCutHeight= config["merge_height"], 100 | outlierZ = config["outlier_z"], 101 | bootstrap = str(config["bootstrap"]).upper(), 102 | nBoot = config["n_boot"] 103 | shell: 104 | """ 105 | Rscript pipeline/scripts/detect_modules.R \ 106 | --tom {input.tom} \ 107 | --out-plot {output.dendrogram} \ 108 | --out-modules {output.modules} \ 109 | --deepSplit {params.deepSplit} \ 110 | --minModuleSize {params.minModSize} \ 111 | --mergeCutHeight {params.mergeCutHeight} \ 112 | --outlierZ {params.outlierZ} \ 113 | --bootstrap {params.bootstrap} \ 114 | --nBoot {params.nBoot} 115 | """ 116 | -------------------------------------------------------------------------------- /R_version/Rpackage/R/dendrogram_generation.R: -------------------------------------------------------------------------------- 1 | # dendrogram_generation.R 2 | # ======================= 3 | # Robust functions for accurate and stable dendrogram generation in a WGCNA workflow 4 | 5 | # Required packages 6 | # Install if missing: BiocManager::install("WGCNA"), install.packages(c( 7 | # "flashClust","dynamicTreeCut","ggdendro","plotly","dendextend")) 8 | library(WGCNA) 9 | library(flashClust) 10 | library(dynamicTreeCut) 11 | library(ggdendro) 12 | library(plotly) 13 | library(dendextend) 14 | 15 | # Enable multithreading 16 | enableWGCNAThreads() 17 | 18 | #' Generate a robust gene dendrogram based on TOM dissimilarity 19 | #' 20 | #' Performs QC, power selection, network construction, module detection, 21 | #' and optional stability assessment via cophenetic correlation and bootstrapping. 22 | #' 23 | #' @param exprData Numeric matrix (genes × samples) 24 | #' @param power Soft-thresholding power (auto-picked if NULL) 25 | #' @param powers Candidate powers vector 26 | #' @param minModuleSize Minimum module size 27 | #' @param mergeCutHeight Merge threshold for modules 28 | #' @param tomType "unsigned" or "signed" 29 | #' @param corType "pearson" or "bicor" 30 | #' @param deepSplit Dynamic tree cut sensitivity (0–4) 31 | #' @param outlierZCut Z-score cutoff for sample removal 32 | #' @param bootstrap Logical: perform bootstrap stability 33 | #' @param nBoot Number of bootstrap iterations 34 | #' @return List with power, datExpr, geneTree, moduleColors, dissTOM, 35 | #' copheneticCorr, stability 36 | #' 37 | generateRobustDendrogram <- function( 38 | exprData, 39 | power = NULL, 40 | powers = 1:20, 41 | minModuleSize = 30, 42 | mergeCutHeight = 0.25, 43 | tomType = "unsigned", 44 | corType = "pearson", 45 | deepSplit = 2, 46 | outlierZCut = 2.5, 47 | bootstrap = FALSE, 48 | nBoot = 100 49 | ) { 50 | # 0. Transpose & QC: remove genes with too many NAs 51 | datExpr <- t(exprData) 52 | naCounts <- rowSums(is.na(datExpr)) 53 | datExpr <- datExpr[naCounts <= ncol(datExpr)*0.1, , drop = FALSE] 54 | 55 | # 1. Sample outlier detection 56 | sampleMeans <- rowMeans(datExpr, na.rm = TRUE) 57 | zSample <- scale(sampleMeans) 58 | outliers <- rownames(datExpr)[abs(zSample) > outlierZCut] 59 | if (length(outliers)) { 60 | warning("Removing outlier samples: ", paste(outliers, collapse=", ")) 61 | datExpr <- datExpr[!rownames(datExpr) %in% outliers, , drop = FALSE] 62 | } 63 | 64 | # 2. Soft-threshold power selection 65 | if (is.null(power)) { 66 | sft <- pickSoftThreshold( 67 | datExpr, 68 | powerVector = powers, 69 | networkType = tomType, 70 | corFnc = if(corType=="bicor") bicor else cor, 71 | verbose = 0 72 | ) 73 | power <- sft$powerEstimate 74 | if (is.na(power)) { 75 | power <- powers[which.max(sft$fitIndices[,"SFT.R.sq"])] 76 | message("Fallback power chosen: ", power) 77 | } 78 | } 79 | 80 | # 3. Adjacency & TOM 81 | adjMat <- adjacency( 82 | datExpr, 83 | power = power, 84 | type = tomType, 85 | corFnc = if(corType=="bicor") bicor else cor 86 | ) 87 | tomMat <- TOMsimilarity(adjMat) 88 | dissTOM <- 1 - tomMat 89 | 90 | # 4. Gene clustering 91 | geneTree <- flashClust(as.dist(dissTOM), method="average") 92 | 93 | # 5. Dynamic tree cutting 94 | dynMods <- cutreeDynamic( 95 | dendro = geneTree, 96 | distM = dissTOM, 97 | deepSplit = deepSplit, 98 | pamRespectsDendro = FALSE, 99 | minClusterSize = minModuleSize 100 | ) 101 | moduleColors <- labels2colors(dynMods) 102 | 103 | # 6. Module merging: only if modules beyond grey 104 | nonGrey <- setdiff(unique(moduleColors), "grey") 105 | if (length(nonGrey) == 0) { 106 | warning("Only grey module detected; skipping merge step.") 107 | mergedColors <- moduleColors 108 | } else { 109 | MEList <- moduleEigengenes(datExpr, colors = moduleColors) 110 | MEs <- MEList$eigengenes 111 | MEDiss <- 1 - cor(MEs) 112 | merged <- mergeCloseModules( 113 | datExpr, 114 | moduleColors, 115 | cutHeight = mergeCutHeight, 116 | verbose = 0 117 | ) 118 | mergedColors <- merged$colors 119 | } 120 | 121 | # 7. Cophenetic correlation 122 | copheCorr <- cor( 123 | cophenetic(geneTree), 124 | as.dist(dissTOM), 125 | use = "pairwise.complete.obs" 126 | ) 127 | 128 | # 8. Bootstrap stability 129 | stability <- NULL 130 | if (bootstrap) { 131 | stability <- replicate(nBoot, { 132 | idx <- sample(seq_len(nrow(datExpr)), nrow(datExpr), replace = TRUE) 133 | exprB <- datExpr[idx, , drop=FALSE] 134 | dissB <- 1 - TOMsimilarity(adjacency(exprB, power=power)) 135 | treeB <- flashClust(as.dist(dissB), method="average") 136 | cor(cophenetic(treeB), cophenetic(geneTree), use="pairwise.complete.obs") 137 | }) 138 | } 139 | 140 | # 9. Plot dendrogram 141 | plotDendroAndColors( 142 | geneTree, 143 | mergedColors, 144 | groupLabels = c("Modules"), 145 | main = "Robust Gene Dendrogram" 146 | ) 147 | 148 | # Return 149 | list( 150 | power = power, 151 | datExpr = datExpr, 152 | geneTree = geneTree, 153 | moduleColors = mergedColors, 154 | dissTOM = dissTOM, 155 | copheneticCorr = copheCorr, 156 | stability = stability 157 | ) 158 | } 159 | 160 | # Interactive dendrogram 161 | plotInteractiveDendrogram <- function(geneTree, stability=NULL) { 162 | d <- dendro_data(as.dendrogram(geneTree)) 163 | segs <- segment(d) 164 | p <- ggplot() + 165 | geom_segment( 166 | data = segs, 167 | aes(x=x,y=y,xend=xend,yend=yend, 168 | text = if (!is.null(stability)) 169 | paste0("Stability: ", round(stability,3)) else NULL) 170 | ) + 171 | labs(title="Interactive Robust Gene Dendrogram") + 172 | theme_minimal() 173 | ggplotly(p, tooltip="text") 174 | } 175 | --------------------------------------------------------------------------------