└── R_version
    ├── LICENSE
    ├── .gitignore
    ├── CITATION.cff
    ├── Dockerfile
    ├── README.md
    ├── renv.lock
    ├── docs
        ├── user_guide.md
        ├── architecture.md
        ├── publications.md
        └── tutorials
        │   └── wgcna_tutorial.Rmd
    ├── environment.yml
    ├── singularity.def
    ├── paper
        ├── manuscript.tex
        └── references.bib
    ├── notebooks
        ├── 01_data_exploration.ipynb
        ├── 02_qc_and_normalization.ipynb
        ├── 03_network_construction.ipynb
        └── 04_module_detection_and_viz.ipynb
    ├── .gitattributes
    ├── .RData
    ├── tests
        ├── testthat.R
        └── testthat
        │   ├── test_soft_threshold.R
        │   ├── test_module_detection.R
        │   └── test_preprocess.R
    ├── config
        ├── logging.yaml
        ├── cluster.yaml
        └── default.yaml
    ├── .Rhistory
    ├── Rpackage
        ├── inst
        │   └── shiny
        │   │   ├── app.R
        │   │   ├── ui.R
        │   │   └── server.R
        ├── NAMESPACE
        ├── DESCRIPTION
        └── R
        │   ├── network_construction.R
        │   ├── module_detection.R
        │   ├── soft_threshold.R
        │   ├── data_preprocessing.R
        │   └── dendrogram_generation.R
    ├── ok.R
    ├── scripts
        ├── deploy.sh
        ├── run_cluster.sh
        └── run_local.sh
    ├── pipeline
        ├── scripts
        │   ├── preprocess.R
        │   ├── build_tom.R
        │   ├── pick_soft_threshold.R
        │   └── detect_modules.R
        └── Snakefile
    ├── .github
        └── workflows
        │   └── ci.yml
    └── src
        ├── utils.R
        └── viz_helpers.R


/R_version/LICENSE:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/.gitignore:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/CITATION.cff:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/Dockerfile:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/renv.lock:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/docs/user_guide.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/environment.yml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/singularity.def:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/docs/architecture.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/docs/publications.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/paper/manuscript.tex:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/paper/references.bib:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/docs/tutorials/wgcna_tutorial.Rmd:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/notebooks/01_data_exploration.ipynb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/notebooks/02_qc_and_normalization.ipynb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/notebooks/03_network_construction.ipynb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/notebooks/04_module_detection_and_viz.ipynb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R_version/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/R_version/.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maxprogrammer007/MyWGCNAResearchProject/main/R_version/.RData


--------------------------------------------------------------------------------
/R_version/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(MyWGCNAResearchProject)
3 | 
4 | test_check("MyWGCNAResearchProject")
5 | 


--------------------------------------------------------------------------------
/R_version/config/logging.yaml:
--------------------------------------------------------------------------------
1 | # Logging configuration (used by src/utils.R:initLogging)
2 | logfile: "results/logs/pipeline.log"
3 | level:   "INFO"   # DEBUG | INFO | WARN | ERROR
4 | 


--------------------------------------------------------------------------------
/R_version/.Rhistory:
--------------------------------------------------------------------------------
 1 | version()
 2 | version
 3 | gc()
 4 | snakemake --cores 4 \
 5 | library(MyWGCNAResearchProject)
 6 | shiny::runApp(system.file("shiny", package="MyWGCNAResearchProject"))
 7 | shiny::runApp(system.file("shiny", package="MyWGCNAResearchProject"))
 8 | source("~/MyWGCNAResearchProject/ok.R")
 9 | source('MyWGCNAResearchProject/ok.R')
10 | gc()
11 | 


--------------------------------------------------------------------------------
/R_version/config/cluster.yaml:
--------------------------------------------------------------------------------
 1 | # Default settings for all Snakemake rules when running on an HPC scheduler
 2 | __default__:
 3 |   account: my_project
 4 |   time:    "02:00:00"
 5 |   ntasks:  1
 6 |   mem:     "4G"
 7 | 
 8 | # Override for heavier steps
 9 | detect_modules:
10 |   account: my_project
11 |   time:    "04:00:00"
12 |   ntasks:  1
13 |   mem:     "16G"
14 | 


--------------------------------------------------------------------------------
/R_version/Rpackage/inst/shiny/app.R:
--------------------------------------------------------------------------------
 1 | # app.R — entry point
 2 | library(shiny)
 3 | library(MyWGCNAResearchProject)
 4 | 
 5 | # Source UI and server
 6 | source(system.file("shiny/ui.R", package = "MyWGCNAResearchProject"), local = TRUE)
 7 | source(system.file("shiny/server.R", package = "MyWGCNAResearchProject"), local = TRUE)
 8 | 
 9 | # Launch
10 | shinyApp(ui = ui, server = server)
11 | 


--------------------------------------------------------------------------------
/R_version/ok.R:
--------------------------------------------------------------------------------
 1 | # ok.R — development entrypoint
 2 | 
 3 | # 1. Set your project root
 4 | setwd("~/MyWGCNAResearchProject")
 5 | 
 6 | # 2. Load devtools so we can source the package code
 7 | if (!requireNamespace("devtools", quietly=TRUE)) install.packages("devtools")
 8 | devtools::load_all("Rpackage")    # sources everything under Rpackage/R/
 9 | 
10 | # 3. Launch the Shiny app
11 | shiny::runApp("Rpackage/inst/shiny", launch.browser = TRUE)
12 | 


--------------------------------------------------------------------------------
/R_version/scripts/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # scripts/deploy.sh
 3 | # -----------------
 4 | # Usage: ./scripts/deploy.sh [tag]
 5 | # Example: ./scripts/deploy.sh v0.1.0
 6 | 
 7 | TAG=${1:-latest}
 8 | IMAGE_NAME=mydockerhubusername/mywgcna:${TAG}
 9 | 
10 | echo "📦 Building Docker image: $IMAGE_NAME"
11 | docker build -t $IMAGE_NAME .
12 | 
13 | echo "🔑 Pushing to Docker Hub"
14 | docker push $IMAGE_NAME
15 | 
16 | echo "✅ Deployment image pushed: $IMAGE_NAME"
17 | 


--------------------------------------------------------------------------------
/R_version/Rpackage/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Export your core functions
 2 | export(
 3 |   loadExpressionData,
 4 |   filterGenesByVariance,
 5 |   detectOutlierSamples,
 6 |   normalizeExpression,
 7 |   pickSoftPower,
 8 |   plotSoftPower,
 9 |   constructNetwork,
10 |   detectModules,
11 |   mergeModules,
12 |   generateRobustDendrogram,
13 |   plotInteractiveDendrogram
14 | )
15 | 
16 | # Import from other packages
17 | import(
18 |   WGCNA,
19 |   dynamicTreeCut,
20 |   flashClust,
21 |   dendextend,
22 |   plotly,
23 |   ggdendro,
24 |   DESeq2,
25 |   stats,
26 |   utils
27 | )
28 | 


--------------------------------------------------------------------------------
/R_version/tests/testthat/test_soft_threshold.R:
--------------------------------------------------------------------------------
 1 | context("Soft-threshold power selection")
 2 | 
 3 | test_that("pickSoftPower returns valid power and fitIndices", {
 4 |   # simulate small expr: 10 samples × 50 genes
 5 |   set.seed(1)
 6 |   dat <- matrix(rnorm(500), nrow = 10, ncol = 50)
 7 |   rownames(dat) <- paste0("S",1:10)
 8 |   colnames(dat) <- paste0("G",1:50)
 9 |   res <- pickSoftPower(dat, powers = 1:5, networkType = "unsigned", corType = "pearson", R2cut = 0)
10 |   expect_true(is.list(res))
11 |   expect_true("power" %in% names(res))
12 |   expect_true(res$power %in% 1:5)
13 |   expect_true(is.matrix(res$fitIndices))
14 |   expect_equal(ncol(res$fitIndices) >= 5, TRUE)
15 | })
16 | 


--------------------------------------------------------------------------------
/R_version/scripts/run_cluster.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # scripts/run_cluster.sh
 3 | # ----------------------
 4 | # Usage: ./scripts/run_cluster.sh
 5 | # Submits Snakemake jobs to the cluster (via sbatch).
 6 | 
 7 | # Number of parallel jobs
 8 | JOBS=100
 9 | 
10 | echo "🐍 Submitting Snakemake workflow to cluster (up to $JOBS jobs)..."
11 | snakemake --jobs $JOBS \
12 |           --cluster-config config/cluster.yaml \
13 |           --cluster "sbatch -A {cluster.account} -t {cluster.time} -n {cluster.ntasks} --mem={cluster.mem}" \
14 |           --latency-wait 60 \
15 |           --rerun-incomplete \
16 |           --snakefile pipeline/Snakefile \
17 |           --configfile config/default.yaml \
18 |           --directory .
19 | 


--------------------------------------------------------------------------------
/R_version/Rpackage/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: MyWGCNAResearchProject
 2 | Type: Package
 3 | Title: Robust WGCNA Analysis & Shiny App
 4 | Version: 0.1.0
 5 | Authors@R: 
 6 |     person("Your", "Name", email = "you@example.com", role = c("aut", "cre"))
 7 | Description: Provides functions for robust WGCNA network construction, module detection,
 8 |     dendrogram stability assessment, and an interactive Shiny interface.
 9 | Depends: 
10 |     R (>= 4.1),
11 |     WGCNA,
12 |     dynamicTreeCut,
13 |     flashClust,
14 |     dendextend,
15 |     plotly,
16 |     ggdendro
17 | Imports:
18 |     stats,
19 |     utils,
20 |     DESeq2
21 | License: MIT + file LICENSE
22 | Encoding: UTF-8
23 | LazyData: true
24 | Roxygen: list(markdown = TRUE)
25 | RoxygenNote: 7.2.1
26 | 


--------------------------------------------------------------------------------
/R_version/Rpackage/R/network_construction.R:
--------------------------------------------------------------------------------
 1 | # network_construction.R
 2 | # =======================
 3 | # Build adjacency and TOM matrices
 4 | 
 5 | library(WGCNA)
 6 | 
 7 | #' Construct network adjacency and TOM dissimilarity
 8 | #'
 9 | #' @param datExpr samples × genes matrix
10 | #' @param power soft-thresholding power
11 | #' @param networkType "unsigned" or "signed"
12 | #' @param corType "pearson" or "bicor"
13 | #' @return list(adjacency, TOM, dissTOM)
14 | constructNetwork <- function(
15 |     datExpr,
16 |     power,
17 |     networkType = "unsigned",
18 |     corType = "pearson"
19 | ) {
20 |   corFnc <- if (corType == "bicor") bicor else cor
21 |   adj <- adjacency(
22 |     datExpr,
23 |     power = power,
24 |     type = networkType,
25 |     corFnc = corFnc
26 |   )
27 |   TOM <- TOMsimilarity(adj)
28 |   dissTOM <- 1 - TOM
29 |   list(adjacency = adj, TOM = TOM, dissTOM = dissTOM)
30 | }
31 | 


--------------------------------------------------------------------------------
/R_version/scripts/run_local.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # scripts/run_local.sh
 3 | # --------------------
 4 | # Usage: ./scripts/run_local.sh [pipeline|shiny]
 5 | # Default: shiny
 6 | 
 7 | MODE=${1:-shiny}
 8 | 
 9 | # Load R package in place and start Shiny
10 | if [ "$MODE" = "shiny" ]; then
11 |   echo "🚀 Launching Shiny app (development mode)..."
12 |   Rscript -e " \
13 |     if (!requireNamespace('devtools',quietly=TRUE)) install.packages('devtools'); \
14 |     devtools::load_all('Rpackage'); \
15 |     shiny::runApp('Rpackage/inst/shiny', launch.browser=TRUE) \
16 |   "
17 |   exit $?
18 | fi
19 | 
20 | # Run the Snakemake pipeline locally
21 | if [ "$MODE" = "pipeline" ]; then
22 |   echo "🐍 Running Snakemake pipeline..."
23 |   snakemake --cores 4 \
24 |             --snakefile pipeline/Snakefile \
25 |             --configfile config/default.yaml \
26 |             --directory .
27 |   exit $?
28 | fi
29 | 
30 | echo "Unknown mode: $MODE. Use 'pipeline' or 'shiny'."
31 | exit 1
32 | 


--------------------------------------------------------------------------------
/R_version/config/default.yaml:
--------------------------------------------------------------------------------
 1 | # Path to raw expression data (CSV with genes×samples)
 2 | expr_file: "data/raw/example_expression.csv"
 3 | 
 4 | # Preprocessing
 5 | norm_method: "vst"            # "vst" or "log2"
 6 | topN:        10000            # number of highest-variance genes to keep
 7 | 
 8 | # Soft‐threshold power scanning
 9 | powers:      [1,2,3,4,5,6,7,8,9,10,12,14,16,18,20]
10 | tom_type:    "unsigned"       # "unsigned" or "signed"
11 | cor_type:    "pearson"        # "pearson" or "bicor"
12 | 
13 | # Module detection
14 | deep_split:       2           # dynamicTreeCut sensitivity (0–4)
15 | min_module_size: 30           # smallest module size
16 | merge_height:     0.25        # merge modules whose eigengenes correlate > (1–merge_height)
17 | 
18 | # Outlier removal & stability
19 | outlier_z:      2.5          # Z-score cutoff for removing sample outliers
20 | bootstrap:      false        # whether to bootstrap for stability
21 | n_boot:         100          # bootstrap iterations if enabled
22 | 


--------------------------------------------------------------------------------
/R_version/Rpackage/inst/shiny/ui.R:
--------------------------------------------------------------------------------
 1 | # ui.R
 2 | library(shiny)
 3 | library(plotly)
 4 | 
 5 | ui <- fluidPage(
 6 |   titlePanel("Robust WGCNA Explorer"),
 7 |   sidebarLayout(
 8 |     sidebarPanel(
 9 |       fileInput("exprFile", "Upload Expression CSV", accept = ".csv"),
10 |       selectInput("normMethod", "Normalize using:", choices = c("log2", "vst")),
11 |       sliderInput("topVar", "Top N variable genes:", min = 1000, max = 20000, value = 10000, step = 1000),
12 |       actionButton("runBtn", "Run WGCNA"),
13 |       hr(),
14 |       numericInput("deepSplit", "Tree cut deepSplit (0–4):", value = 2, min = 0, max = 4),
15 |       numericInput("minModSize", "Min module size:", value = 30, min = 5, max = 200),
16 |       numericInput("mergeHeight", "Module merge height:", value = 0.25, min = 0, max = 1, step = 0.05)
17 |     ),
18 |     mainPanel(
19 |       tabsetPanel(
20 |         tabPanel("Dendrogram", plotOutput("dendPlot")),
21 |         tabPanel("Interactive", plotlyOutput("dendPlotly")),
22 |         tabPanel("Module Colors", uiOutput("modLegend"))
23 |       )
24 |     )
25 |   )
26 | )
27 | 


--------------------------------------------------------------------------------
/R_version/pipeline/scripts/preprocess.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # pipeline/scripts/preprocess.R
 4 | # -----------------------------
 5 | # Load & filter genes, then normalize expression
 6 | suppressPackageStartupMessages({
 7 |   library(optparse)
 8 |   library(MyWGCNAResearchProject)
 9 | })
10 | 
11 | opt <- parse_args(OptionParser(option_list = list(
12 |   make_option(c("-i", "--input"), type = "character", help = "Raw expression CSV"),
13 |   make_option(c("-o", "--output"), type = "character", help = "Normalized RDS output"),
14 |   make_option(c("-m", "--method"), type = "character", default = "vst",
15 |               help = "Normalization: 'log2' or 'vst'"),
16 |   make_option(c("-t", "--topN"), type = "integer", default = 10000,
17 |               help = "Top N variable genes"))
18 | ))
19 | 
20 | dir.create(dirname(opt$output), recursive = TRUE, showWarnings = FALSE)
21 | 
22 | expr <- loadExpressionData(opt$input)
23 | expr_filt <- filterGenesByVariance(expr, topN = opt$topN)
24 | expr_norm <- normalizeExpression(expr_filt, method = opt$method)
25 | 
26 | saveRDS(expr_norm, file = opt$output)


--------------------------------------------------------------------------------
/R_version/tests/testthat/test_module_detection.R:
--------------------------------------------------------------------------------
 1 | context("Module detection and merging")
 2 | 
 3 | test_that("detectModules identifies at least one module", {
 4 |   # small toy TOM: block diagonal to force two modules
 5 |   tom <- diag(10)
 6 |   tom[1:5,1:5] <- tom[1:5,1:5] + 1
 7 |   tom[6:10,6:10] <- tom[6:10,6:10] + 1
 8 |   dissTOM <- 1 - tom
 9 |   res <- detectModules(dissTOM, deepSplit = 1, minModuleSize = 2)
10 |   expect_true(is.list(res))
11 |   expect_true("moduleColors" %in% names(res))
12 |   uniqueColors <- unique(res$moduleColors)
13 |   expect_true(length(uniqueColors) >= 2)
14 | })
15 | 
16 | test_that("mergeModules merges correctly when multiple modules", {
17 |   # simulate expr for merging test: 6 samples × 6 genes
18 |   mat <- matrix(rnorm(36), nrow = 6, ncol = 6)
19 |   datExpr <- mat
20 |   # create two modules: first 3 genes and last 3 genes
21 |   colors <- c(rep("blue",3), rep("red",3))
22 |   merged <- mergeModules(datExpr, colors, cutHeight = 0.5)
23 |   expect_true(is.list(merged))
24 |   expect_true("mergedColors" %in% names(merged))
25 |   expect_equal(length(merged$mergedColors), 6)
26 | })
27 | 


--------------------------------------------------------------------------------
/R_version/pipeline/scripts/build_tom.R:
--------------------------------------------------------------------------------
 1 | # pipeline/scripts/build_tom.R
 2 | # -----------------------------
 3 | suppressPackageStartupMessages({
 4 |   library(optparse)
 5 |   library(MyWGCNAResearchProject)
 6 | })
 7 | 
 8 | opt <- parse_args(OptionParser(option_list = list(
 9 |   make_option(c("-e", "--expr"), type = "character", help = "Normalized RDS input"),
10 |   make_option(c("-p", "--power"), type = "character", help = "Power RDS input"),
11 |   make_option(c("-o", "--output"), type = "character", help = "TOM RDS output"),
12 |   make_option(c("--networkType"), type = "character", default = "unsigned"),
13 |   make_option(c("--corType"), type = "character", default = "pearson")
14 | )))
15 | 
16 | dir.create(dirname(opt$output), recursive = TRUE, showWarnings = FALSE)
17 | 
18 | expr_norm <- readRDS(opt$expr)
19 | power     <- readRDS(opt$power)
20 | datExpr   <- t(expr_norm)
21 | 
22 | res <- constructNetwork(datExpr,
23 |                         power       = power,
24 |                         networkType = opt$networkType,
25 |                         corType     = opt$corType)
26 | 
27 | saveRDS(res$TOM, file = opt$output)
28 | 


--------------------------------------------------------------------------------
/R_version/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: R-CMD-check
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |     - uses: actions/checkout@v3
14 | 
15 |     - name: Setup R
16 |       uses: r-lib/actions/setup-r@v2
17 | 
18 |     - name: Setup Pandoc
19 |       uses: r-lib/actions/setup-pandoc@v2
20 | 
21 |     - name: Install system dependencies
22 |       run: |
23 |         sudo apt-get update
24 |         sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev
25 | 
26 |     - name: Install R package dependencies
27 |       run: |
28 |         Rscript -e 'install.packages(c("devtools","roxygen2","testthat"))'
29 |         Rscript -e 'devtools::install_deps(dependencies = TRUE)'
30 | 
31 |     - name: Run tests
32 |       run: |
33 |         Rscript -e 'devtools::test()'
34 | 
35 |     - name: Check package
36 |       run: |
37 |         Rscript -e 'devtools::check()'
38 | 
39 |     - name: Lint code
40 |       run: |
41 |         Rscript -e 'if (!requireNamespace("lintr", quietly=TRUE)) install.packages("lintr"); lintr::lint_dir("Rpackage/R")'
42 | 


--------------------------------------------------------------------------------
/R_version/tests/testthat/test_preprocess.R:
--------------------------------------------------------------------------------
 1 | context("Data preprocessing")
 2 | 
 3 | test_that("loadExpressionData reads a small CSV correctly", {
 4 |   # create temp file
 5 |   tmp <- tempfile(fileext = ".csv")
 6 |   write.csv(data.frame(Sample1 = 1:3, Sample2 = c(2,4,6)),
 7 |             row.names = c("GeneA","GeneB","GeneC"),
 8 |             file = tmp)
 9 |   expr <- loadExpressionData(tmp, sep = ",", row.names = 1)
10 |   expect_true(is.matrix(expr))
11 |   expect_equal(rownames(expr), c("GeneA","GeneB","GeneC"))
12 |   expect_equal(colnames(expr), c("Sample1","Sample2"))
13 |   file.remove(tmp)
14 | })
15 | 
16 | test_that("filterGenesByVariance retains top N genes", {
17 |   mat <- matrix(1:20, nrow = 5)
18 |   rownames(mat) <- paste0("G",1:5)
19 |   # variances are increasing across rows
20 |   filtered <- filterGenesByVariance(mat, topN = 3)
21 |   expect_equal(nrow(filtered), 3)
22 |   expect_equal(rownames(filtered), c("G5","G4","G3"))
23 | })
24 | 
25 | test_that("normalizeExpression on log2 produces no NAs", {
26 |   mat <- matrix(c(0,1,4,9), nrow = 2)
27 |   norm <- normalizeExpression(mat, method = "log2")
28 |   expect_false(any(is.na(norm)))
29 |   expect_equal(norm[1,1], log2(0+1))
30 | })
31 | 
32 | 


--------------------------------------------------------------------------------
/R_version/Rpackage/inst/shiny/server.R:
--------------------------------------------------------------------------------
 1 | # server.R
 2 | library(shiny)
 3 | library(MyWGCNAResearchProject)
 4 | library(plotly)
 5 | 
 6 | server <- function(input, output, session) {
 7 |   wgcnaRes <- eventReactive(input$runBtn, {
 8 |     req(input$exprFile)
 9 |     expr <- loadExpressionData(input$exprFile$datapath)
10 |     expr <- filterGenesByVariance(expr, topN = input$topVar)
11 |     norm <- normalizeExpression(expr, method = input$normMethod)
12 |     generateRobustDendrogram(
13 |       exprData    = norm,
14 |       deepSplit   = input$deepSplit,
15 |       minModuleSize  = input$minModSize,
16 |       mergeCutHeight = input$mergeHeight,
17 |       bootstrap   = FALSE
18 |     )
19 |   })
20 |   
21 |   output$dendPlot <- renderPlot({
22 |     req(wgcnaRes())
23 |     # static plot is drawn inside generateRobustDendrogram()
24 |     invisible()
25 |   })
26 |   
27 |   output$dendPlotly <- renderPlotly({
28 |     res <- wgcnaRes()
29 |     plotInteractiveDendrogram(res$geneTree, res$stability)
30 |   })
31 |   
32 |   output$modLegend <- renderUI({
33 |     res <- wgcnaRes()
34 |     cols <- unique(res$moduleColors)
35 |     tags$div(
36 |       lapply(cols, function(col) {
37 |         tags$span(style = paste0("background:", col, ";padding:5px;margin:2px;display:inline-block;"), col)
38 |       })
39 |     )
40 |   })
41 | }
42 | 


--------------------------------------------------------------------------------
/R_version/pipeline/scripts/pick_soft_threshold.R:
--------------------------------------------------------------------------------
 1 | # pipeline/scripts/pick_soft_threshold.R
 2 | # --------------------------------------
 3 | suppressPackageStartupMessages({
 4 |   library(optparse)
 5 |   library(MyWGCNAResearchProject)
 6 | })
 7 | 
 8 | opt <- parse_args(OptionParser(option_list = list(
 9 |   make_option(c("-i", "--input"), type = "character", help = "Normalized RDS input"),
10 |   make_option(c("-p", "--output-power"), type = "character", help = "Power RDS output"),
11 |   make_option(c("-g", "--output-plot"), type = "character", help = "Plot PNG output"),
12 |   make_option(c("--powers"), type = "character", default = "c(1:20)",
13 |               help = "Candidate powers, e.g. 'c(1,2,3)'"),
14 |   make_option(c("--networkType"), type = "character", default = "unsigned"),
15 |   make_option(c("--corType"), type = "character", default = "pearson")
16 | )))
17 | 
18 | dir.create(dirname(opt$`output-power`), recursive = TRUE, showWarnings = FALSE)
19 | 
20 | dir.create(dirname(opt$`output-plot`),  recursive = TRUE, showWarnings = FALSE)
21 | 
22 | expr_norm <- readRDS(opt$input)
23 | datExpr <- t(expr_norm)
24 | 
25 | powers <- eval(parse(text = opt$powers))
26 | res <- pickSoftPower(datExpr,
27 |                      powers = powers,
28 |                      networkType = opt$networkType,
29 |                      corType     = opt$corType)
30 | 
31 | saveRDS(res$power, file = opt$`output-power`)
32 | 
33 | png(filename = opt$`output-plot`, width = 800, height = 600)
34 | plotSoftPower(res$fitIndices)
35 | dev.off()


--------------------------------------------------------------------------------
/R_version/Rpackage/R/module_detection.R:
--------------------------------------------------------------------------------
 1 | # module_detection.R
 2 | # ====================
 3 | # Dynamic tree cutting & module merging
 4 | 
 5 | library(WGCNA)
 6 | library(dynamicTreeCut)
 7 | 
 8 | #' Detect modules from dissTOM
 9 | #'
10 | #' @param dissTOM TOM dissimilarity matrix
11 | #' @param deepSplit sensitivity (0–4)
12 | #' @param minModuleSize minimum cluster size
13 | #' @param method clustering linkage ("average", etc.)
14 | #' @return list(geneTree, moduleColors)
15 | detectModules <- function(
16 |     dissTOM,
17 |     deepSplit = 2,
18 |     minModuleSize = 30,
19 |     method = "average"
20 | ) {
21 |   geneTree <- flashClust(as.dist(dissTOM), method = method)
22 |   dynMods <- cutreeDynamic(
23 |     dendro = geneTree,
24 |     distM = dissTOM,
25 |     deepSplit = deepSplit,
26 |     pamRespectsDendro = FALSE,
27 |     minClusterSize = minModuleSize
28 |   )
29 |   moduleColors <- labels2colors(dynMods)
30 |   list(geneTree = geneTree, moduleColors = moduleColors)
31 | }
32 | 
33 | #' Merge similar modules based on eigengene clustering
34 | #'
35 | #' @param datExpr samples × genes matrix
36 | #' @param moduleColors vector of module assignments
37 | #' @param cutHeight merge threshold (e.g. 0.25)
38 | #' @return list(mergedColors, mergeInfo)
39 | mergeModules <- function(
40 |     datExpr,
41 |     moduleColors,
42 |     cutHeight = 0.25
43 | ) {
44 |   merge <- mergeCloseModules(
45 |     datExpr,
46 |     moduleColors,
47 |     cutHeight = cutHeight,
48 |     verbose = 0
49 |   )
50 |   list(mergedColors = merge$colors, mergeInfo = merge)
51 | }
52 | 


--------------------------------------------------------------------------------
/R_version/src/utils.R:
--------------------------------------------------------------------------------
 1 | # src/utils.R
 2 | # =========================
 3 | # Utility functions for configuration management, logging, and directory setup
 4 | 
 5 | # Ensure required packages are installed
 6 | if (!requireNamespace("yaml", quietly=TRUE)) install.packages("yaml")
 7 | if (!requireNamespace("logging", quietly=TRUE)) install.packages("logging")
 8 | 
 9 | library(yaml)
10 | library(logging)
11 | 
12 | #' Read YAML configuration file
13 | #'
14 | #' @param config_file Path to YAML file (default: "config/default.yaml")
15 | #' @return List of configuration parameters
16 | readConfig <- function(config_file = "config/default.yaml") {
17 |   cfg <- yaml::read_yaml(config_file)
18 |   return(cfg)
19 | }
20 | 
21 | #' Initialize logging to file and console
22 | #'
23 | #' @param logfile File path for log output (default: "results/logs/pipeline.log")
24 | #' @param level Logging level as string (DEBUG, INFO, WARN, ERROR)
25 | #' @return NULL
26 | initLogging <- function(logfile = "results/logs/pipeline.log", level = "INFO") {
27 |   basicConfig(level = getLevel(level))
28 |   addHandler(writeToFile, file = logfile, level = getLevel(level))
29 |   addHandler(writeToConsole, level = getLevel(level))
30 |   loginfo("Logging initialized. File: %s", logfile)
31 | }
32 | 
33 | #' Ensure directories exist (create if missing)
34 | #'
35 | #' @param dirs Character vector of directory paths
36 | #' @return NULL
37 | ensureDirs <- function(dirs) {
38 |   for (d in dirs) {
39 |     if (!dir.exists(d)) {
40 |       dir.create(d, recursive = TRUE)
41 |       loginfo("Created directory: %s", d)
42 |     }
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/R_version/Rpackage/R/soft_threshold.R:
--------------------------------------------------------------------------------
 1 | # soft_threshold.R
 2 | # =================
 3 | # Auto-pick and visualize soft-thresholding power for scale-free topology
 4 | 
 5 | library(WGCNA)
 6 | 
 7 | #' Pick soft-thresholding power
 8 | #'
 9 | #' @param datExpr samples × genes matrix
10 | #' @param powers candidate integer vector
11 | #' @param networkType "unsigned" or "signed"
12 | #' @param corType "pearson" or "bicor"
13 | #' @param R2cut minimum scale-free R²
14 | #' @return list(power, fitIndices)
15 | pickSoftPower <- function(
16 |     datExpr,
17 |     powers = 1:20,
18 |     networkType = "unsigned",
19 |     corType = "pearson",
20 |     R2cut = 0.80
21 | ) {
22 |   corFnc <- if (corType == "bicor") bicor else cor
23 |   sft <- pickSoftThreshold(
24 |     datExpr,
25 |     powerVector = powers,
26 |     networkType = networkType,
27 |     corFnc = corFnc,
28 |     verbose = 0
29 |   )
30 |   power <- sft$powerEstimate
31 |   if (is.na(power) || sft$fitIndices[which(powers == power), "SFT.R.sq"] < R2cut) {
32 |     # fallback to highest R²
33 |     idx <- which.max(sft$fitIndices[, "SFT.R.sq"])
34 |     power <- powers[idx]
35 |     message("Fallback power = ", power)
36 |   }
37 |   list(power = power, fitIndices = sft$fitIndices)
38 | }
39 | 
40 | #' Plot scale-free fit and mean connectivity
41 | #'
42 | #' @param fitIndices from pickSoftThreshold()
43 | plotSoftPower <- function(fitIndices) {
44 |   par(mfrow = c(1, 2))
45 |   plot(fitIndices[, 1], fitIndices[, 2],
46 |        xlab = "Soft Threshold (power)", ylab = "Scale Free R²",
47 |        type = "b", main = "Scale Free Fit")
48 |   plot(fitIndices[, 1], fitIndices[, 5],
49 |        xlab = "Soft Threshold (power)", ylab = "Mean Connectivity",
50 |        type = "b", main = "Mean Connectivity")
51 |   par(mfrow = c(1, 1))
52 | }
53 | 


--------------------------------------------------------------------------------
/R_version/Rpackage/R/data_preprocessing.R:
--------------------------------------------------------------------------------
 1 | # data_preprocessing.R
 2 | # ====================
 3 | # Functions to load, QC-filter and normalize expression data
 4 | 
 5 | # dependencies
 6 | library(WGCNA)
 7 | if (!requireNamespace("DESeq2", quietly=TRUE)) {
 8 |   message("installing DESeq2 for VST normalization...")
 9 |   BiocManager::install("DESeq2")
10 | }
11 | library(DESeq2)
12 | 
13 | #' Load expression matrix (genes × samples)
14 | #'
15 | #' @param file path to CSV/TSV
16 | #' @param sep field separator (',' or '\t')
17 | #' @param row.names column for gene IDs
18 | #' @return numeric matrix
19 | loadExpressionData <- function(file, sep = ",", row.names = 1) {
20 |   df <- read.csv(file, sep = sep, row.names = row.names, check.names = FALSE)
21 |   as.matrix(df)
22 | }
23 | 
24 | #' Filter genes by variance
25 | #'
26 | #' @param exprData genes × samples matrix
27 | #' @param topN number of highest-variance genes to keep
28 | #' @return filtered matrix
29 | filterGenesByVariance <- function(exprData, topN = 10000) {
30 |   vars <- apply(exprData, 1, var, na.rm = TRUE)
31 |   keep <- order(vars, decreasing = TRUE)[seq_len(min(topN, length(vars)))]
32 |   exprData[keep, , drop = FALSE]
33 | }
34 | 
35 | #' Detect & remove outlier samples by mean-expression Z-score
36 | #'
37 | #' @param datExpr samples × genes matrix
38 | #' @param zCut Z-score cutoff
39 | #' @return cleaned datExpr
40 | detectOutlierSamples <- function(datExpr, zCut = 2.5) {
41 |   sampleMeans <- rowMeans(datExpr, na.rm = TRUE)
42 |   z <- scale(sampleMeans)
43 |   out <- which(abs(z) > zCut)
44 |   if (length(out)) {
45 |     warning("Removing outlier samples: ", paste(rownames(datExpr)[out], collapse = ", "))
46 |     datExpr <- datExpr[-out, , drop = FALSE]
47 |   }
48 |   datExpr
49 | }
50 | 
51 | #' Normalize using log2 or variance-stabilizing transform (VST)
52 | #'
53 | #' @param exprData raw counts genes × samples
54 | #' @param method "log2" or "vst"
55 | #' @return normalized matrix
56 | normalizeExpression <- function(exprData, method = c("log2", "vst")) {
57 |   method <- match.arg(method)
58 |   if (method == "log2") {
59 |     log2(exprData + 1)
60 |   } else {
61 |     dds <- DESeqDataSetFromMatrix(countData = exprData,
62 |                                   colData = DataFrame(row = colnames(exprData)),
63 |                                   design = ~ 1)
64 |     vst(dds, blind = TRUE) |> assay()
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/R_version/pipeline/scripts/detect_modules.R:
--------------------------------------------------------------------------------
 1 | # pipeline/scripts/detect_modules.R
 2 | # ---------------------------------
 3 | suppressPackageStartupMessages({
 4 |   library(optparse)
 5 |   library(MyWGCNAResearchProject)
 6 | })
 7 | 
 8 | opt <- parse_args(OptionParser(option_list = list(
 9 |   make_option(c("-x","--expr"), type = "character", help = "Normalized RDS input"),
10 |   make_option(c("-t","--tom"),  type = "character", help = "TOM RDS input"),
11 |   make_option(c("-p","--out-plot"),    type = "character", help = "Dendrogram PNG output"),
12 |   make_option(c("-m","--out-modules"), type = "character", help = "Modules TSV output"),
13 |   make_option(c("--deepSplit"),     type = "integer", default = 2),
14 |   make_option(c("--minModuleSize"), type = "integer", default = 30),
15 |   make_option(c("--mergeCutHeight"),type = "double",  default = 0.25),
16 |   make_option(c("--outlierZ"),      type = "double",  default = 2.5),
17 |   make_option(c("--bootstrap"),     type = "logical", default = FALSE),
18 |   make_option(c("--nBoot"),         type = "integer", default = 100)
19 | )))
20 | 
21 | dir.create(dirname(opt$`out-plot`),    recursive = TRUE, showWarnings = FALSE)
22 | 
23 | dir.create(dirname(opt$`out-modules`), recursive = TRUE, showWarnings = FALSE)
24 | 
25 | expr_norm <- readRDS(opt$expr)
26 | tomMat    <- readRDS(opt$tom)
27 | dissTOM   <- 1 - tomMat
28 | 
29 | mods   <- detectModules(dissTOM,
30 |                         deepSplit     = opt$deepSplit,
31 |                         minModuleSize = opt$minModuleSize)
32 | 
33 | moduleColors <- mods$moduleColors
34 | geneTree     <- mods$geneTree
35 | 
36 | # Merge
37 | merged <- mergeModules(t(expr_norm),
38 |                        moduleColors,
39 |                        cutHeight = opt$mergeCutHeight)
40 | mergedColors <- merged$mergedColors
41 | 
42 | # Write modules
43 | dfMods <- data.frame(Gene = names(mergedColors),
44 |                      Module = mergedColors,
45 |                      stringsAsFactors = FALSE)
46 | write.table(dfMods,
47 |             file = opt$`out-modules`,
48 |             sep = "\t", quote = FALSE, row.names = FALSE)
49 | 
50 | # Plot dendrogram
51 | png(filename = opt$`out-plot`, width = 1000, height = 800)
52 | plotDendroAndColors(geneTree,
53 |                     mergedColors,
54 |                     groupLabels = c("Modules"),
55 |                     main = "Gene dendrogram and module colors")
56 | dev.off()
57 | 


--------------------------------------------------------------------------------
/R_version/src/viz_helpers.R:
--------------------------------------------------------------------------------
 1 | # src/viz_helpers.R
 2 | # =========================
 3 | # Visualization helpers for WGCNA outputs
 4 | 
 5 | # Required packages
 6 | if (!requireNamespace("ggplot2", quietly=TRUE)) install.packages("ggplot2")
 7 | if (!requireNamespace("viridis", quietly=TRUE)) install.packages("viridis")
 8 | if (!requireNamespace("pheatmap", quietly=TRUE)) install.packages("pheatmap")
 9 | if (!requireNamespace("reshape2", quietly=TRUE)) install.packages("reshape2")
10 | 
11 | library(ggplot2)
12 | library(viridis)
13 | library(pheatmap)
14 | library(reshape2)
15 | 
16 | #' Theme for dendrogram plots
17 | #'
18 | #' @return ggplot2 theme object
19 | themeDendrogram <- function() {
20 |   theme_minimal(base_size = 14) +
21 |     theme(
22 |       axis.title = element_blank(),
23 |       axis.text = element_blank(),
24 |       panel.grid = element_blank()
25 |     )
26 | }
27 | 
28 | #' Plot TOM heatmap ordered by geneTree
29 | #'
30 | #' @param tom TOM similarity matrix
31 | #' @param geneTree hclust object
32 | #' @param moduleColors Vector of module colors corresponding to genes
33 | #' @param heatmap_file Optional path to save heatmap image
34 | #' @return pheatmap object
35 | plotTOMHeatmap <- function(tom, geneTree, moduleColors, heatmap_file = NULL) {
36 |   ord <- geneTree$order
37 |   mat_ord <- as.matrix(tom)[ord, ord]
38 |   ann_row <- data.frame(Module = moduleColors[ord])
39 |   rownames(ann_row) <- rownames(mat_ord)
40 |   
41 |   ph <- pheatmap(
42 |     mat_ord,
43 |     color = viridis(100),
44 |     cluster_rows = FALSE,
45 |     cluster_cols = FALSE,
46 |     annotation_row = ann_row,
47 |     show_rownames = FALSE,
48 |     show_colnames = FALSE
49 |   )
50 |   if (!is.null(heatmap_file)) {
51 |     ggsave(heatmap_file, plot = ph[[4]], width = 8, height = 6)
52 |     loginfo("Saved TOM heatmap to %s", heatmap_file)
53 |   }
54 |   return(ph)
55 | }
56 | 
57 | #' Plot module-trait correlation heatmap
58 | #'
59 | #' @param moduleTraitCor Matrix of correlations
60 | #' @param moduleTraitP Matrix of p-values (same dims as moduleTraitCor)
61 | #' @return ggplot2 object
62 | plotModuleTraitCorr <- function(moduleTraitCor, moduleTraitP = NULL) {
63 |   df <- melt(moduleTraitCor)
64 |   names(df) <- c("Module", "Trait", "Correlation")
65 |   p <- ggplot(df, aes(x = Trait, y = Module, fill = Correlation)) +
66 |     geom_tile() +
67 |     scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
68 |     theme_minimal() +
69 |     labs(x = NULL, y = NULL)
70 |   
71 |   if (!is.null(moduleTraitP)) {
72 |     df$p <- melt(moduleTraitP)$value
73 |     p <- p + geom_text(aes(label = ifelse(p < 0.05, "*", "")), color = "black")
74 |   }
75 |   return(p)
76 | }
77 | 


--------------------------------------------------------------------------------
/R_version/pipeline/Snakefile:
--------------------------------------------------------------------------------
  1 | # pipeline/Snakefile
  2 | 
  3 | import os
  4 | 
  5 | # 1) Load config
  6 | configfile: "config/default.yaml"
  7 | 
  8 | # 2) Define output directories (ensure they exist)
  9 | INTER   = "results/intermediate"
 10 | PROC    = "results/processed"
 11 | FIGS    = "results/figures"
 12 | for d in [INTER, PROC, FIGS]:
 13 |     os.makedirs(d, exist_ok=True)
 14 | 
 15 | # 3) Default final targets
 16 | rule all:
 17 |     input:
 18 |         # normalized expression
 19 |         f"{INTER}/expr_norm.Rds",
 20 |         # chosen power & its plot
 21 |         f"{INTER}/power.Rds",
 22 |         f"{FIGS}/soft_threshold.png",
 23 |         # TOM matrix
 24 |         f"{INTER}/tom.Rds",
 25 |         # dendrogram & module‐color table
 26 |         f"{FIGS}/dendrogram.png",
 27 |         f"{PROC}/module_colors.tsv"
 28 | 
 29 | # 4) Preprocessing: filter & normalize
 30 | rule preprocess:
 31 |     input:
 32 |         raw = config["expr_file"]
 33 |     output:
 34 |         norm = f"{INTER}/expr_norm.Rds"
 35 |     params:
 36 |         method = config["norm_method"],
 37 |         topN  = config["topN"]
 38 |     shell:
 39 |         """
 40 |         Rscript pipeline/scripts/preprocess.R \
 41 |           --input {input.raw} \
 42 |           --output {output.norm} \
 43 |           --method {params.method} \
 44 |           --topN {params.topN}
 45 |         """
 46 | 
 47 | # 5) Soft‐threshold power selection
 48 | rule pick_soft_threshold:
 49 |     input:
 50 |         norm = f"{INTER}/expr_norm.Rds"
 51 |     output:
 52 |         power = f"{INTER}/power.Rds",
 53 |         plot  = f"{FIGS}/soft_threshold.png"
 54 |     params:
 55 |         powers      = "c(" + ",".join(map(str, config["powers"])) + ")",
 56 |         networkType = config.get("tom_type", "unsigned"),
 57 |         corType     = config.get("cor_type", "pearson")
 58 |     shell:
 59 |         """
 60 |         Rscript pipeline/scripts/pick_soft_threshold.R \
 61 |           --input {input.norm} \
 62 |           --output-power {output.power} \
 63 |           --output-plot {output.plot} \
 64 |           --powers {params.powers} \
 65 |           --networkType {params.networkType} \
 66 |           --corType {params.corType}
 67 |         """
 68 | 
 69 | # 6) Build TOM
 70 | rule build_tom:
 71 |     input:
 72 |         norm  = f"{INTER}/expr_norm.Rds",
 73 |         power = f"{INTER}/power.Rds"
 74 |     output:
 75 |         tom   = f"{INTER}/tom.Rds"
 76 |     params:
 77 |         networkType = config.get("tom_type", "unsigned"),
 78 |         corType     = config.get("cor_type", "pearson")
 79 |     shell:
 80 |         """
 81 |         Rscript pipeline/scripts/build_tom.R \
 82 |           --expr {input.norm} \
 83 |           --power {input.power} \
 84 |           --output {output.tom} \
 85 |           --networkType {params.networkType} \
 86 |           --corType {params.corType}
 87 |         """
 88 | 
 89 | # 7) Detect modules & plot dendrogram
 90 | rule detect_modules:
 91 |     input:
 92 |         tom = f"{INTER}/tom.Rds"
 93 |     output:
 94 |         dendrogram = f"{FIGS}/dendrogram.png",
 95 |         modules    = f"{PROC}/module_colors.tsv"
 96 |     params:
 97 |         deepSplit     = config["deep_split"],
 98 |         minModSize    = config["min_module_size"],
 99 |         mergeCutHeight= config["merge_height"],
100 |         outlierZ      = config["outlier_z"],
101 |         bootstrap     = str(config["bootstrap"]).upper(),
102 |         nBoot         = config["n_boot"]
103 |     shell:
104 |         """
105 |         Rscript pipeline/scripts/detect_modules.R \
106 |           --tom {input.tom} \
107 |           --out-plot {output.dendrogram} \
108 |           --out-modules {output.modules} \
109 |           --deepSplit {params.deepSplit} \
110 |           --minModuleSize {params.minModSize} \
111 |           --mergeCutHeight {params.mergeCutHeight} \
112 |           --outlierZ {params.outlierZ} \
113 |           --bootstrap {params.bootstrap} \
114 |           --nBoot {params.nBoot}
115 |         """
116 | 


--------------------------------------------------------------------------------
/R_version/Rpackage/R/dendrogram_generation.R:
--------------------------------------------------------------------------------
  1 | # dendrogram_generation.R
  2 | # =======================
  3 | # Robust functions for accurate and stable dendrogram generation in a WGCNA workflow
  4 | 
  5 | # Required packages
  6 | # Install if missing: BiocManager::install("WGCNA"), install.packages(c(
  7 | #   "flashClust","dynamicTreeCut","ggdendro","plotly","dendextend"))
  8 | library(WGCNA)
  9 | library(flashClust)
 10 | library(dynamicTreeCut)
 11 | library(ggdendro)
 12 | library(plotly)
 13 | library(dendextend)
 14 | 
 15 | # Enable multithreading
 16 | enableWGCNAThreads()
 17 | 
 18 | #' Generate a robust gene dendrogram based on TOM dissimilarity
 19 | #'
 20 | #' Performs QC, power selection, network construction, module detection,
 21 | #' and optional stability assessment via cophenetic correlation and bootstrapping.
 22 | #'
 23 | #' @param exprData Numeric matrix (genes × samples)
 24 | #' @param power Soft-thresholding power (auto-picked if NULL)
 25 | #' @param powers Candidate powers vector
 26 | #' @param minModuleSize Minimum module size
 27 | #' @param mergeCutHeight Merge threshold for modules
 28 | #' @param tomType "unsigned" or "signed"
 29 | #' @param corType "pearson" or "bicor"
 30 | #' @param deepSplit Dynamic tree cut sensitivity (0–4)
 31 | #' @param outlierZCut Z-score cutoff for sample removal
 32 | #' @param bootstrap Logical: perform bootstrap stability
 33 | #' @param nBoot Number of bootstrap iterations
 34 | #' @return List with power, datExpr, geneTree, moduleColors, dissTOM,
 35 | #'   copheneticCorr, stability
 36 | #'
 37 | generateRobustDendrogram <- function(
 38 |     exprData,
 39 |     power = NULL,
 40 |     powers = 1:20,
 41 |     minModuleSize = 30,
 42 |     mergeCutHeight = 0.25,
 43 |     tomType = "unsigned",
 44 |     corType = "pearson",
 45 |     deepSplit = 2,
 46 |     outlierZCut = 2.5,
 47 |     bootstrap = FALSE,
 48 |     nBoot = 100
 49 | ) {
 50 |   # 0. Transpose & QC: remove genes with too many NAs
 51 |   datExpr <- t(exprData)
 52 |   naCounts <- rowSums(is.na(datExpr))
 53 |   datExpr <- datExpr[naCounts <= ncol(datExpr)*0.1, , drop = FALSE]
 54 |   
 55 |   # 1. Sample outlier detection
 56 |   sampleMeans <- rowMeans(datExpr, na.rm = TRUE)
 57 |   zSample <- scale(sampleMeans)
 58 |   outliers <- rownames(datExpr)[abs(zSample) > outlierZCut]
 59 |   if (length(outliers)) {
 60 |     warning("Removing outlier samples: ", paste(outliers, collapse=", "))
 61 |     datExpr <- datExpr[!rownames(datExpr) %in% outliers, , drop = FALSE]
 62 |   }
 63 |   
 64 |   # 2. Soft-threshold power selection
 65 |   if (is.null(power)) {
 66 |     sft <- pickSoftThreshold(
 67 |       datExpr,
 68 |       powerVector = powers,
 69 |       networkType = tomType,
 70 |       corFnc = if(corType=="bicor") bicor else cor,
 71 |       verbose = 0
 72 |     )
 73 |     power <- sft$powerEstimate
 74 |     if (is.na(power)) {
 75 |       power <- powers[which.max(sft$fitIndices[,"SFT.R.sq"])]
 76 |       message("Fallback power chosen: ", power)
 77 |     }
 78 |   }
 79 |   
 80 |   # 3. Adjacency & TOM
 81 |   adjMat <- adjacency(
 82 |     datExpr,
 83 |     power = power,
 84 |     type = tomType,
 85 |     corFnc = if(corType=="bicor") bicor else cor
 86 |   )
 87 |   tomMat <- TOMsimilarity(adjMat)
 88 |   dissTOM <- 1 - tomMat
 89 |   
 90 |   # 4. Gene clustering
 91 |   geneTree <- flashClust(as.dist(dissTOM), method="average")
 92 |   
 93 |   # 5. Dynamic tree cutting
 94 |   dynMods <- cutreeDynamic(
 95 |     dendro = geneTree,
 96 |     distM = dissTOM,
 97 |     deepSplit = deepSplit,
 98 |     pamRespectsDendro = FALSE,
 99 |     minClusterSize = minModuleSize
100 |   )
101 |   moduleColors <- labels2colors(dynMods)
102 |   
103 |   # 6. Module merging: only if modules beyond grey
104 |   nonGrey <- setdiff(unique(moduleColors), "grey")
105 |   if (length(nonGrey) == 0) {
106 |     warning("Only grey module detected; skipping merge step.")
107 |     mergedColors <- moduleColors
108 |   } else {
109 |     MEList <- moduleEigengenes(datExpr, colors = moduleColors)
110 |     MEs <- MEList$eigengenes
111 |     MEDiss <- 1 - cor(MEs)
112 |     merged <- mergeCloseModules(
113 |       datExpr,
114 |       moduleColors,
115 |       cutHeight = mergeCutHeight,
116 |       verbose = 0
117 |     )
118 |     mergedColors <- merged$colors
119 |   }
120 |   
121 |   # 7. Cophenetic correlation
122 |   copheCorr <- cor(
123 |     cophenetic(geneTree),
124 |     as.dist(dissTOM),
125 |     use = "pairwise.complete.obs"
126 |   )
127 |   
128 |   # 8. Bootstrap stability
129 |   stability <- NULL
130 |   if (bootstrap) {
131 |     stability <- replicate(nBoot, {
132 |       idx <- sample(seq_len(nrow(datExpr)), nrow(datExpr), replace = TRUE)
133 |       exprB <- datExpr[idx, , drop=FALSE]
134 |       dissB <- 1 - TOMsimilarity(adjacency(exprB, power=power))
135 |       treeB <- flashClust(as.dist(dissB), method="average")
136 |       cor(cophenetic(treeB), cophenetic(geneTree), use="pairwise.complete.obs")
137 |     })
138 |   }
139 |   
140 |   # 9. Plot dendrogram
141 |   plotDendroAndColors(
142 |     geneTree,
143 |     mergedColors,
144 |     groupLabels = c("Modules"),
145 |     main = "Robust Gene Dendrogram"
146 |   )
147 |   
148 |   # Return
149 |   list(
150 |     power = power,
151 |     datExpr = datExpr,
152 |     geneTree = geneTree,
153 |     moduleColors = mergedColors,
154 |     dissTOM = dissTOM,
155 |     copheneticCorr = copheCorr,
156 |     stability = stability
157 |   )
158 | }
159 | 
160 | # Interactive dendrogram
161 | plotInteractiveDendrogram <- function(geneTree, stability=NULL) {
162 |   d <- dendro_data(as.dendrogram(geneTree))
163 |   segs <- segment(d)
164 |   p <- ggplot() +
165 |     geom_segment(
166 |       data = segs,
167 |       aes(x=x,y=y,xend=xend,yend=yend,
168 |           text = if (!is.null(stability)) 
169 |             paste0("Stability: ", round(stability,3)) else NULL)
170 |     ) +
171 |     labs(title="Interactive Robust Gene Dendrogram") +
172 |     theme_minimal()
173 |   ggplotly(p, tooltip="text")
174 | }
175 | 


--------------------------------------------------------------------------------