├── 3_min_GSEA_tutorial.Rmd ├── 5x_doublets ├── a.txt ├── analyze_doublets.ipynb └── doublet_methods.ipynb ├── AUCell_intro.Rmd ├── DE_results.csv ├── GO_in_R.Rmd ├── GO_in_python.ipynb ├── GSEA_in_python.ipynb ├── PyDeseq2_DE_tutorial.ipynb ├── RNA_Velocity_scvelo.ipynb ├── RNAseq_method_comparison ├── GTEX_240_sample_ids.csv ├── NOTES.txt ├── analysis.ipynb ├── increasing_depth.csv.gz ├── make_counts_for_polyester.ipynb └── read_generation.Rmd ├── Random_forest_single_cell_classification.ipynb ├── Scanpy_intro_pp_clustering_markers.ipynb ├── bitfam_random_forest.ipynb ├── celloracle_pseudotime_GRN.ipynb ├── convert_ensemble_ids.ipynb ├── count_table_for_deseq_example.csv ├── doublet_removal_SOLO_scVI.ipynb ├── h5ad_to_seurat.ipynb ├── high_quality_barplots.ipynb ├── high_quality_lineplots.ipynb ├── high_quality_volcano_plots.ipynb ├── hypergeometric_enrichment_test_p_value.ipynb ├── integration_comparison ├── harmony.Rmd ├── readme.txt ├── scanorama.ipynb ├── scvi.ipynb ├── seurat_cca.Rmd └── seurat_rpca.Rmd ├── monocle3_tutorial.Rmd ├── paCMAP_examples.ipynb ├── pseudobulk_pyDeseq2.ipynb ├── python_sequence_alignment.ipynb ├── salmon_to_deseq.Rmd ├── sars.ipynb ├── sc2024 ├── annotation_integration.ipynb ├── bad_mapping.ipynb ├── iterative_preprocessing.ipynb ├── preprocessing.ipynb └── readme.txt ├── scATAC_intro_R.Rmd ├── scATAC_intro_R.nb.html ├── scVI_tools_introduction.ipynb ├── scvi_label_transfer.ipynb ├── seqs.fasta ├── shifted_transformation.ipynb ├── simple_scanpy_integration.ipynb ├── simpleaf_alevin_fry_tutorial.txt ├── single_cell_analysis_complete_class.ipynb ├── single_cell_gene_co-expression.ipynb ├── single_r.Rmd ├── soupX ├── readme.txt ├── soupX_R_tutorial.Rmd └── soupX_python_test.ipynb ├── spatial_seq_intro.ipynb ├── test_significance_t_u_shapiro.ipynb └── tutorial_complex_Heatmap.Rmd /3_min_GSEA_tutorial.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | library(DESeq2) 8 | Counts <- read.delim("count_table.csv", header = TRUE, row.names = 1, sep = ",") 9 | Counts <- Counts[which(rowSums(Counts) > 0),] 10 | condition <- factor(c("C","C","C","C", "S","S","S","S")) 11 | coldata <- data.frame(row.names = colnames(Counts), condition) 12 | dds <- DESeqDataSetFromMatrix(countData = Counts, colData = coldata, design = ~condition) 13 | dds <- DESeq(dds) 14 | res <- results(dds, contrast = c("condition", "S", "C")) 15 | res <- na.omit(res) 16 | res <- res[res$baseMean > 50,] 17 | ``` 18 | 19 | ```{r} 20 | res 21 | ``` 22 | 23 | 24 | ```{r} 25 | if (!requireNamespace("BiocManager", quietly = TRUE)) 26 | install.packages("BiocManager") 27 | 28 | BiocManager::install("org.Hs.eg.db") #org.Mm.eg.db for mouse 29 | 30 | 31 | if (!require("BiocManager", quietly = TRUE)) 32 | install.packages("BiocManager") 33 | 34 | BiocManager::install("clusterProfiler") 35 | 36 | 37 | if (!require("BiocManager", quietly = TRUE)) 38 | install.packages("BiocManager") 39 | 40 | BiocManager::install("AnnotationDbi") 41 | ``` 42 | 43 | 44 | ```{r} 45 | library(org.Hs.eg.db) 46 | library(clusterProfiler) 47 | ``` 48 | 49 | 50 | 51 | 52 | ```{r} 53 | res <- res[order(-res$stat),] 54 | res 55 | ``` 56 | 57 | ```{r} 58 | gene_list <- res$stat 59 | names(gene_list) <- rownames(res) 60 | gene_list 61 | ``` 62 | 63 | 64 | 65 | ```{r} 66 | gse <- gseGO(gene_list, 67 | ont = "BP", 68 | keyType = "ENSEMBL", 69 | OrgDb = "org.Hs.eg.db", 70 | eps = 1e-300) 71 | ``` 72 | 73 | ```{r} 74 | as.data.frame(gse) 75 | ``` 76 | 77 | 78 | ```{r} 79 | fit <- gseaplot(gse, geneSetID = 1) 80 | 81 | png("gsea.png", res = 250, width = 2000, height = 1300) 82 | print(fit) 83 | dev.off() 84 | 85 | fit 86 | ``` 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /5x_doublets/a.txt: -------------------------------------------------------------------------------- 1 | blank 2 | -------------------------------------------------------------------------------- /AUCell_intro.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | 7 | ```{r} 8 | if (!require("BiocManager", quietly = TRUE)) 9 | install.packages("BiocManager") 10 | 11 | BiocManager::install("AUCell") 12 | ``` 13 | 14 | ```{r} 15 | library(AUCell) 16 | library(Seurat) 17 | ``` 18 | 19 | ```{r} 20 | load("droplet_Lung_seurat_tiss.Robj") 21 | tiss <- UpdateSeuratObject(object = tiss) 22 | ``` 23 | 24 | ```{r} 25 | tiss[[]] 26 | ``` 27 | 28 | ```{r} 29 | DimPlot(object = tiss, group.by = "cell_ontology_class", label = TRUE) 30 | ``` 31 | 32 | ```{r} 33 | markers <- read.csv("PanglaoDB_markers_27_Mar_2020.tsv", sep = "\t") 34 | markers <- markers[markers$cell.type == "Endothelial cells" & markers$species != "Hs",] 35 | markers 36 | ``` 37 | ```{r} 38 | genes <- markers$official.gene.symbol 39 | 40 | mousify <- function(a){ 41 | return(paste0(substr(a,1,1), tolower(substr(a,2,nchar(a))))) 42 | 43 | } 44 | genes <- sapply(genes, mousify) 45 | genes 46 | ``` 47 | 48 | ```{r} 49 | counts <- GetAssayData(object = tiss, slot = "counts") 50 | ``` 51 | 52 | ```{r} 53 | cell_rankings <- AUCell_buildRankings(counts) 54 | ``` 55 | 56 | ```{r} 57 | cells_AUC <- AUCell_calcAUC(genes, cell_rankings) 58 | ``` 59 | 60 | 61 | ```{r} 62 | cells_assignment <- AUCell_exploreThresholds(cells_AUC, plotHist = TRUE, assign=TRUE) 63 | ``` 64 | 65 | ```{r} 66 | cells_assignment$geneSet$assignment 67 | ``` 68 | 69 | 70 | ```{r} 71 | new_cells <- names(which(getAUC(cells_AUC)["geneSet",]>0.15)) 72 | ``` 73 | 74 | ```{r} 75 | tiss$is_ec <- ifelse(colnames(tiss) %in% new_cells, "EC", "non_EC") 76 | ``` 77 | 78 | ```{r} 79 | tiss[[]] 80 | ``` 81 | 82 | ```{r} 83 | DimPlot(object = tiss, group.by = "is_ec", label = TRUE) 84 | ``` 85 | 86 | ```{r} 87 | DimPlot(object = tiss, group.by = "cell_ontology_class", label = TRUE) 88 | ``` 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /GO_in_R.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | library(DESeq2) 8 | Counts <- read.delim("../count_table.csv", header = TRUE, row.names = 1, sep = ",") 9 | Counts <- Counts[which(rowSums(Counts) > 0),] 10 | condition <- factor(c("C","C","C","C", "S","S","S","S")) 11 | coldata <- data.frame(row.names = colnames(Counts), condition) 12 | dds <- DESeqDataSetFromMatrix(countData = Counts, colData = coldata, design = ~condition) 13 | dds <- DESeq(dds) 14 | res <- results(dds, contrast = c("condition", "S", "C")) 15 | sigs <- na.omit(res) 16 | sigs <- sigs[sigs$padj < 0.05 & sigs$baseMean > 50,] 17 | ``` 18 | 19 | ```{r} 20 | sigs 21 | ``` 22 | 23 | 24 | 25 | 26 | ```{r} 27 | if (!require("BiocManager", quietly = TRUE)) 28 | install.packages("BiocManager") 29 | 30 | BiocManager::install("clusterProfiler") 31 | 32 | if (!require("BiocManager", quietly = TRUE)) 33 | install.packages("BiocManager") 34 | 35 | BiocManager::install("AnnotationDbi") 36 | 37 | if (!require("BiocManager", quietly = TRUE)) 38 | install.packages("BiocManager") 39 | 40 | BiocManager::install("org.Hs.eg.db") 41 | ``` 42 | 43 | ```{r} 44 | library(clusterProfiler) 45 | library(org.Hs.eg.db) 46 | library(AnnotationDbi) 47 | ``` 48 | 49 | ```{r} 50 | genes_to_test <- rownames(sigs[sigs$log2FoldChange > 0.5,]) 51 | ``` 52 | 53 | ```{r} 54 | GO_results <- enrichGO(gene = genes_to_test, OrgDb = "org.Hs.eg.db", keyType = "ENSEMBL", ont = "BP") 55 | ``` 56 | 57 | ```{r} 58 | as.data.frame(GO_results) 59 | ``` 60 | 61 | ```{r} 62 | fit <- plot(barplot(GO_results, showCategory = 15)) 63 | 64 | png("out.png", res = 250, width = 1400, height = 1800) 65 | print(fit) 66 | dev.off() 67 | 68 | fit 69 | ``` 70 | 71 | ```{r} 72 | 73 | ``` 74 | 75 | ```{r} 76 | 77 | ``` 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /RNAseq_method_comparison/GTEX_240_sample_ids.csv: -------------------------------------------------------------------------------- 1 | GTEX-111CU-2026-SM-5GZZC 2 | GTEX-111VG-2226-SM-5N9DU 3 | GTEX-1128S-2326-SM-5GZZY 4 | GTEX-113IC-0226-SM-5HL5C 5 | GTEX-117YX-1226-SM-5H11S 6 | GTEX-11DXW-1226-SM-5H133 7 | GTEX-11DXX-1926-SM-5EGJK 8 | GTEX-11DXX-2326-SM-9YFKS 9 | GTEX-11DXZ-0726-SM-5N9C4 10 | GTEX-11DXZ-2626-SM-59882 11 | GTEX-11EQ9-1826-SM-5Q5AJ 12 | GTEX-11GS4-1826-SM-5HL4T 13 | GTEX-11GS4-3126-SM-5A5LH 14 | GTEX-11GSO-0626-SM-5A5LW 15 | GTEX-11GSP-2326-SM-5HL63 16 | GTEX-11LCK-0926-SM-5A5KA 17 | GTEX-11O72-0006-SM-5O9DB 18 | GTEX-11TTK-0426-SM-5EQLT 19 | GTEX-11TTK-2826-SM-5GU5K 20 | GTEX-11TUW-0006-SM-5LZW2 21 | GTEX-11TUW-0226-SM-5LU8X 22 | GTEX-11TUW-1826-SM-5BC5D 23 | GTEX-11UD1-0011-R5b-SM-5P9FP 24 | GTEX-11UD2-0005-SM-5NQ9B 25 | GTEX-11UD2-0226-SM-5EQKY 26 | GTEX-11WQK-1826-SM-5EQM2 27 | GTEX-11WQK-2926-SM-5EQKT 28 | GTEX-11ZTS-1426-SM-5EQMM 29 | GTEX-11ZU8-0126-SM-5EQ58 30 | GTEX-11ZVC-1426-SM-5EGGA 31 | GTEX-12126-0011-R10b-SM-5BC6T 32 | GTEX-1212Z-0008-SM-5TDBW 33 | GTEX-1269C-2826-SM-5EQ5O 34 | GTEX-12WSG-5016-SM-7SB84 35 | GTEX-12WSK-0008-SM-5YY9R 36 | GTEX-12WSM-0011-R10a-SM-5DUWV 37 | GTEX-12ZZY-1526-SM-5LZWF 38 | GTEX-12ZZZ-0008-SM-5YY9S 39 | GTEX-12ZZZ-0011-R5a-SM-5EQ4M 40 | GTEX-13111-1126-SM-5GCMZ 41 | GTEX-13113-5019-SM-7EPH2 42 | GTEX-1313W-0011-R10b-SM-5DUXA 43 | GTEX-1313W-0011-R4b-SM-5KLZV 44 | GTEX-1313W-0011-R5b-SM-5L3EP 45 | GTEX-1313W-0326-SM-5LZU5 46 | GTEX-131XG-2626-SM-5KM17 47 | GTEX-131XH-2626-SM-5GCNP 48 | GTEX-131XH-2926-SM-5LZU4 49 | GTEX-131XW-3226-SM-5LZUO 50 | GTEX-132Q8-0011-R6a-SM-5K7YN 51 | GTEX-1339X-2726-SM-5PNYU 52 | GTEX-1399S-2026-SM-5KM4B 53 | GTEX-1399T-0826-SM-5IFES 54 | GTEX-139D8-1126-SM-5LU8W 55 | GTEX-139T6-0626-SM-5IFF9 56 | GTEX-139TS-0008-SM-62LDG 57 | GTEX-139TU-0226-SM-5J1NM 58 | GTEX-139UW-2826-SM-5L3E7 59 | GTEX-13CF2-0011-R2a-SM-5L3DC 60 | GTEX-13CF2-0011-R5a-SM-5LZWS 61 | GTEX-13D11-0626-SM-5LZYY 62 | GTEX-13D11-0726-SM-5LZZB 63 | GTEX-13D11-2226-SM-5IFEO 64 | GTEX-13FH7-1326-SM-5IFG8 65 | GTEX-13FLV-0626-SM-5IFEY 66 | GTEX-13FTW-1526-SM-5LZXC 67 | GTEX-13FTW-2126-SM-5K7YG 68 | GTEX-13FTX-1126-SM-5N9EN 69 | GTEX-13FXS-0011-R2b-SM-5K7XX 70 | GTEX-13G51-0011-R6b-SM-5LZX4 71 | GTEX-13IVO-0726-SM-5LZYV 72 | GTEX-13JVG-3026-SM-5IJEV 73 | GTEX-13NYB-0226-SM-5N9G4 74 | GTEX-13NYB-2226-SM-5MR58 75 | GTEX-13NYB-2426-SM-5IFF4 76 | GTEX-13NYS-1926-SM-5IJCB 77 | GTEX-13NZ9-1026-SM-5MR5K 78 | GTEX-13NZ9-1226-SM-5MR3J 79 | GTEX-13O1R-1026-SM-5KM2L 80 | GTEX-13O1R-1326-SM-5IJF5 81 | GTEX-13O3O-0011-R2b-SM-5P9H1 82 | GTEX-13O3O-0011-R4b-SM-5KM3F 83 | GTEX-13O3P-0726-SM-5J2OM 84 | GTEX-13O3Q-2726-SM-5KM51 85 | GTEX-13O61-1126-SM-5L3FI 86 | GTEX-13OVG-1026-SM-5KLZ3 87 | GTEX-13OVH-3026-SM-5MR4N 88 | GTEX-13OVI-1026-SM-5L3EM 89 | GTEX-13OVJ-1426-SM-5K7Z2 90 | GTEX-13OVL-0126-SM-5L3GJ 91 | GTEX-13OW5-0011-R7a-SM-5O9C9 92 | GTEX-13OW5-1126-SM-5J1NR 93 | GTEX-13OW6-3026-SM-5J2MI 94 | GTEX-13OW7-0426-SM-5K7VK 95 | GTEX-13PLJ-1726-SM-5L3FT 96 | GTEX-13RTJ-0526-SM-62LDP 97 | GTEX-13SLX-2426-SM-664OM 98 | GTEX-13SLX-3226-SM-5YYA6 99 | GTEX-13U4I-0426-SM-5LU4W 100 | GTEX-13VXT-0726-SM-5SIAD 101 | GTEX-13VXT-1426-SM-5LU4B 102 | GTEX-13X6J-2126-SM-5TDCV 103 | GTEX-144GM-1926-SM-5LUAN 104 | GTEX-145LS-2926-SM-5O99G 105 | GTEX-145ME-2026-SM-5SIA5 106 | GTEX-145MH-0011-R5a-SM-5P9JT 107 | GTEX-145MI-0011-R6a-SM-5PNZA 108 | GTEX-145MO-0626-SM-5NQAW 109 | GTEX-14753-2926-SM-5LU9J 110 | GTEX-1477Z-2226-SM-5QGPG 111 | GTEX-148VI-1026-SM-5TDDJ 112 | GTEX-148VJ-0006-SM-5NQB1 113 | GTEX-148VJ-2626-SM-5QGPI 114 | GTEX-14ABY-0011-R1a-SM-6EU2W 115 | GTEX-14AS3-0126-SM-5Q5F4 116 | GTEX-14ASI-0011-R4a-SM-69LQ4 117 | GTEX-14B4R-1426-SM-5Q5CG 118 | GTEX-14BIL-0006-SM-5N9F2 119 | GTEX-14BIN-0011-R10a-SM-5S2UA 120 | GTEX-14BIN-0626-SM-793DP 121 | GTEX-14BIN-2426-SM-5TDCF 122 | GTEX-14BIN-2626-SM-5YY8U 123 | GTEX-14C39-0526-SM-664OF 124 | GTEX-14C39-1826-SM-5ZZW4 125 | GTEX-14C5O-0526-SM-62LEI 126 | GTEX-14DAR-2026-SM-5S2O3 127 | GTEX-14E7W-1026-SM-62LEK 128 | GTEX-14E7W-1826-SM-69LQ1 129 | GTEX-14JFF-0005-SM-7P8RF 130 | GTEX-14JG6-2226-SM-6EU2G 131 | GTEX-14JIY-0011-R1a-SM-68713 132 | GTEX-14JIY-0011-R8a-SM-6AJAP 133 | GTEX-14LLW-0126-SM-6LLIO 134 | GTEX-14PII-1026-SM-5ZZVW 135 | GTEX-14PJ2-2126-SM-5YY96 136 | GTEX-14PJ3-0005-SM-9JGFT 137 | GTEX-14PJO-1826-SM-69LPR 138 | GTEX-14PK6-1326-SM-686ZE 139 | GTEX-14PKV-1426-SM-5YYB9 140 | GTEX-14PN3-1226-SM-69LOW 141 | GTEX-15DCD-0226-SM-6LPKC 142 | GTEX-15DYW-2626-SM-6LPK7 143 | GTEX-15DZA-0826-SM-6AJBF 144 | GTEX-15EO6-0011-R11b-SM-6M47M 145 | GTEX-15EO6-3126-SM-6LPIQ 146 | GTEX-15EOM-5016-SM-7P8SR 147 | GTEX-15G1A-2026-SM-7KUFG 148 | GTEX-169BO-0126-SM-79OLS 149 | GTEX-16MT8-1326-SM-6M47R 150 | GTEX-16NPV-2426-SM-6M482 151 | GTEX-16YQH-0006-SM-7P8OQ 152 | GTEX-16YQH-0426-SM-7KUL7 153 | GTEX-16Z82-0126-SM-7DHLF 154 | GTEX-17EVQ-0526-SM-7KFSK 155 | GTEX-17EVQ-2926-SM-7EWE3 156 | GTEX-17F96-1126-SM-7EWDF 157 | GTEX-17F9E-0526-SM-7DUFD 158 | GTEX-17F9Y-1226-SM-7EPGE 159 | GTEX-17GQL-0226-SM-7LTAK 160 | GTEX-17HG3-0011-R5a-SM-7DUEW 161 | GTEX-17HG3-2226-SM-7938L 162 | GTEX-17HGU-2426-SM-7EWDP 163 | GTEX-17JCI-0011-R10b-SM-718A2 164 | GTEX-17JCI-0726-SM-7EPH1 165 | GTEX-183FY-1926-SM-7KFRI 166 | GTEX-18465-1126-SM-7LG6E 167 | GTEX-18465-2726-SM-7LT9I 168 | GTEX-18A67-0526-SM-7LT9X 169 | GTEX-18A6Q-0011-R11a-SM-72D6H 170 | GTEX-18A6Q-0011-R1b-SM-731DI 171 | GTEX-18A6Q-0826-SM-7KFRD 172 | GTEX-18A7A-1426-SM-731AP 173 | GTEX-18A7B-0526-SM-7LG68 174 | GTEX-18D9A-1326-SM-7LT8P 175 | GTEX-18D9B-1926-SM-7KFSV 176 | GTEX-18D9B-2526-SM-718BN 177 | GTEX-18QFQ-1726-SM-731C9 178 | GTEX-1A3MV-1726-SM-72D68 179 | GTEX-1A3MX-0005-SM-7MGW7 180 | GTEX-1A3MX-0011-R1b-SM-7P8PH 181 | GTEX-1A3MX-0011-R3b-SM-79OOW 182 | GTEX-1A3MX-1926-SM-72D7F 183 | GTEX-1A8G6-0011-R6b-SM-7P8PE 184 | GTEX-1A8G6-0526-SM-7PC1E 185 | GTEX-1A8G6-1626-SM-7MGWO 186 | GTEX-1AMEY-1526-SM-73KYW 187 | GTEX-1AX9I-2326-SM-7PBXV 188 | GTEX-1AYCT-1026-SM-79ONR 189 | GTEX-1AYCT-2026-SM-793CJ 190 | GTEX-1B8KZ-0126-SM-7DHM5 191 | GTEX-1B8SG-0006-SM-7MKFA 192 | GTEX-1B8SG-1826-SM-731F3 193 | GTEX-1B933-2926-SM-731FO 194 | GTEX-1B996-0011-R5a-SM-7P8PN 195 | GTEX-1BAJH-0926-SM-79OO6 196 | GTEX-1BAJH-2526-SM-7IGOT 197 | GTEX-1C64N-0426-SM-7PC31 198 | GTEX-1C64O-0006-SM-7PC22 199 | GTEX-1C6VS-1226-SM-79OO2 200 | GTEX-1C6WA-0011-R6a-SM-7PBYP 201 | GTEX-1CAMQ-0426-SM-7IGPL 202 | GTEX-1CAMR-0526-SM-7P8RS 203 | GTEX-1CAMS-0326-SM-7PC35 204 | GTEX-1CB4E-0626-SM-7DHMW 205 | GTEX-1EH9U-0226-SM-7PBY8 206 | GTEX-1EH9U-3226-SM-7MKGR 207 | GTEX-1EKGG-0226-SM-9WYTH 208 | GTEX-1EMGI-2626-SM-7IGNR 209 | GTEX-1EU9M-0011-R9a-SM-9WG61 210 | GTEX-1EWIQ-3126-SM-7MXTI 211 | GTEX-1F52S-0011-R3a-SM-CKZNI 212 | GTEX-1F5PL-0826-SM-7MXU7 213 | GTEX-1F5PL-1626-SM-7MXTY 214 | GTEX-1F6IF-0526-SM-7MKHD 215 | GTEX-1F75A-0011-R11a-SM-AHZ35 216 | GTEX-1F75A-0726-SM-7RHHD 217 | GTEX-1F75B-0011-R2a-SM-ARL8I 218 | GTEX-1GF9U-2026-SM-7SB92 219 | GTEX-1GF9U-2126-SM-7SB7L 220 | GTEX-1GF9W-0011-R4a-SM-CE6RH 221 | GTEX-1GF9W-0011-R6b-SM-9QEIE 222 | GTEX-1GF9W-0126-SM-7PC12 223 | GTEX-1GF9X-1126-SM-7MKHC 224 | GTEX-1GMR3-1326-SM-7P8TB 225 | GTEX-1GMR3-2726-SM-7MKFB 226 | GTEX-1GMR8-0005-SM-ARZL2 227 | GTEX-1GN1U-2126-SM-AHZ4J 228 | GTEX-1GN1W-0526-SM-9MQJQ 229 | GTEX-1GN2E-0426-SM-9MQKX 230 | GTEX-1GN73-0005-SM-ACKVI 231 | GTEX-1GPI7-0326-SM-7MKH6 232 | GTEX-1GPI7-0926-SM-7PC3J 233 | GTEX-1GTWX-0426-SM-9MQM1 234 | GTEX-1GZ2Q-0226-SM-7P8TL 235 | GTEX-1GZ4H-0126-SM-9JGGI 236 | GTEX-1GZ4I-0011-R4a-SM-9QEI9 237 | GTEX-1H1CY-0011-R5a-SM-CM2SN 238 | GTEX-1H1CY-3026-SM-9OSW7 239 | GTEX-1H1DG-0011-R10b-SM-CE6S7 240 | GTEX-1H1ZS-0526-SM-9WG5L 241 | -------------------------------------------------------------------------------- /RNAseq_method_comparison/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1) Transcript quant methods: 2 | Salmon 3 | kallisto 4 | STAR + RSEM 5 | STAR + StringTie 6 | 7 | 2) Gene count methods: 8 | Salmon 9 | kallisto 10 | STAR + RSEM 11 | STAR + StringTie 12 | STAR + HTSeq-count 13 | STAR + featureCounts 14 | 15 | 16 | 3) References 17 | gencode.v44.transcripts.fa.gz 18 | GRCh38.primary_assembly.genome.fa.gz 19 | gencode.v44.primary_assembly.annotation.gtf.gz 20 | GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_expected_count.gct.gz 21 | 22 | 4) Datasets 23 | 240 random gTEX samples simulated 24 | 25 | Increasing coverage of 5 closest GTEX samples to the total counts q75 value. simulated 26 | 1,5,10,15,20,25,30,40,50,60,70,80,90,100 % read depth, n = 5 27 | 28 | Samples simulated with polyester, paired, 101 bp, unstranded 29 | ('filtered_fasta.fa.gz' is gencode.v44.transcripts.fa.gz with no transcript < 100 bp) 30 | simulate_experiment_countmat('filtered_fasta.fa.gz', readmat = , outdir = <>, 31 | paired = TRUE, error_model = 'uniform', 32 | error_rate = 0.004, readlen = 101, gzip=TRUE) 33 | 34 | 35 | 36 | 37 | #####STAR 38 | STAR --runMode genomeGenerate --genomeDir STAR_index --genomeFastaFiles GRCh38.primary_assembly.genome.fa --sjdbGTFfile gencode.v44.primary_assembly.annotation.gtf --runThreadN 22 39 | 40 | STAR --runMode alignReads --genomeDir STAR_index --outSAMtype BAM SortedByCoordinate --readFilesIn "$file_r1" "$file_r2" --readFilesCommand gunzip -c --runThreadN 12 --outFileNamePrefix "gtex_STAR_out/${samp}" 41 | 42 | 43 | 44 | #####RSEM 45 | rsem-prepare-reference --gtf gencode.v44.primary_assembly.annotation.gtf --star -p 12 GRCh38.primary_assembly.genome.fa rsem_ref/rsem_ref 46 | 47 | rsem-calculate-expression --star --no-bam-output --paired-end --star-gzipped-read-file --no-qualities -p 18 "$file_r1" "$file_r2" rsem_ref/rsem_ref "gtex_rsem/${samp}" 48 | 49 | 50 | 51 | #####stringtie v2.2.1 52 | stringtie -G gencode.v44.primary_assembly.annotation.gtf -p 8 -e -o gtex_stringtie/${samp}.gtf -A gtex_stringtie/${samp}.genes "$file_bam" 53 | prepDE.py -l 101 -t wut_if_god_was.csv -i sample_sheet.txt 54 | 55 | not easy to use because of requiring star -> stringtie -> prepDE. output is in a weird gtf format. prepDE is not intuitive 56 | 57 | 58 | 59 | #####salmon v1.10.2 60 | salmon index -t trans_and_deocys.fa.gz -d decoys.txt -p 30 -i human_salmon_index --gencode 61 | 62 | salmon quant -i "$salmon_index" -l A -1 "$file_r1" -2 "$file_r2" \-p 18 --validateMappings -o "random_salmon_out/${samp}" 63 | 64 | 65 | 66 | #####kallisto v0.50.0 67 | kallisto index -i kallisto_index gencode.v44.transcripts.fa.gz 68 | 69 | kallisto quant -i "$salmon_index" -o "gtex_kallisto_out/${samp}" -t 22 "$file_r1" "$file_r2" 70 | 71 | 72 | 73 | #####htseq 2.0.3 74 | for file in *bam; do samtools index $file; done 75 | 76 | htseq-count -f bam -r pos --stranded no --max-reads-in-buffer 90000000 gtex_STAR_out/*bam gencode.v44.primary_assembly.annotation.gtf 77 | 78 | 79 | find gtex_STAR_out -name '*Aligned.sortedByCoord.out.bam' | parallel -j 12 'base=$(basename {} "Aligned.sortedByCoord.out.bam"); htseq-count -f bam -r pos --stranded no --max-reads-in-buffer 90000000 {} gencode.v44.primary_assembly.annotation.gtf > "htseq_gtex_counts/sample_${base}.txt"' 80 | 81 | 82 | 83 | #####featurecount (subread v2.0,6) 84 | featureCounts -p --countReadPairs -a gencode.v44.primary_assembly.annotation.gtf -T 6 -o gtex_feature_counts.txt gtex_STAR_out/*bam 85 | -------------------------------------------------------------------------------- /RNAseq_method_comparison/increasing_depth.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mousepixels/sanbomics_scripts/521f16b9b4cedb736c28403a557462cffa35f06a/RNAseq_method_comparison/increasing_depth.csv.gz -------------------------------------------------------------------------------- /RNAseq_method_comparison/read_generation.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | 7 | 8 | ```{r} 9 | library(polyester) 10 | library(Biostrings) 11 | library(readr) 12 | ``` 13 | 14 | 15 | 16 | ```{r} 17 | #removing small txs 18 | fasta = readDNAStringSet('gencode.v44.transcripts.fa.gz') 19 | fasta <- fasta[width(fasta) >= 100] 20 | writeXStringSet(fasta, 'filtered_fasta.fa.gz', compress = TRUE) 21 | ``` 22 | 23 | 24 | ```{r} 25 | fasta = readDNAStringSet('filtered_fasta.fa.gz') 26 | ``` 27 | 28 | 29 | simulating gtex samples, I stopped at 240 instead of 500 because I enjoy life 30 | ```{r} 31 | df <- read_csv("gtex_500.csv") 32 | df$transcript_id <- NULL 33 | df <- as.matrix(df) 34 | 35 | 36 | simulate_experiment_countmat('filtered_fasta.fa.gz', readmat = df, outdir = 'test_reads', 37 | paired = TRUE, error_model = 'uniform', 38 | error_rate = 0.004, readlen = 101, gzip=TRUE) 39 | ``` 40 | 41 | 42 | 43 | simulating increasing depths counts 44 | ```{r} 45 | df <- read_csv('increasing_depth.csv') 46 | df <- as.matrix(df) 47 | 48 | 49 | 50 | simulate_experiment_countmat('filtered_fasta.fa.gz', readmat = df , outdir = 'increasing_depth', 51 | paired = TRUE, error_model = 'uniform', 52 | error_rate = 0.004, readlen = 101, gzip=TRUE) 53 | 54 | ``` 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /convert_ensemble_ids.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 26, 6 | "id": "d89a4260", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "####complete function, may need slight modification based on your gtf format####\n", 11 | "\n", 12 | "def get_ens_dict(file_path):\n", 13 | " with open(file_path) as f:\n", 14 | " gtf = list(f)\n", 15 | "\n", 16 | " gtf = [x for x in gtf if not x.startswith('#')]\n", 17 | " gtf = [x for x in gtf if 'gene_id \"' in x and 'gene_name \"' in x]\n", 18 | " if len(gtf) == 0:\n", 19 | " print('you need to change gene_id \" and gene_name \" formats')\n", 20 | " \n", 21 | " gtf = list(map(lambda x: (x.split('gene_id \"')[1].split('\"')[0], x.split('gene_name \"')[1].split('\"')[0]), gtf))\n", 22 | " gtf = dict(set(gtf))\n", 23 | " return gtf\n", 24 | "\n", 25 | "gtf_dict = get_ens_dict('Homo_sapiens.GRCh38.105.gtf') #replace with your file path" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "id": "771ccf9b", 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "count_table.csv Homo_sapiens.GRCh38.105.gtf\ttutorial.Rmd\t Untitled.ipynb\r\n", 39 | "deseq_results.csv tutorial.nb.html\t\tUntitled1.ipynb\r\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "!ls" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "df6faf97", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "#make sure to use the gtf you made your genome index with.. or find the same organism/version on ensemble\n", 55 | "#this gtf was taken straigh from ensemble" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 2, 61 | "id": "a4cfb217", 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "#!genome-build GRCh38.p13\r\n", 69 | "#!genome-version GRCh38\r\n", 70 | "#!genome-date 2013-12\r\n", 71 | "#!genome-build-accession GCA_000001405.28\r\n", 72 | "#!genebuild-last-updated 2021-08\r\n", 73 | "1\tensembl_havana\tgene\t1211340\t1214153\t.\t-\t.\tgene_id \"ENSG00000186827\"; gene_version \"11\"; gene_name \"TNFRSF4\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\";\r\n", 74 | "1\tensembl_havana\ttranscript\t1211340\t1214153\t.\t-\t.\tgene_id \"ENSG00000186827\"; gene_version \"11\"; transcript_id \"ENST00000379236\"; transcript_version \"4\"; gene_name \"TNFRSF4\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"TNFRSF4-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS11\"; tag \"basic\"; transcript_support_level \"1 (assigned to previous version 3)\";\r\n", 75 | "1\tensembl_havana\texon\t1213983\t1214153\t.\t-\t.\tgene_id \"ENSG00000186827\"; gene_version \"11\"; transcript_id \"ENST00000379236\"; transcript_version \"4\"; exon_number \"1\"; gene_name \"TNFRSF4\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"TNFRSF4-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS11\"; exon_id \"ENSE00001832731\"; exon_version \"2\"; tag \"basic\"; transcript_support_level \"1 (assigned to previous version 3)\";\r\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "!head -n 8 Homo_sapiens.GRCh38.105.gtf" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "id": "da275a2e", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "with open('Homo_sapiens.GRCh38.105.gtf') as f:\n", 91 | " gtf = list(f)\n", 92 | " \n", 93 | " \n", 94 | "gtf = [x for x in gtf if not x.startswith('#')]" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 4, 100 | "id": "5d4e89b6", 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "3236571" 107 | ] 108 | }, 109 | "execution_count": 4, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "len(gtf)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 1, 121 | "id": "3389cca8", 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "#gtf" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 6, 131 | "id": "7ec553e4", 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "gtf = [x for x in gtf if 'gene_id \"' in x and 'gene_name \"' in x]" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "id": "930911ac", 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/plain": [ 147 | "3080284" 148 | ] 149 | }, 150 | "execution_count": 7, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "len(gtf)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 8, 162 | "id": "44aba8e0", 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "'1\\tensembl_havana\\tgene\\t1211340\\t1214153\\t.\\t-\\t.\\tgene_id \"ENSG00000186827\"; gene_version \"11\"; gene_name \"TNFRSF4\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\";\\n'" 169 | ] 170 | }, 171 | "execution_count": 8, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | } 175 | ], 176 | "source": [ 177 | "gtf[0]" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "id": "ec75cd40", 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "#your gtf might have slightly differnt format. make sure to change the split strings accordingly\n", 188 | "#e.g., some gtf files do not have any quotes" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 9, 194 | "id": "4d4a0b72", 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "gtf = list(map(lambda x: (x.split('gene_id \"')[1].split('\"')[0], x.split('gene_name \"')[1].split('\"')[0]), gtf))" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 10, 204 | "id": "7353a4f0", 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/plain": [ 210 | "[('ENSG00000186827', 'TNFRSF4'),\n", 211 | " ('ENSG00000186827', 'TNFRSF4'),\n", 212 | " ('ENSG00000186827', 'TNFRSF4'),\n", 213 | " ('ENSG00000186827', 'TNFRSF4'),\n", 214 | " ('ENSG00000186827', 'TNFRSF4')]" 215 | ] 216 | }, 217 | "execution_count": 10, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "gtf[0:5]" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 12, 229 | "id": "bd41ea0a", 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "3080284" 236 | ] 237 | }, 238 | "execution_count": 12, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | } 242 | ], 243 | "source": [ 244 | "len(gtf)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 13, 250 | "id": "70208ae3", 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "gtf = list(set(gtf))" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 14, 260 | "id": "21a7f773", 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/plain": [ 266 | "40839" 267 | ] 268 | }, 269 | "execution_count": 14, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "len(gtf)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 3, 281 | "id": "854662be", 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "#gtf" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 16, 291 | "id": "f27e7754", 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "name": "stdout", 296 | "output_type": "stream", 297 | "text": [ 298 | "count_table.csv Homo_sapiens.GRCh38.105.gtf\ttutorial.Rmd\t Untitled.ipynb\r\n", 299 | "deseq_results.csv tutorial.nb.html\t\tUntitled1.ipynb\r\n" 300 | ] 301 | } 302 | ], 303 | "source": [ 304 | "!ls" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 17, 310 | "id": "d4a934dc", 311 | "metadata": {}, 312 | "outputs": [], 313 | "source": [ 314 | "import pandas as pd" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 19, 320 | "id": "00c7f321", 321 | "metadata": {}, 322 | "outputs": [ 323 | { 324 | "data": { 325 | "text/html": [ 326 | "
\n", 327 | "\n", 340 | "\n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | "
Unnamed: 0baseMeanlog2FoldChangelfcSEstatpvaluepadj
0ENSG00000160072320.844930-0.4859800.158346-3.0690990.0021470.027228
1ENSG00000069424231.9217121.1425180.2709894.2161000.0000250.000938
2ENSG000001167861218.6330250.3978000.1104443.6018400.0003160.006850
3ENSG0000011625416.7420052.7236290.7079773.8470570.0001200.003336
4ENSG00000204138316.084877-0.5353100.146291-3.6592090.0002530.005816
........................
1447ENSG000001591311000.997666-0.3776180.125794-3.0018840.0026830.031762
1448ENSG00000159259114.924454-1.7910300.474113-3.7776410.0001580.004068
1449ENSG0000016029876.770422-1.1898020.372401-3.1949510.0013990.019928
1450ENSG00000159055123.604477-0.9460330.322999-2.9289000.0034020.038006
1451ENSG000001592001182.5014991.5555950.4708043.3041290.0009530.015224
\n", 466 | "

1452 rows × 7 columns

\n", 467 | "
" 468 | ], 469 | "text/plain": [ 470 | " Unnamed: 0 baseMean log2FoldChange lfcSE stat \\\n", 471 | "0 ENSG00000160072 320.844930 -0.485980 0.158346 -3.069099 \n", 472 | "1 ENSG00000069424 231.921712 1.142518 0.270989 4.216100 \n", 473 | "2 ENSG00000116786 1218.633025 0.397800 0.110444 3.601840 \n", 474 | "3 ENSG00000116254 16.742005 2.723629 0.707977 3.847057 \n", 475 | "4 ENSG00000204138 316.084877 -0.535310 0.146291 -3.659209 \n", 476 | "... ... ... ... ... ... \n", 477 | "1447 ENSG00000159131 1000.997666 -0.377618 0.125794 -3.001884 \n", 478 | "1448 ENSG00000159259 114.924454 -1.791030 0.474113 -3.777641 \n", 479 | "1449 ENSG00000160298 76.770422 -1.189802 0.372401 -3.194951 \n", 480 | "1450 ENSG00000159055 123.604477 -0.946033 0.322999 -2.928900 \n", 481 | "1451 ENSG00000159200 1182.501499 1.555595 0.470804 3.304129 \n", 482 | "\n", 483 | " pvalue padj \n", 484 | "0 0.002147 0.027228 \n", 485 | "1 0.000025 0.000938 \n", 486 | "2 0.000316 0.006850 \n", 487 | "3 0.000120 0.003336 \n", 488 | "4 0.000253 0.005816 \n", 489 | "... ... ... \n", 490 | "1447 0.002683 0.031762 \n", 491 | "1448 0.000158 0.004068 \n", 492 | "1449 0.001399 0.019928 \n", 493 | "1450 0.003402 0.038006 \n", 494 | "1451 0.000953 0.015224 \n", 495 | "\n", 496 | "[1452 rows x 7 columns]" 497 | ] 498 | }, 499 | "execution_count": 19, 500 | "metadata": {}, 501 | "output_type": "execute_result" 502 | } 503 | ], 504 | "source": [ 505 | "df = pd.read_csv('deseq_results.csv')\n", 506 | "df" 507 | ] 508 | }, 509 | { 510 | "cell_type": "code", 511 | "execution_count": 20, 512 | "id": "a3fac37a", 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "gtf = dict(gtf)" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": 21, 522 | "id": "8d5f4049", 523 | "metadata": {}, 524 | "outputs": [], 525 | "source": [ 526 | "df['Gene Name'] = df['Unnamed: 0'].map(gtf)" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": 22, 532 | "id": "636d86cb", 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "data": { 537 | "text/html": [ 538 | "
\n", 539 | "\n", 552 | "\n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | "
Unnamed: 0baseMeanlog2FoldChangelfcSEstatpvaluepadjGene Name
0ENSG00000160072320.844930-0.4859800.158346-3.0690990.0021470.027228ATAD3B
1ENSG00000069424231.9217121.1425180.2709894.2161000.0000250.000938KCNAB2
2ENSG000001167861218.6330250.3978000.1104443.6018400.0003160.006850PLEKHM2
3ENSG0000011625416.7420052.7236290.7079773.8470570.0001200.003336CHD5
4ENSG00000204138316.084877-0.5353100.146291-3.6592090.0002530.005816PHACTR4
...........................
1447ENSG000001591311000.997666-0.3776180.125794-3.0018840.0026830.031762GART
1448ENSG00000159259114.924454-1.7910300.474113-3.7776410.0001580.004068CHAF1B
1449ENSG0000016029876.770422-1.1898020.372401-3.1949510.0013990.019928C21orf58
1450ENSG00000159055123.604477-0.9460330.322999-2.9289000.0034020.038006MIS18A
1451ENSG000001592001182.5014991.5555950.4708043.3041290.0009530.015224RCAN1
\n", 690 | "

1452 rows × 8 columns

\n", 691 | "
" 692 | ], 693 | "text/plain": [ 694 | " Unnamed: 0 baseMean log2FoldChange lfcSE stat \\\n", 695 | "0 ENSG00000160072 320.844930 -0.485980 0.158346 -3.069099 \n", 696 | "1 ENSG00000069424 231.921712 1.142518 0.270989 4.216100 \n", 697 | "2 ENSG00000116786 1218.633025 0.397800 0.110444 3.601840 \n", 698 | "3 ENSG00000116254 16.742005 2.723629 0.707977 3.847057 \n", 699 | "4 ENSG00000204138 316.084877 -0.535310 0.146291 -3.659209 \n", 700 | "... ... ... ... ... ... \n", 701 | "1447 ENSG00000159131 1000.997666 -0.377618 0.125794 -3.001884 \n", 702 | "1448 ENSG00000159259 114.924454 -1.791030 0.474113 -3.777641 \n", 703 | "1449 ENSG00000160298 76.770422 -1.189802 0.372401 -3.194951 \n", 704 | "1450 ENSG00000159055 123.604477 -0.946033 0.322999 -2.928900 \n", 705 | "1451 ENSG00000159200 1182.501499 1.555595 0.470804 3.304129 \n", 706 | "\n", 707 | " pvalue padj Gene Name \n", 708 | "0 0.002147 0.027228 ATAD3B \n", 709 | "1 0.000025 0.000938 KCNAB2 \n", 710 | "2 0.000316 0.006850 PLEKHM2 \n", 711 | "3 0.000120 0.003336 CHD5 \n", 712 | "4 0.000253 0.005816 PHACTR4 \n", 713 | "... ... ... ... \n", 714 | "1447 0.002683 0.031762 GART \n", 715 | "1448 0.000158 0.004068 CHAF1B \n", 716 | "1449 0.001399 0.019928 C21orf58 \n", 717 | "1450 0.003402 0.038006 MIS18A \n", 718 | "1451 0.000953 0.015224 RCAN1 \n", 719 | "\n", 720 | "[1452 rows x 8 columns]" 721 | ] 722 | }, 723 | "execution_count": 22, 724 | "metadata": {}, 725 | "output_type": "execute_result" 726 | } 727 | ], 728 | "source": [ 729 | "df" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "id": "3394d1e1", 736 | "metadata": {}, 737 | "outputs": [], 738 | "source": [] 739 | }, 740 | { 741 | "cell_type": "code", 742 | "execution_count": null, 743 | "id": "283a35eb", 744 | "metadata": {}, 745 | "outputs": [], 746 | "source": [] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": null, 751 | "id": "a7a56a7e", 752 | "metadata": {}, 753 | "outputs": [], 754 | "source": [] 755 | }, 756 | { 757 | "cell_type": "code", 758 | "execution_count": null, 759 | "id": "675931c3", 760 | "metadata": {}, 761 | "outputs": [], 762 | "source": [] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": null, 767 | "id": "62108215", 768 | "metadata": {}, 769 | "outputs": [], 770 | "source": [] 771 | } 772 | ], 773 | "metadata": { 774 | "kernelspec": { 775 | "display_name": "Python 3 (ipykernel)", 776 | "language": "python", 777 | "name": "python3" 778 | }, 779 | "language_info": { 780 | "codemirror_mode": { 781 | "name": "ipython", 782 | "version": 3 783 | }, 784 | "file_extension": ".py", 785 | "mimetype": "text/x-python", 786 | "name": "python", 787 | "nbconvert_exporter": "python", 788 | "pygments_lexer": "ipython3", 789 | "version": "3.10.1" 790 | } 791 | }, 792 | "nbformat": 4, 793 | "nbformat_minor": 5 794 | } 795 | -------------------------------------------------------------------------------- /h5ad_to_seurat.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "d94f83ba", 6 | "metadata": {}, 7 | "source": [ 8 | "My opinion: learn to use scanpy instead\n", 9 | "\n", 10 | "scanpy is great and integrates with machine learning very nicely, eg scVI tools, scArches, etc" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "id": "b1457418", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import scanpy as sc\n", 21 | "from scipy import io" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "id": "8cff35b4", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "!mkdir matrix_files" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "id": "480f32c2", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "adata = sc.read_h5ad('TS_Heart.h5ad')" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 5, 47 | "id": "4564bb37", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "adata = adata.raw.to_adata() #only if adata has RAW saved and thats what you want!!" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 14, 57 | "id": "eb07b27c", 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "with open('matrix_files/barcodes.tsv', 'w') as f:\n", 62 | " for item in adata.obs_names:\n", 63 | " f.write(item + '\\n')" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 15, 69 | "id": "0f64c503", 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "with open('matrix_files/features.tsv', 'w') as f:\n", 74 | " for item in ['\\t'.join([x,x,'Gene Expression']) for x in adata.var_names]:\n", 75 | " f.write(item + '\\n')" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 12, 81 | "id": "f375b043", 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "io.mmwrite('matrix_files/matrix', adata.X.T)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 16, 91 | "id": "fbee4699", 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "barcodes.tsv features.tsv matrix.mtx\r\n" 99 | ] 100 | } 101 | ], 102 | "source": [ 103 | "!ls matrix_files/" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 18, 109 | "id": "8417f9fe", 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "!gzip matrix_files/*" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 19, 119 | "id": "4891515a", 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "barcodes.tsv.gz features.tsv.gz matrix.mtx.gz\r\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "!ls matrix_files/" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 22, 137 | "id": "0afd24f7", 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "adata.obs.to_csv('metadata.csv')" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 23, 147 | "id": "9ec92fc7", 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "#example script\n", 152 | "#to run:\n", 153 | "#mkdir \n", 154 | "#python name_of_this_script.py \n", 155 | "#gzip /*\n", 156 | "import scanpy as sc\n", 157 | "from scipy import io\n", 158 | "import sys\n", 159 | "\n", 160 | "adata = sc.read_h5ad(sys.argv[1])\n", 161 | "out_dir = sys.argv[2]\n", 162 | "\n", 163 | "adata = adata.raw.to_adata() #only if adata has RAW saved and thats what you want!!\n", 164 | "\n", 165 | "with open(out_dir + '/barcodes.tsv', 'w') as f:\n", 166 | " for item in adata.obs_names:\n", 167 | " f.write(item + '\\n')\n", 168 | " \n", 169 | "with open(out_dir + '/features.tsv', 'w') as f:\n", 170 | " for item in ['\\t'.join([x,x,'Gene Expression']) for x in adata.var_names]:\n", 171 | " f.write(item + '\\n')\n", 172 | " \n", 173 | "io.mmwrite(out_dir +'/matrix', adata.X.T)\n", 174 | "\n", 175 | "adata.obs.to_csv(sys.argv[1] + '.metadata.csv')" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "id": "6bbf5690", 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [] 185 | } 186 | ], 187 | "metadata": { 188 | "kernelspec": { 189 | "display_name": "Python 3 (ipykernel)", 190 | "language": "python", 191 | "name": "python3" 192 | }, 193 | "language_info": { 194 | "codemirror_mode": { 195 | "name": "ipython", 196 | "version": 3 197 | }, 198 | "file_extension": ".py", 199 | "mimetype": "text/x-python", 200 | "name": "python", 201 | "nbconvert_exporter": "python", 202 | "pygments_lexer": "ipython3", 203 | "version": "3.9.12" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 5 208 | } 209 | -------------------------------------------------------------------------------- /high_quality_barplots.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "b173b621", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 68, 17 | "id": "569d4896", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "a = [67, 56, 62, 70, 89]\n", 22 | "b = [42, 55, 61, 51, 62]\n", 23 | "\n", 24 | "vals = a + b\n", 25 | "groups = ['Gene_a']*5 + ['Control']*5" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 27, 31 | "id": "280b161c", 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "MannwhitneyuResult(statistic=22.5, pvalue=0.046532985074510584)" 38 | ] 39 | }, 40 | "execution_count": 27, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "from scipy import stats\n", 47 | "stats.mannwhitneyu(a,b)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 69, 53 | "id": "d37f53ce", 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJgAAAEyCAYAAAAGMhkdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAawUlEQVR4nO2deZxcVZXHv50mIekOiAQQBIUEk4AEBXIGcRsQlWWQAQZkUVEkIQ4uQIwLoEhYRFARBEW2YABlERC3zCiOCwYU5JDEIQyfNDFhT0hYk04H0umu+ePcl65UqrtfJXXrvao638+nP139lqr7un7v3HPPPfedlkKhgOPEYkjWDXAaGxeYExUXmBMVF5gTFReYExUXmBMVF5gTFRdYDRCRmSLSlAFHF1gERGSoiOw/wP4dRWT3WrYpKzbLugENyqeBa0TkD8DZyUYR2Ro4E/g8MBd4bzbNqx0usDjcDLQBXwEeBFaE7YuAkWH/+dk0rba0+FxkPERkK+D3gIRNrwNHqupvM2tUjXGBRUBEhgGTsO5xJ8yCbRl+twE3Aheo6pOZNbJGuJMfh08DVwGPA/sBd4ftY4ArgE8At2bTtNriAovDDcCHVPVAVX0w2aiqL6rqNGAscEpmrash7uRHQFW7gT8MsP/pGjYnU9wHc6LiXaQTFReYE5Wm9MFEZF/gHGCLrNsSiZVYGOTvWTekKQWGiesjWTeiBhyedQOaVWCJ5XoVmJdhO2KwF/AGcmKdm1VgCfNU9YCsG1FNROTPQL+ZHLXGnXwnKi4wJyouMCcqLjAnKi4wJyq5GEWKyOXY8Hqeqp5Rg4+cV/K7kZhX8jtTcjHZnQyt99lnH6699tqsm+NUTkt/O7yLdKLiAnOi4gJzopILJz8m8+fPZ8aMGaxatWrQY9vb25k0aRITJkyoQcuag4YX2IwZM5g9e3ZF51x22WWRWtN8NLzAEss1cuRIxo0b1+9xHR0ddHZ2prJ0TnoaXmAJ48aNGzAEMmXKFObMmVPDFjUH7uQ7UXGBOVFxgTlRcYE5UXGBOVFxgTlRaZowRUdHB1OmTBlwv1N9Gl5g7e3tAHR2dqaKcyXHO9Wh4QU2adIkgNRzkZMnT47dpKai4QU2YcIEn1vMEHfynai4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqKSK5ItIO3AucCSwI9ADPAncBlysqj0iMhSrzfMprD7PMuBO4BxVXVn9pjv1QFoL9iPgy1gJlCeAl4EJwIVYyTqw8inTgZ2xsnXbAacDvxYRt5RNStov/l/D73tUdQ9MaIlV2llE9sEKPAGcrqq7AUeHv/fHLJ/ThKSd7J6NWaaDRORRrKjmFlixzW/RJy6Au8LvWcBrwHDgYODnpW8qIicBJ2GPbqqYSy+9lAULFlR83jPPPENXVxdtbW3stNNOFZ8/fvx4pk2bVvF5zUhagZ0CFIATgbeHbd3A/wIvAG8pOnYZgKr2isgLmD/21n7edxc24YnICxYs2KS1jJ2dnSxbtmyjz3cGJ63AzsDE9SBwBLAVcC8mvCGY01+Ofp8bFXgivM9e2LPdK2L8+PGVngL0reIebLV3tT+3GRlUYCLSBlwQ/rxLVZ8HnheRe4FjgQ8B1xWdsh2wJDj2o8K2suXrVHUmMHNjn+2+sd1Usop7sNXezqaTxslvo0+I/wLrSgbvGbatAoprUCfO/WGY/0XJfqeJGFRgqvoC8Jfw50dF5J9Y17Z72Hajqj5MX4ng74vIY/Q5+/cBv6hWg536Im2Y4kjgEqAD2B4YAShW+Pw74ZhPAecDTwG7Ys7/lcBhqtpbvSY79UQqJ19VXwbODD/9HdONRfvPrU7TnEbAI+xOVFxgTlRcYE5UXGBOVFxgTlRcYE5UXGBOVFxgTlRcYE5UXGBOVBr+8U0DsXDhwgGfepjgNYw2nqYW2IoVKyrKiPXnjFVOUwsMvIZRbJpeYF7DKC7u5DtRcYE5UXGBOVFpKB8sbflkL7pQO3IrsI1Ztb1w4UJWrFhR0TlpK4AMdlwxvvK7j9wKbFNXbaclbQWQtMc565NbgSUMA7ZuGWyBuPFioUA3MBQYNcA5zxcKFMLrFgb+J6zFnpkw2HsCvFQosCZVS5uH3Ats65YWDhs6NNWxs7q7WVooMGqQc25as4bu8LoA614PxPYtLRw0SDuSz3f6yL3AYtICvCmFdRwK7N3aGr09jUhTC2wzSG0dnY3D42BOVFxgTlRcYE5UGtIHe6lQYFZ3/2PDtTVsS7PTUAJL3PU1kCpc4OY7Pg0lsL1bW6GnZ724VnehQCdQ/PyoJHi6FtazdEk4Ytsh2Ugv7Vwq1E8ad0MJbNshQzioRBz3dHfzYj/WrIcylq6nZ4P3qBUzZsxg9uzZFZ2T9zTuhhJYORL7VDzlVG5KKZnmSRPVj0ViudKmcS9btoypU6fm2uI1vMASiqecyk0p5WmaJ20a99KlSyvOOKm1xWsagTUiPT329Pg8L1xxgTUAeV644iN1JyqpLZiIjALOwSp97IgVw3oUK34118v5OeVIWy9yFFZGZldsdL8QG3DtHbbNxcr5fQILOT0OjMHK+e0lIgfGeJT5A2vX9huCSEj2v1gU3U+7rT9GtbSw32buXaQh7X/pQkxIzwIfUNXHAUSkFdi8TDm/H4jI4cCv6Cvnt0G1tU3lxUIh9civmw1jXmm3ZcFAaxKSdQJdXV3r/k7WC5SrJFfJuoJqrydIU6uoBatJBFZo9BYR2R0ruHAlcDVwaNEpNSvnN1gKM/TFvIpTo3uwSH4LkKQRVpIaneZzN4WOjo51I7+B6O21TqHceoFyleSyWFeQxoJtC2wdXr8fWB5+dgeuwr6jTMr5pemm7unu5umQg1/a8RVYfwoJ0qVGx6K9vR1gUGEltLa20tPTs16YolwluUqqy1W7klwagRUf8yLWVXZhRUrfDXweK8lXjqjl/NJQbn6yP7JOjZ40aRJA6sj8smXLKgq0ZlFdLo3AlmMJCsOAjmREKCIPYwLbBbi56PialfNLQ7n5ybwyYcKEiiLtU6dOZcGCBf12kaXbEgtZSwYVmKp2BwEcBIwTkZGYBds7HNKBleu7MPx9NPADclLOb3lvL3MrtGBZZVNUSjmL11932N7ezuTJk2vexrSjyK9jFmYU8E9MYLuEfeep6sMicitwAlbO73NYVwoZl/Ob29PD05WMCjPMpqiUchYvb8VW01Zbe0hEDsCs1H7YzX4vJq4/hcM+hcW/PklfOb87ga9nWc6vXDZFOfKQTVHKxuSH5Y3U0UJVfQArn9zf/lyX8xtsAW+esikSNiY/LG94ODrHVJoflsfHfLrA6oA8Z0sMRn14s07d4gJzouICc6LiPlhGpHmCY9osiOLjirelfSJjpVSScdE0AhtstfdLNQ5RVPIEx0qewljpObFpeIFVutq71nkUI9vbGTt6dNl9jy9aRGdXFyPb2hg7ZswG+1d1dbF0+XI6V62it7eXIWEGInndNnz4umOHtLay/bbb0t7WttFtfXzxYjorDIU0vMDynk0xdvRofnTxRWX3nXrm2cydPx/6mYFYunw5K1b2ZaMn+WHJ686QkJiw5267cem552x0W9e1pwIaXmD1lE1RStuIEQB0rlqV6ovdcost6O3p2cDqJZana/XqqO0tR8MLrJ45+fjjAPoVRnEX+s499mDSCcdz5Q0/Zu78+YwdM2adZdwYy1MtXGA5Zo/x4wbs0hLhjB0zZpO6vpjUZ9/h1A0uMCcqLjAnKi4wJyouMCcqPopsAB5fvJhTzzzbXi9atO73um2LF2fWNhdYHTNQILazq2uDbcnxtcQFVseUC8T2N3/ZNmIEk044vuZtdIHllMuuvY6ORdXr2rpWr+bKG3486HHjxoxm6pRTqva5LrCc0rFo8SZN75TrIrPABZZTxo0pn8IzGM8sWcLq1asZMWIEO+2wQ80+tz9cYDmlmt1UlngczImKC8yJigvMiYoLzImKO/l1zKMLOrjhtttTpUK3jRjByccfxx7jB36EZrVxgdUxN9x2O/c/9FBF59Q689UFVscklmugpW/giz6cTWSgpW+Q7aIPd/KdqLjAnKi4wJyoVOyDicgdwDHhz7tU9Ziw3autORtQkQUTkU/TJ65SbgCmAztjNY22w6qt/ToUZXCakNRfvIjsClwB/A14pmRfabW13bCCDNBXbc1pQtLWi9wM+ClWO+rjwJ9KDsmk2ppjFC/66G9/VqT1wc4F3gV8QlUXi0jp/kyqrTU7lT59J5eLPsTUdBbwE1X9aT+H9VdCI/Nqa43MYE/fKSbPiz4mYDUhjxGRo8K25DF5R4pIJ/CdouNzVW2tkRns6Tt5oJLR3XCgPfwklqk1/P2bouMS5z4X1dacbElTzm8mMLN4m4g8gYUjiuNguay25mRLNSe7c1ltzcmWjRKYqu5SZluuq6052eARdicqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyouMCcqLjAnKi4wJyppKn1Mw555Px7YBliOFcQ6X1UfCcd4KT+nLGks2BeAD2CFsJ4AdsRK+v1NRHYJx3gpP6csab7864HRqvoWVR0PTAvb24GjvJSfMxBpKn1cWLLp3qLXr7ORpfzAy/k1AxvTfX0h/H4RuIN+SvlhlT6g/1J+0FfOzyutNSiVVLwdJiI3YY78CuBIVV3Oxpfyg75yfq+mbYdTX6QSmIhsA/wBOBFYAhygqveF3U8VHbpdOH7QUn5ghbZU9QBgXkWtduqGQQUmIrsDDwLvw4Swr6rOLTqkuFSfl/Jz1iNNMay7gTFFx99ZVFL5elW93kv5Of2RRmDDi15PKNmXWCcv5eeUJU2YYpcUx3gpP6csHmV3ouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6LiAnOi4gJzouICc6KS5iHAFSEixwNfAXYHVgN/BM5U1YXV/iwn/1TVgonIJOBWYG+sYEMr9uz8+0Vk+2p+llMfVE1gIjIMuDj8eZeqjsGs2EqsAsjZ1fosp35oKRQKVXkjEXkvVngB4GOqemvYfg/wYaAjlAMsPuckrNrau4FhI0eOZNy4cQB0dHTQ2dlJCxH68UisBQpA8XX0R3J9Q4YMoW348AGPzQtdr71Gb2/vBtc3Z86ce4F5qnpG6TnV/O42qLoWeD78Lld1bRes2hoAnZ2dzJkzZ70DCkB3ddpXM8pdR3/09vbS2dUVuUXVpcz17d/fsdUU2MZUXXsCq7Y2EfPXXgJqMRjYCysh+CqNV4hrL7K5trKfVU2BbVB1reT1BlXXVHUmMLOKbUiFiPwZu+vmhWpvDUPerq2ao8iHsCKlEKquicibgf3CNq+61oRUzYKp6hoRORu4BjhaRBZhNSO3wIpjXTzQ+TVmJvBnrItuNGaSo2ur2igyQUQ+DnwJC1G8Rl+gtaOqH+TUBVUXmOMU43ORTlRcYE5UXGANgoi0iMh0EXl38nfWbQL3wRqCEA76LVby+pfAx1W1S0RaVDXTL7hpLJiIDCl3V+flTt9EXsAC2a8DBwJTAbIWFzSJBRORzVR1bXg9GtgSWK6qz2Xbsk1HRFpVtUdE9gR+AuyJxcA+o6q/F5EhqtqbVfsa3oKFf3AirrOAvwC/B34bsjkQkbr8P4QusCf8fgT4PjabsjNwpohsp6q9WVrpZrFgb8Ui3AeETc8DbwqvJ6rq3MQSZNC8igmCaSlnmUTkGuDTQA9wlapOq3X7iqnLO7cSRGQ8MAsT1+3Y9NVPsHRuwmsSS5BFGysh3AiFYJnGiMhpInKyiBwcDjkfUGBz4AQROS6cl8m1NbzAgJeBXbG7+QQswXEadu2rgd1F5NLsmlcZiZUVkdOAR4DLgeuBX4nIecArwLnAUmB7YKqIvE1VC1m4Ag3dRSYOrohMADqxu/sTwCLsizkFmzPdDDhSVX+VVVvTElLTLwFOx3K+7gV2AvbBRPV9Vb1ERC4ATgPagN+o6lFZtLehLVjio6jqfGA34FhgOfAu4L+BN2JJjpDTzGwRKW3X9sBHMHEdqqpHAkcBz4R9h4jIG4DLgH9giZxP1qzBJTS0wErYCRPRtlhK0SPAY8BkYDtV/XmGbduAxGdS1bUisrmITBaRMVgYYlfMx3oiHPM08MVw6v7AGFV9CTgLOKRcrnytyOVdG4n/A+YD7wD+I2z7gar+BvriSVk1rpiSuN2bgF8DgnWLS7BQxEhgCtbtg/maKzHL9gqAqt5f04aXoe4smIi0lns9GKr6V+CrwNeAa4EJqvrr8D4tWYmr3OiuSFznACcDbwd+BtwI/BUT0ObAFBE5XUTeieXgbYHF+Z6tSeNTUFdOfrGVCYmNHwSGAb8D7lXVp4rv/qLzys7JhVFVIasplf6i7CIyHLgZSz1fAzyrqrsW7T8J+C6wdcmps7F5yGeiNbpC6kpgACKyBXAd5rAndGM+1XtV9fVSQZUI842q+nIeJoJDe0YD5wEnhRHvIcCj2FrSq7Ab6GHgOOCJRJDhBjsWGAesAm5T1e9mcAkDUjcCC13J5tid+1ngAWxS92zgUMyf/KaqnlNy3pCiL+XEcOzVqvqXGja/LCKyL+ZfbYtZrDdjVvlE4B7s2k7HJrE/Bfy82DoHF2Eo0K6qL5JD6sYHC9ZmG+AIbIncCeHvw4Fe4KfAI2FaCBEZGs7rFZE3i8gPMB/meOwLzQNPYz7Va1h87oNYd/+oqi7HRrv3YTfWlzFfDOjzG1X1tbyKC3IqsDKxn4SxwI6YoH6F3f3LsLm3z2DW6Ssisrmqdof3OhS4G7N6i4H9VfWuuFcwOMGyLsGmroZjC5RfBaap6rzgHz4OfDtsnwicGkaVuUjFSUPuushi30hEPokNvZ8IE9LbYbGf4ZjfNRv4GCa4GzGBnYUFGQtYF/N1LNj4Y2BKlqGI0gGIiGyD+VvdmJUaBdypqscWHTMS6yYvCJsOVdXf1a7Vm0ZuBFbiiG8D3AW8HxtFLQFOVNX7whTI1zBR/RJb8Pt5zH+5E/iCqj4vIjtg00FHASdkbbVKbpyPAs9hfuQ2WJd9GtZNDgfOUNUrRGQcdu1twAzgPlXN0/rSQcmFwEr++e/Bni92JdaljcISBJ/ERkybAzcBR5a8zbdV9cyS990TeEpVX416AQNQcuPsDNyBBU1fwSzwaar6pIh8EBtNvgfzyS7HRHe/qh4sIsNV9bUMLmGTyIXAYF1XcAtwCPYPvhv7B38RmIRZqJmqenIYPZ2I+WMAv1fVv4f3aQV68+CjlIxg34mNBM+gL9MBbGR4TDjmP7Fr3q3oba7IcqpnU8lMYGViVR/GHPdhmMN7tKreHfK5pmKZDy1Yd3d7aZByoCS8LAl+463Y4612wLJOb8As1IewLnGaql4mIu1YvOtYzFKfpaoPZNHualFzgSU5Sf1EsKcDX8CyHGZhKTQ9ofv4MnBQOPRNYRifnJeLoGk5Qt7W5ViGaSswTlUXisiB2Dzie7C5xUNU9eFwzgjgtbxeUyXUVGAl/sg7MAf8ZWCZqt4W7uBbsHSUlcD5qvq9cPxngYsw5/h9IVsgNww07SQivwD+HRuYXKSq3wjbP4tZ512xyfh35M0Cbyo1zaYoEtfX6Bt2E7ZNxJ7Acw7mg4wFThGRh1R1NvALLAB5bzg+N1ar5MbZG3gvdoM8rap/xPzIiZjP+G8i8qCqzsIGK3thAluCdYurN/yE+iW6BZO+ZVVDMB/qfCxWtRQT2b6Y87sCm+r5joh8HvgGNoT/O/BhVV1Z9J4bTGjXijK+Y/EI+CIsYyPJkFiDhRyuDrnxt2Ixr18CX1XVxSKyD7BjktnRaEQTmIhshTmsWwLXqeorItKGpZPsAuyBreyZhd3Zc4HLVfXmIMY7sC70ElU9K0ojK6RkVDhCVVeH11sBP8Kudwm26GI0ttL6SeBrqnqLiFyPpd+8gi02OT0vVjgWUaaKQiDxr9g//RIsMwAsjrUPJrpvYc/13BHzrfYH9hORqeFLPAvYLw/iKsou7RWRHcQWV3xLRJKH347HRoRPAe9S1SOwUe+z2BrFY8JxZ2I+5FbAwkYXF1TZBwtziN+hLwNgBmaVHhVbrLAlFr3eD1vd8xjwOcyqXQicClwX5hI7wntmGn4o6QJPxMIMW2G+0mIR+QeW4z8qbFsRTv078D3gUuDDIjJWVR8PaTZr85BtWguqIrDQpQ0DpmPiehk4WVV/GfYfDHwTywxYiuUvtWPd4utYN3kw1rVcpaqvJ+8dvtzM7nS15V6bYXOa38B8qG8CVwOdqvqq9C0Ha8FiWNeH85Lnkz+GXTfJIKVZqEoXGazLDpjPtAaYXCSuH2IrePbBRoYdmNAK2ET1/Zi47gEOV9V/VKNNVUaAT2LXdqyqnqOqz2IjRbDr68G6+8+KyBSxxyidEvb/GVgldbCwt9pUs4s8GhPQMmAOgIgci+VtPYdN9RyK+SU3Y1/KQdiX9FtVvTGck6fFF4lTfyo2MLkfW+DaisW8esOIdkEIqP4QCztcXfQ2dwIXNlp8Ky3VFFg7oZIK8E5s9DQL87H+C1tN/XXMEjynqtOBK0RkmKqugXyJC9abbdgq/B4WuuziNiZWaRaWFTEReBt241yjqj+uQVNzSzVHkU/DutJCh4rIzqq6CiuM9Sq2kmclNpG9IDmpSFyZrewZCBFJ8rQAthGRw8P2lmC9usN843Ss+z8Oc/oPanZxQXWLYbUB/4ONEJ/DrNZ54fVh2PMSJmLBxi+FbM66QEROwdKXC9jysfNV9bGwb0esSzwMy5q9phnCD2mppsBasDnEn2FTHmBxoZVYUBUsLvbF4lFiPRASIG/CUom6sDy1u7AR5STMP/sd9tC3p/p5m6YkRiGG47ApoDFYF/wy5vhPV9XbwzG58rXSIPYAld9QvmrctcDn6u2aakGUqSKxh9K+A1tStRZ4QFVfDvvqTlwJQWRHYNm0T2EPTrlNVf+QZbtyTaFQqMnPxIkTh9Tqs2pwLcPC76FZtyXvP7lJma4nBsr9ctbHBeZEJZcLb53GwQXmRMUF5kTFBeZExQXmRMUF5kTFBeZExQXmROX/AbGgft7b5vaiAAAAAElFTkSuQmCC\n", 59 | "text/plain": [ 60 | "
" 61 | ] 62 | }, 63 | "metadata": { 64 | "needs_background": "light" 65 | }, 66 | "output_type": "display_data" 67 | } 68 | ], 69 | "source": [ 70 | "plt.figure(figsize = (2,4))\n", 71 | "\n", 72 | "ax = sns.barplot(x = groups, y = vals, capsize = 0.5, edgecolor = '0.2', lw = 2.5, errwidth = 2.5, \n", 73 | " palette = ['brown', 'mistyrose'], errcolor = '0.2')\n", 74 | "\n", 75 | "kwargs = {'edgecolor':'0.2', 'linewidth':2.5, 'fc': 'none'}\n", 76 | "\n", 77 | "ax = sns.swarmplot(x = groups, y = vals, marker = 's', s = 10, **kwargs)\n", 78 | "\n", 79 | "prev_lim = ax.get_ylim()[1]\n", 80 | "\n", 81 | "max_val = max(vals)\n", 82 | "plt.plot([0,0,1,1], [max_val+5, max_val+8, max_val+8, max_val+5], lw = 2.5, color = '0.2', \n", 83 | " clip_on = False)\n", 84 | "\n", 85 | "plt.text(x = 0.5, y = max_val+8, s = '*', ha = 'center', size = 20, weight = 'bold', color = '0.2')\n", 86 | "\n", 87 | "\n", 88 | "for axis in ['bottom', 'left']:\n", 89 | " ax.spines[axis].set_linewidth(2.5)\n", 90 | " ax.spines[axis].set_color('0.2')\n", 91 | " \n", 92 | "ax.spines['top'].set_visible(False)\n", 93 | "ax.spines['right'].set_visible(False)\n", 94 | "\n", 95 | "plt.xticks(size = 14, rotation = 35, rotation_mode = 'anchor', ha = 'right', weight = 'bold', color = '0.2')\n", 96 | "plt.yticks(size = 14, weight = 'bold', color = '0.2')\n", 97 | "\n", 98 | "ax.tick_params(width = 2.5, color = '0.2')\n", 99 | "\n", 100 | "plt.ylim(top = prev_lim)\n", 101 | "\n", 102 | "\n", 103 | "plt.savefig('bar_test.svg', bbox_inches = 'tight')\n", 104 | "plt.savefig('bar_test.png', bbox_inches = 'tight', dpi = 250, facecolor = ax.get_facecolor())" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 39, 110 | "id": "dd2b76bc", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "import pandas as pd" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 40, 120 | "id": "dfc3cee6", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "a = [67, 56, 62, 70, 89, 45, 37, 50, 60, 34]\n", 125 | "b = [42, 55, 61, 51, 62, 25, 41, 35, 38, 22]\n", 126 | "\n", 127 | "vals = a + b\n", 128 | "gene = ['Gene_a']*10 + ['Control']*10\n", 129 | "\n", 130 | "groups = ['Treated']*5 + ['Untreated']*5 + ['Treated']*5 + ['Untreated']*5\n", 131 | "\n", 132 | "df = pd.DataFrame(zip(vals, gene, groups), columns = ['Value', 'Gene', 'Treatment'])" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 42, 138 | "id": "f8f6280a", 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/html": [ 144 | "
\n", 145 | "\n", 158 | "\n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | "
ValueGeneTreatment
067Gene_aTreated
156Gene_aTreated
262Gene_aTreated
370Gene_aTreated
489Gene_aTreated
\n", 200 | "
" 201 | ], 202 | "text/plain": [ 203 | " Value Gene Treatment\n", 204 | "0 67 Gene_a Treated\n", 205 | "1 56 Gene_a Treated\n", 206 | "2 62 Gene_a Treated\n", 207 | "3 70 Gene_a Treated\n", 208 | "4 89 Gene_a Treated" 209 | ] 210 | }, 211 | "execution_count": 42, 212 | "metadata": {}, 213 | "output_type": "execute_result" 214 | } 215 | ], 216 | "source": [ 217 | "df.head()" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 63, 223 | "id": "db7749d0", 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "data": { 228 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAARoAAAERCAYAAACkdGh5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAhMklEQVR4nO3de5wcZZ3v8c9MCCQzAVZikECQBM1EIKwh/hTwFsALL0/cI4IvBUQTDeYcRBY97LqYNQdRjrisHMiK1zUYvIEK4i5ml4OrJAIK629J0ChkCLkIGgghQOhMApOZOX88VaQz6UvVTFf17ft+vfLq7qrqqifTM79+6rn8no6hoSFERLLUWe8CiEjrU6ARkcwp0IhI5hRoRCRzCjQikrn96l2AWjKza4FZwGp3/3hdCyMiL2qpQEMIMnNmz549B7i4zmURaTcd5Xbo1klEMqdAIyKZU6ARkcy1WhtNrtasWcPSpUvZsWNH1WO7u7tZsGABM2fOzKFkIo1FgWYUli5dyl133ZXqPddcc01GpRFpXAo0oxDXZCZMmEBPT0/Z43p7eykUColqPiKtSIGmBnp6evjGN75Rdv/ChQu5//77cyyRSGNRY7CIZE6BRkQyp0AjIplToBGRzCnQiEjmFGhEJHPq3q6B3t5eFi5cWHG/SDtToBmF7u5uAAqFQqJxMvHxIu1GgWYUFixYAJB4rtP555+fdZFEGpICzSjMnDlTc5dEElBjsIhkToFGRDKnQCMimVOgEZHMKdCISOYUaEQkcwo0IpI5BRoRyZwCjYhkLteRwWbWDVwGnAEcAQwAm4CbgC+4+4CZjQUWAfOAKcAW4GZgsbs/l2d5RaQ28q7RfBX4W2A6sBF4GpgJXAF8MjrmeuAzwFHAeuBQwjrat5mZamAiTSjvP9w3R493uPtxhIAT11KOMrPZwHnR64vd/VXAWdHrOYSakIg0mbwnVd5FqKm83cx+D0wADgTuA65kT5ABuCV6XA7sAsYBpwM/Hn5SM5sPzAdmZVNsERmNvGs0HwG+Ez0/Fng50A/8FtgKHFl07BYAdx+M9hEdX8pUQo3n4NoWV0RqIe8azceBDxBqMO8C/gJYSQhAnYTG4VI6qpx3Y3SeWdQo2Fx99dWsXbs29fsee+wx+vr66OrqYsqUKanfP2PGDC655JLU7xNpZLkFGjPrAj4XvbzF3Z8AnjCzlcB7gbcC/1z0lkOBzVED8MRo26Olzu3uy4BlZraCULMZtbVr145qdclCocCWLVtqURSRppdnjaar6HqvBTCz/YHjo207gNsJPVAQGoGvA+YS2meI9udixowZI3pfvM52tfW4a31dkUbWMTQ0lNvFotpL3PO0HhgPTI5e/527X2Vm3wfOAQaBXuAVwFjgbmBO1GZT7vwrgDmzZ8+uuBZ2luJ1tutZBpE6KdvEkXdj8BnAPxACyGGEQOPAAuAfo2PmAZ8F/kgIMluBLwFzKwUZEWlcuTYGu/vTwKXRv3LH9BNGD1+WV7lEJFsaaSsimVOgEZHMKdCISOYUaEQkcwo0IpI5BRoRyZwCjYhkToFGRDKnQCMimVOgEZHM5Z2Ppm2sW7eOhQsXVj2uu7ubBQsWMHPmzBxKJVIfCjQZ2b59e6p8Ntdcc02GpRGpLwWaDFXLSRPnrtmxY0eOpRLJnwJNhnp6eirmpIlz14i0OjUGi0jmFGhEJHMKNCKSObXRJLRmzRqWLl1ateG2t7c3pxKJNI+2DTRp121at24d27dvT3WN3t7eimNp4qBU7bjhtPaTNJu2DTSjXbcpiUKhkOgaSY8TaVZtG2hi+wOHdFRbCBOeGhqin7Duy8QKxz8xNES8gE0HlX/Au4GhBOeMbRsa4oWqR4k0nrYPNId0dDB37Niqxy3v7+fxoSEmVjn+2y+8QH/0fAhefF7JYR0dvD1FGUSaTdsHmqx0AC9LUEsZC5wwZkzm5RGpJwWajOwHiWpKIu1A42hEJHMKNCKSOQUaEcmc2mhS2jY0xPL+8n1Ju3Msi0izUKBJKG7WfQESdTGrqiiyhwJNQieMGQMDA3uNi+kfGqIADBZtiwfh7Ya9aj5xN/akToWgLCSdiwZKn1oPCjQJTers5O3DgsQd/f08VaZ2M0CJms/AwD7nkNpYunQpd911V6r3KH1qfhRoRiGurxRPYyg1VSGeOpBklLCMTFyTSZM+VbWg/CjQ1EDxNIZSUxU0dSA/adKnqhaUHwUaaVsjqQXJyCjQtBndLuxLSeSzp0DTZnS7IPWQe6Axs4nAYuBdwBHAc8DvgYvdfZWZjQUWAfOAKcAW4GZgsbs/l3d5W41uF6Qecg00UZC5D3gFoQd4HaEz5oRo2yrgeuA8wvCUh4GjgYuBWWZ2mrsPlji1pKTbBclT3jWaKwgB5U/Aqe7+MICZjQEOMLPZhCADoYZznZn9FfCvwBzgDODHeRT03t27y46RicX7nyqalpB0WzkTOzo4aT/d0Y5WuZzQxXmai7fFOZsfe+wx+vr66OrqYsqUKfu8p1puZ+VzLi2332gz6wDeG71cD3zfzI4B/gh8Cfga8I6it9wSPS4HdgHjgNMpEWjMbD4wH5hVq/I+NTSUuEu6n30H5yXdJrUVB4P41q+c4n2lcjYXCgW2bNmyzzbV8kYmz6/OScAh0fM3AU9G/44BvgKMAY4sOn4LgLsPmtlWQnvNy8uceyqhxlMzSXL4Pj00xPMpznkA8JIq501yXdlXd3c3kDwYHHTQQQwODlIoFPZqr4oDVLVt5cyYMWOU/5PWlGegKb7WU4RbqD7gLuBk4GPAyjLvrfbXtzF67yzg4NEUMpbk9uXJwUFWDZv/VI7mOmVrwYIFAIm77c8//3yWLFmSqoZSrV1Lyssz0DxJmPy8P9Ab9yCZ2X8RAs1U4DtFxx8KbDazTmBitO3RUid292XAMjNbQY1rNpWUmv8k9TFz5szU3fCVakGltsXHS3q5BRp3748CwduBHjObQKjRnBAd0gvcTmgwBjgLuA6YS2ifIdrfMFSjaW6lakHlbpPiWpCMTN7dG58m1DgmAo8QAs3UaN/l7v5fZnYjcA6wxMwuJNxiAdwN/CTX0laxamCAR9M07jbQ7O2kq2i2slK1oLhbX7dJtZVroHH335jZKYRay0mEL/qVhCBzZ3TYPML4mQ8SgsxWwoC9TzfaGJpSs7dLaaTZ22kbTZv1dkFTLRpL7gM23P1e4K0V9vcDl0X/mkK1Regaafb2SBpNm5GmWjSWRIHGzA4EDgS2ufsuM3sXcBrwgLtfn2UBpbZG0mjajDTVorEkrdF8gzDY7qRoGsGthIyVmNkh7v7FjMonMiqaatEYkrZMzga2u/tvgPdE29YTxrfMy6JgItI6kgaaI4BN0fO/BP7g7tOBDcBRWRRMRFpH0kAzAIyPnk8H1kTPt1N91K6ItLmkbTTrCGkaHgIOAjzafjhhJrZIbsrNzC6WdMb18OPSzNQeiXad3Z000FwD3AD0AE8D3zGz4wkTJX+WUdmaRrXVK7c1SNd2q1i7dm3iBtyk44WGH6eZ2rWVKNC4+3fN7AHCbdM97v5ENAfpbYRG4baUdvXK8iNtaiPJN30ppXKwpFGvb+lKAyVLLXuT5Lik70srHrTZrhIP2HP335nZWuA4MzvM3R8ANmdXtMZXavXKcuK5TllK801fSqkcLI2s0kDJeJBkpVDRPzSUeK300c5Va6RBm/WQONCY2ScIo3UPBO4zsyXAlYSpAd/PqHwNrdFmb480F0qafCu1vG6W0tY2hyclK5mkrIHmqjWbpCOD5wNXD9v8c0IiqvcCbRloGs3w25c0830AjjjiiJaZSJikthnfJnUQMrKNLXPr1Ehz1ZpV0hrN/yKMBF5MlMbB3bea2Z+oYfpMqa208302b26dO+Ektc34duZlWmk0c0kDTQ9hkN7nzeyKou1PEVJxSgNKOt/ngQceYGBggMHBhpocLy0kaaDZAUyMVisAwMzGE9I4aDZag6s23+eUU06pmMhbZLSStmz9GngZ8B/R6yOBFcAE4J7aF0tEWknSQHM5oS3szYS2msOB10bbrqjwPhGRZIEmmrV9GiEb3s7o30rgLdE+EZGy0gzY+xUh2IiIpJJ0HM2bK+1391/Wpjgi+dtWZflizVUbvaQ1mhVEGfVKGEpxHpGGUWn0cKmRwVnPVWtlaQKE8s40qWopD3bu3JljaRpHqdHD5SZV5jFXrZUlDTSnDnt9MHAm8H7gozUtkdRM2qVVdu7cuVdAauZlSO7dvfvF26Ba6Af+c2AABgYqHjexoyPRcsrtJmmaiFJrYv+rmb0KOAP451oWSmqj1NIqjzzyCM8++2zJ4wcGBkoGpGZcNeGpoaFRTRsoOalSRmxEodfMOgijgo8Ajq9piaRmKq3EWDwtodTs7WZfhmSkuWS2F906HTSCc9Qyh00rSdrrVKm+uLE2RZE8FU9LKLUMbLMvQ6Lbl8aS9NMoF6YH0chgEakiaaC5fNjrIWALcKe7p88dKSOyPWozGE3i7FLJt5NuG6l2TcgteyRtDB4eaKQO4m7YWiTOLnWOpNtE0iobaKqNBi6mkcH5mtDdzfRp00b03ofXr6fQ18eEri6mH310qm2pr7VhA4UGbkx+cnCQVSlzPo80Z3C7q1SjWUH50cDFNDI4Z9OnTeOrX/j8iN57waWLWLVmDdOPPvrFcyTdNtJrNapVAwM8mqYLWzmDR6xagFBfnbSsuCZTadkWUM7gWqgUaIaPBpYW8vCGDVxw6aLwfP36Fx9f3LZhQ93KlrdKy7ZA4+QMTpNsvtFGdZcNNGVGA0uT6xofllAv7Nixz21Noa9vn23x8VJ/aZPNQ+OM6k6zrtOhwFxCdr29Zpe5+2drXC7JyIfPfh8AfUUTKcs1/HaNH8+Cc87OvYxSWtJk8404qjvpyODXEtbYPrDMIQo0TeK4GT1cfdnivbbVouFX8lMt2XwjjupOWqP5HHBQmX0junk1sx8B74le3uLu74m2jwUWAfOAKYSBgTcDi939uZFcS0TqK2lf3euAXcD06PW9wMnAE9G+VMzsQ+wJMsNdD3wGOApYDxwKXAzcZmbqWxRpQkn/cCcAD7n7I0TjZtz9PkJt4ytpLmhmrwD+ibCEy2PD9s0GzoteXuzurwLOil7PIaSkEJEmk/TW6VlgXPT8GeA4M3sf8EpSjLUxs/2A7xEmY74fuHPYIe8oen5L9LicUJsaB5wO/LjEeecD89HyvDICxTmDy+3PUtJu63j+WSM18iaVNNBsIASXccD9wFuA70f7/pDiepcBJwLnufsGMxu+/8ii51sA3H3QzLYS2mteXua8Uwk1HpHEKuUMrnR8raXttn788cczKkl2KgYaM1sCfItwq3MiIRD8PWCEdJ47gL9JciELUeVTwHfd/XtlDitXO6pWa9pIWGdqVlSutvL7tb1cf9MP9uqyLqdr/Hg+fPb7OG5G+e7RdlEqZ3A5WeYMTtptvXr1agYHBxmokk60EVWr0VwEfAz4LSHgbHP3h81sCjADWO/uzyS81kzC+Jv3mNm7o21d0eMZZlYA/rHo+EOBzVED8MRo26OlTuzuy4BlZraCNqzZXH/TD7jnN+nW8Rvexd2OJnV2NuXcpUKhwCmnnALA888/z8DAAGPGjOGAAw4A9iSbT5LiI68UHtUCze7omL8ErgGuMrOfEoLOv7v74AiuOa7EtjFAN/BTQo8ThEbg6wiDBOP33D6C67W8uCZTbVZ3PJs6Sc1H8pcmJUehUNjr9eDgIP3D2pkaKcVHtUAzGTgX+ADhdml/4N3Rvy1m9m3gBnev2k4T1zqKt5nZRkI3dvE4mhuBc4AlZnYhITcxwN3ATxL8n9rW+HGlYniRqB2ieE5T/LrU9qR6jp7GJxZ+JPX7ZG/Vbp0efPBBdu7cSWdnJ11d4Wagr6+PwcHBvbYBdHZ2Mnny5BdXwihnxowZtSl8FRUDjbs/BXwJ+JKZzSD07JxLaKt5GaF95pJq50lpHvAw8EFCkNlKGLD36RHWoNrGzp07E6VlKDWnqdJ2yUfSEb+zZs3aJ7dz8bZGlGbt7bWExtxPmdl5wJcpPyUh6TmnltjWT+idumw0525H48ePr5ikqtycpsc2b2bnzp2MHz+eKZMnp75uz9EjS8Il6cSNxklSrjbN7O3hzGw8od3kA8BpJB/sJzmZMnlyxblKmtPU3OJu7TQpV5tm9raZnUa4jTmT0GALobt5kDDR8luZlU4y8cimTYnaYtQV3ljibu3Ozk5mzZoFNM+aXNXG0WwiDJSDPWNZ1hEadW9w9z9lVzTJyvbnnkvVFqOu8HwkXSO9q6ur6dbkqlajiUfqFoAfAsvc/e5siyR5UFd440i7RnpnE479qRZoVhJujW52974cyiOjUJyes9z+WLUE542eWLyVlFojvZT4lmjyCBrs661a97byBjeBSuk5pfGVWiO9lPiWqNrYmEakZVJaQKn0nOU8smkT259T/jDJlwJNCyiVnrMc3RJJPTRfq5KINB3VaESaTLWRwfG2RqJAI5KD7dGE1iSpG8pZt24dkHxk8Lp160Z8rWK1SCWhQNOm0nSFy+jFCRzyTN2wffv2hhm4p0DTZtJ2hWulytqqNlAyrXITZWty7mjQZi0o0LSZNF3hWqmy9qoNlEwry4myteyhVKBpM2m6wkVqRd3bIpI5BRoRyZwCjYhkToFGRDKnQCMimVOgEZHMKdCISOY0jkakztph7XQFGpE6a4e10xVoROqsHdZOV6ARaRRRKolq+4vXSG+WddMVaEQaRNK1z0sd1+jrpivQiDSIaqkeSqWEaJZ10xVoRBpEtVQPzbx2ugKNSJPo2xnWcEzSHtNo3eAKNCJNYvOWJ4F07TGN0g2uQCPSJAYHBoCw9varjz227HGN2A2uQCPSIKoljN+5axcAXePGNd266Qo0InWWNmF855gxWRep5hRoROosacL4uHv7sEmT8ihWTSnQiNRZ0oTx8S1Rd1dXDqWqrdwCjZldAswFZgAvBZ4Efg181t1/Fx0zFlgEzAOmAFuAm4HF7v5cXmUVkdrKMx/NRcCpwCCwETgCeA/wazObGh1zPfAZ4ChgPXAocDFwm5kpd45Ik8rzj/ebwDR3P9LdZwDxYr7dwLvNbDZwXrTtYnd/FXBW9HoOcEaOZRWRGsrt1sndrxi2aWXR8+eBdxS9viV6XA7sAsYBpwM/LnVuM5sPzAdmjb6kIo2tGddNr2dj8EXR41PAj4DPFe3bAuDug2a2ldBe8/IK55pKqPWItKxmXjc990BjZvsTbqM+AGwHznD3J82so8xbym0vtpFQQ5oFHFyDYoo0nGZeNz3XQGNmLwVuBd4IbAbmuvuqaPcfiw49FNgcNQBPjLY9Wu687r4MWGZmK1DNRlpUM6+bnltjsJkdA9xHCDKrgdcVBRmA24uex43AcwntM8P3i0gTybNGcysQZ/XZD7jZzOJ933T3b5rZjcA5wBIzuxB4RbT/buAnOZZVRGooz0Azruj5zGH74trKPOBh4IOEILOVMGDv0+4+mHkJRSQTeXZvT01wTD9wWfRPRFqERtuKSOYUaEQkcwo0IpI5BRoRyZwCjYhkToFGRDKnQCMimVOgEZHMKdCISOYUaEQkcwo0IpI5BRoRyZwCjYhkToFGRDKnQCMimVOgEZHMKdCISOYUaEQkcwo0IpI5BRoRyZwCjYhkToFGRDKnQCMimVOgEZHMKdCISOYUaEQkcwo0IpI5BRoRyZwCjYhkToFGRDKnQCMimVOgEZHMKdCISOYUaEQkcwo0IpK5/epdgHLM7Gzgk8AxwE7gF8Cl7r6urgUTkdQaskZjZguAG4ETgM3AGOAs4B4zO6yeZROR9Bou0JjZ/sAXope3uPvRhFrNc8ChwKJ6lU1ERqZjaGio3mXYi5m9Abg7enmuu98Ybb8DeBvQ6+4zhr1nPjAfOBnYf8KECfT09FS8Tm9vL4VCgQ4a+P5xmP7osbOzk65x4+paliT6du1icHCQJJ9HGvrs8pH287v//vtXAqvd/ePD9zXi53Rk0fMtRc+fiB5fXuI9U4E58YtCocD999+f6GJD7PklaBaDg4MU+vrqXYzE0nweaeizy0eKz29OuR2NGGg6Um4H2AisBF5DaM/ZBrRio/Es4GDgWWB1XUsiac2iPT671aU2NmKg+WPR80NLPH90+BvcfRmwLLsiNQYzW0H41ljt7qfUtzSSRrt/dg3XGAz8Bngqen4WgJkdDpwUbbu9HoUSkZFruBqNu79gZouArwNnmdl6YCJwILCVPT1S7WgZsIJwqyjNZRlt/Nk1XK9TzMzeD/wNoWt7F3sG7PXWtWAiklrDBhoRaR2N2EYjIi2m4dpoRGTkinq3VjZS75YCTY1EUyc+ArwPmAlMAJ4ENgD/D1ji7tvrV0Ipx8wOAD4KvBc4FtgfeAz4JfBFd3+wxtdbBswDNrn71Fqeu1Hp1qkGzOwvgF8B1wFvAsYDvYRGbAM+S+kRzVJnZvYS4NfA/yUMoegEHgZeCnwYOL1+pQuiL7GmphpNbVxHGJUM8GXgk+7eB2Bm44D/RjSdIpp9fgXwDmASYXb6jcBl7v58dMwKouov8CPgbwld/CuB89398fjCUTqNjwPHR5vuAxa7+z3VCm1m3cD3gFcTBkTuRxgweSNwhbu/kPon0XyuI2QJAPgisMjd+wHM7FSiL2MzeyOwmBCMxhG6qb8NXFV0/EbgKOA7hJrswujY5cAF7v5c0TEAR5lZ3BtzavR4Z/S4EDiXMH/vUuBaM5tJ+NJ6M3AQ8CfgFuAz7l6oyU8jIwo0o2RmBxNulwB+C/y1uw/G+919F/Dj6NiJwL2EX7QdwIPAq4C/A44D/mrY6V8PnEgYDT0BmAtcDbw/Ot8lhD8OgEcIVf5TgTvNbI67/7pK8ccD7yLMI+slfIu/kvAHNZ4Q4FpW9Nm9N3r5AOEL4sVuWHe/MzruFOBnhL+XZwhBpofwhTETOGfYqc8m1Ga3AocRPq9NwN8Dq4Buws/6heg1wHZC8IhdF21bDwya2TGEmtcEwu/OOmAGcAlwspm9qfj3rtHo1mn0etgTsH8Zf9hm9jUzGyr690XgQkKQ2QZMd/dXA6dF731nNHO92BjgZHfvAW6Ntr0lOn8XcHm07Up3fyVhcukdwFjCN18124Hj3P0wdz/B3Y8EvhvtOzvh/7+ZFX92dxUHmWEuj457FDg6yh7wD9G+s83s+GHH7yKM/3ol4NG2twC4+7sJNRyAze5+UvRv+KzFXwFHuvuxhFrypYQg00f4zI4l/D5B+EKam/D/XBcKNKNXPNmz+Bd1PeE2ptiJ0eMhwJ+javPdRftPGnb879x9dfT8D9FjPOfrOMI3I8CnonMNAG8vc65SBoDzzKzXzJ6PznFetO/wBO9vduU+u+FeGz3e7u5PR8+/X7Tfhh3/C3f/U/Slszba9rKUZft6VBvG3QeKynCPu29KUIaGolun0XsI2E34Wb7BzDrcfcjdrwKuKroHhz2/2AXg9yXO9UyF17uHnaP4j+QhwqzgYklGYl4KfCp6vgl4HJgCHEF7fAmtZc9n98b4s6twfNLRrc8UPR/+uSX1eJntTTnCth1+mTIVdVnfFL2cDVxZoZfgP6PHIeC8uNpMaFf5IqFhL6k1hGo0hOkZry8633zgsgTniGs9vVE36+sJbRVtwd2fBX4YvTwB+LyZvfjla2ZvNrPTCBN9Ad4R9VJBaKh98VQpLx1/bl1mljQAxWV4g5nFjcmjKUOuVKOpjYsI9+SvITTsXmhmjwAvGXbcdYQu0yOBP5jZQ4RG1yOBA4Bp7FurKcnd+8zsckJbwUeBM83scUJtZBJwA6G9ppLfAu8EesxsA6FtZ3yS67eQjxE+uxMINbwLo56hwwk9fZ8gBO2fET6n9Wa2hdC+A3CTu/8u5TUfih4nAQ+Z2dPs6XUq5wvAmYR2mt+b2SZCRwKE9pzl5d7YCFSjqQF3fwZ4A+GX8l5CjeUYQnV5JaH35hp330qoRXyT0N19DKGn4TeEXMhPDD93leteRejRuDc6Tw8hUN0QXaOaz0fHPhO9/ybgK2nK0OyiNpfXE3pv4hpnDyFH9Q3AHe6+ghAI7iD8zUwj9NItBj44gsteT6i9Phtd60RCw3+lcj5I6Oq+FXg+et+jhF7I0xu5xwk0qVJEcqBbpxZmZnMJ37qlLHf3z+VZHmlfCjStbRJ7utSHe6jMdpGa062TiGROjcEikjkFGhHJnAKNiGROgUZEMqdAIyKZU/d2QmZ2LWFZ01oruSh6dM2N7EmSVKwmKSDN7FzCCNNro9HNad57HSFNwanRyNmaqcfPOrruVELCquXu/s5o208JKRimufvGCu9dBPS5+7U1LG987ob8nNJQoEluFhUWMc/IRYRUEO8kTDX4GmFKw474ADPbz913l357VecS/oiWkXCOVU5mkf/PerQWERJdXVtqZ4t+Tokp0KS0P3BIR9oZ//vaNjREtTyZ7n4bgJlNIQSa+wjpA+40s38nZGnrDIfYpwjpH19KmGR3gbuvN7MPEDLBHQY8TZgr8zHCiOE4WdIGM9vk7lPN7J3A/yEkbVoHXOLu/xHNMr4a+BBhMmaclyUzE7q7mT5t2qjP8/CGDRR27Kh+YBVRyo97CEs2nwr8lPC53En4QuiOjrmBEBTuBFr+c0pCgSalQzo6mDt27KjPs7y/n8dHN1jyrYRZxX80s3mECZI/AH5H+AX9ISEZ0lZCCop+QuL0/0lItnUz8N8Js5b/GthoZj2EyX6rCL/07wZujba/jjBp9JfRua8cTeGTmD5tGl/9wudHfZ4LLl3EqjVralAiIExs/DRhdvc5wFcJ2Qz/jTAR8yLC7Vc8C77lP6ck1BjcvH7q7le6+/cIt1YQchfH34qvMbNDgIMJya2+yp78Jce7+xrgz9Hr26La09sIlbYTCX8QryWkJTgZOCU69rPu/mXgXzL8v9VLqRnQcfU1XsXiPne/kj25g6a6+y8ICa52uPtN7l6cWVGfE6rRNLM/Fz2P/xjeT7TaAuFLpI/QZtBFSGfwEmAJITM/7JutLT7PVYT8K7EHCd+ypY5tJU8QfiaTi7YdTghAcca7bdFj3N4Sp3coVz3V54RqNK3ituhxHiE50xzCkiu7ou37AwcCZwx7X3z/Pi/K9H8HITP/mYScKycQqt5j2bMMyP82swsJ1fmWEi13cycw28yWmtm3CA3TP4eqTWpPA5PMbJ6ZHVvmmLb9nFSjSWnb0BDL+/trcp5acfcbovWi/geh6v0YoR0Awv36EkLyre+wdya3rxN+2T8D/Nzd32pmZxKq9UsIPRy/JPyi3wZcQ8gQOIbwx3dGzf4TJTy8YQMXXLqoJudJ4UPAP7Hn//YTQrtLtb+Vq4DPERqBF7N30nmgdT+nJDR7O6GiRd1qraHWSG4E+lm3HtVoklvdZOdtZqub7LxShWo0IpI5NQaLSOYUaEQkcwo0IpI5BRoRyZwCjYhkToFGRDKnQCMimVOgEZHMKdCISOYUaEQkc/8flh6LSg3GOCcAAAAASUVORK5CYII=\n", 229 | "text/plain": [ 230 | "
" 231 | ] 232 | }, 233 | "metadata": { 234 | "needs_background": "light" 235 | }, 236 | "output_type": "display_data" 237 | } 238 | ], 239 | "source": [ 240 | "plt.figure(figsize = (4,4))\n", 241 | "\n", 242 | "ax = sns.barplot(data = df, x = 'Gene', y = 'Value', capsize = 0.25, edgecolor = '0.2', lw = 2.5, errwidth = 2.5, \n", 243 | " palette = ['brown', 'mistyrose'], errcolor = '0.2', hue = 'Treatment')\n", 244 | "\n", 245 | "kwargs = {'edgecolor':'0.2', 'linewidth':2.5, 'fc': 'none'}\n", 246 | "\n", 247 | "ax = sns.swarmplot(data = df, x = 'Gene', y = 'Value', hue = 'Treatment',\n", 248 | " dodge = True, marker = 's', s = 10, **kwargs)\n", 249 | "\n", 250 | "prev_lim = ax.get_ylim()[1]\n", 251 | "\n", 252 | "max_val = max(vals)\n", 253 | "\n", 254 | "\n", 255 | "handles, labels = ax.get_legend_handles_labels()\n", 256 | "\n", 257 | "plt.legend(handles[2:], labels[2:], loc = 10, bbox_to_anchor = (0.5, -0.15), \n", 258 | " ncol = 2, frameon = False, fontsize = 14, labelcolor = '0.2', prop = {'weight': 'bold'})\n", 259 | "\n", 260 | "\n", 261 | "\n", 262 | "for axis in ['bottom', 'left']:\n", 263 | " ax.spines[axis].set_linewidth(2.5)\n", 264 | " ax.spines[axis].set_color('0.2')\n", 265 | " \n", 266 | "ax.spines['top'].set_visible(False)\n", 267 | "ax.spines['right'].set_visible(False)\n", 268 | "\n", 269 | "plt.xticks(size = 14, ha = 'center', weight = 'bold', color = '0.2')\n", 270 | "plt.yticks(size = 14, weight = 'bold', color = '0.2')\n", 271 | "\n", 272 | "ax.tick_params(width = 2.5, color = '0.2')\n", 273 | "\n", 274 | "plt.xlabel('')\n", 275 | "plt.ylabel('Values', size = 14, weight = 'bold', color = '0.2')\n", 276 | "\n", 277 | "#plt.ylim(top = prev_lim)\n", 278 | "\n", 279 | "\n", 280 | "plt.savefig('bar_test_big.svg', bbox_inches = 'tight')\n", 281 | "plt.savefig('bar_test_big.png', bbox_inches = 'tight', dpi = 250, facecolor = ax.get_facecolor())" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 67, 287 | "id": "8341a2ef", 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "[[-0.2 60.4]\n", 295 | " [-0.2 79.2]]\n", 296 | "[[ 0.8 47.4]\n", 297 | " [ 0.8 60.2]]\n", 298 | "[[ 0.2 37.8 ]\n", 299 | " [ 0.2 53.415]]\n", 300 | "[[ 1.2 25.8]\n", 301 | " [ 1.2 38.6]]\n" 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "for e in ax.lines:\n", 307 | " xy = e.get_xydata()\n", 308 | " if xy[0,0] == xy[1,0]:\n", 309 | " print(xy)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "id": "a64e0933", 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [] 319 | } 320 | ], 321 | "metadata": { 322 | "kernelspec": { 323 | "display_name": "Python 3 (ipykernel)", 324 | "language": "python", 325 | "name": "python3" 326 | }, 327 | "language_info": { 328 | "codemirror_mode": { 329 | "name": "ipython", 330 | "version": 3 331 | }, 332 | "file_extension": ".py", 333 | "mimetype": "text/x-python", 334 | "name": "python", 335 | "nbconvert_exporter": "python", 336 | "pygments_lexer": "ipython3", 337 | "version": "3.10.1" 338 | } 339 | }, 340 | "nbformat": 4, 341 | "nbformat_minor": 5 342 | } 343 | -------------------------------------------------------------------------------- /high_quality_lineplots.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 16, 6 | "id": "2dde0ea3", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import seaborn as sns\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "from scipy import stats" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 21, 19 | "id": "a6de9c7a", 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/html": [ 25 | "
\n", 26 | "\n", 39 | "\n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | "
Tumor sizeWeekTreatment
021Control
121Control
231Control
332Control
442Control
\n", 81 | "
" 82 | ], 83 | "text/plain": [ 84 | " Tumor size Week Treatment\n", 85 | "0 2 1 Control\n", 86 | "1 2 1 Control\n", 87 | "2 3 1 Control\n", 88 | "3 3 2 Control\n", 89 | "4 4 2 Control" 90 | ] 91 | }, 92 | "execution_count": 21, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "ctr = [2,2,3,3,4,5,5,6,4,8,6,6,9,11,12,12,15,16,20,25,27]\n", 99 | "drug = [2,3,2,3,4,3,3,4,5,5,6,6,8,7,6,7,8,11,10,11,15]\n", 100 | "\n", 101 | "week = []\n", 102 | "for x in range(1,8): #weeks 1-7\n", 103 | " week += [x,x,x]\n", 104 | "week += week\n", 105 | "\n", 106 | "vals = ctr + drug\n", 107 | "\n", 108 | "labels = ['Control']*21 + ['Drug_z']*21\n", 109 | "\n", 110 | "df = pd.DataFrame(zip(vals, week, labels), columns = ['Tumor size', 'Week', 'Treatment'])\n", 111 | "df.head()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 50, 117 | "id": "51dfdc8f", 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAARoAAAEUCAYAAAD0ufnKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAA8bUlEQVR4nO2deXyURdbvv1nJQtgJiSQBArLvlIgMURBBQREVRBHROCKKOAvjHZ1R5xV1xhe9M1cdfN2uVzPIyJiwCeICIiooIkUWCCggEBIgJIQlgezp9P3j6W46K52mlyR9vp9Pf7q76nnqOQHy41TVqXP8zGYzgiAI7sTf2wYIgtD6EaERBMHtiNAIguB2RGgEQXA7IjSCILgdERpBENyOCI0gCG5HhEYQhDoopZKUUi4LshOhEQQBpVSQUuq6Rvq7K6UGODt+oLM3CoLQqngAeFsptRl4ytqolOoE/Al4DEgDfuXM4K1KaJRSrwLDgXSt9e+9aowgtCw+AMKAJ4AdQJGl/TDQ1tL/vLOD+7Wms05Kqa+B60aOHMk777zjbXMEocVx/vx5Fi5cyL59+wAIDg7m73//O2PHjnXkdr+GOmSNRhAEKisrWblyJXfffTf79u0jPDwcgKCgIBYtWsQLL7xAbm6u0+OL0AiCwLp161iyZAmxsbEkJSUxYcIEAD7++GPuvvtuPvvsM55++mmnx29VazSCIDjH9OnTiY2NZfTo0QCsXLkSgA4dOrBo0SJmz55NcXGx0+OL0AiCQGBgoE1k6iMqKuqyxpepkyAIdVi8eDFaa5eNJ0IjCILbEaERBMHtiNAIguB2ZDFYEAQbzz33nEPXPfvss00aVzwaQRDcjng0giDYqO2pWD2cpnowtRGPRhAEtyNCI3ic+fPno5Ri/vz53jZF8BAydWohVFRUkJKSwpdffsnhw4epqqoiMjKSESNGMHfuXHr16uXS5y1evJhPPvmE6Oho1q9f79KxBd9DhKYFUFRUxIIFC9i/fz8AYWFhxMbGkpeXx7p16+jTp4/LhaapVFZWEhQU5FUbhOaLCE0T2bNnD5s3b6awsJD27dszceJEhgwZ4tZnvvzyyzaRmTt3LgsXLiQw0Pir01pTXV0NQHp6Ou+++y579uyhoqKC6Ohobr75Zu6//37b9dOmTSM3N5epU6fSvXt3Vq9eTUVFBePGjeNPf/oT4eHhtmsAcnNzUUoB8NZbbwHwyCOPAPD000/z+eefs2fPHh577DHuuecefvnlF95++21SU1MpLi4mMjKS66+/nvnz5xMWFubWPyeh+SJC0wT27NnD+vXrqaysBKCwsNA2rXCX2Fy4cIFNmzYB0LdvX37729/i53cxv5BVBLTWLFy4EJPJREREBNHR0WRnZ/Pmm29y6NAhXnzxxRrjbty4kTZt2tChQwdOnz7NZ599RlRUFAsXLqRfv36UlpZy7tw5goKC6NevHwBt27blwoULtjFefvllwsPD6d69O/7+/hw5coRf//rXlJSUEBoaSmxsLEePHmX58uXs3r2bd999F39/WRb0RXxaaD7//HNOnjzp8PXHjh3DZDLVaKusrOTjjz9m165dDo0RFRXFTTfd5PAzjx49anvm8OHDa4iMPe+88w4mk4lu3bqxYsUK2rVrx9KlS/nXv/7Fxo0b+fWvf02fPn1s17dp04aUlBS6dOlCYmIi+/btY+fOnQD8/e9/t63RdOnShaSkJNt99gfthg4dyj//+U/atGmDyWTi+eefp6SkhJCQEJKTk4mOjmblypUsWbKE3bt3s23bNq699lqHf3ah9SD/vTSB2iJzqXZXYJ9qtSGRAdi7dy8AY8eOpV27dgA1BM2amtGKUorIyEj8/f3p0aMHAGfOnGmSbXfccQdt2rQBICAgwPaMYcOGER0dXceGn376qUnjC60Hn/ZomuJZALz66qsUFhbWaW/fvj2JiYkusqomPXv2JCAgAJPJRHp6OmazuVHBcZSIiAjb54CAAKCmqDlC586d6213hX1C60I8miYwceLEOjsrQUFBTJw40W3PbNu2LZMmTQJg//79/M///A9VVVW2/tTUVHbu3MmgQYMA+P777ykqMhLYf/7557brBg4c2KTnhoSEAFBWVuawAFmfkZGRYVtMtrdhwACnywIJLRyPeTRKqceBm4F+QBfgFLAdeF5rvcdyzddAfUWsvtNaj/OQqQ1iXfD19K7TE088wZEjR9i/fz9JSUmkpKQQHR3NqVOnKCws5A9/+APz589n4cKF5OXlMX36dDp27Eh2djYAkydPrrE+4wg9e/YE4OzZs8yYMYN27drZdp0aIjExkS1btlBSUsKsWbOIjo4mKysLMNZzxo3z+l+h4CU8OXX6DdADOAZkAX2BmcAUpdRgrXWW3bWHMYTIyl4P2XhJhgwZ4nZhqU27du147733SElJYdOmTWRlZZGdnU2XLl1ISEhgzJgxxMfH8/bbb9u2t0+cOEFcXBxTp051alp36623kpqayo8//mgTLOs2ekP06tWL9957z7a9ffToUbp168bEiROZP3++7Dj5MB6r66SUegZYbhUUpdQfgH9Yuv+gtX7FzqN5QGud5MQzvkbqOgmCy2jiocoGF+c85tForf9aq+kbu8/ltfpeUUq9DRwHvgT+orXOa2hspVQikIhRpVIQhGaGN33Z31jeTwMpdu2lGAJzCugFPARsV0qFNzJWTwxPqL3rzRQE4XLxuNAopYKVUsuA+zHq+96mtbauxywCOmqtBwOxwH9b2nsBtzcybBaGh1R371kQBK/jUaFRSnUBNgNzgVxgvNZ6m7Vfa52mtS63fDYDH9rdHtfQuFrrJK31eCDdDWYLgnCZeHJ7ewDwCRCPIQjTtNbH7PojgXuB/6u1Pm9pvstuiCzPWCoIgqvx5Pb2GgyRsT53pfVAIPAuxqLvP4CXlFK/AOEY0yeAn4DVnjNVEAT7ozXnz58nLCzMFkXeVDw5dQqx+zwYuNruFYOx+Ps3IA2IxAjq+xlYAvxKa13mQVsFwacpKSnhxx9/tH1/8803+fHHHykpKXFqPE9ub/d04LJnLC9BELyEyWQiIyODjRs32tpKS0tt30ePHt1kz0ZCNQVBqEFJSQlbt26tt2/r1q1OeTUiNIIg1MBkMlFaWlpvX2lpqVNpUXw6TURLYf78+aSmpgLg7+9PSEgInTt3ZtCgQcycOZPhw4d710ChVREQEEBoaGi9YhMaGurUgrB4NC2IoKAgBg4cSEREBMeOHePzzz/noYce4r333mv0PmvqUUFwhLCwMK655pp6+xISEpzK/SxC4wRl+fl8N3s2ZadOXfpiF2JNq7lhwwaSk5O58sorMZvNvPHGG2it0VqjlEIpxZo1a3j44YcZO3YsKSkprF+/3tZ34sQJAE6cOGFrsy+p8vXXX3PHHXcwduxYHnroIbZt21bvdQ2xePFi2/W1X0Lz59SpU7Rv354JEybY2kJDQ5k8eTLDhg0Tj8ZTHHj9dc7s3MmBpUu9ZkOvXr1qnKhdtWpVjf6XX36ZQ4cO2RKHO8ovv/zCk08+SXZ2NoGBgZw5c4Y///nPTbItJiaGwYMH217WCgzBwcFNGkfwDjt27GDNmjWcsvuPdMGCBYwePdrpShY+vUaT+cILFDUxj211RQVn09PBbObohx9StG8f/k34BWo3YACD//KXJlpaP/379yc8PJzi4mIOHTpUo6924vBPP/3UoTGXLVuGyWSyJRiPiori9ddfr5Gg/FLMmzePefPmAbB8+XIyMzPx9/fnhRdecHgMwTtcuHCBPXv2ADUD9uxTvzqDeDRNpOT48Ua/e5qG8gnVThzuKFbBGjZsGFFRUQDceOONTtm2YcMGXnvtNQAef/xxt6Y8FVzDzp07bQLT0DqNM/i0R9NUz6IsP5/N48eD9ZfbbKayqIiRr71GSNeurjfwEuzbt88W09C7d+8afbUTh9snDLdmyrOv0VSby00w/t133/H8889jNpu5//77ueuuuy59k+BVKisrbeV0unfvTkxMjMvGFo+mCRx4/XXMtdJZmk0mr6zVHDp0yJb9DGDGjBmNXt+xY0fb52PHjLOsW7ZsqXOdNbfw7t27bXP0L774okm27dmzhyeffBKTycSUKVN47LHHmnS/4B327Nlj+49rzJgxLq1m4dMeTVM5m5qKudZWsbmykrOWGBd3U1BQQGJiIgUFBeTl5dlKryxYsAClVI3ibrUZPHgwYWFhlJSU8PTTTxMfH8/u3bvrXDd37lw+//xzSkpKmDlzJl26dCEvr8HkhvXy3HPPUVZmHE07cuQIDzzwgK2vKWs9gucwm8388MMPgJGj2tUVK0RomsB1n3zi1edXVlayd+9eQkJCuOKKKxg4cCCzZs1ixIgRl7y3ffv2/O1vf+PVV18lNzcXk8nECy+8wFNPPVXjuj59+rBkyRKWLl3KyZMnad++PQ899BDPPGMcQbOu+zRGefnFzKw///xzE39KwRscOnTI5sE6c5bpUojQtAAcTbR+Ka8mISGBhISEGm2TJ0+uc118fDyrV1/MyvHuu+/aPvft2/eSdjgSayM0L6zeTFBQEKNGjXL5+CI0Qh0SExOJjo4mOjqavLw8m1dyyy230LNnT9auXcvatWvrvfe2227jtttu85yxwmWTn59v220cPny4rXigKxGhEeowfvx4du7cyZEjRwgICKB///7ccsst3HnnnQDk5eWRmZlZ772u3BIVPIPVmwFjEdgdiNAIdbhUDZ+HH36Yhx9+2EPWCO6kuLjYtinQr18/OnXq5JbnyPa2IPgwWmtbgJ67vBkQoREEn6WqqoqdO3cCEBUVRY8ePdz2LBEaQfBRMjMzKS4uBoy1NVcG6NVGhEYQfBD7AL22bdsyaNAgtz5PhEYQfJCsrCxbxLc7AvRqI0IjCD7I9u3bAQgMDHRLgF5tRGgEwccoKCjg4MGDgJEOxNlkVk1BhEYQfIwdO3bYPrtzS9seERpB8CFKSkpIT08H4Morr6RLly4eea4IjSD4ELt27aKqqgrwnDcDIjSC4DOYTCZbgF5kZCS9evXy2LNFaATBR9i7dy/nz58HXJ9B71J47FClUupx4GagH9AFOAVsB57XWu+xXBMEPAXcD8QA+cBK4C9a6/OeslUQWhv2AXrh4eEMGTLEo8/3pEfzG2ACUA1kAd2BmcB2pVRPyzXvAYuBHsBhIBL4HbBeKSXelyA4SXZ2Nrm5uYCRIM1aa8tTePKX912gl9Y6VmvdD3jc0h4O3K6UGgnca2n7nda6P2DNuH0dcJsHbRWEVoXVmwkICOCqq67y+PM9Jmta67/WavrG7nM5MMXuu7Xs4gagDAgBbgRWIwhCkzhz5owtS+LQoUMJDw/3uA3eTHz1G8v7aSAFsC9jmA+gta5WShVgrNfENTSQUioRSASGu8FOQWjRNCVAz76ET2Ptl0qOVhuPC41SKhhjGjUXKAJu01qfUko1tATuyNJ4T4zplSAIdpSVlZGWlgYYRQYjIyO9YkeThUYp1REI0lrnO3FvF2ANMA7IBW7WWqdZurPtLo0Eci0LwNaSizmNDJ2FMRUbDrRvql2C0FpJTU2l0lKLzJEAvaZ6Ko7i8GKwUuoOpdR+oABYq5SappT6Sik11cH7BwA7MEQmHRhtJzIAn9t9ti4C34yxPlO7vwZa6ySt9XjLuIIgYJQ+tk6bunTpUqdssidxyKNRSt0CJFNTmNIxpisngU8dGGYNEG/33JVKKWvfu1rrd5VSK4DZwGtKqYWA9U9mG7DWEVsFQTDYt28fRUVFgOcD9GrjqEfzDMZaia2SmNY6B0NkRjs4hn2xmMHA1XYvazXx+4HnMaZRvTG8p6UYU6yaRa8FQWgU65Z2aGgoQ4cO9aotjq7RDAV+0VrPV0rNs2vPA/o7MoDWuqcD11QCz1pegiA4SU5ODsePHweMAL2goCCv2uOoR1MJ1Ci6rJQKAGKBClcbJQjC5WH1Zvz9/b0SoFcbR4VmFxCrlPrA8j0SI6iuM7DTHYYJguAc586d46effgJgyJAhREREeNkix4VmieX9HsAM9AJutXz+326wSxAEJ9mxYwdmsxnwbM6ZxnBIaLTWG4G7gKMYi8J+GLErsy19giA0A8rLy20Bej179iQqKsrLFhk4HLCntV6JsSXdBfDTWp9yn1mCIDhDWloa5eXlQPPxZsDxOJr3gBLgv7TWBXbtTwO9tda/dpN9giA4iH2AXqdOnejbt6+XLbqIo2s0icAC4Hu73DEAt2DEvgiC4GX279/PuXPnAO8H6NWmKflo/IC+GGIzwk32CILgJNaicCEhIQwbNszL1tSkKULzE8ZZpSjga6XUZPeYJAhCUzl+/Dg5Oca541GjRhEcHOxli2rSFKEpBK4HvgAigPU4GBUsCIJ7sQ/QGz3a0VNBnqNJqTy11qXANODfQBCSkkEQvE5hYSH79u0DYNCgQbRr187LFtWlyTmDtdZVWuu5wCtusEcQfJrFixdjl9XAIX788Ueqq40zx81pS9seRwP2/LXWY2u1PQ6MwZhOCYLgBFVVVezatavB/vz8fI4cOdJgf0VFBampqQDExcVxxRVXuNxGV3BZqTy11j+6yhBB8EXWrVvHiy++yOjRo3n00Udt7YWFhSQlJZGcnEy/fv1477336r0/PT2dsrIyoPl6M9CI0CilTMAPWutfKaWqMc411YdZa+3NJOeC0GKZOnUqZWVlLFu2jMTERFuFgunTp1NaWsrUqVOZN29evfeazWZbgF7Hjh3p16+fx+xuKo0JhPVMk/13QRBcSEhICPfccw/Tpk1j4cKFtkXdiooKXnnlFcaOHdvgvQcOHODMmTMAjB49Gn//5ltjsTGheY6LCcHrr8EgCMJlUVlZyccff8z7779PXl4e4eHhFBcXExQUxKJFi7jllluYN28e0dHRde61bmm3adOGESOadwxtg0KjtX6uvs+CILiOdevWsWTJEpRSvPTSS6xcuZJPPvnEJj4pKSkcOXKkzhpNbm4uWVlZAIwcOZI2bdrUM3rzwdFDlT0wamXv11qfVkr9DmO3KQN4Xmtd5UYbBaHVMn36dGJjY21BditXrgSgQ4cOLFq0iNmzZ1NcXFznPqs34+fn1ywD9Grj6CLuK8B0YJhS6gbLdzPGocoA4Gn3mCcIrZvAwMBGhaK+fDLnz58nMzMTgAEDBtChQwd3mecyHF09Gg6c0VpnYoiLGdiCsUB8p3tMEwTfY/HixWitG71m586dtgC9a665xhNmXTaOCk0UFxeGBwFpWusbgP0YUypBEDxAZWWlTYhiYmKIiYm5xB3NA0eFpgzopJQKAa4E9lnaKzAqJAiC4AEyMjIoLS0FmneAXm0cFZp9GKVVTgJhGOkiwCj81lhNbEEQXIR9gF779u0ZMGCAly1yHEcXg/+KUZK2HXAI+EApdTXQEaPUrSAIl8FzzzUtgqS5B+jVxtFDlZ9jrMUoYLDWugjDy7kS+F/uM08QhNoEBwczcuRIb5vRJJpSBeE0cNru+3ngvDuMEgRf49lna1aBtno41va8vDzeeustAIYPH05ISAgtiZbjewmCD2MN0IOWtQhsRYRGEJo5Fy5cYM+ePQD079+fjh07etmipuPR9A5KqWuBJ4CrMOp3AzyntV5sd83XwHX13P6d1nqcu20UhOaG1hqTyQS0TG8GHD/r1A7Asgh8OYwEbgIOclFoGuIwYF8Nc+9lPlsQWhxVVVXs3LkTgOjoaOLi4rxskXNcUmiUUgHAWYwYmsuNAv4AeAdjynapheQXtNZJl/k8QWjR7Nmzh5KSEsA4btCcisI1hUsKjdbapJTKxogOviwsO1copdo6cPkrSqm3gePAl8BftNZ59V2olErEqKY5/HJtFITmhHUROCIigoEDB7r9eet793boummHDjVpXEcXgxcDfZRS9ecUdD2lGAJzCugFPARsV0qFN3B9T4x1HSn/IrQq8vPzASNALyAgwMvWOI+ji8GLARPwtlLqVSDfrs+stXZMBh1jEbBPa12ulPID/gb8GUNwbgeW13NPFvANhkcjYiO0aKwLvwBz5swhMzOTUaNGeeTZtT0Vq4fTVA+mNo56ND2AYIy0EGEYHoT9y2VordO01uWWz2bgQ7vuelfCtNZJWuvxQLorbREET1NSUsKPP14sLrJ69Wq6dOmC2dxQbYCWgaMezTIaroLgMpRSkcC9wP+1RB4D3GV3SZa7bRAEb2EymcjIyGDjxo22ttLSUjZv3kxAQECLnj45JDRa60RXPEwpdQfwMjUrKvxWKXUvxonwp4F/AC8ppX4BwjFOjQP8BKx2hR2C0BwpKSlh69at9fZt3bqVwYMHExER4WGrXIPDAXtKqTDgAYyDlQA7gSStdUkTntcOqL2e09HyOoax+Ps3YLLlulDgZ4yT4y9rrS9750sQmismk8mWa6Y2paWlNdZuWhqOBux1BbZinNa2ch+GNzJOa13gyDiWuJikS1z2jOUlCD5FVVUVoaGh9YpNaGhoi502geOLwX8F+mJMec5ZXn4YwvNXdxgmCL6C2Wzm+++/55tvvmnwiEFCQgJhYWEetsx1OCo0N2Ok7Zyite6ste4MTAWqLH2CIDiByWRiw4YNbNq0iczMTDp27MikSZNs/aGhoUyePJlhw4b5hEcTCRzUWn9hbbAkwzrApc8sCYJQD2VlZaxYsYJdu3YBRvRv165dufrqq23XLFiwgNGjR7dobwYcF5rTQG+llC2tl1JqFNAHu2RYgtCSWbx4MUqpS1/oAs6dO8f777/PIUsgXFRUFPPmzSMqKqqG5xIREdGiPRkrju46bcKIb/nRsu1sxlif8QM2NnajIDRXqqqqyMjIaDDqNj8/n+LiYnr16uXS5x4/fpwVK1bYKlD27duXGTNmEBwc7NLnNCccFZqngYlANMaisJVcZIdIaKGsW7eOF198kdGjR/Poo4/a2gsLC0lKSiI5OZl+/frVqXt9Ofz000+sXr2aqiqjivTVV1/N5MmTW1SicWdwNGAvRyk1HHiMmnE0/6O1PtXgjYLQjJk6dSplZWUsW7aMxMREwsONM7vTp0+ntLSUqVOnMm+ea84Rm81mtm/fzqZNmwCjZvZNN93UIupmu4KmJCc/BTx7yQsFoYUQEhLCPffcw7Rp01i4cCH79hl1ESsqKnjllVcYO3asS55TXV3Np59+alv0DQ4OZubMmVx55ZWXuLP10JTI4L7AeKAbNY8QoLV+3rVmCYL7qays5OOPP+b9998nLy+P8PBwiouLCQoKYtGiRdxyyy3MmzeP6Ohop59RXl5OSkqKbdE3IiKCe+65h6ioKFf9GC0CRyODHwTeouFdKhEaocWxbt06lixZglKKl156iZUrV/LJJ5/YxCclJYUjR444vUZTWFjIhx9+aMspExUVxezZs2nXrp0rf4wWgaMezV+Alr/HJgh2TJ8+ndjYWNs6ycqVKwHo0KEDixYtYvbs2badoaZy4sQJVqxYwYULFwDf2FlqDEeFphNGzuBxWuvDbrRHEDxGYGBgo4uxzk5vfHVnqTEc/cn/Y7lWdpiEVsvixYvRWjt9v/XMUnJyMlVVVfj5+TFlyhRuuukmnxYZcNyj+R0wFjiolNoG2JddMWutH3S5ZYLQgqhvZ2nGjBn07dv3Enf6Bo4KzR+BgRgRwbfbtftZ2kRoBJ/FF3aWTKWlBISGOn1/UzwagEqM6VOV008UhFZEa95Zqq66+GteUVhIqAeEphI4CgxqYkY9QWjWPPfccw5d9+yzdWNVXbmz1JAdtdvrs8MdlBcUcOzjj23fD7/3Hv0XLXLaq3F0heploAPQ2amnCEIr46effuL999+3iczVV1/NXXfd1Sq2r8/s2sW2mTPpZHfYNOuDD6goLHR6TEc9mmlACMZicCZ1F4MnOm2BIHiR2h6C1YNoyHNw15klT3kqjWE2m8latoy9L75Ij9mzObZmja2vuqKCw++/T//f/94pr8ZRobkOY9HXD7DmpLF+b9kFZwTBQVrzzlJVSQm7n36a4+vW4R8cTMztt/P93XfXuCZr2TLiH3jAqbUaR4XmW0RQBB+mNe8sXThyBP3oo5w/cACA+HnzOPHJJ1RXVNS47nK8GkfTRIxv0qiC0IpozTtLuRs3kv7HP1JlWWvqcs019Jw7F6qq6HX//Wy+7joAJn7zDQB+/v6YysrcIzSC4Ku01jNL1VVV7P8//4df3n7b1tbnkUfot2gR/oF1ZSEsJuaynufo6e3GKleZtdYiWEKLx75A2/nz5yksLORf//pXqzuzVF5QQOrvf0/B9u0ABLZty4i//50ou+oLrsZRgfC79CWC0HIpKSkhIyPD9v3NN99kzJgxTJs2jbVr17o9G9763rULuNbPNMsakbOcTU9HL1xI2cmTAET07Yt64w3aujgvcm0cFZra0UTtgSkYVRBec6lFguBhTCYTGRkZbNx4Mc9+aWkpW7ZsYcKECTz44IN0797dixZePmazmaMffkjmCy9grqwEoPuttzL0b38j0AOlXBxdDK4TtqiUegrYA7R1tVGC4ElKSkrYunVrvX0//PADI0aMcLsNtT0Vq4dzuR4MQFVpKXueeYZja9cC4BcYyKCnn6bn3Ln4+XlmsnI5ayvlQDFwJ/CIa8wRBM9jMpnqrXcNhmdjv3bT0ig+ehT96KMU/fwzACHdujFq6dIaUb+ewNHF4K9qNQUAPYEYoMDRhymlrgWeAK7iYoXL57TWi+2uCQKeAu63jJ8PrAT+orU+7+izBMFR/Pz8CA0NrVdsQkNDW2wBt5ObN5P2+ONUnTd+bTpffTWj/vlP2nTp4nFbHF0+H48RHTze8koAYjEWiT9swvNGAjcBZxq55j1gMdADOIwhSL8D1iulWvZyv9DsOHfuHNu3b2fMmDH19ickJLS4crRmk4mf//EPds6fbxOZ3g89xJhly7wiMnAJj0Yp9V/AMeCbWl1mDE9jM/B+E573AfAOhsDV8U4sJXfvtXz9ndb6daXUNGAdhtDdBqxuwvMEoUEKCgr44IMPKCoq4vbbb2fixIls3rwZMDyZhIQEhg0b1qI8mvIzZ0hdtIiCbdsACAgPZ/jLL3PFTTd51a5LTZ0WAz9orV1S4EZrfRpAKdXQAvIUu8+rLO8bgDKMQ503IkIjuIDc3FyWL19OSYmR9eTkyZNcf/31NqFZsGABYWFhLUpkzu3ejV64kNITJwBo26cP6o03iHBw69ydNLdAu1i7z/kAWutqpVQBxnpNXH03KaUSgURguHvNE1oDR48eZcWKFZSXlwMwYcIEEhISauzAREREeMu8JmM2m8n+6CMyn3vOdj7piptvZth//zeBluqbjtJQPE/t9qbuhjkiNG2UUvX+glvRWmc36akN09Be26X24HpiTK0EoVEOHjxoSx4OMGXKlBZdltZUVsaeZ58lx1Iqxi8wkIF/+hO9EhM9tnXtCI4IzXDgSCP9ZgfHcQR7wYoEci0LwNaEWzkN3JeFsY40HCOYUBDqkJmZyZo1a6iursbPz4/p06czbNgwb5vlNCU5OexcuJCivXsBaNO1K6OWLqXzVVc5PaYr4nbqw9FdHL9LvFzF53afZ1jeb8ZYn6ndb0NrnWQ5YZ7uQluEVsSuXbtYtWoV1dXVBAQEMGvWrBYtMnlbtvDt9Ok2kel01VVcu27dZYmMO3HEEzkO/D9XPEwpdQdGWlB7cfqtUupeYIfWeo5SagUwG3hNKbUQsE4OtwFrXWGH4Ft89913fPnll4CRrOruu++ml5vP9rgLc3U1B5Yu5cDSpWA2UkTF//rXDHjiCfyDgrxsXcM4IjTH6juC4CTtuCgcVjpaXscs3+8HDgL3Wa4twAjYe0ZrXe0iOwQfwGw2s3nzZr777jvA2LKeM2dOiz23VHHuHKmLFnHq228BCAgLY/iSJVxx881etuzSeHTXSWudBCRd4ppK4FnLS2iFTJs2jVmzZrFhwwZyc3MZO3YsixcvprS0lMWLF5Oeno6/vz/x8fG88847TqVlMJvNbNiwwZZ2s23btsydO5fIyMhL3Nk8OZeZaWxdHzP+Pw6Pj+eqN94g4sorvWyZY1xKaLKBXE8YIvgWmzZtYunSpQQHB/Pggw+yfv16Tp48SWRkpG2as2fPHqd2TkwmE2vXriUzMxOADh06cN9999GxY0eX/gyeIjslhT3/9V+2revom25i2JIlBLWgLfhGhUZr3dNDdgg+xt13303Xrl0BI8z/wIEDdOrUiYKCAnJzc4mNjXXq1HRlZSUpKSkcPHgQgK5duzJ37twG42KaWz2l2uSsXk3Gn/4EgF9AAAOeeIL4Bx9sVlvXjiBnhwSv0LnzxRJhISEhlJaWMnfuXGJjY3nssceYPn06SUlJTRqzvLycf//73zaR6d69O4mJiS0q+K42bePj8Q8OJrhzZ8Z88AG9581rcSIDzS8yWPBhwsPDWbRoEYsWLeLQoUM88sgjDBw40KGAupKSEpYvX05urjHT79mzJ3fffTdt2rRp9L7mUE/JnsK9ezlvF8tybO1a+i5aRMyttxLagisuiEcjNBu2bt1KTk4OZrOZ8PBw/P39HVoILioq4v3337eJTL9+/ZgzZ84lRaa5UFlURNa//823t97KtpkzCY+7GIif/dFHxEyb1qJFBsSjEZoR2dnZvPzyy5w9e5Z27dpx5513opRq9J4zZ87wwQcfcO7cOQCGDh3K9OnTm30CcbPZzJmdO8n+6CNOfPYZ1ZZzVz3nzq1bITIpyekKkc0FP7O59dSFU0p9DVw3cuRI3nnnHW+bI7iZvLw8PvjgA4qLiwG46qqrmDJlSrNewyg7dYpjq1eTnZxMcVZWjb7wXr246q23+HbatBrF2/yDg7l+y5aW4NU0+AcvHo3QIsnJyeHDDz+krKwMMHauJkyY0CxFprqqilNbt5L90UfkffUVZrvUoH5BQURPmkTcXXfRceRI9r/2mksrRDYXRGgEr9DQtnJt6lusPXz4MP/5z3+otGTznzx5Mtdcc41TdrizzElxdjY5KSnkrFpFWV5ejb62V15J3KxZxNx2G206dQKg4uxZ4u+/n15z57qsQmRzQYRGaFH89NNPrFq1CpPJhJ+fH9OmTfNIlQJHMZWXc/KLL8hOSaHg++9r9AWEhdH9lluImzWLDsOH1/G+gjt2hFpBhZdbIbK5IEIjeIXanorVw2lsuzk9PZ1169ZhNpvx9/dnxowZDBw48LLscFWZk6KffyY7OZlja9dSWVhYo6/jiBHEzZrFFVOnEtjWN6sTidAILYIffviBL774AoDAwEDuuusu+vTp41WbKs+f58Qnn5CdnMy53btr9AV17EjMbbcRd+edtOvXz0sWNh9EaIRmjdls5ptvvuEby1pFmzZtuOeee4iLazTpo1vtOZuaSnZyMic2bMBUq0RLl3Hj6DFrFt1uuIGAFhLH4wlEaIRmi9ls5osvvmDHjh2AETl87733EuWFbd7y06c5tmYN2cnJXKg1rQqJiiJ25kziZs4kLDa2gRF8GxEaoVlSXV3N+vXrSU9PB6Bdu3bcd999Nc5IuRNTaSn+wcGc2raN7JQUTn75pa1mNRi5ebtNnEjcrFlEJiTg14KqJXgDERofwxO5YC6XqqoqVq1axc+WMq6dO3dm7ty5tG/vuXTQJceP8+P8+ZQcPVqjPTw+nrg77yT2jju8VoytJSJC44O4MxeMM9jXti4qKiIjI8MmMlFRUdx7772EN7FsSFMpOXGC4sOHbd+zli8nMiGBrKNH8Q8J4YqpU4m76y46jRrllj8Xd5U5aS6I0Pgg7soF4wwlJSVkZGTYvr/11luMGTOG22+/nV27djF79mxCQkIaGcE5zGYz5w8c4OTGjZzctInzBw8ydsUKW3/2Rx8xbuVK2g0YwBU339yikkw1R0RofJDauWAKCgqYO3cu77zzDo899hgAt99+O4mJiW61w2QykZGRwcaNG21tpaWlbNmyheuvv545c+YQHBzssudVV1VxNjWVk5s2cXLTJkpyLlbvqe8w47F16zwW9t9SPRVHEaERgMvLBeMsJSUlbN26td6+7du3M3z48MsWmqrSUgq2bTPE5auvqDx7ts417YcMoee99/LttGk12rOWLSP+gQcIbaFh/80JERoBMHLB9OzZk5iYmCblgnGGCxcukJGRQVxcHKW14lCslJaW1li7aQrlZ86Q/9VXnPzyS/K3bqXacvDShr8/nZQiatIkom64gZCuXfn51Vdb5WHG5oIIjQA4lwumKVRXV3Po0CHS0tLYv38/1dXVzJkzh9DQ0HrFJjQ0lIAmbBmX5ORw8ssvOblpE6d37oTqmpV5/Nu0oWtCAtGTJhF5/fW2g4zQug8zNhdEaHyM9evX1/j+8MMP2z7PmTPH5c87d+4caWlppKenU1RUVKPv8OHD/OpXv7LtdNmTkJBAWFhYg+OazWaK9u2zrbcUWXap7Anq2JFuEyYQNWkSXceNI7CB8VrzYcbmggiNh2gJ8Suuoqqqip9//pm0tDQO220ZAwQFBTFo0CBGjhxJTEwMpaWl+Pv72xaEQ0NDSUhIYNiwYXU8murKSs7s3GmIy5dfUnriRJ1nh8bEGFOiSZPoNGoU/oHyT7w5IH8LHqQ5xK9cTh6YS5Gfn09qaiq7d++uMx3q3r07I0aMYPDgwbRp04b1vXvXKJQ+0u5zwdtvs9nyecqePZzaupWTGzeSt2VLnZPRAO0GDSJ60iS63XAD7fr3b9KfX2uPX2kuiNB4kOYUv+IqysvL2bt3L6mpqRw/frxGX0hICEOHDmXkyJF069bNqfG/GDWqziKtX0AAna66yua5hLXQEre+hAiNB2kO8SvO5IGpjdls5tixY6SlpZGZmWnLdGelV69ejBw5kv79+xPYwNSloTww165fz7aZM23Juq0iExAaStdrryVq0iS6TZhAcIcODtvbGOKpeAYRGi/jjfgVZ7FG8aalpXHq1KkafREREQwfPpwRI0Y4XHrWVF5O3ubNlNmNlZ2cTNydd5K1fDnBnToRdcMNdLvhBrr+6lcEuCFCWPAMIjRexpPxK85gNps5fPgwqamp/Pzzz1TbbRv7+fnRr18/RowYQZ8+fRy2+/zBg2SnpHBszRqqLlyoE/qfsG4dV9x6K52GD5dT0a2EZic0SqnFQEN+fJDWusqD5rgdd8evNIZ9QNz58+cJCwuz7fQUFhaSnp5OWloahbUWYDt16sTIkSMZNmwYbR1MTVlVXMyJTz8l+6OPOJuWZmuvL/Q/Z+VK+v/+9yIyrYhmJzR2FAC1J9AttgiVp+NXLkXtw4xvvvkmCQkJ9O7dm02bNvHLL7/UuD4wMJCBAwcycuRI4uLiHNrZMZvNnMvIIPujjzi+YQMmS/0lK13GjaPX/ffzzdSpNdol9L/10ZyFZoPWOtHbRrRGGjrMuHHjRiZMmFDjtHR0dDQjRoxgyJAhDp+irjh7lmNr15KdnMz5Awdq9LXp2tWWjS6kWzcJ/fcRml2lSrup0wUMITwHpALPaK3TGrgnEUgEhgPtm2ulSnfGsDiKo3WMApYuZcSIEURHRzt0vbm6moLt28lOTubkxo01xMMvIIDI8eONbHTjx+MfGOjWekqC12hxlSpNwEmgCugPTAUmKqWuaUBsegLXec68lkNVVRW5ublkZ2eTk5ODo+mjptaazjREaW4uOatWkZ2SQumxYzX6wuLiiJs1i9g77iDEyTgaoXXQHIXmQ+CfWuszAEqpG4HPgTbAQmBePfdkAd9g8Wg8YqUTuCKG5VKUlZWRk5NjE5bjx49TVWW3fn7ffcyZM4fVq1dTWlrKyGXLAEi97z7AOAKwYMGCRp9RXVlJ3ldfkZ2cTP6339Y4wOgfHEz0lCnE3Xknna++Gr8GdqLEU/Etmp3QaK0P1Pr+hVLqNNAZqLfGhtY6CUhSSn2Nj3k2hYWFZGdn2175+fkNXtu+fXvi4uIwm82MGzeOTZs21bmmscOMF44cITs5mZxVq6g4fbpGX7sBA4ibNYvu06cT7MHcvkLLoNkJjVLqSWCF1jrb8n0ShsiA4bn4LNXV1eTn59u8lezs7Donou3p1q0bcXFxxMXFERsbWyO5d/fu3fHz86PA4tE0dJixqrSU3M8+Izs5mTM7d9YYP7BtW7rfeitxs2bRfvBgj+UYFloezU5ogAXAfyulsoESjDUagGLgVW8Z5Woai2GxUllZyfHjx23CkpOTQ7klNL82gYGBxMTEEBsbS1xcHDExMY3uEoWFhTF69Gg+tXxfsGCBzQaz2UxhZibZyckcX7eOqgsXatzb6aqriJs1i+gpUwiUXSHBAZqj0LwI3AkMAqKAo8B3wAta6/3eNMxVNBTDMnjwYJuwZGdnk5ubWyMS156wsDCbpxIXF0d0dLRDiaJq73xZT01v27KFXT/8QMcjR+hy8CBhtVJeBnfuTOwddxA3axZt4+Ob9gMLPk+zExqt9TuAy/amm1semMZiWCorKzl16hSZmZl17uvUqZNtGhQXF0enTp0ue6piL0yj4+OpeuopsE/v4O9P5LXXEjdrFt2uvx7/oKDLep7guzQ7oXEHzSEPDNSMYRlZT3/hsmUEA/6JiURFRdVYX3E01L8hys+c4cLhwyT278+FQ4e4cPgw7QcP5qCl/8TKlfScMYOs5csJjYkxiqTNmEGog3E0gtAYPiE03s4DYzabybEr7XEpnnzySaey/1dXVVGSk0PxkSM2MTlvea+d/d8/OJi+jz3GwaVLActhxjVriLrxRrqMGdPgtrQgOINPCI238sAUFxfb0ioUFBQ0GMOS8cADmEwmWwzLpUSm8vx5io8cMUTEIiQXDh+mOCurRn3ohvAPDubKhQs59vHHtrbqigpy1qwxDjOKyAguxieEpj7clQemurrallbBmu3fir+/P6dOnaoTwzJq6FB+TEurEcNirq6m9ORJQ0gsYlJsEZSyvDyHbAnq2JG28fFE9O5N2/h42lreQ2NiKC8o4KsJE2pcL4cZBXfhs0Lj6jww586ds6VVqB3b0rlzZ0aMGGFLq1BSUmKIjsWjUf36EZ6VRcT335Px0Uect4iKqYGaRzXw9yc8NtYmItb38Pj4GiVF7DGVlnL4/fflMKPgMXxWaFyRB6aqqor9+/eTlpbGoVoh9YGBgbZs/7Gxsfj5+WGurqZo/37O7NpFdGAg1rxyJ1esoKPZzJHlyxt8VmDbtoT36lVDUCLi4wnr0YOANm2aZLeprEzqGAkepdULjTvywOTn55OWlsbu3bspKSmp0XfFFVfYsv0H+flxbvduftmwgTNaczY1lcqiIvyDg+tklRu7YgXZyckEd+5sExF7UWkTGemyXTGpYyR4mlYvNK6ioqLClu3/WK1TyiEhIQwZMoShvXsTmJPDmS+/RL/0EoWZmXWmJwBxd91VJ6vciU8/ZdIPP8g5IaFV0uzy0VwO1kOVtfPROJv7xGw2c/z4cVJTU9m7dy8V9qJhNhPfoQN9/P0JOXGCc2lpXKiVlc6esLg4SrKznbLDlUgeGMGNtLh8NF6lpKSE3bt3k5aWdvE0dHU1oWfP0vncOaJLS/E/epTK06cpqG8Af3/aDxxIJ6WM16hRhERGOvxLLgitDZ8QGvv/nU0mE5/27QvA+PT0GgcJ7bP9m8vKCC8oICo/n7b5+UScPo2fxaMxWV5WAsLC6Dh8uE1YOg4bRmA9kbzTDh2i4uxZTKWlmOs5w+Tn7+/2RVjxVARv4BNCY6X2YcYd27YR3qEDsbGxrF22DNMvv9A2P58++fmEnTmDXwPTyjZdu9o8lU5K0W7AAIdrPNe3ECsIrR2fERrrYcbNmzczzNI2vEcPPvvjHzlz5gw9GkkY1bZPH5uodBo1ijAHqwAIgmDgM0JTUlLC1q1buS4qinOWttzkZIYrRZZd/IpfYCAdhgy5OA0aObLBwDdBEBzDZ4TGZDJRUVHBgLFj2W5py/7oI8b+5z/s+e47hkydyhUJCXQYMkRKrwqCi/EZoQkICOCaUaM4npJia6uuqCB73ToiH3+cnuPGERER4UULBaH14hNCY7+tXDuSJTspCZKS+BrZkREEdyH5AARBcDs+4dHYx69Um0zYR0P7+fnhHxAghwgFwY34hNCAxK8IgjeRqZMgCG5HhEYQBLcjQiMIgtsRoREEwe2I0AiC4HZEaARBcDutLcPeMaB727Zt6WvJOSMIgmdITU39BkjXWv++dl9ri6NpC3DhwgVSU1O9bYsg+BrXNdTR2oTmCNALuAA0nMAXhgPtgUIg3e1WNW87moMNYkfrsaPea1rV1MlRrEnMgW+01uN92Y7mYIPY0frtkMVgQRDcTmubOjlKEvA1kOVVK5qHHc3BBhA7apNEK7LDJ6dOgiB4Fpk6CYLgdkRoBEFwOz61RqOUuhZ4ArgKiLQ0P6e1XuxBGx4Hbgb6AV2AU8B24Hmt9R5P2WGxZR7wCBCPEYNUAKQBS7TWWz1pi8WeFGCm5esqrfXMxq534XMXA8820B2kta7yhB1WlFKdgb8A04HuwHlgL/A7rXWaB57fEyNUpCH+pbVObMqYvubRjARuAs540YbfABOAaowFtu4Yv1zbLX/BnuRXQCxGKuWfga7AVGCjp21RSj3ARZHxFgXAjlovjy5iWkRmB/A7jL+bw8AJYATgqZrK5dT9c9hr15/b1AF9yqMBPgDewRDY816y4V1gudY6C0Ap9QfgH0A4cDvwigdtWaC1LrN+UUo9aLEvBBiFh3Y8lFK9gX9ieHaxQIwnnlsPG5r6P7Ub+CuGoBwHJmitDwIopQKANp4wQGudC4yxb1NKPQcMAiqBN5o6pk8Jjdb6NIBSqm5hbM/Z8NdaTd/YfS73sC1llunkyxhC18/SVQZoT9iglAoE/o3h4c0BtnjiuQ0wQyl1F3AOSAWe8cRUxYpSyg+YZfl6GPhQKTUAw+NcCrzlKVtq2RUGLLR8/VBrndPUMXxt6tQc+Y3l/TSQ0tiFbqITcDUwGAgC8oHJWuujHnr+s5bnP6q1bmxdwN2YgJMYXlwUxhRyu1JqhAdt6Irx9wGQAPTAWMMbgOFFLGzgPnczD+iMMY38384MIELjJZRSwUqpZcD9QBFwm9b6lKft0Fqvxfh3EA28jrFI/qFSKs7dz1ZKKeDPGFPJf7v7eY3wIRCptb5Saz0AYx0PjKmKJ3+57WcYpzGmUH3AVlz1MQ/aAtimbIssXz/RWu9t7PqGEKHxAkqpLsBmYC7Gwtp4rfU2b9mjtTZrrU8CT1uaYjB2o9zNYCAAmKmUuqCUugBYBe42S1t7dxuhtT6gtT5j9/0LjF907OzxBKeACsvnA1rr81prE7DL0tbTg7ZYucvuuS85O4gIjYexzLl3AOMwTrqO9uQ6gJ0dYUqph5RS9gWtptl9DvegOSGW54UDfpa2gFrf3YZS6kl7D04pNQljqgAePAKgta7ECPcH6KuUaquU8sfYcQI44Clb7Pij5f07rfV3zg7iU0cQlFJ3YCx8+mHEjgCcxdju3qG1nuMBG37m4qJrJlBs1/2u1vpdd9tgsaMDxs9eDhzCmCZYt08rgXFa6x89YUstu7Iw1iY8GUeTheG5ZAMlQH+MfyPFGP8R7POEHRZbrgK2Yvx95Fvs6Wnpnqm1XuVBW24EPrd8vVVrvd7ZsXzNo2mH8csUb9fW0dLW3UM2hNh9HoyxEGp9eXJbtwxYDuRg/EPugTGNWw0keENkvMiLGFPZYIx/G0cxdsJGeVJkALTWO4HxFnvCMXLBfANc70mRsfCE5f0n4JPLGcinPBpBELyDr3k0giB4AREaQRDcjgiNIAhuR4RGEAS3I0IjCILbEaERBMHt+NTpbaH1YRfg1+RkTILnEKERnEIp9QTG2ZdqoIvW+qyl3T5LXnet9QlLexLGAdKjWuueHjdY8CoydRKc5VvLuz/GuS0rCXafr63ns8dThAreRzwawVk0xlmgcAwRWa+U6gt0wzijE4khOv9RSnXHKFUM8K0lwdOjwHygL8aJ5W+AP9mH/FvGex64HuiAcTTgHeAfWuvq+oxSSvXBEMFo4EuMMzqlrvuxBWcQj0ZwCkvC7h8sX6+t9b4UQzys3+2Lv2/FSNv5OjAUIwl2KcbJ8e+VUvFgE4wdGGkKgjDO28RjHIp9rT6bLCewN2OIzBfANBGZ5oEIjXA5WKdPIy3pUa3C8gVGDpVBSqlOdu35GIc5rcmkHtZaD8RYzN2LcYDwz5a+pzC8mANAnNZ6GHCfpe9RpVRsLVuiMEQmDvgUmG6fD1nwLiI0wuVgFZpAYCzGVOkCRr7dbzFSLYzjokezFaPUjTXHzNtKKTOG+AyytFmTYl9tee8LFFmuW25p8wdG17LlRoxsdLuA27XWHs2/LDSOrNEIl8MPGFOkYOAejHQTG7XWJqXUt8CTwAyM/C5gCI19IqsMDJGx54Tl3XrdaeCXep5de0p0AaM21XCMaZinUyoIjSBCIziNpYrCToz6UNakYVYvZxtGwu977G75FqPCgBlDSFZorW3pIZVSo7hYUuRHjKTcxRhrLacs17TD8Fg+rWXOKsuY92HkPJ6mtd7oip9TuHxk6iRcLvbTJ9t3rXURsNuuvQjIsFQ6sJYNWaKUOqqUylBKncHYyZps6XsRKMRYczmqlEpTSh3G8HCSGrDlQYyMcMHAaqXUNS74+QQXIEIjXC72danKMDyR+vq+s9uSfgz4LcbUKRJj6zsXeBPLlEdrfQBjneY/GMX+BmF4O18Dv6/PEMtO2EwMwQoHNiilhjj9kwkuQzLsCYLgdsSjEQTB7YjQCILgdkRoBEFwOyI0giC4HREaQRDcjgiNIAhuR4RGEAS3I0IjCILbEaERBMHt/H+lu76hdleblQAAAABJRU5ErkJggg==\n", 123 | "text/plain": [ 124 | "
" 125 | ] 126 | }, 127 | "metadata": { 128 | "needs_background": "light" 129 | }, 130 | "output_type": "display_data" 131 | } 132 | ], 133 | "source": [ 134 | "plt.figure(figsize = (4,4))\n", 135 | "\n", 136 | "err_kws = {'capsize': 5, 'capthick': 2, 'elinewidth':2}\n", 137 | "\n", 138 | "ax = sns.lineplot(data = df, x = 'Week', y = 'Tumor size', hue = 'Treatment', lw = 2.5,\n", 139 | " style = 'Treatment', markers = ['o', '^'], dashes = False, markersize = 8 ,\n", 140 | " err_style = 'bars', err_kws = err_kws, palette = ['gray', 'firebrick'])\n", 141 | "\n", 142 | "\n", 143 | "for axis in ['bottom', 'left']:\n", 144 | " ax.spines[axis].set_linewidth(2.5)\n", 145 | " ax.spines[axis].set_color('0.2')\n", 146 | "\n", 147 | "ax.spines['top'].set_visible(False)\n", 148 | "ax.spines['right'].set_visible(False)\n", 149 | "\n", 150 | "ax.tick_params(width = 2.5, color = '0.2')\n", 151 | "\n", 152 | "plt.xticks(size = 14, weight = 'bold', color = '0.2')\n", 153 | "plt.yticks(size = 14, weight = 'bold', color = '0.2')\n", 154 | "\n", 155 | "ax.set_xlabel(ax.get_xlabel(), fontsize = 14, weight = 'bold', color = '0.2')\n", 156 | "ax.set_ylabel(ax.get_ylabel(), fontsize = 14, weight = 'bold', color = '0.2')\n", 157 | "\n", 158 | "plt.legend(frameon = False, prop = {'weight':'bold', 'size':14}, labelcolor = '0.2')\n", 159 | "\n", 160 | "\n", 161 | "for week in range(1,8):\n", 162 | " z = df[(df.Week == week) & (df.Treatment == 'Drug_z')]['Tumor size'].values\n", 163 | " c = df[(df.Week == week) & (df.Treatment == 'Control')]['Tumor size'].values\n", 164 | " \n", 165 | " p = stats.ttest_ind(z,c).pvalue\n", 166 | " \n", 167 | " max_v = df[df.Week == week]['Tumor size'].max()\n", 168 | " \n", 169 | " if p < 0.05:\n", 170 | " plt.text(x = week- 0.05, y = max_v - 0.5, s = \"*\",\n", 171 | " fontsize = 20, ha = 'center', va = 'bottom', color = '0.2', weight = 'bold')\n", 172 | " else:\n", 173 | " plt.text(x = week, y = max_v, s = \"ns\", fontsize = 12, ha = 'center', va = 'bottom', color = '0.2')\n", 174 | "\n", 175 | "\n", 176 | "\n", 177 | "\n", 178 | "plt.savefig('line_test.png', bbox_inches = 'tight', dpi = 250, facecolor = ax.get_facecolor())" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 45, 184 | "id": "212202ce", 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "for week in range(1,8):\n", 189 | " z = df[(df.Week == week) & (df.Treatment == 'Drug_z')]['Tumor size'].values\n", 190 | " c = df[(df.Week == week) & (df.Treatment == 'Control')]['Tumor size'].values\n", 191 | " \n", 192 | " p = stats.ttest_ind(z,c).pvalue\n", 193 | " \n", 194 | " max_v = df[df.Week == week]['Tumor size'].max()\n", 195 | " \n", 196 | " if p < 0.05:\n", 197 | " plt.text(x = week, y = max_v, s = \"*\", fontsize = 20, ha = 'center', va = 'bottom')\n", 198 | " else:\n", 199 | " plt.text(x = week, y = max_v, s = \"ns\", fontsize = 14, ha = 'center', va = 'bottom')" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 46, 205 | "id": "174fc492", 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/plain": [ 211 | "Ttest_indResult(statistic=-4.6475800154489, pvalue=0.009678951648207292)" 212 | ] 213 | }, 214 | "execution_count": 46, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "s" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "id": "f87879ca", 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [] 230 | } 231 | ], 232 | "metadata": { 233 | "kernelspec": { 234 | "display_name": "Python 3 (ipykernel)", 235 | "language": "python", 236 | "name": "python3" 237 | }, 238 | "language_info": { 239 | "codemirror_mode": { 240 | "name": "ipython", 241 | "version": 3 242 | }, 243 | "file_extension": ".py", 244 | "mimetype": "text/x-python", 245 | "name": "python", 246 | "nbconvert_exporter": "python", 247 | "pygments_lexer": "ipython3", 248 | "version": "3.10.1" 249 | } 250 | }, 251 | "nbformat": 4, 252 | "nbformat_minor": 5 253 | } 254 | -------------------------------------------------------------------------------- /integration_comparison/harmony.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | library(Seurat) 8 | library(harmony) 9 | ``` 10 | 11 | 12 | 13 | ```{r} 14 | files <- list.files('dedif_data/') 15 | files <- grep("2i|Dox", files, value = TRUE) 16 | files <- grep("h5", files, value = TRUE) 17 | files <- grep("C1_", files, value = TRUE) 18 | files 19 | ``` 20 | 21 | 22 | 23 | ```{r} 24 | prep_integration <- function(h5_path){ 25 | #print(h5_path) 26 | day <- sub("^.*_D([^_]*)_Dox.*$", "\\1", h5_path) 27 | day <- sub("^.*_D([^_]*)_2i.*$", "\\1", day) 28 | print(day) 29 | 30 | data <- Read10X_h5(paste0('dedif_data/', h5_path)) 31 | 32 | data <- CreateSeuratObject(data, min.cells = 0, min.features = 300) 33 | 34 | ub <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.97) 35 | data <- data[, data[["nFeature_RNA"]] < ub] 36 | data <- NormalizeData(object = data, verbose = FALSE) 37 | 38 | # 39 | 40 | 41 | data$day <- day 42 | data$Dataset <- h5_path 43 | return(data) 44 | } 45 | 46 | ``` 47 | 48 | 49 | 50 | ```{r} 51 | data_list <- sapply(files, prep_integration) 52 | ``` 53 | 54 | 55 | ```{r} 56 | data <- merge(data_list[1]$GSM3195648_D0_Dox_C1_gene_bc_mat.h5, y = data_list[2:length(data_list)]) 57 | ``` 58 | 59 | 60 | ```{r} 61 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 62 | data <- ScaleData(data, verbose = FALSE) 63 | data <- RunPCA(data, npcs = 40, verbose = FALSE) 64 | ``` 65 | 66 | 67 | ```{r} 68 | start <- Sys.time() 69 | data <- RunHarmony(data, "Dataset") 70 | print( Sys.time() - start ) 71 | ``` 72 | 73 | 9.710755 mins 74 | 75 | ```{r} 76 | data <- RunUMAP(data, reduction = "harmony", dims = 1:40) 77 | ``` 78 | 79 | ```{r} 80 | data$dayint <- data[[]]$day 81 | data$dayint <- ifelse(data$dayint == "iPSC", 20, data$dayint) 82 | data$dayint <- as.numeric(data$dayint) 83 | ``` 84 | 85 | 86 | ```{r} 87 | FeaturePlot(data, "dayint") 88 | ``` 89 | 90 | 91 | 92 | 93 | 94 | 95 | ```{r} 96 | files <- list.files('pbmc_cd16/') 97 | files <- grep("MH|new", files, value = TRUE) 98 | files 99 | ``` 100 | 101 | 102 | 103 | ```{r} 104 | prep_integration <- function(h5_path){ 105 | 106 | data <-CreateSeuratObject(counts = Read10X(paste0('pbmc_cd16/', h5_path)), min.cells = 0, min.features = 300) 107 | 108 | 109 | ub <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.97) 110 | data <- data[, data[["nFeature_RNA"]] < ub] 111 | data <- NormalizeData(object = data, verbose = FALSE) 112 | 113 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 114 | 115 | 116 | data$Sample <- h5_path 117 | return(data) 118 | } 119 | 120 | ``` 121 | 122 | 123 | ```{r} 124 | data_list <- sapply(files, prep_integration) 125 | ``` 126 | 127 | 128 | 129 | 130 | ```{r} 131 | data <- merge(data_list[1]$MH8919176, y = data_list[2:length(data_list)]) 132 | ``` 133 | 134 | 135 | 136 | ```{r} 137 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 138 | data <- ScaleData(data, verbose = FALSE) 139 | data <- RunPCA(data, npcs = 40, verbose = FALSE) 140 | ``` 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | ```{r} 149 | start <- Sys.time() 150 | data <- RunHarmony(data, "Sample") 151 | print( Sys.time() - start ) 152 | ``` 153 | 154 | 2.746183 mins 155 | 156 | 157 | ```{r} 158 | data <- RunUMAP(data, reduction = "harmony", dims = 1:40) 159 | ``` 160 | 161 | 162 | ```{r} 163 | df <- read.csv('pbmc_cd16/cell_types.csv') 164 | df 165 | ``` 166 | 167 | 168 | ```{r} 169 | data$cell_types <- df$initial_clustering[match(rownames(data[[]]), df$covid_index)] 170 | ``` 171 | 172 | 173 | ```{r} 174 | DimPlot(data, group.by = 'cell_types') 175 | ``` 176 | 177 | 178 | 179 | ```{r} 180 | DimPlot(data[,data$cell_types == "CD16" | data$cell_types == "CD14"], group.by = 'cell_types') 181 | ``` 182 | 183 | 184 | 185 | 186 | Two datasets 187 | 188 | 189 | 190 | ```{r} 191 | prep_integration <- function(h5_path){ 192 | 193 | data <-CreateSeuratObject(counts = Read10X(paste0('two_different/', h5_path)), min.cells = 0, min.features = 300) 194 | 195 | data$Sample <- h5_path 196 | 197 | if (h5_path == 'lung'){ 198 | data$cell_type = 'lung_na' 199 | } 200 | else{ 201 | df <- read.csv('two_different/muscle_meta.csv') 202 | data$cell_type <- df$cell_type 203 | } 204 | 205 | ub <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.97) 206 | data <- data[, data[["nFeature_RNA"]] < ub] 207 | data <- NormalizeData(object = data, verbose = FALSE) 208 | 209 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 210 | 211 | 212 | 213 | return(data) 214 | } 215 | 216 | ``` 217 | 218 | 219 | 220 | ```{r} 221 | data_list <- c(prep_integration('lung'), prep_integration('muscle')) 222 | ``` 223 | 224 | 225 | ```{r} 226 | data <- merge(prep_integration('lung') , y = prep_integration('muscle')) 227 | ``` 228 | 229 | ```{r} 230 | data 231 | ``` 232 | 233 | ```{r} 234 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 235 | data <- ScaleData(data, verbose = FALSE) 236 | data <- RunPCA(data, npcs = 40, verbose = FALSE) 237 | ``` 238 | 239 | 240 | 241 | ```{r} 242 | start <- Sys.time() 243 | data <- RunHarmony(data, "Sample") 244 | print( Sys.time() - start ) 245 | ``` 246 | 247 | 28.89322 secs 248 | 249 | 250 | 251 | 252 | ```{r} 253 | data <- RunUMAP(data, reduction = "harmony", dims = 1:40) 254 | ``` 255 | 256 | 257 | 258 | 259 | ```{r} 260 | DimPlot(data, group.by = 'Sample') 261 | ``` 262 | 263 | 264 | 265 | 266 | ```{r} 267 | DimPlot(data, group.by = 'cell_type') 268 | ``` 269 | 270 | 271 | 272 | ```{r} 273 | DimPlot(data[, data$cell_type == 'lung_na' | data$cell_type == 'Macrophage'], group.by = 'cell_type') 274 | ``` 275 | 276 | 277 | 278 | 279 | ```{r} 280 | DimPlot(data[, data$cell_type == 'lung_na' | data$cell_type == 'Endothelial'], group.by = 'cell_type') 281 | ``` 282 | 283 | 284 | 285 | ```{r} 286 | DimPlot(data[, data$cell_type == 'lung_na' | data$cell_type == 'SkMus_1'], group.by = 'cell_type') 287 | ``` 288 | 289 | 290 | 291 | 292 | -------------------------------------------------------------------------------- /integration_comparison/readme.txt: -------------------------------------------------------------------------------- 1 | comparison of multiple RNA integraiton methods 2 | -------------------------------------------------------------------------------- /integration_comparison/seurat_cca.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | library(Seurat) 8 | ``` 9 | 10 | 11 | ```{r} 12 | files <- list.files('dedif_data/') 13 | files <- grep("2i|Dox", files, value = TRUE) 14 | files <- grep("h5", files, value = TRUE) 15 | files <- grep("C1_", files, value = TRUE) 16 | files 17 | ``` 18 | 19 | 20 | ########################## CCA ###################################### 21 | 22 | dedif 23 | 24 | ```{r} 25 | prep_integration <- function(h5_path){ 26 | #print(h5_path) 27 | day <- sub("^.*_D([^_]*)_Dox.*$", "\\1", h5_path) 28 | day <- sub("^.*_D([^_]*)_2i.*$", "\\1", day) 29 | print(day) 30 | 31 | data <- Read10X_h5(paste0('dedif_data/', h5_path)) 32 | 33 | data <- CreateSeuratObject(data, min.cells = 0, min.features = 300) 34 | 35 | ub <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.97) 36 | data <- data[, data[["nFeature_RNA"]] < ub] 37 | data <- NormalizeData(object = data, verbose = FALSE) 38 | 39 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 40 | 41 | 42 | data$day <- day 43 | return(data) 44 | } 45 | 46 | ``` 47 | 48 | 49 | 50 | ```{r} 51 | data_list <- sapply(files, prep_integration) 52 | ``` 53 | 54 | ```{r} 55 | features <- SelectIntegrationFeatures(object.list = data_list) 56 | ``` 57 | 58 | 59 | ```{r} 60 | start <- Sys.time() 61 | anchors <- FindIntegrationAnchors(object.list = data_list, anchor.features = features, reduction = "cca") 62 | 63 | 64 | data <- IntegrateData(anchorset = anchors) 65 | 66 | print( Sys.time() - start ) 67 | ``` 68 | 69 | 70 | 71 | FAILED, out of memory 72 | 73 | 74 | 75 | 76 | CD16 removed 77 | 78 | ```{r} 79 | files <- list.files('pbmc_cd16/') 80 | files <- grep("MH|new", files, value = TRUE) 81 | files 82 | ``` 83 | 84 | 85 | 86 | ```{r} 87 | prep_integration <- function(h5_path){ 88 | 89 | data <-CreateSeuratObject(counts = Read10X(paste0('pbmc_cd16/', h5_path)), min.cells = 0, min.features = 300) 90 | 91 | 92 | ub <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.97) 93 | data <- data[, data[["nFeature_RNA"]] < ub] 94 | data <- NormalizeData(object = data, verbose = FALSE) 95 | 96 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 97 | 98 | 99 | data$Sample <- h5_path 100 | return(data) 101 | } 102 | 103 | ``` 104 | 105 | 106 | ```{r} 107 | data_list <- sapply(files, prep_integration) 108 | ``` 109 | 110 | 111 | 112 | ```{r} 113 | features <- SelectIntegrationFeatures(object.list = data_list) 114 | ``` 115 | 116 | 117 | ```{r} 118 | start <- Sys.time() 119 | anchors <- FindIntegrationAnchors(object.list = data_list, anchor.features = features, reduction = "cca") 120 | 121 | 122 | data <- IntegrateData(anchorset = anchors) 123 | 124 | print( Sys.time() - start ) 125 | ``` 126 | 127 | 128 | 1.3 hours 129 | 130 | 131 | 132 | ```{r} 133 | data <- ScaleData(data, verbose = FALSE) 134 | data <- RunPCA(data, npcs = 40, verbose = FALSE) 135 | data <- RunUMAP(data, reduction = "pca", dims = 1:40) 136 | data <- FindNeighbors(data, reduction = "pca", dims = 1:40) 137 | data <- FindClusters(data, resolution = 0.5) 138 | 139 | ``` 140 | 141 | 142 | ```{r} 143 | data 144 | ``` 145 | 146 | 147 | ```{r} 148 | df <- read.csv('pbmc_cd16/cell_types.csv') 149 | df 150 | ``` 151 | 152 | 153 | ```{r} 154 | data$cell_types <- df$initial_clustering[match(rownames(data[[]]), df$covid_index)] 155 | ``` 156 | 157 | 158 | ```{r} 159 | DimPlot(data, group.by = 'cell_types') 160 | ``` 161 | 162 | 163 | ```{r} 164 | DimPlot(data[,data$cell_types == "CD16" | data$cell_types == "CD14"], group.by = 'cell_types') 165 | ``` 166 | 167 | 168 | 169 | 170 | 171 | Two datasets 172 | 173 | 174 | 175 | ```{r} 176 | prep_integration <- function(h5_path){ 177 | 178 | data <-CreateSeuratObject(counts = Read10X(paste0('two_different/', h5_path)), min.cells = 0, min.features = 300) 179 | 180 | data$Sample <- h5_path 181 | 182 | if (h5_path == 'lung'){ 183 | data$cell_type = 'lung_na' 184 | } 185 | else{ 186 | df <- read.csv('two_different/muscle_meta.csv') 187 | data$cell_type <- df$cell_type 188 | } 189 | 190 | ub <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.97) 191 | data <- data[, data[["nFeature_RNA"]] < ub] 192 | data <- NormalizeData(object = data, verbose = FALSE) 193 | 194 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 195 | 196 | 197 | 198 | return(data) 199 | } 200 | 201 | ``` 202 | 203 | 204 | ```{r} 205 | data_list <- c(prep_integration('lung'), prep_integration('muscle')) 206 | ``` 207 | 208 | 209 | 210 | ```{r} 211 | features <- SelectIntegrationFeatures(object.list = data_list) 212 | ``` 213 | 214 | 215 | ```{r} 216 | start <- Sys.time() 217 | anchors <- FindIntegrationAnchors(object.list = data_list, anchor.features = features, reduction = "cca") 218 | 219 | 220 | data <- IntegrateData(anchorset = anchors) 221 | 222 | print( Sys.time() - start ) 223 | ``` 224 | 225 | 3.075165 mins 226 | 227 | 228 | ```{r} 229 | data <- ScaleData(data, verbose = FALSE) 230 | data <- RunPCA(data, npcs = 40, verbose = FALSE) 231 | data <- RunUMAP(data, reduction = "pca", dims = 1:40) 232 | data <- FindNeighbors(data, reduction = "pca", dims = 1:40) 233 | data <- FindClusters(data, resolution = 0.5) 234 | 235 | ``` 236 | 237 | 238 | 239 | ```{r} 240 | DimPlot(data, group.by = 'cell_type') 241 | ``` 242 | 243 | ```{r} 244 | DimPlot(data[, data$cell_type == 'lung_na' | data$cell_type == 'Macrophage'], group.by = 'cell_type') 245 | ``` 246 | 247 | ```{r} 248 | DimPlot(data[, data$cell_type == 'lung_na' | data$cell_type == 'Endothelial'], group.by = 'cell_type') 249 | ``` 250 | 251 | 252 | ```{r} 253 | DimPlot(data[, data$cell_type == 'lung_na' | data$cell_type == 'SkMus_1'], group.by = 'cell_type') 254 | ``` 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | ```{r} 263 | FeaturePlot(data, c("Cdh5"), max.cutoff = 2) 264 | ``` 265 | 266 | 267 | ```{r} 268 | FeaturePlot(data, c("Ptprc"), max.cutoff = 2) 269 | ``` 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | -------------------------------------------------------------------------------- /integration_comparison/seurat_rpca.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | 7 | 8 | ```{r} 9 | library(Seurat) 10 | ``` 11 | 12 | 13 | ```{r} 14 | files <- list.files('dedif_data/') 15 | files <- grep("2i|Dox", files, value = TRUE) 16 | files <- grep("h5", files, value = TRUE) 17 | files <- grep("C1_", files, value = TRUE) 18 | files 19 | ``` 20 | 21 | 22 | ########################## rpca ###################################### 23 | 24 | 25 | ```{r} 26 | prep_integration <- function(h5_path){ 27 | #print(h5_path) 28 | day <- sub("^.*_D([^_]*)_Dox.*$", "\\1", h5_path) 29 | day <- sub("^.*_D([^_]*)_2i.*$", "\\1", day) 30 | print(day) 31 | 32 | data <- Read10X_h5(paste0('dedif_data/', h5_path)) 33 | 34 | data <- CreateSeuratObject(data, min.cells = 0, min.features = 300) 35 | 36 | ub <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.97) 37 | data <- data[, data[["nFeature_RNA"]] < ub] 38 | data <- NormalizeData(object = data, verbose = FALSE) 39 | 40 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 41 | 42 | 43 | data$day <- day 44 | return(data) 45 | } 46 | 47 | ``` 48 | 49 | 50 | 51 | ```{r} 52 | data_list <- sapply(files, prep_integration) 53 | ``` 54 | 55 | ```{r} 56 | features <- SelectIntegrationFeatures(object.list = data_list) 57 | ``` 58 | 59 | ```{r} 60 | #dont do in cca 61 | scale_pca <- function(x){ 62 | x <- ScaleData(x, features = features, verbose = FALSE) 63 | x <- RunPCA(x, features = features, verbose = FALSE) 64 | return(x) 65 | } 66 | 67 | 68 | data_list <- lapply(X = data_list, scale_pca) 69 | ``` 70 | 71 | 72 | 73 | ```{r} 74 | start <- Sys.time() 75 | anchors <- FindIntegrationAnchors(object.list = data_list, anchor.features = features, reduction = "rpca") 76 | 77 | 78 | data <- IntegrateData(anchorset = anchors) 79 | 80 | print( Sys.time() - start ) 81 | ``` 82 | 83 | 84 | 85 | 86 | ```{r} 87 | data <- ScaleData(data, verbose = FALSE) 88 | data <- RunPCA(data, npcs = 40, verbose = FALSE) 89 | data <- RunUMAP(data, reduction = "pca", dims = 1:40) 90 | data <- FindNeighbors(data, reduction = "pca", dims = 1:40) 91 | data <- FindClusters(data, resolution = 0.5) 92 | 93 | data$dayint <- data[[]]$day 94 | data$dayint <- ifelse(data$dayint == "iPSC", 20, data$dayint) 95 | data$dayint <- as.numeric(data$dayint) 96 | ``` 97 | 98 | ```{r} 99 | data <- readRDS("rpca_dedif.rds") 100 | ``` 101 | 102 | ```{r} 103 | FeaturePlot(data, "dayint") 104 | ``` 105 | 106 | 107 | oops, R crashed and it didn't save notebook.. similar enough to above that just reloading object 108 | 109 | ```{r} 110 | data <- readRDS("rpca_cd16.rds") 111 | ``` 112 | 113 | 114 | 115 | 116 | ```{r} 117 | df <- read.csv('pbmc_cd16/cell_types.csv') 118 | data$cell_types <- df$initial_clustering[match(rownames(data[[]]), df$covid_index)] 119 | ``` 120 | 121 | 122 | 123 | ```{r} 124 | DimPlot(data, group.by = 'cell_types') 125 | ``` 126 | 127 | 128 | 129 | ```{r} 130 | DimPlot(data[,data$cell_types == "CD16" | data$cell_types == "CD14"], group.by = 'cell_types') 131 | ``` 132 | 133 | 134 | 135 | 136 | 137 | Two datasets 138 | 139 | 140 | 141 | ```{r} 142 | prep_integration <- function(h5_path){ 143 | 144 | data <-CreateSeuratObject(counts = Read10X(paste0('two_different/', h5_path)), min.cells = 0, min.features = 300) 145 | 146 | data$Sample <- h5_path 147 | 148 | if (h5_path == 'lung'){ 149 | data$cell_type = 'lung_na' 150 | } 151 | else{ 152 | df <- read.csv('two_different/muscle_meta.csv') 153 | data$cell_type <- df$cell_type 154 | } 155 | 156 | ub <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.97) 157 | data <- data[, data[["nFeature_RNA"]] < ub] 158 | data <- NormalizeData(object = data, verbose = FALSE) 159 | 160 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 161 | 162 | 163 | 164 | return(data) 165 | } 166 | 167 | ``` 168 | 169 | 170 | 171 | ```{r} 172 | data_list <- c(prep_integration('lung'), prep_integration('muscle')) 173 | ``` 174 | 175 | ```{r} 176 | features <- SelectIntegrationFeatures(object.list = data_list) 177 | ``` 178 | 179 | 180 | ```{r} 181 | #dont do in cca 182 | scale_pca <- function(x){ 183 | x <- ScaleData(x, features = features, verbose = FALSE) 184 | x <- RunPCA(x, features = features, verbose = FALSE) 185 | return(x) 186 | } 187 | 188 | 189 | data_list <- lapply(X = data_list, scale_pca) 190 | ``` 191 | 192 | 193 | ```{r} 194 | start <- Sys.time() 195 | anchors <- FindIntegrationAnchors(object.list = data_list, anchor.features = features, reduction = "rpca") 196 | 197 | 198 | data <- IntegrateData(anchorset = anchors) 199 | 200 | print( Sys.time() - start ) 201 | ``` 202 | 203 | 34.42784 secs 204 | 205 | 206 | ```{r} 207 | data <- ScaleData(data, verbose = FALSE) 208 | data <- RunPCA(data, npcs = 40, verbose = FALSE) 209 | data <- RunUMAP(data, reduction = "pca", dims = 1:40) 210 | data <- FindNeighbors(data, reduction = "pca", dims = 1:40) 211 | data <- FindClusters(data, resolution = 0.5) 212 | 213 | ``` 214 | 215 | 216 | 217 | ```{r} 218 | DimPlot(data, group.by = 'cell_type') 219 | ``` 220 | 221 | 222 | ```{r} 223 | DimPlot(data[, data$cell_type == 'lung_na' | data$cell_type == 'Macrophage'], group.by = 'cell_type') 224 | ``` 225 | 226 | 227 | 228 | ```{r} 229 | DimPlot(data[, data$cell_type == 'lung_na' | data$cell_type == 'Endothelial'], group.by = 'cell_type') 230 | ``` 231 | 232 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /monocle3_tutorial.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | 7 | ```{r} 8 | BiocManager::install(c('BiocGenerics', 'DelayedArray', 'DelayedMatrixStats', 9 | 'limma', 'lme4', 'S4Vectors', 'SingleCellExperiment', 10 | 'SummarizedExperiment', 'batchelor', 'HDF5Array', 11 | 'terra', 'ggrastr')) 12 | ``` 13 | 14 | ```{r} 15 | devtools::install_github('cole-trapnell-lab/monocle3') 16 | ``` 17 | 18 | ```{r} 19 | install.packages('Seurat') 20 | ``` 21 | 22 | ```{r} 23 | devtools::install_github("satijalab/seurat-wrappers") 24 | ``` 25 | 26 | 27 | 28 | ```{r} 29 | library(monocle3) 30 | library(Seurat) 31 | ``` 32 | 33 | 34 | ```{r} 35 | files <- list.files('data') 36 | files <- grep("2i|Dox", files, value = TRUE) 37 | files <- grep("h5", files, value = TRUE) 38 | files 39 | ``` 40 | 41 | 42 | 43 | ```{r} 44 | input_files <- function(h5_path){ 45 | day <- sub("^.*_D([^_]*)_Dox.*$", "\\1", h5_path) 46 | day <- sub("^.*_D([^_]*)_2i.*$", "\\1", day) 47 | 48 | data <- Read10X_h5(paste0('data/', h5_path)) 49 | data <- CreateSeuratObject(data, min.cells = 0, min.features = 200) 50 | data[["percent.mt"]] <- PercentageFeatureSet(data, pattern = "mt-") 51 | lb <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.02) 52 | ub <- quantile(data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.97) 53 | data <- data[, data[["nFeature_RNA"]] > lb & data[["nFeature_RNA"]] < ub & data[["percent.mt"]] < 15] 54 | 55 | data$day <- day 56 | return(data) 57 | } 58 | ``` 59 | 60 | 61 | ```{r} 62 | data_list <- sapply(files, input_files) 63 | ``` 64 | 65 | 66 | #### This is where you would integrate if you wanted to do so #### 67 | #### Example at the bottom of the notebook ###### 68 | 69 | 70 | ```{r} 71 | data <- merge(data_list[1]$GSM3195648_D0_Dox_C1_gene_bc_mat.h5, y = data_list[2:length(data_list)]) 72 | ``` 73 | 74 | 75 | ```{r} 76 | data 77 | ``` 78 | 79 | 80 | ```{r} 81 | data <- NormalizeData(object = data, verbose = FALSE) 82 | data <- FindVariableFeatures(object = data, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 83 | data <- ScaleData(data, verbose = FALSE) 84 | data <- RunPCA(data, npcs = 30, verbose = FALSE) 85 | data <- FindNeighbors(data, dims = 1:30) 86 | 87 | data <- RunUMAP(data, reduction = "pca", dims = 1:30) 88 | 89 | 90 | data@active.assay = 'RNA' 91 | ``` 92 | 93 | 94 | 95 | ```{r} 96 | DimPlot(data, group.by = c("day")) 97 | ``` 98 | 99 | 100 | ```{r} 101 | data$dayint <- data[[]]$day 102 | data$dayint <- ifelse(data$dayint == "iPSC", 20, data$dayint) 103 | data$dayint <- as.numeric(data$dayint) 104 | ``` 105 | 106 | 107 | 108 | ```{r} 109 | FeaturePlot(data, "dayint") 110 | ``` 111 | 112 | 113 | ```{r} 114 | cds <- SeuratWrappers::as.cell_data_set(data) #change to cds here 115 | ``` 116 | 117 | 118 | ```{r} 119 | cds <- cluster_cells(cds) 120 | ``` 121 | 122 | 123 | ```{r} 124 | plot_cells(cds, show_trajectory_graph = FALSE, 125 | color_cells_by = "partition") 126 | ``` 127 | 128 | 129 | 130 | ```{r} 131 | cds <- learn_graph(cds, use_partition = FALSE) #graph learned across all partitions 132 | ``` 133 | 134 | 135 | ```{r} 136 | cds <- order_cells(cds) 137 | ``` 138 | 139 | 140 | 141 | ```{r} 142 | plot_cells(cds, color_cells_by = "pseudotime", label_branch_points=FALSE, label_leaves=FALSE) 143 | ``` 144 | 145 | 146 | ```{r} 147 | rowData(cds)$gene_name <- rownames(cds) 148 | rowData(cds)$gene_short_name <- rowData(cds)$gene_name 149 | ``` 150 | 151 | 152 | 153 | ```{r} 154 | plot_cells(cds, 155 | genes=c('Sox2', 'Nanog', 'Col6a2'), 156 | label_cell_groups=FALSE, 157 | show_trajectory_graph=FALSE, 158 | min_expr = 3) 159 | ``` 160 | 161 | 162 | ```{r} 163 | cds_pt_res <- graph_test(cds, neighbor_graph="principal_graph", cores=8) 164 | ``` 165 | 166 | 167 | ```{r} 168 | cds_pt_res <- readRDS("cds_pt_res.rds") 169 | ``` 170 | 171 | ```{r} 172 | cds_pt_res 173 | ``` 174 | 175 | ```{r} 176 | cds_pt_res <- na.omit(cds_pt_res) 177 | cds_pt_res <- cds_pt_res[cds_pt_res$p_value < 0.05 & cds_pt_res$status == "OK", ] 178 | cds_pt_res 179 | ``` 180 | 181 | 182 | ```{r} 183 | cds_pt_res[order(-cds_pt_res$morans_test_statistic),] 184 | ``` 185 | 186 | 187 | ```{r} 188 | plot_cells(cds, genes=c("Col1a2", "Uba52", "Serpine1", "Dppa5a"), 189 | show_trajectory_graph=FALSE, 190 | label_cell_groups=FALSE, 191 | label_leaves=FALSE) 192 | ``` 193 | 194 | 195 | 196 | ```{r} 197 | cds_subset <- choose_cells(cds) 198 | ``` 199 | 200 | 201 | 202 | ```{r} 203 | cds_subset 204 | ``` 205 | 206 | 207 | ```{r} 208 | cds_subset_pt_res <- graph_test(cds_subset, neighbor_graph="principal_graph", cores=8) 209 | cds_subset_pt_res <- na.omit(cds_subset_pt_res) 210 | cds_subset_pt_res <- cds_subset_pt_res[cds_subset_pt_res$p_value < 0.05 & cds_subset_pt_res$status == "OK", ] 211 | cds_subset_pt_res 212 | ``` 213 | 214 | 215 | 216 | 217 | ```{r} 218 | cds_subset_pt_res[order(-cds_subset_pt_res$morans_test_statistic),] 219 | ``` 220 | 221 | 222 | ```{r} 223 | plot_cells(cds_subset, genes=c("Rpl7a", "Eef1a1", "Mgst1", "Lgals1"), 224 | show_trajectory_graph=FALSE, 225 | label_cell_groups=FALSE, 226 | label_leaves=FALSE) 227 | ``` 228 | 229 | 230 | 231 | ```{r} 232 | cds_subset_subset <- cds_subset[rowData(cds_subset)$gene_short_name %in% c("Rpl7a", "Eef1a1", "Mgst1", "Lgals1")] 233 | ``` 234 | 235 | ```{r} 236 | plot_genes_in_pseudotime(cds_subset_subset, 237 | color_cells_by="dayint", 238 | min_expr=0.5) 239 | ``` 240 | 241 | 242 | 243 | ################################ Integration Example ###################### 244 | 245 | ```{r} 246 | features <- SelectIntegrationFeatures(object.list = data_list) 247 | ``` 248 | 249 | ```{r} 250 | 251 | scale_pca <- function(x){ 252 | x <- ScaleData(x, features = features, verbose = FALSE) 253 | x <- RunPCA(x, features = features, verbose = FALSE) 254 | return(x) 255 | } 256 | 257 | 258 | data_list <- lapply(X = data_list, scale_pca) 259 | ``` 260 | 261 | 262 | ```{r} 263 | anchors <- FindIntegrationAnchors(object.list = data_list, anchor.features = features, reduction = "rpca") 264 | saveRDS(anchors, file = "integration_anchors.rds") 265 | 266 | 267 | data <- IntegrateData(anchorset = anchors) 268 | 269 | ``` 270 | 271 | 272 | -------------------------------------------------------------------------------- /python_sequence_alignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "2bb017e7", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "#conda install -c bioconda mafft \n", 11 | "#!pip install Biopython" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "id": "c64c0ba6", 17 | "metadata": {}, 18 | "source": [ 19 | "Here I convert a SeqIO object to a string then to bytes. I then pass this encoded string to a subprocess call of maft through STDOUT. Mafft reads the encoded fasta through STDIN and ouputs the aligned fasta through STDOUT. This STDOUT is then decoded back into a python string and read as a new aligned SeqIO object.\n", 20 | "\n", 21 | "I have supplied a function at the bottom that takes unaligned SeqIO objects and returns alinged SeqIO objects" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "id": "146e2ffc", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "from Bio import SeqIO\n", 32 | "import subprocess\n", 33 | "from io import StringIO" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "id": "d0655331", 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "alignment.ipynb sars_map.ipynb test_500_seqs.fasta\r\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "!ls" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "id": "22bd34fd", 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "seqs = list(SeqIO.parse('test_500_seqs.fasta', 'fasta'))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 5, 67 | "id": "3e87ed3a", 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "500" 74 | ] 75 | }, 76 | "execution_count": 5, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "len(seqs)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "id": "ea18c1ac", 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "#This is just a command line example of what is happening in the python code below\n", 93 | "!cat test_500_seqs.fasta | mafft --quiet - > aligned_file.fasta" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 7, 99 | "id": "a2aa430b", 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "aligned_file.fasta alignment.ipynb sars_map.ipynb test_500_seqs.fasta\r\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "!ls" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 8, 117 | "id": "28029134", 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "seq_str = ''\n", 122 | "for seq in seqs:\n", 123 | " seq_str += '>' + seq.description + '\\n'\n", 124 | " seq_str += str(seq.seq) + '\\n'" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 10, 130 | "id": "cc16fddc", 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "child = subprocess.Popen(['mafft', '--quiet', '-'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)\n", 135 | "child.stdin.write(seq_str.encode())\n", 136 | "child_out = child.communicate()[0].decode('utf8')\n", 137 | "seq_ali = list(SeqIO.parse(StringIO(child_out), 'fasta'))\n", 138 | "child.stdin.close()" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 14, 144 | "id": "b530f795", 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "#to write to file\n", 149 | "with open('aligned_file.fasta', 'w') as f:\n", 150 | " for seq in seqs:\n", 151 | " f.write( '>' + seq.description + '\\n')\n", 152 | " f.write(str(seq.seq) + '\\n')\n" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "id": "dae165f9", 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "#in a function. takes unaligned SeqIO object and returns aligned SeqIO object\n", 163 | "def align_seqs(seqs):\n", 164 | " seq_str = ''\n", 165 | " for seq in seqs:\n", 166 | " seq_str += '>' + seq.description + '\\n'\n", 167 | " seq_str += str(seq.seq) + '\\n'\n", 168 | " child = subprocess.Popen(['mafft', '--quiet', '-'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)\n", 169 | " child.stdin.write(seq_str.encode())\n", 170 | " child_out = child.communicate()[0].decode('utf8')\n", 171 | " seq_ali = list(SeqIO.parse(StringIO(child_out), 'fasta'))\n", 172 | " child.stdin.close()\n", 173 | " return seq_ali" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "id": "afc242a5", 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 3 (ipykernel)", 188 | "language": "python", 189 | "name": "python3" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.10.1" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 5 206 | } 207 | -------------------------------------------------------------------------------- /salmon_to_deseq.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | ######## PREPARE INDEX ####### 6 | ##ref: https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/ 7 | 8 | curl -O https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/gencode.v44.transcripts.fa.gz 9 | curl -O https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/GRCh38.primary_assembly.genome.fa.gz 10 | 11 | grep "^>" <(gunzip -c GRCh38.primary_assembly.genome.fa.gz) | cut -d " " -f 1 > decoys.txt 12 | sed -i -e 's/>//g' decoys.txt 13 | 14 | cat gencode.v44.transcripts.fa.gz GRCh38.primary_assembly.genome.fa.gz > CRCH38_and_decoys.fa.gz 15 | 16 | salmon index -t GRCH38_and_decoys.fa.gz -d decoys.txt -p 30 -i GRCh38_salmon_index --gencode 17 | 18 | 19 | ######## RUN SALMON ON ONE SAMPLE ####### 20 | 21 | #paired 22 | salmon quant -i GRCh38_salmon_index/ -l A -1 path_to_R1.fastq.gz -2 path_to_R2.fastq.gz --validateMappings -o salmon_out/out_directory 23 | 24 | #unstranded 25 | salmon quant -i GRCh38_salmon_index/ -l A -r path_to.fastq.gz --validateMappings -o salmon_out/out_directory 26 | 27 | 28 | 29 | ######### START MULTIPLE FILE SALMON SCRIPT ###### 30 | 31 | #!/bin/bash 32 | 33 | # Set the path to the Salmon index 34 | salmon_index="human_salmon_index" 35 | 36 | # Set the path to the "fastq" folder 37 | fastq_dir="fastq" 38 | 39 | # Loop through all the directories within the "fastq" folder 40 | for dir in "${fastq_dir}"/SRR*; do 41 | # Find the R1 and R2 FASTQ files 42 | r1_file=$(find "$dir" -name "*_1.fastq.gz") 43 | r2_file=$(find "$dir" -name "*_2.fastq.gz") 44 | 45 | # Extract the sample name 46 | samp=$(basename "$dir") 47 | 48 | echo "Processing sample ${samp}" 49 | salmon quant -i "$salmon_index" -l A \ 50 | -1 "$r1_file" \ 51 | -2 "$r2_file" \ 52 | -p 28 --validateMappings -o "salmon_out/${samp}_quant" 53 | done 54 | 55 | ######### END MULTIPLE FILE SALMON SCRIPT ###### 56 | 57 | 58 | #### R CODE BELOW ####### 59 | 60 | ```{r} 61 | library(tximport) 62 | library(ensembldb) 63 | library(AnnotationHub) 64 | library(DESeq2) 65 | ``` 66 | 67 | ```{r} 68 | hub = AnnotationHub() 69 | ``` 70 | 71 | ```{r} 72 | #make sure to use the right species 73 | ensdb_query <- query(hub, c("EnsDb", "sapiens", "109")) 74 | ensdb_query 75 | ``` 76 | 77 | ```{r} 78 | ensdb_109 <- ensdb_query[['AH109606']] 79 | ``` 80 | 81 | ```{r} 82 | # Extract transcript and gene information 83 | tx_data <- transcripts(ensdb_109, return.type = "DataFrame") 84 | 85 | # Create the tx2gene data.frame 86 | tx2gene <- tx_data[, c("tx_id", "gene_id")] 87 | 88 | tx2gene 89 | ``` 90 | 91 | 92 | 93 | 94 | ```{r} 95 | quants_dir <- "salmon_out/" 96 | 97 | quant_files <- list.files(quants_dir, pattern = "quant.sf$", recursive = TRUE, full.names = TRUE) 98 | 99 | quant_dirs <- list.files(quants_dir, pattern = "_quant$", full.names = TRUE) 100 | sample_names <- gsub("_quant$", "", basename(quant_dirs)) 101 | 102 | names(quant_files) <- sample_names 103 | 104 | quant_files 105 | ``` 106 | 107 | 108 | ```{r} 109 | txi <- tximport(quant_files, type = "salmon", tx2gene = tx2gene,ignoreTxVersion = TRUE) 110 | ``` 111 | 112 | 113 | ```{r} 114 | sample_names 115 | ``` 116 | 117 | 118 | 119 | ```{r} 120 | condition <- factor(c("KO","KO","KO","KO", "WT","WT","WT","WT")) 121 | coldata <- data.frame(row.names = sample_names, condition) 122 | coldata 123 | ``` 124 | 125 | 126 | ```{r} 127 | dds <- DESeqDataSetFromTximport(txi, coldata, ~condition) 128 | dds 129 | ``` 130 | 131 | ```{r} 132 | dds <- DESeq(dds) 133 | ``` 134 | 135 | ```{r} 136 | vsdata <- vst(dds, blind = FALSE) 137 | plotPCA(vsdata, intgroup = "condition") 138 | ``` 139 | 140 | 141 | ```{r} 142 | res <- results(dds, contrast = c('condition', 'KO', 'WT')) 143 | ``` 144 | 145 | 146 | ```{r} 147 | res 148 | ``` 149 | 150 | 151 | 152 | ```{r} 153 | sigs <- na.omit(res) 154 | sigs <- sigs[sigs$padj < 0.05 & sigs$baseMean > 10, ] 155 | 156 | sigs 157 | ``` 158 | 159 | 160 | 161 | ```{r} 162 | write.csv(counts(dds), "counts.csv") 163 | ``` 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /sc2024/readme.txt: -------------------------------------------------------------------------------- 1 | Tutorial series on updated single cell workflows 2 | -------------------------------------------------------------------------------- /scATAC_intro_R.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | 7 | ```{r} 8 | install.packages('hdf5r') #need to read h5 files 9 | ``` 10 | 11 | 12 | ```{r} 13 | install.packages("Signac") #seurat addon for analyzing chromatin 14 | ``` 15 | 16 | ```{r} 17 | install.packages('Seurat') 18 | ``` 19 | 20 | 21 | ```{r} 22 | library(Signac) 23 | library(Seurat) 24 | ``` 25 | 26 | 27 | ```{r} 28 | counts <- Read10X_h5(filename = "GSM5723631_Young_HSC_filtered_peak_bc_matrix.h5") 29 | ``` 30 | 31 | ```{r} 32 | meta <- read.csv( 33 | file = 'GSM5723631_Young_HSC_singlecell.csv.gz', 34 | header = TRUE, 35 | row.names = 1) 36 | ``` 37 | 38 | 39 | ```{r} 40 | chrom_assay <- CreateChromatinAssay( 41 | counts = counts, 42 | sep = c(":", "-"), 43 | genome = 'mm10', 44 | fragments = './GSM5723631_Young_HSC_fragments.tsv.gz', 45 | min.cells = 10, 46 | min.features = 200 47 | ) 48 | ``` 49 | 50 | 51 | ```{r} 52 | data <- CreateSeuratObject( 53 | counts = chrom_assay, 54 | assay = "peaks", 55 | meta.data = meta 56 | ) 57 | ``` 58 | 59 | 60 | ```{r} 61 | data[[]] 62 | ``` 63 | 64 | EnsDb.Hsapiens.v86 for human 65 | ```{r} 66 | if (!require("BiocManager", quietly = TRUE)) 67 | install.packages("BiocManager") 68 | 69 | BiocManager::install("EnsDb.Mmusculus.v79") 70 | BiocManager::install("GenomeInfoDb") #translation between chromosome names 71 | BiocManager::install("biovizBase") 72 | ``` 73 | 74 | ```{r} 75 | library(GenomeInfoDb) 76 | library(EnsDb.Mmusculus.v79) 77 | ``` 78 | 79 | 80 | ```{r} 81 | annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79) 82 | seqlevelsStyle(annotations) <- 'UCSC' 83 | ``` 84 | 85 | 86 | 87 | ```{r} 88 | Annotation(data) <- annotations 89 | ``` 90 | 91 | 92 | ```{r} 93 | data <- NucleosomeSignal(object = data) #fragment ratio 147-294: <147 94 | ``` 95 | 96 | ```{r} 97 | data <- TSSEnrichment(object = data, fast = FALSE) 98 | ``` 99 | 100 | ```{r} 101 | data$blacklist_ratio <- data$blacklist_region_fragments / data$peak_region_fragments 102 | 103 | #data[[]] 104 | ``` 105 | 106 | 107 | ```{r} 108 | data$pct_reads_in_peaks <- data$peak_region_fragments / data$passed_filters * 100 109 | ``` 110 | 111 | 112 | ```{r} 113 | VlnPlot( 114 | object = data, 115 | features = c('peak_region_fragments', 'pct_reads_in_peaks', 116 | 'blacklist_ratio', 'nucleosome_signal', 'TSS.enrichment'), 117 | pt.size = 0.1, 118 | ncol = 5 119 | ) 120 | ``` 121 | 122 | 123 | 124 | could do this.... 125 | ```{r} 126 | data <- subset( 127 | x = data, 128 | subset = peak_region_fragments > 3000 & 129 | peak_region_fragments < 20000 & 130 | pct_reads_in_peaks > 15 & 131 | blacklist_ratio < 0.05 & 132 | nucleosome_signal < 4 & 133 | TSS.enrichment > 2 134 | ) 135 | ``` 136 | 137 | 138 | 139 | ```{r} 140 | low_prf <- quantile(data[["peak_region_fragments"]]$peak_region_fragments, probs = 0.02) 141 | hig_prf <- quantile(data[["peak_region_fragments"]]$peak_region_fragments, probs = 0.98) 142 | low_prp <- quantile(data[["pct_reads_in_peaks"]]$pct_reads_in_peaks, probs = 0.02) 143 | 144 | high_blr <- quantile(data[["blacklist_ratio"]]$blacklist_ratio, probs = 0.98) 145 | 146 | hig_ns <- quantile(data[["nucleosome_signal"]]$nucleosome_signal, probs = 0.98) 147 | 148 | low_ts <- quantile(data[["TSS.enrichment"]]$TSS.enrichment, probs = 0.02) 149 | ``` 150 | 151 | ```{r} 152 | print(low_prf) 153 | print(hig_prf) 154 | print(low_prp) 155 | print(high_blr) 156 | print(hig_ns) 157 | print(low_ts) 158 | ``` 159 | 160 | ```{r} 161 | data <- subset( 162 | x = data, 163 | subset = peak_region_fragments > low_prf & 164 | peak_region_fragments < hig_prf & 165 | pct_reads_in_peaks > low_prp & 166 | blacklist_ratio < high_blr & 167 | nucleosome_signal < hig_ns & 168 | TSS.enrichment > low_ts 169 | ) 170 | ``` 171 | 172 | 173 | 174 | 175 | 176 | ```{r} 177 | data 178 | ``` 179 | 180 | 181 | Normalization, dimension reduction 182 | 183 | ```{r} 184 | data <- RunTFIDF(data) 185 | ``` 186 | 187 | ```{r} 188 | data <- FindTopFeatures(data, min.cutoff = 'q0') 189 | data 190 | ``` 191 | 192 | 193 | ```{r} 194 | data <- RunSVD(data) 195 | ``` 196 | 197 | ```{r} 198 | DepthCor(data) 199 | ``` 200 | 201 | ```{r} 202 | data <- RunUMAP(object = data, reduction = 'lsi', dims = 2:30) 203 | data <- FindNeighbors(object = data, reduction = 'lsi', dims = 2:30) 204 | data <- FindClusters(object = data, verbose = FALSE, algorithm = 3) 205 | DimPlot(object = data, label = TRUE) + NoLegend() 206 | ``` 207 | 208 | 209 | Multiple samples.... 210 | 211 | ```{r} 212 | import_atac <- function(count_path, meta_path, fragment_path){ 213 | counts <- Read10X_h5(filename = count_path) 214 | 215 | meta <- read.csv( 216 | file = meta_path, 217 | header = TRUE, 218 | row.names = 1) 219 | 220 | 221 | 222 | chrom_assay <- CreateChromatinAssay( 223 | counts = counts, 224 | sep = c(":", "-"), 225 | genome = 'mm10', 226 | fragments = fragment_path, 227 | min.cells = 10, 228 | min.features = 200 229 | ) 230 | 231 | data <- CreateSeuratObject( 232 | counts = chrom_assay, 233 | assay = "peaks", 234 | meta.data = meta 235 | ) 236 | 237 | Annotation(data) <- annotations 238 | 239 | 240 | data <- NucleosomeSignal(object = data) #fragment ratio 147-294: <147 --- mononucleosome:nucleosome-free 241 | 242 | 243 | data <- TSSEnrichment(object = data, fast = FALSE) 244 | 245 | data$blacklist_ratio <- data$blacklist_region_fragments / data$peak_region_fragments 246 | 247 | data$pct_reads_in_peaks <- data$peak_region_fragments / data$passed_filters * 100 248 | 249 | low_prf <- quantile(data[["peak_region_fragments"]]$peak_region_fragments, probs = 0.02) 250 | hig_prf <- quantile(data[["peak_region_fragments"]]$peak_region_fragments, probs = 0.98) 251 | low_prp <- quantile(data[["pct_reads_in_peaks"]]$pct_reads_in_peaks, probs = 0.02) 252 | 253 | high_blr <- quantile(data[["blacklist_ratio"]]$blacklist_ratio, probs = 0.98) 254 | 255 | hig_ns <- quantile(data[["nucleosome_signal"]]$nucleosome_signal, probs = 0.98) 256 | 257 | low_ts <- quantile(data[["TSS.enrichment"]]$TSS.enrichment, probs = 0.02) 258 | 259 | data <- subset( 260 | x = data, 261 | subset = peak_region_fragments > low_prf & 262 | peak_region_fragments < hig_prf & 263 | pct_reads_in_peaks > low_prp & 264 | blacklist_ratio < high_blr & 265 | nucleosome_signal < hig_ns & 266 | TSS.enrichment > low_ts 267 | ) 268 | 269 | 270 | 271 | #data <- RunTFIDF(data) 272 | #data <- FindTopFeatures(data, min.cutoff = 'q0') 273 | #data <- RunSVD(data) 274 | 275 | return(data) 276 | } 277 | ``` 278 | 279 | 280 | ```{r} 281 | young <- import_atac("GSM5723631_Young_HSC_filtered_peak_bc_matrix.h5", 282 | 'GSM5723631_Young_HSC_singlecell.csv.gz', 283 | './GSM5723631_Young_HSC_fragments.tsv.gz') 284 | 285 | old <- import_atac("GSM5723632_Aged_HSC_filtered_peak_bc_matrix.h5", 286 | 'GSM5723632_Aged_HSC_singlecell.csv.gz', 287 | './GSM5723632_Aged_HSC_fragments.tsv.gz') 288 | ``` 289 | 290 | ```{r} 291 | young$dataset <- "young" 292 | old$dataset <- "old" 293 | ``` 294 | 295 | 296 | ```{r} 297 | data <- merge(young, old) 298 | ``` 299 | 300 | 301 | ```{r} 302 | data 303 | ``` 304 | 305 | 306 | ```{r} 307 | data <- FindTopFeatures(data, min.cutoff = 'q0') 308 | data <- RunTFIDF(data) 309 | data <- RunSVD(data) 310 | data 311 | ``` 312 | 313 | 314 | ```{r} 315 | data <- RunUMAP(object = data, reduction = 'lsi', dims = 2:30) 316 | data <- FindNeighbors(object = data, reduction = 'lsi', dims = 2:30) 317 | ``` 318 | 319 | ```{r} 320 | data <- FindClusters(object = data, verbose = FALSE, algorithm = 3, resolution = .4) 321 | ``` 322 | 323 | ```{r} 324 | DimPlot(object = data, label = TRUE) + NoLegend() 325 | ``` 326 | 327 | ```{r} 328 | DimPlot(object = data, label = TRUE, group.by = "dataset") + NoLegend() 329 | ``` 330 | 331 | Data analysis 332 | 333 | ```{r} 334 | gene.activities <- GeneActivity(data) 335 | ``` 336 | 337 | ```{r} 338 | data[['RNA']] <- CreateAssayObject(counts = gene.activities) 339 | 340 | data <- NormalizeData( 341 | object = data, 342 | assay = 'RNA', 343 | normalization.method = 'LogNormalize', 344 | scale.factor = median(data$nCount_RNA) 345 | ) 346 | ``` 347 | 348 | ```{r} 349 | data[['RNA']] 350 | ``` 351 | 352 | 353 | ```{r} 354 | DefaultAssay(data) <- 'RNA' 355 | 356 | 357 | FeaturePlot( 358 | object = data, 359 | features = c('Kit', 'Pecam1', 'Itgam'), 360 | max.cutoff = 'q95' 361 | ) 362 | ``` 363 | 364 | 365 | ```{r} 366 | DefaultAssay(data) <- 'peaks' 367 | 368 | da_peaks <- FindMarkers( 369 | object = data, 370 | ident.1 = rownames(data[[]][data$dataset == "old",]), 371 | ident.2 = rownames(data[[]][data$dataset == "young",]), 372 | min.pct = 0.05, 373 | test.use = 'LR', 374 | latent.vars = 'peak_region_fragments' 375 | ) 376 | 377 | 378 | ``` 379 | 380 | 381 | ```{r} 382 | da_peaks 383 | ``` 384 | 385 | ```{r} 386 | da_peaks$closest_gene <-ClosestFeature(data, regions = rownames(da_peaks))$gene_name 387 | da_peaks$distance <- ClosestFeature(data, regions = rownames(da_peaks))$distance 388 | da_peaks 389 | ``` 390 | 391 | ```{r} 392 | CoveragePlot( 393 | object = data, 394 | region = rownames(da_peaks)[2], 395 | extend.upstream = 10000, 396 | extend.downstream = 5000, 397 | group.by = "dataset" 398 | ) 399 | ``` 400 | 401 | ```{r} 402 | plot1 <- VlnPlot( 403 | object = data, 404 | features = rownames(da_peaks)[2], 405 | group.by = "dataset" 406 | ) 407 | plot2 <- FeaturePlot( 408 | object = data, 409 | features = rownames(da_peaks)[2], 410 | max.cutoff = 'q95' 411 | ) 412 | 413 | plot1 | plot2 414 | ``` 415 | 416 | 417 | 418 | -------------------------------------------------------------------------------- /shifted_transformation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "id": "ee4391f7", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import scanpy as sc\n", 12 | "from scipy.sparse import diags\n", 13 | "\n", 14 | "#from scipy.sparse import csr_matrix\n", 15 | "#will only work with sparse, covert prior : adata.X = csr_matrix(adata.X)\n", 16 | "\n", 17 | "def shifted_transformation(adata, y0=1):\n", 18 | " size_factors = adata.X.sum(axis=1) / np.mean(adata.X.sum(axis=1))\n", 19 | "\n", 20 | " adata.X = diags(1 / size_factors.A1).dot(adata.X)\n", 21 | " adata.X.data = np.log(adata.X.data + y0)\n", 22 | " \n", 23 | " return adata\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "id": "1d135da4", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "adata = sc.datasets.pbmc3k() #loading pbcm toy dataset\n", 34 | "adata = shifted_transformation(adata)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 11, 40 | "id": "8ff05ad0", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "id": "07b475dc", 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [] 52 | } 53 | ], 54 | "metadata": { 55 | "kernelspec": { 56 | "display_name": "Python 3 (ipykernel)", 57 | "language": "python", 58 | "name": "python3" 59 | }, 60 | "language_info": { 61 | "codemirror_mode": { 62 | "name": "ipython", 63 | "version": 3 64 | }, 65 | "file_extension": ".py", 66 | "mimetype": "text/x-python", 67 | "name": "python", 68 | "nbconvert_exporter": "python", 69 | "pygments_lexer": "ipython3", 70 | "version": "3.10.6" 71 | } 72 | }, 73 | "nbformat": 4, 74 | "nbformat_minor": 5 75 | } 76 | -------------------------------------------------------------------------------- /simpleaf_alevin_fry_tutorial.txt: -------------------------------------------------------------------------------- 1 | ##### setup ##### 2 | conda create -n af -y -c bioconda -c conda-forge simpleaf piscem 3 | export ALEVIN_FRY_HOME="$PWD" 4 | simpleaf set-paths 5 | ulimit -n 4096 6 | 7 | 8 | ##### index ##### 9 | gunzip -c fastq/pbmc_1k_v3_S1_L002_R2_001.fastq.gz | head | sed -n '2p' | wc -c 10 | # ***make sure to subtract 1 11 | simpleaf index --output simpleaf_index --fasta genome.fa --gtf genes.gtf --rlen 91 --threads 28 --use-piscem 12 | 13 | 14 | ##### quant ##### 15 | 16 | simpleaf quant --reads1 a_r1.fastq.gz,b_r1.fastq.gz --reads2 a_r2.fastq.gz,b_R2_001.fastq.gz --threads 28 --index simpleaf_index/index --chemistry 10xv3 --resolution cr-like --unfiltered-pl --expected-ori fw --t2g-map simpleaf_index/index/t2g_3col.tsv --output simpleaf_output 17 | 18 | 19 | 20 | %pip install pyroe 21 | 22 | import scanpy as sc 23 | from pyroe import load_fry 24 | 25 | 26 | # all counts summed in .X 27 | adata = load_fry("output_dir/af_quant", output_format = {'X' : ['U','S','A']}) 28 | 29 | # unspliced in unspliced layer 30 | adata = load_fry("output_dir/af_quant", output_format = {'X' : ['S', 'A'],'unspliced' : ['U']}) -------------------------------------------------------------------------------- /single_cell_gene_co-expression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "a664602f", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "'''\n", 11 | "Are two selected genes co-expressed?\n", 12 | "\n", 13 | "Are any genes co-expressed with a selected gene?\n", 14 | "\n", 15 | "'''" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "id": "afc181a0", 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import scanpy as sc\n", 26 | "from scipy import stats\n", 27 | "import numpy as np\n", 28 | "import pandas as pd" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "id": "e3b218c6", 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stderr", 39 | "output_type": "stream", 40 | "text": [ 41 | "/home/jrlab2019/miniconda3/envs/sc/lib/python3.9/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy.\n", 42 | " view_to_actual(adata)\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "#simple import and preprocessing\n", 48 | "adata = sc.read_10x_mtx('../tutorial_sample/outs/filtered_feature_bc_matrix/')\n", 49 | "sc.pp.filter_cells(adata, min_genes=200)\n", 50 | "sc.pp.filter_genes(adata, min_cells=3)\n", 51 | "adata.var['mt'] = adata.var_names.str.startswith('MT-') \n", 52 | "sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)\n", 53 | "adata = adata[adata.obs.pct_counts_mt < 20]\n", 54 | "sc.pp.normalize_total(adata, target_sum=1e4)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "id": "fb29b19e", 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "AnnData object with n_obs × n_vars = 8093 × 21949\n", 67 | " obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt'\n", 68 | " var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'" 69 | ] 70 | }, 71 | "execution_count": 3, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "adata" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 4, 83 | "id": "89fb17ac", 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "Index(['AL627309.1', 'AL627309.5', 'LINC01409', 'FAM87B', 'LINC01128',\n", 90 | " 'LINC00115', 'FAM41C', 'AL645608.2', 'AL645608.4', 'LINC02593',\n", 91 | " ...\n", 92 | " 'MT-CYB', 'BX004987.1', 'AC145212.1', 'MAFIP', 'AC011043.1',\n", 93 | " 'AL354822.1', 'AL592183.1', 'AC240274.1', 'AC007325.4', 'zika'],\n", 94 | " dtype='object', length=21949)" 95 | ] 96 | }, 97 | "execution_count": 4, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "adata.var_names" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 10, 109 | "id": "b060f1a0", 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "i1 = np.where(adata.var_names == 'zika')[0][0]" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 11, 119 | "id": "6e02a079", 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "i2 = np.where(adata.var_names == 'IFITM1')[0][0]" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 9, 129 | "id": "87801fdb", 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "(8093, 21949)" 136 | ] 137 | }, 138 | "execution_count": 9, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "data = adata.X.toarray()\n", 145 | "data.shape" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 12, 151 | "id": "4a024686", 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "array([ 0. , 0.3849559 , 0.47232196, ..., 83.49873 ,\n", 158 | " 0. , 0. ], dtype=float32)" 159 | ] 160 | }, 161 | "execution_count": 12, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "data[:, i1]" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 13, 173 | "id": "c4f5a8ca", 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "(-0.004018901858421179, 0.7177328380898428)" 180 | ] 181 | }, 182 | "execution_count": 13, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "stats.pearsonr(data[:, i1], data[:, i2])" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 14, 194 | "id": "4914ceda", 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "out = []\n", 199 | "for gene in adata.var_names:\n", 200 | " i2 = np.where(adata.var_names == gene)[0][0]\n", 201 | " res = stats.pearsonr(data[:, i1], data[:, i2])\n", 202 | " out.append([gene, res[0], res[1]])" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 15, 208 | "id": "8ae63171", 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "text/html": [ 214 | "
\n", 215 | "\n", 228 | "\n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | "
generp
0AL627309.10.0077124.878679e-01
1AL627309.5-0.0007739.445847e-01
2LINC014090.0376107.140523e-04
3FAM87B-0.0039927.195206e-01
4LINC01128-0.0085124.438714e-01
............
21944AL354822.1-0.0133472.299292e-01
21945AL592183.10.0045406.830051e-01
21946AC240274.10.1102362.633133e-23
21947AC007325.40.0201177.034826e-02
21948zika1.0000000.000000e+00
\n", 306 | "

21949 rows × 3 columns

\n", 307 | "
" 308 | ], 309 | "text/plain": [ 310 | " gene r p\n", 311 | "0 AL627309.1 0.007712 4.878679e-01\n", 312 | "1 AL627309.5 -0.000773 9.445847e-01\n", 313 | "2 LINC01409 0.037610 7.140523e-04\n", 314 | "3 FAM87B -0.003992 7.195206e-01\n", 315 | "4 LINC01128 -0.008512 4.438714e-01\n", 316 | "... ... ... ...\n", 317 | "21944 AL354822.1 -0.013347 2.299292e-01\n", 318 | "21945 AL592183.1 0.004540 6.830051e-01\n", 319 | "21946 AC240274.1 0.110236 2.633133e-23\n", 320 | "21947 AC007325.4 0.020117 7.034826e-02\n", 321 | "21948 zika 1.000000 0.000000e+00\n", 322 | "\n", 323 | "[21949 rows x 3 columns]" 324 | ] 325 | }, 326 | "execution_count": 15, 327 | "metadata": {}, 328 | "output_type": "execute_result" 329 | } 330 | ], 331 | "source": [ 332 | "df = pd.DataFrame(out, columns = ['gene', 'r', 'p'])\n", 333 | "df" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 16, 339 | "id": "6dd2805b", 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "name": "stderr", 344 | "output_type": "stream", 345 | "text": [ 346 | "/home/jrlab2019/miniconda3/envs/sc/lib/python3.9/site-packages/pandas/core/arraylike.py:397: RuntimeWarning: divide by zero encountered in log10\n", 347 | " result = getattr(ufunc, method)(*inputs, **kwargs)\n" 348 | ] 349 | }, 350 | { 351 | "data": { 352 | "text/html": [ 353 | "
\n", 354 | "\n", 367 | "\n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | "
generpbon-log10_p
0AL627309.10.0077124.878679e-011.070821e+040.311698
1AL627309.5-0.0007739.445847e-012.073269e+040.024759
2LINC014090.0376107.140523e-041.567273e+013.146270
3FAM87B-0.0039927.195206e-011.579276e+040.142957
4LINC01128-0.0085124.438714e-019.742533e+030.352743
..................
21944AL354822.1-0.0133472.299292e-015.046717e+030.638406
21945AL592183.10.0045406.830051e-011.499128e+040.165576
21946AC240274.10.1102362.633133e-235.779463e-1922.579527
21947AC007325.40.0201177.034826e-021.544074e+031.152747
21948zika1.0000000.000000e+000.000000e+00inf
\n", 469 | "

21949 rows × 5 columns

\n", 470 | "
" 471 | ], 472 | "text/plain": [ 473 | " gene r p bon -log10_p\n", 474 | "0 AL627309.1 0.007712 4.878679e-01 1.070821e+04 0.311698\n", 475 | "1 AL627309.5 -0.000773 9.445847e-01 2.073269e+04 0.024759\n", 476 | "2 LINC01409 0.037610 7.140523e-04 1.567273e+01 3.146270\n", 477 | "3 FAM87B -0.003992 7.195206e-01 1.579276e+04 0.142957\n", 478 | "4 LINC01128 -0.008512 4.438714e-01 9.742533e+03 0.352743\n", 479 | "... ... ... ... ... ...\n", 480 | "21944 AL354822.1 -0.013347 2.299292e-01 5.046717e+03 0.638406\n", 481 | "21945 AL592183.1 0.004540 6.830051e-01 1.499128e+04 0.165576\n", 482 | "21946 AC240274.1 0.110236 2.633133e-23 5.779463e-19 22.579527\n", 483 | "21947 AC007325.4 0.020117 7.034826e-02 1.544074e+03 1.152747\n", 484 | "21948 zika 1.000000 0.000000e+00 0.000000e+00 inf\n", 485 | "\n", 486 | "[21949 rows x 5 columns]" 487 | ] 488 | }, 489 | "execution_count": 16, 490 | "metadata": {}, 491 | "output_type": "execute_result" 492 | } 493 | ], 494 | "source": [ 495 | "df['bon'] = df.p * len(df)\n", 496 | "df['-log10_p'] = -np.log10(df.p)\n", 497 | "\n", 498 | "df" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": 18, 504 | "id": "e58c8438", 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/html": [ 510 | "
\n", 511 | "\n", 524 | "\n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | "
generpbon-log10_p
0zika1.0000000.000000e+000.000000e+00inf
1IFIT20.3190317.119685e-1911.562700e-186190.147539
2OASL0.2991206.186299e-1671.357831e-162166.208569
3IFIT10.2479111.291226e-1132.834112e-109112.888998
4DDX580.2437379.033933e-1101.982858e-105109.044123
..................
693RAE10.0526282.169152e-064.761072e-025.663710
694EXOSC40.0526282.169169e-064.761109e-025.663707
695AC016747.10.0526232.173228e-064.770018e-025.662895
696PPM1L0.0526112.184895e-064.795627e-025.660569
697FUNDC2-0.0525382.256946e-064.953771e-025.646479
\n", 626 | "

698 rows × 5 columns

\n", 627 | "
" 628 | ], 629 | "text/plain": [ 630 | " gene r p bon -log10_p\n", 631 | "0 zika 1.000000 0.000000e+00 0.000000e+00 inf\n", 632 | "1 IFIT2 0.319031 7.119685e-191 1.562700e-186 190.147539\n", 633 | "2 OASL 0.299120 6.186299e-167 1.357831e-162 166.208569\n", 634 | "3 IFIT1 0.247911 1.291226e-113 2.834112e-109 112.888998\n", 635 | "4 DDX58 0.243737 9.033933e-110 1.982858e-105 109.044123\n", 636 | ".. ... ... ... ... ...\n", 637 | "693 RAE1 0.052628 2.169152e-06 4.761072e-02 5.663710\n", 638 | "694 EXOSC4 0.052628 2.169169e-06 4.761109e-02 5.663707\n", 639 | "695 AC016747.1 0.052623 2.173228e-06 4.770018e-02 5.662895\n", 640 | "696 PPM1L 0.052611 2.184895e-06 4.795627e-02 5.660569\n", 641 | "697 FUNDC2 -0.052538 2.256946e-06 4.953771e-02 5.646479\n", 642 | "\n", 643 | "[698 rows x 5 columns]" 644 | ] 645 | }, 646 | "execution_count": 18, 647 | "metadata": {}, 648 | "output_type": "execute_result" 649 | } 650 | ], 651 | "source": [ 652 | "df = df[df.bon < 0.05].sort_values('bon').reset_index(drop = True)\n", 653 | "df" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 20, 659 | "id": "14cacf15", 660 | "metadata": {}, 661 | "outputs": [ 662 | { 663 | "data": { 664 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQcAAAE+CAYAAACA3tXCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAh10lEQVR4nO3debxd473H8c/XWBFCK0WEHiKo8SBNh0tFUDOlVYm2htJwb9wOSvHiorfctrSlatZL1DWrqca2UZe6qIQMEkVCEFGqqhpHQ5Lf/WM9m2VnnTFnz9/363VeZ+9nPWvtZ6XOr2vYz3cpIjAzK7dMrQdgZvXJxcHMCrk4mFkhFwczK+TiYGaFXBzMrNBytR5AK1hjjTWira2t1sMwKzR58uTXImJwebuLQxW0tbUxadKkWg/DrJCk54vaXRyqYN68eXzve9+r9TCsRZ166ql9Ws/XHMyskIuDmRVqqdMKSfMjYqCkNuBJ4Knc4pHAQcAI4GXggNS+BTA9vb6M7N/sCGAh8BfgaxFReM5m1shaqjiUmR0R7fkGSQBExBnAGaltfr6fpB2BERHRIelfgTOBA6s0ZrOq8WlFL0XE7yOiI719GBhay/GYVUorF4dhkqakn/P7uI3Dgbv6c1Bm9cKnFX0k6Stk1yd26GT5OGAcwKBBg/r6MWY108pHDn0maWfgJGCfiFhQ1CciLomIERExYsCAAdUdoFk/aOUjhz6RtDVwMbBbRLxa6/GYVYqLQ++dBQwEbkh3N16IiH1qOySz/tdSxSEiBqbfc4DNC5ZPACYUrZN7v3PFBmhWR3zNwcwKuTiYWSE5mr7yRowYEZ6ybfVK0uSIGFHe7iMHMyvk4mBmhVwczKxQS93KrBUnQVlv9DW5qb/5yMHMCrk4mFmhlikOkoZKulXSM5JmS/qZpBVyy2+R9HDZOhtLui9N635S0iWpfZSk26u9D2bV1BLFQdkkiJuAWyJiOLAR2fyIUtrTasC2wCBJG+RWPRc4OyLaI+LjwM+rOnCzGmqJ4gCMBv4ZEZcDRMQi4NvA1yQNAPYHfg1cC4zJrbc2MLf0JiKmY9YiWqU4bAZMzjdExJvAC8CGwFjgmvQzNtftbOBeSXdJ+nY6wjBrCa1SHLqyOjAc+ENEPA28K2lzgHSk8XHgBmAU8LCkFXuyUUnjJE2SNKmjo6P7FczqTKsUh5lk1xTeI2lVYD2gnaxAPCdpDtBG7ughIuZFxGURsS9ZHP0SU72LOAnKGl2rFIeJwABJBwNIWhb4CVl2w4FkqU5tEdFGVkTGpH67SVo+vV4L+AjwUtVHb1YDLVEcIpt6uh9wgKRngKeBfwKXAB8ji5gv9X0O+LukTwKfA56QNBW4BzguIv6cuu4kaW7u59NV3CWzimuZr09HxIvA3gWL1inou016+QhwTMHy+4CV+nN8ZvWmJY4czKz3XBzMrJCLg5kVckxcFTgmzuqZY+LMrFdcHMysUMvcyqwlJ0HVTr2kKjUiHzmYWSEXBzMr1FKnFZLmR8RASW3Ak8BTucUjgYOAEcDLwAGpfQuglONwGTAFOAfYEhgTETdWfOBmNdBSxaHM7Ihozzekp2YTEWfwfkrU/Hy/VFgOBY6tzjDNaqOVi0OfpCd0I2lxjYdiVlGtfM1hWAqOnSLp/FoPxqzetPKRwxKnFf1J0jhgHMCgQYMq9TFmFdPKRw4V5SQoa3QuDmZWyMWhlyR9QtJcsludF0uaUesxmVVCS11ziIiB6fccCoJiI2ICWa7kEuvk3j8KDK3UGM3qhY8czKyQi4OZFXJxMLNCToKqAidBWT1zEpSZ9YqLg5kVaqlbmbXiJKjqcvpT//CRg5kVcnEws0IuDmZWqOGLg6RFKZNhhqSpkr4jaZm0bJSkv0t6XNJTku6XtFdu3XMlnZJ7f1Ip20HSaZJeymU+7JHal5d0haTpkp6UdGK199msGprhguTbpVwGSR8FrgZWBUpXpR6IiL3S8nbgFklvR8RE4GRgiqT/SX2PALbObfvsiPhx2ecdAKwYEVtIGgDMlHRNKSHKrFk0/JFDXkS8ShawcrRKgZAfXD4F+E/g6PT+TeAk4Lz0c0pEvNHdxwArS1oOWAl4B3izn3bBrG40VXEAiIhngWWBj3bS5TFgk1z/a4DVgVUj4sqyvkdLmibpMkmrp7YbgbfIEqpfAH4cEa/35z6Y1YOmKw498IEjCklDgbWBIZLy07MvBIYB7WSF4CepfSSwCBgCrA98R9IGS3yINE7SJEmTOjo6+n0nzCqt6YpD+kNdBLzaSZetyZ5ZUfIzsusT1/P+dQoi4pWIWBQRi4FLyYoCZM+2uDsi3k2nMQ+SPeviAxwTZ42uqYqDpMHARcB5UTCjTNKWwH8ApTsSu5OdfvwS+D6wv6RN07K1c6vuBzyRXr8AjE59VgY+BfypEvtjVkvNcLdiJUlTgOWBhcCVwE9zy7eX9DgwgOxo4hsRMVHSh8ieXPXFVEjeknQc2YXJ0cCZ6e5GAHOAI9P2zgcuT/FwAi6PiGkV3UOzGmj44hARy3ax7D6gMBc+Iv4JbFzWdhNwU3r91U7Wm8/7j8oza1pNdVphZv3HxcHMCjkJqgqcBGX1zElQZtYrLg5mVqjh71Y0AidBVYcToPqXjxzMrJCLg5kVcnEws0INWxxSatOMNKV6iqRPSrovJT5Nk/QnSedJWi23Tik1qvTTJmkXSZNTstNkSaNz/edIWqPsczeR9JCkBZKOreIum1VVQ16QlPRpYC9gm4hYkP6AV0iLvxwRkyStAPwAuBXYIS17LzUqt63Vgb0jYp6kzYF7gHW6+PjXgW8An++v/TGrR4165LA28FpELACIiNciYl6+Q0S8A3wXWE/SVp1tKCIez607g2wi14pd9H81Ih4F3l3anTCrZ41aHH4DrCvpaUkXSNqhqFNELAKm8n7y00q5U4qbC1b5AvBYqeiYtbKGPK2IiPmStgW2B3YErpN0Qifd88lPS5xWvNdJ2gz4EfC5/hijpHFkeZYMGlQ4MdSsrjVkcYD3jgruA+6TNB04pLyPpGWBLfhg8tMSUlTczcDBETG7n8Z3CXAJwJAhQzyBxRpOQ55WSNpY0vBcUzvwfFmf5ckuSL7YVRhLuptxB3BCRDzY/6M1a0wNWRyAgcAVkmZKmgZsCpyWll2V2p4AVgb27WZbRwMbAqfkrkfkk6unSZqbfn4qaS1Jc4FjgJNT+6r9uXNm9aAhTysiYjLwmYJFo7pZb2BB2+nA6Z30b+tkU0O7HqFZ42vUIwczqzAXBzMr5CSoKnASlNUzJ0GZWa+4OJhZIRcHMyvUkLcyG41j4nrPkW+15yMHMyvk4mBmhZq2OEian34vI+lcSU+ktKdHJa2flg2UdKGk2ZIeS0lQX89t425Jb0i6vWzbEyQ9l/u6dXtVd86sClrhmsOBwBBgy4hYnGZgvpWW/QJ4Fhielg0GvpZb9yyyp3MfyZKOi4gbKzhus5pqheKwNvByRCwGiIi5AJKGASOBg3LL/kKW6UB6P1HSqGoP2KweNO1pRc71wN7p8P8nkrZO7ZsBU0uFoQ/OSEG2Z3cVK2fWqJq+OKQjhY2BE4HFwERJO5X3S2nWUyTNK19W4ESy6LlPAB8Gji/Y3jhJkyRN6ujoWKp9MKuFpi8OABGxICLuiojjgP8iS46eCWwlaZnU54wUIddtNkNEvByZBcDlZKcn5X0uiYgRETFiwIAB/bg3ZtXR9MVB0jaShqTXywBbAs9HxCxgEnB6ipND0of4YOZkZ9tcO/0WWaF5ojKjN6udVrgg+VHg0tx1gT8C56XXR5DdkZgl6a/A22Rx9gBIeoDs9GFgSn86PCLuIUubGkxWSKYAR1VjR8yqqWmLQyn1KSLuBu7upM+bFN+mLC3fvpP20UXtZs2k6U8rzKxvXBzMrJCToKrASVBWz5wEZWa94uJgZoVcHMysUNPeyqwnrZwE5USnxuUjBzMr5OJgZoUatjikB9pem1KcJku6U9JISTMkrZD6DJP0rKRVO+m/kaQ2SUvMjZB0QNrWYkkjypadKGmWpKck7VqtfTarpoYsDmnC083AfRExLCK2JZtGvSLwv8Cxqev5wEnAPzrpv2YXH/MEsD9wf9lnbwqMIcuD2A24oDRxy6yZNOoFyR2BdyPiolJDREwFkDQdeFzSQmC5iLhG0ugu+rcVfUBEPJmWly/aF7g2Tdd+TtIssinbD/XTvpnVhUYtDpsDk4sWRMQbkn4IXABs2l3/PlgHeDj3fm5qM2sqDXla0QO7A6/wfnGoOidBWaNr1OIwA9i2aIGkvYBBwK7AWZIGdNW/D14C1s29H5raPsBJUNboGrU43AusKGlcqUHSlpK2B34KjI+I6cCtZBcku+rfW7cBYyStmJ5/MZwsQMasqTTkNYeICEn7AedIOh74JzAHeBO4OSJmpq6nAVOBCUBR/2+lfhunpKeSbwMLgZ8Dg4E7JE2JiF0jYoak68kyKBeSFaJFFdpVs5ppyOIAEBHzgC910+cfwAa5ps76L99J+82dbPcM4IzuxmjWyBr1tMLMKszFwcwKOQmqCpwEZfVsqZOgJG0n6bD0enDpSdVm1px6VBwknUr2yLcTU9PywP9UalBmVns9PXLYD9iH9Oj6dKdglUoNysxqr6e3Mt9J3y0IAEkrV3BMTcdJUNaIenrkcL2ki4HVJH0d+B1waeWGZWa11qMjh4j4saRdyL6BuDFwSkT8tqIjM7Oa6vE3JFMxaPiCIGl+RAxMOQ5PAk/lFo8EDgJGAC8DB6T2LYDp6fVlwDvAeGARMB8Yl/vKtllT6FFxkLQ/8COyJ1Yr/URErFrBsVXD7IhozzeUwl3yX5FOBaU912fVUnCMpH3IJnvtVp0hm1VHT48czgT2LqUjtbr0dO6SlQF/k8yaTk+LwytNWhiGSZqSXj8YEeN7uqKk8cAxwArA6AqMzaymelocJkm6DrgFWFBqjIibKjGoKlritKKnIuJ84HxJBwEnA4fkl6fsiHEAgwYNWsphmlVfT4vDqkAH8LlcWwCNXhz6w7XAheWNEXEJcAnAkCFDfNphDaentzIPq/RAGomk4RHxTHq7J/BMV/3NGlFP51ZsJGli6eEvKWLt5MoOra4dnR54M4XsusMh3fQ3azg9Pa24FDgOuBggIqZJuho4vVIDq5SIGJh+zyGLrC9fPoEsVm6JdXLvv1mxAZrViZ5+fXpARJSHqC7s78GYWf3oaXF4TdIw0v18SV8k+wahmTWpHiVBSdqA7Mr7Z4C/Ac8BX46I5ys7vObgJCirZ50lQfX0msPngTuB35MdbbwF7Jw2OqW/Bmlm9aOnpxUjgKOA1YHVgCPJ5hJcKum7lRmamdVST48chgLbRMR8eC827g7gs2QPqD2zMsMzs1rpaXH4KLmvTQPvAmtGxNuSFnSyjiWtmATlBKjG19PicBXwiKRb0/u9gatTXJxzDMyaUE+/Pv19SXcB/5KajoqI0uX3L1dkZGZWU71JgpoE1P39OEmLyFKbliO75frViHijLPlpBeB+4N+A9YDbI2Lzsu1cRxaJB9lF2DfKAl/OIUuKWjciFlduj8xqoxkfh/d2RLSnP/bXyeLcSkpTtLcENiW7RVsoIg5M22kHfkVuBqqkZcji+l8EdujvHTCrB81YHPIeAtYpb4yIhcD/ARt2twFluXFfAq7JNY8CZpBN1R7bHwM1qzdNWxwkLQvsBNxWsGxAWja9fFmB7cmSsPLTsseSFYubgT0lLb/0IzarL81YHFZKU6n/DKzJBxOzS7FwDwJ3RMRdPdheqRAAIGkFYA/glpQl+Qiwa/lKksZJmiRpUkdHR1/3xaxmenxBsoG8HRHt6ejgHrJrDuemZb2KhZO0HLA/sG2ueVeyC5TTU1L1AOBt4Pb8uk6CskbXjEcOAEREB/AN4Dvpj7wvdgb+FBFzc21jgSMioi0i2oD1gV1SMTJrGk1bHAAi4nFgGt1fNNxY0tzcT+lhNmP44CnFALI5JXfkPuMt4A9kXwwzaxpNd1pRkNqU/6MtSn6aAxReUIyIQ8vedwAfLui3fx+GalbXmvrIwcz6zsXBzAr1KAnKlo6ToKyedZYE5SMHMyvk4mBmhVwczKxQ093KrEetlATlBKjm4SMHMyvk4mBmhVqmOEhaS9K1kmZLmizpzvSA4I3S62ckPSbpeklrShol6faC7UyQ9JykKemnvQa7Y1ZxLXHNIQW23AxcERFjUttWZFO6LwOOiYhfp/ZRwOBuNnlcRNxYsQGb1YFWOXLYEXg3Ii4qNUTEVGA48FCpMKT2+yLiiRqM0ayutEpx2Jzs4Ts9be/OGZKmSTpb0opLNzSz+tQqxaE/nQhsAnyCbIbm8UWdnARlja5VisMMPpjm1F17pyLi5cgsAC4HRnbS75KIGBERIwYMcA6MNZ5WKQ73AitKGldqkLQl8DTwGUl75to/K2mJ3Ifc8rXTb5FF2/v6hDWlligOkU093Q/YOd3KnAH8gCyEdi/g39OtzJlkD7r5S1p1p7KEqE8DV0maTpZcvQZwetV3yKwKWuJWJkBEzCN7/kSR3QraXgFWKmgf3W+DMqtjLXHkYGa95+JgZoWcBFUFToKyeuYkKDPrFRcHMyvk4mBmhVrmVmYttUoSlFOgmouPHMyskIuDmRVqmeIgaX763Sbp7VyS0xRJK0g6VNJ5qc9pko4tW3+OpDXS68skvSrJ8yqsabVMcSgzOyLacz/v9HL9CRR/5dqsabRqcVgqEXE/8Hqtx2FWSa16t2KYpCnp9YMRMb6gz7clfSX3fkjlh2VWP1q1OMyOiPZu+pwdET8uvZE0pzcfkLIjxgEMGjSot+MzqzmfVlSIk6Cs0bk4mFkhF4c+kHQN8BCwcUqIOrzWYzLrby1zzSEiBqbfc8gi6cuXTyC7RUlEnFawvC33emxFBmlWR3zkYGaFXBzMrJCLg5kVckxcFTgmzuqZY+LMrFdcHMysUMvcyqwlJ0FZI/KRg5kVcnEws0IuDmZWqGbFQdKakq6W9KykyZIekrRfWradpD9K+lP6GZdbb4kIt+62l+tzjqSXJC1T1j4qxcXNkPS/ufb5BZ9zjKSZkqZJmijpY/3x72FWb2pSHCQJuAW4PyI2iIhtgTHAUElrAVcDR0XEJsB2wJGS9uzL9nJ9lgH2A14Edsi1rwZcAOwTEZsBB3Qz/MeBERGxJXAjcGYvdt2sYdTqyGE08E5EXFRqiIjnI+LnwHhgQkQ8ltpfA74LnNDH7ZWMAmYAFwL5iVMHATdFxAtpvVe7GnhE/D4iOtLbh8kVILNmUqvisBnwWBfLJpe1TUrtfdleyVjgGuBmYE9Jy6f2jYDVJd2XTkcO7mY7eYcDdxUtkDRO0iRJkzo6Ooq6mNW1urggKel8SVMlPVqJ7UlaAdgDuCUi3gQeAXZN3ZcDtgX2TG3/IWmjHnzGV4ARwFlFy50EZY2uVsVhBrBN6U0KeN0JGAzMJPtjzds2rdOX7UH2R78aMD1lQW7H+6cWc4F7IuKtdApzP7BVV4OXtDNwEtl1igVd9TVrVLUqDvcCH5L0r7m20v+9ng8cKqkdQNJHgB/R9YW/rrYHWSE4IiLaUmjL+sAukgYAtwLbSVouvf8k8GRnHyRpa+BissLQ5fUJs0ZWk69PR0RI+jxwtqTvAn8B3gKOj4iX0yH7pZJWAQScExG/zm3iZEnfym1vaGfbS3/wuwFH5fq/JekPwN4RcZ2ku4FpwGLgFxFRepLVAElzc5/7U7LTk4HADdlNEl6IiH3651/GrH54ynYVDBkyJI488shaD6PiPLeiMXnKtpn1iouDmRXyaUUVOAnK6plPK8ysV1wczKyQk6CqoBWSoHynovn4yMHMCrk4mFkhFwczK9Q0xUHSWpKulTQ7Tb2+U9LIlAg1IyU3HZjrv7ykH0p6RtJjqd/uadkcSdPTz0xJp0v6UG7d9ST9RtKTaXlbDXbZrKKa4oJkSoK6GbgiIsaktq3IZmIeHBHPSBoCTJZ0T0S8AXwfWBvYPCIWSFqTXEIUsGNEvCZpIHAJ2WSrQ9KyXwJnRMRv0/LFld9Ls+pqiuIA7Ai8W5YENTXfISLmSXoVGCzpHeDrwPqlKdcR8QpwffmGI2K+pKOAFyV9GFgLWC4ifltaXqmdMqulZjmt2Jwl06M+QNJIYAVgNrAh2WzKN3uy8dTvOWA4WXLUG5JukvS4pLMkLbtUozerQ81SHLokaW3gSuCwiOjrKYDS7+WA7YFjgU8AGwCHFnymY+KsoTVLcZjBkulRAEhaFbgDOCkiHk7Ns4D10rJupVyJNuBpsuSoKRHxbEQsJEu93qZ8HcfEWaNrluJwL7Bi2fMttpS0A9mFyl9GxI2lZSk9+r+Bn6V8SSQNlrRELH264HgBWf7k34BHgdUklSLoRpNF25k1laYoDpFNLd0P2DndypwB/AD4bPo5ND20Zkopfg44mSwxaqakJ4Dbgfw1iN+n9j8CLwBHps9aRHZKMVHSdLLTjUsrvY9m1dYsdyuIiHnAlwoWfb+T/u+QPQ/juwXL2rr5rN8CW/Z+lGaNoymOHMys/7k4mFkhJ0FVgZOgrJ45CcrMesXFwcwKNc3dinrWbElQTn1qDT5yMLNCLg5mVsjFwcwKVbQ4SPq8pJC0Sa5tpKT7JT2Vpjz/QtIAZc6VNCulNm2T+rd3keZ0VdrOE5Iuk7R8ah8k6deSpqb1DsutU5jiJGmnlAg1RdIfJG1YsD+7pJSp6en36Ar+85nVVKWPHMYCf0i/SWlLN5A9TXvjiNgauBtYBdidLC9hODAOuDBto4MszWkzsqdlnyNptbTsKmATYAtgJeCI1D4emBkRWwGjgJ+UJliRpTidFREfB0YCr6b2C4EvR0Q7cDXZ3Ityr5E9mXsLslSoK/v0r2LWACpWHNJsxu2Aw4ExqXk8WZTbQ6V+EXFjSmHal2z2ZKSp1atJWjsino6IZ1LfeWR/zIPT+ztT/yCbIDW0tFlglRQfNxB4HVgoaVPKUpzSDM3SOqUp3IOAeeX7FBGPpzFANk18JUkrLs2/k1m9quStzH2BuyPiaUl/lbQtWWLTFZ30Xwd4Mfd+bmp7udRQluZErn154KvAN1PTecBtZH/gqwAHRsRiSe+lOAHrA78DTkgzLY8A7pT0NtnszE91s39fAB4rxcyZNZtKnlaMBa5Nr69N7/usmzSnC4D7I+KB9H5XYAowBGgHzkvBLl2lOH0b2CMihgKXAz/tYiybAT8iTePupI+ToKyhVeTIIQWxjga2kBTAsmSH7VeQJTbdWrDaS8C6ufdDU1tnaU6lzzqV7DQj/4d6GPDDdLoxS9JzZNcm3ktxSuveAnxK0m3AVhHxSFr/OrJrIUX7NpQsQObgiJhd1AeyJCiy1GqGDBniCSzWcCp15PBF4MqI+FhEtEXEumQBrb8DDpH0yVJHSfunC5W3AQenuxafAv4eES+nC4lLpDmldY8gO0oYW3Y08QKwU+qzJrAx8Cydpzj9DRiUTjsAdgGeLN+pdCH0DrJTkQf7+o9j1ggqVRzGkv1B5/2K7MLkGODH6Rbkk2R/3P8A7iT7A55Flqz0b2m9L9F5mtNFwJrAQ6n9lNT+feAzKalpItndkdc6S3FKWZBfB34laSrZ9YvjACTtI+k/03aPJkuuPiU3lo8u/T+XWf3xlO0qGDJkSBx5ZKeXJxqO51Y0F0/ZNrNecXEws0I+ragCJ0FZPfNphZn1iouDmRVycTCzQo6Jq4JGjYnzLcvW5iMHMyvk4mBmhZqqOPQieWp87uvP76RkpymSfpjW2U3SHyX9KbVfJ2m9tOyAlC61WNISt3/MmkWzXXPIJ0+dmkueGlMKmJH0ReCBiDg/vZ8D7BgRr6X3mwM/B/aJiCdT2z5AG9mErieA/YGLq7dbZtXXNMUhlzy1I/Br4FQ6SZ7qZlPHA/9VKgxpndtyr0sFo/8Gb1aHmum04r3kKSCfPDW5l9vZDHisvwdn1miaqTj0a/IUgKSPpGsOT0s6tpfrOgnKGlpTnFb0MXmqMzOAbYCpEfFXoD0VhoG9GZOToKzRNcuRQ1+SpzpzJnCSpI/n2gZUZNRmdaxZikNfkqcKRcR0shTrX6Z1HgQ+TvYsCyTtJ2ku8GngDkn39PvemNWBpjitiIgdC9rOzb3dvot12wra7iDLiizqfzNLFiKzptMsRw5m1s9cHMyskJOgqsBJUFbPnARlZr3iI4cqkPQP4Klaj6MH1iB7kni98zj718ciYnB5Y1PcrWgATxUdttUbSZM8zv7TKOPsjE8rzKyQi4OZFXJxqI5Laj2AHvI4+1ejjLOQL0iaWSEfOZhZIReHCkt5lE9JmiXphFqPJ0/SnFx+5qTU9mFJv5X0TPq9eg3GdZmkVyU9kWsrHJcy56Z/32mStqnxOE+T9FIuo3SP3LIT0zifkrRrtcbZVy4OFSRpWeB8YHdgU2CspE1rO6ol7BgR7blbbicAEyNiODAxva+2CcBuZW2djWt3YHj6GQdcWKUxQvE4Ac5O/6btEXEnQPrffQxZ0thuwAXpv4+65eJQWSOBWRHxbES8Q5ZQtW+Nx9SdfclCcki/P1/tAUTE/cDrZc2djWtf4JeReRhYTdLaNRxnZ/YFro2IBRHxHDCL7L+PuuXiUFnrAC/m3s9NbfUigN9ImixpXGpbMyJeTq//DHQVjFNNnY2rHv+Nj06nOJflTsvqcZxdcnFobdtFxDZkh+bjJX02vzCyW1l1dzurXseVXAgMA9qBl4Gf1HQ0S8HFobJeAtbNvR+a2upCRLyUfr9KFmAzEnildFiefr9auxF+QGfjqqt/44h4JSIWRcRi4FLeP3Woq3H2hItDZT0KDJe0vqQVyC5I3dbNOlUhaWVJq5ReA58je2DPbcAhqdsh9C6ct5I6G9dtwMHprsWngL/nTj+qrux6x35k/6aQjXOMpBUlrU92AfWP1R5fb3jiVQVFxEJJRwP3kCViXxYRM2o8rJI1gZvTw3mWA66OiLslPQpcL+lw4HngS9UemKRrgFHAGimv81Tgh52M605gD7ILfB3AYTUe5yhJ7WSnPXOAIwEiYoak64GZwEJgfEQsqtZY+8LfkDSzQj6tMLNCLg5mVsjFwcwKuTiYWSEXBzMr5OJgZoVcHKxqJB0q6bylWP9uSW9Iur2sfX1Jj6Tp0NelL5zZUnJxsEZyFvDVgvYfkU2T3hD4G3B4VUfVpFwcrCYktUm6N81enChpvdQ+TNLDKYTmdEnzS+tExETKnpCu7Cueo4EbU1OX08wlTZB0kaRJkp6WtFd/71uzcHGwWvk5cEVEbAlcBZSeiv4z4GcRsQXZtObufAR4IyIWpvc9mQrdRjYhak/gIkkf6uXYW4KLg9XKp4Gr0+srge1y7Tek11eXr9RPro+IxRHxDPAssEmFPqehuThYRUkaX8pTBIZU4CP+Spb+VJpE2JOp0OUTijzBqICLg1VURJxfylME5uUW/R/ZFHaALwMPpNcPA19Ir8fQjRT88nvgi6mpJ9PMD5C0jKRhwAY0xnNMq87FwWrl34HDJE0juwPxzdT+LeCY1L4h8PfSCpIeIDvl2EnS3FyC8/FpnVlk1yD+u5vPfoEsS+Eu4KiI+Gf/7FJz8ZRtqyuSBgBvR0RIGgOMjYh+C+WVNAG4PSJu7K5vq3PYi9WbbYHz0i3KN4Cv1XY4rctHDtaUJJ0EHFDWfENEnFGL8TQiFwczK+QLkmZWyMXBzAq5OJhZIRcHMyvk4mBmhf4f8ZhKk2Vv70QAAAAASUVORK5CYII=\n", 665 | "text/plain": [ 666 | "
" 667 | ] 668 | }, 669 | "metadata": { 670 | "needs_background": "light" 671 | }, 672 | "output_type": "display_data" 673 | } 674 | ], 675 | "source": [ 676 | "import seaborn as sns\n", 677 | "import matplotlib.pyplot as plt\n", 678 | "\n", 679 | "plt.figure(figsize = (3,5))\n", 680 | "\n", 681 | "ax = sns.barplot(data = df[1:16], x = '-log10_p', y = 'gene', color = 'grey')\n", 682 | "\n", 683 | "plt.show()\n" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": null, 689 | "id": "cb162306", 690 | "metadata": {}, 691 | "outputs": [], 692 | "source": [] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "id": "b9e4915d", 698 | "metadata": {}, 699 | "outputs": [], 700 | "source": [] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": null, 705 | "id": "c7c9b80d", 706 | "metadata": {}, 707 | "outputs": [], 708 | "source": [] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "execution_count": null, 713 | "id": "3f4d5cd6", 714 | "metadata": {}, 715 | "outputs": [], 716 | "source": [] 717 | }, 718 | { 719 | "cell_type": "code", 720 | "execution_count": null, 721 | "id": "9aad4a5c", 722 | "metadata": {}, 723 | "outputs": [], 724 | "source": [] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": null, 729 | "id": "aeee62a7", 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [] 733 | }, 734 | { 735 | "cell_type": "code", 736 | "execution_count": null, 737 | "id": "fae68899", 738 | "metadata": {}, 739 | "outputs": [], 740 | "source": [] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": null, 745 | "id": "9a14a5f8", 746 | "metadata": {}, 747 | "outputs": [], 748 | "source": [] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": null, 753 | "id": "45697472", 754 | "metadata": {}, 755 | "outputs": [], 756 | "source": [] 757 | } 758 | ], 759 | "metadata": { 760 | "kernelspec": { 761 | "display_name": "Python 3 (ipykernel)", 762 | "language": "python", 763 | "name": "python3" 764 | }, 765 | "language_info": { 766 | "codemirror_mode": { 767 | "name": "ipython", 768 | "version": 3 769 | }, 770 | "file_extension": ".py", 771 | "mimetype": "text/x-python", 772 | "name": "python", 773 | "nbconvert_exporter": "python", 774 | "pygments_lexer": "ipython3", 775 | "version": "3.9.12" 776 | } 777 | }, 778 | "nbformat": 4, 779 | "nbformat_minor": 5 780 | } 781 | -------------------------------------------------------------------------------- /single_r.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | if (!require("BiocManager", quietly = TRUE)) 8 | install.packages("BiocManager") 9 | 10 | BiocManager::install("SingleR") 11 | ``` 12 | ```{r} 13 | install.packages('Seurat') 14 | ``` 15 | 16 | ```{r} 17 | #very basic Seurat preprocessing 18 | prep_data <- function(Data.path){ 19 | Raw_data <- Read10X(data.dir = Data.path) 20 | seuset_data <- CreateSeuratObject(counts = Raw_data, min.cells = 3, min.features = 200) 21 | seuset_data[["percent.mt"]] <- PercentageFeatureSet(seuset_data, pattern = "mt-") 22 | lb <- quantile(seuset_data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.01) 23 | ub <- quantile(seuset_data[["nFeature_RNA"]]$nFeature_RNA, probs = 0.99) 24 | seuset_data <- seuset_data[, seuset_data[["nFeature_RNA"]] > lb & seuset_data[["nFeature_RNA"]] < ub & seuset_data[["percent.mt"]] < 15] 25 | seuset_data <- NormalizeData(object = seuset_data, verbose = FALSE) 26 | seuset_data <- FindVariableFeatures(object = seuset_data, nfeatures = 3000, verbose = FALSE, selection.method = 'vst') 27 | seuset_data <- ScaleData(seuset_data, verbose = FALSE) 28 | seuset_data <- RunPCA(seuset_data, npcs = 20, verbose = FALSE) 29 | seuset_data <- FindNeighbors(seuset_data, dims = 1:20) 30 | seuset_data <- FindClusters(seuset_data, resolution = 0.3) 31 | seuset_data <- RunUMAP(seuset_data, reduction = "pca", dims = 1:20) 32 | return(seuset_data) 33 | } 34 | ``` 35 | 36 | 37 | ```{r} 38 | data <- prep_data("Lung1/outs/filtered_feature_bc_matrix") 39 | ``` 40 | ```{r} 41 | a <- DimPlot(data, reduction = "umap", label=TRUE) 42 | 43 | png("./umap_u.png", res = 250, width = 1500, height = 1500) 44 | 45 | print(a) 46 | dev.off() 47 | #a + b + c 48 | a 49 | ``` 50 | 51 | ```{r} 52 | library(SingleR) 53 | ``` 54 | 55 | ```{r} 56 | ref <- celldex::MouseRNAseqData() 57 | ``` 58 | 59 | ```{r} 60 | results <- SingleR(test = as.SingleCellExperiment(data), ref = ref, labels = ref$label.main) 61 | ``` 62 | 63 | ```{r} 64 | data$singlr_labels <- results$labels 65 | ``` 66 | 67 | ```{r} 68 | DimPlot(data, reduction = 'umap', group.by = 'singlr_labels', label = TRUE) 69 | ``` 70 | 71 | ```{r} 72 | FeaturePlot(data, features = c("Ptprc", "Cd3e")) 73 | ``` 74 | 75 | ```{r} 76 | if (!require("BiocManager", quietly = TRUE)) 77 | install.packages("BiocManager") 78 | 79 | BiocManager::install("scRNAseq") 80 | ``` 81 | ```{r} 82 | if (!require("BiocManager", quietly = TRUE)) 83 | install.packages("BiocManager") 84 | 85 | BiocManager::install("scuttle") 86 | ``` 87 | ```{r} 88 | if (!require("BiocManager", quietly = TRUE)) 89 | install.packages("BiocManager") 90 | 91 | BiocManager::install("TabulaMurisData") 92 | ``` 93 | ```{r} 94 | library(ExperimentHub) 95 | ``` 96 | ```{r} 97 | eh <- ExperimentHub() 98 | ``` 99 | ```{r} 100 | query(eh, "TabulaMurisData") 101 | ``` 102 | ```{r} 103 | eh[['EH1617']] 104 | ``` 105 | 106 | ```{r} 107 | lung_ref <- eh[['EH1617']] 108 | lung_ref <- lung_ref[,lung_ref$tissue == 'Lung'] 109 | lung_ref <- lung_ref[,!is.na(lung_ref$cell_ontology_class)] 110 | ``` 111 | 112 | ```{r} 113 | lung_ref 114 | ``` 115 | 116 | ```{r} 117 | library(scuttle) 118 | ``` 119 | 120 | ```{r} 121 | lung_ref <- logNormCounts(lung_ref) 122 | ``` 123 | 124 | 125 | ```{r} 126 | results <- SingleR(test = as.SingleCellExperiment(data), ref = lung_ref, labels = lung_ref$cell_ontology_class) 127 | ``` 128 | 129 | 130 | ```{r} 131 | data$singlr_label <- results$labels 132 | ``` 133 | 134 | ```{r} 135 | a <- DimPlot(data, reduction = "umap", group.by = 'singlr_label', label = FALSE) 136 | 137 | png("./umap_l.png", res = 250, width = 2500, height = 1500) 138 | 139 | print(a) 140 | dev.off() 141 | #a + b + c 142 | a 143 | ``` 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /soupX/readme.txt: -------------------------------------------------------------------------------- 1 | R and python scripts for soupX 2 | -------------------------------------------------------------------------------- /soupX/soupX_R_tutorial.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "soupX tutorial" 3 | output: html_notebook 4 | --- 5 | 6 | 7 | install.packages('SoupX') 8 | 9 | ```{r} 10 | library(Seurat) 11 | library(SoupX) 12 | ``` 13 | 14 | 15 | ```{r} 16 | 17 | mad_outlier <- function(sobj, metric, nmads){ 18 | M <- sobj@meta.data[[metric]] 19 | median_M <- median(M, na.rm = TRUE) 20 | mad_M <- mad(M, na.rm = TRUE) 21 | outlier <- (M < (median_M - nmads * mad_M)) | (M > (median_M + nmads * mad_M)) 22 | return(outlier) 23 | } 24 | 25 | pp <- function(sample_id){ 26 | path <- paste0(sample_id, "/outs/filtered_feature_bc_matrix/") 27 | sobj <- Read10X(data.dir = path) 28 | sobj <- CreateSeuratObject(counts = sobj, min.cells = 0, min.features = 200) 29 | sobj$sample_id <- sample_id 30 | 31 | #add QC metrics 32 | sobj$log1p_total_counts <- log1p(sobj@meta.data$nCount_RNA) 33 | sobj$log1p_n_genes_by_counts <- log1p(sobj@meta.data$nFeature_RNA) 34 | sobj[["percent.mt"]] <- PercentageFeatureSet(sobj, pattern = "^mt-") 35 | 36 | #find outliers and subset 37 | bool_vector <- !mad_outlier(sobj, 'log1p_total_counts', 5) & !mad_outlier(sobj, 'log1p_n_genes_by_counts', 5) & !mad_outlier(sobj, 'percent.mt', 3) 38 | sobj <- subset(sobj, cells = which(bool_vector)) 39 | 40 | return(sobj) 41 | } 42 | ``` 43 | 44 | ```{r} 45 | samples <- c('Lung1', 'Lung2', 'Lung3', 'Lung4', 'Lung6') 46 | ``` 47 | 48 | 49 | ```{r} 50 | data_list <- sapply(samples, pp) 51 | ``` 52 | 53 | 54 | ```{r} 55 | get_soup_groups <- function(sobj){ 56 | sobj <- NormalizeData(sobj, verbose = FALSE) 57 | sobj <- FindVariableFeatures(object = sobj, nfeatures = 2000, verbose = FALSE, selection.method = 'vst') 58 | sobj <- ScaleData(sobj, verbose = FALSE) 59 | sobj <- RunPCA(sobj, npcs = 20, verbose = FALSE) 60 | sobj <- FindNeighbors(sobj, dims = 1:20, verbose = FALSE) 61 | sobj <- FindClusters(sobj, resolution = 0.5, verbose = FALSE) 62 | 63 | return(sobj@meta.data[['seurat_clusters']]) 64 | 65 | } 66 | ``` 67 | 68 | ```{r} 69 | add_soup_groups <- function(sobj){ 70 | sobj$soup_group <- get_soup_groups(sobj) 71 | return(sobj) 72 | } 73 | data_list <- sapply(data_list, add_soup_groups) 74 | ``` 75 | 76 | ```{r} 77 | data_list[1]$Lung1[[]] 78 | ``` 79 | 80 | ```{r} 81 | make_soup <- function(sobj){ 82 | sample_id <- as.character(sobj$sample_id[1]) #e.g, Lung1 83 | path <- paste0(sample_id, "/outs/raw_feature_bc_matrix/") 84 | raw <- Read10X(data.dir = path) 85 | 86 | sc = SoupChannel(raw,sobj@assays$RNA@counts) 87 | sc = setClusters(sc,sobj$soup_group) 88 | sc = autoEstCont(sc, doPlot=FALSE) 89 | out = adjustCounts(sc, roundToInt = TRUE) 90 | 91 | #optional keep original 92 | sobj[["original.counts"]] <- CreateAssayObject(counts = sobj@assays$RNA@counts) 93 | 94 | sobj@assays$RNA@counts <- out 95 | 96 | return(sobj) 97 | 98 | } 99 | ``` 100 | 101 | ```{r} 102 | data_list <- sapply(data_list, make_soup) 103 | ``` 104 | 105 | ```{r} 106 | sum(data_list[1]$Lung1@assays$original.counts@counts) 107 | ``` 108 | 109 | ```{r} 110 | sum(data_list[1]$Lung1@assays$RNA@counts)/sum(data_list[1]$Lung1@assays$original.counts@counts) 111 | ``` 112 | 113 | ```{r} 114 | 115 | ``` 116 | -------------------------------------------------------------------------------- /test_significance_t_u_shapiro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a5975753", 6 | "metadata": {}, 7 | "source": [ 8 | "## Three steps that will cover the majority of cases:\n", 9 | "\n", 10 | "### 1) are my data normally distributed? shapiro test\n", 11 | "### 2) if yes: t-test\n", 12 | "### 3) if no: u-test" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "id": "3fe379a1", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "from scipy import stats #the only import you actually need for this\n", 23 | "import numpy as np\n", 24 | "import seaborn as sns\n", 25 | "import pandas as pd\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "%config InlineBackend.print_figure_kwargs={'facecolor' : \"w\"}" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "id": "74cbab56", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "id": "24acca47", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "id": "06d7bc02", 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEGCAYAAABvtY4XAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUO0lEQVR4nO3de3CU9b3H8U+uhIQkQAi3XEiQWxIuuWwEGyBBRbBUpAEBBwVFmko7PToMc2Z0ysCMc06tQxXQlmmoRYVW2iP2IFLtjHECRYm4gjKQAwIm3NJCAiSQhCUk2fNHash9seHZJb/n/Zpxxt3fZp9vZpg3D88+z7N+brfbLQCAcfx9PQAAwBoEHgAMReABwFAEHgAMReABwFCBvh6gpQEDBighIcHXYwBAj1FaWqqKiooO1+6owCckJMjpdPp6DADoMRwOR6drHKIBAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwlKWBf+WVV5SSkqKxY8fq0UcflcvlsnJzAIAWLAv8uXPntGHDBjmdTh0+fFgNDQ3atm2bVZsDALRh6R58fX29rl27pvr6etXW1mro0KFWbg4A0IJlgY+JidHKlSsVHx+vIUOGKDIyUg888EC71+Xn58vhcMjhcKi8vNyqcQDAdiwL/OXLl7Vjxw6VlJSorKxMNTU12rp1a7vX5eXlyel0yul0Kjo62qpxAMB2LAv8Rx99pMTEREVHRysoKEi5ubn69NNPrdocAKANywIfHx+voqIi1dbWyu12q6CgQElJSVZtDgDQhmWBnzhxoubNm6f09HSNGzdOjY2NysvLs2pzAIA2/Nxut9vXQ3zL4XDI6XT6egwA6DG66iZXsgKAoQg8ABiKwAOAoQg8ABiKwAOAoQg8ABiKwAOAoQg8ABgq0NcDALCJxkbpZIH0jy+lsGgpeY7Uu6+PhzIbgQdgvapz0h8ekS4cufnch89LP9woJT/su7kMxyEaANZ790et4y5JN2qk7cuky6d8M5MNEHgA1jpfLJ36pOO1hjrpwFvencdGCDwAa10u6Xr90jfemcOGCDwAa/VL7Hq9/3DvzGFDBB6AtQYlS8OyOl4LCJbSF3t3Hhsh8ACsN/d30qCxrZ8LCpPmvi71G+abmWyA0yQBWC9iqPT0XulEi/PgU+ZIIZG+nsxoBB6Ad/j5SSPvb/oPXsEhGgAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFPeiAeA9Fcelf3zVdLOxhCmSP/uYViLwAKznuiK9myd9/cHN5/oOa7qNcNzdvpvLcPz1CcB6/7u8ddwlqfKUtHWeVF3um5lsgMADsNalEunoro7XrldJB/nSbasQeADWuvB/ktydr58/4rVR7IbAA7BWn0Ee1gd7Zw4bIvAArBWbIQ1M6WTRT0pb5NVx7ITAA7De3E1S2MA2T/pJM/5bGtRZ/NFdnCYJwHqDUqSffSEd+tPNL91OXSQNGOnryYxmaeArKyu1bNkyHT58WH5+fvr973+ve+65x8pNArhThURId//I11PYiqWBf+aZZzRz5ky98847qqurU21trZWbAwC0YFngr1y5oj179uiNN96QJAUHBys4ONiqzQEA2rDsQ9ZvvvlG0dHRevLJJ5WWlqZly5appqam3evy8/PlcDjkcDhUXs4VbQBwu1gW+Pr6eh04cEDLly/XwYMHFRYWphdffLHd6/Ly8uR0OuV0OhUdHW3VOABgO5YFPjY2VrGxsZo4caIkad68eTpw4IBVmwMAtGFZ4AcPHqy4uDgdO3ZMklRQUKDk5GSrNgcAaMPSs2heffVVLVq0SHV1dRo+fLg2b95s5eYAAC1YGvjU1FQ5nU4rNwEA6AS3KgAAQxF4ADAUgQcAQxF4ADAUgQcAQxF4ADAUgQcAQxF4ADAUgQcAQxF4ADAUgQcAQxF4ADAUgQcAQxF4ADAUgQcAQxF4ADAUgQcAQ3kM/Pnz5/XUU0/pwQcflCQVFxfr9ddft3wwAED3eAz8E088oRkzZqisrEySNGrUKK1bt87quQAA3eQx8BUVFZo/f778/ZteGhgYqICAAMsHAwB0j8fAh4WF6eLFi/Lz85MkFRUVKTIy0vLBAADdE+jpBS+//LJmz56tkydPKisrS+Xl5XrnnXe8MRsAoBs8Bj49PV27d+/WsWPH5Ha7NXr0aAUFBXljNgBAN3gM/FtvvdXq8YEDByRJixcvtmYiAMBt4THwn3/+efP/u1wuFRQUKD09ncADwB3OY+BfffXVVo+rqqr0+OOPWzYQAOD2+M5XsoaGhur48eNWzAIAuI087sE/9NBDzadINjY2qri4WPPnz7d8MABA93gM/MqVK2++ODBQw4YNU2xsrKVDAQC6z2Pgs7OzvTEHAOA26zTw4eHhzYdmWnK73fLz89OVK1csHQyAYS6VSM7XpbIvpbBoKW2RNOJ+X09ltE4Df/XqVW/OAcBkJX+X/jhfulF787kj70qTfiLN/IXv5jKcx0M037pw4YJcLlfz4/j4eEsGAmCYxkZpx09ax/1bRb+RkudI8RO9PpYdeDxN8r333tPIkSOVmJio7OxsJSQkNN8bHgA8Or1Pqjzd+fqhP3lvFpvxGPhVq1apqKhIo0aNUklJiQoKCpSVleWN2QCYwFXlYb3SK2PYkcfABwUFKSoqSo2NjWpsbNS0adP05ZdfemE0AEaISZf8uzgaHDfJe7PYjMdj8H379lV1dbWmTJmiRYsWaeDAgQoMvOVD9wDsLnywlPaY9MUb7dciYqQJC70+kl143IOfOnWqKisrtX79es2cOVN33XWXdu7c6Y3ZAJji+2uliU9Lgb1vPjcsS1qyUwqJ8N1chvO4K+52uzVjxgz1799fCxcu1IIFCxQVFXXLG2hoaJDD4VBMTIzef//9bg0LoIcKCJIe/KWU85x08YQUGiX1T/T1VMbzuAe/evVqHTlyRL/+9a9VVlam7Oxs3X//rV+csH79eiUlJXVrSACG6N1XinUQdy+55btJDhw4UIMHD1ZUVJQuXLhwSz9z9uxZ7dq1S8uWLfu3BwQA/Hs8Bn7jxo3KycnRfffdp4qKCm3atEmHDh26pTd/9tln9dJLL8nf/zvflRgA0E0ej8GfOnVK69atU2pq6nd64/fff18DBw5URkaGCgsLO31dfn6+8vPzJUnl5eXfaRsAgM75ud1utxVv/Nxzz2nLli0KDAyUy+XSlStXlJubq61bt3b6Mw6HQ06n04pxbOfr81d1/UajRg8OV3Ag/4ICTNVVNy0LfEuFhYVau3atx7NoCHz3fXqyQmveO6Kvz1dLkgb06aWf3TtCS76X4NvBAFiiq25yxZJBjv7zip7c/Lmu1zc2P1dRfV2r3zuikCB/LcjkBnGAnXjl3+45OTmcA28Rt9ut6uv1crvd2rSnpFXcW/pN4Ul54R9rgNRQ7+sJ8C/swfdQNxoa9drHJ/SHz06rovq6BvTppfqGjuMuSacu1qr86nUNjAjx4pSwjfo6ae/LknOzVP1Pqf/wpitX786TWn5xUF2NdOmbpgudIob6bl6bIPA91Io/f6WdX5U1P66ovt7l6wP8/dQ7OMDqsWBX//OEdGzXzceXvpE++E/pcmnTF3o01EsfvyB9/rpUd1WSn3TXvdIPXpH6DfPR0Obj9IoeqLjsSqu434r7kwYqPCTIoolga6f2tY57S5/9Vqo6K/3tOemTdf+KuyS5pZMF0ps/kK5Xe2tS2yHwPVDh17d2JfG3hkSG6Oezki2aBrZ3/G+dr7kbpMPvdnwnSanpi0AObbNkLBD4HinoO1wZ3C8sSJufyFRc/1ALJ4Kt+Xk49Fd1Rmqo63z91L7bOw+aEfgeaEbK4FafW3Xlcs0NvbCr2NqBYG9jZnW+5h8kJUzp+ud7hd/eedCMwPdA8VGh+tGU4bf8+k9OXFRpRY2FE8HWYtKl8Z18aceUFU1/AUTEdv7z4+dbMxcIfE/1/PeT9MqCCUqP76uosGBFhHR9QlRZ1TUvTQZbmrNRevAlKTpJCg6XhqZLuZukac9L/gHS7A1SYAen6DqWSsO+5/15bYLTJHuwH6bF6odpTXtG/7WrWJv+XtLh6wL8/ZQ4IMybo8Fu/P2liT9u+q8jI+6Tfvx3aX++9I+vpLBoKW1R14d30G0E3hALM+P1u70l6uhi1Yz4fhoS2bv9AuBN0aOkWWt9PYWtcIjGEMcvVHcYd0n6Z9U1blMA2BCBN8TOQ51f+HT68jUdOlvlxWkA3AkIvCFcdQ1drl+70fU6APMQeEPcc1dUp2vhvQI1PjbSi9MAuBMQeEPMz4xTTN+OP0jNmzpcocF8ng7YDYE3RERIkP7040manjxIAf5Nl7kOiuiln89K0s/uG+nj6QD4Art1BontF6pNix2qqr2hK64bGhIZosAA/g4H7IrAGygyNEiRodwaGLA7du8AwFDswRumsdGt/aWXVFlbp3GxfTv94BWA+Qi8QfaXXNKKP3+ps5ebbizm7yc9nBqjX+SOU0gQX9cH2A2BN0RZ5TU9uXm/alpc8NTolv5y8JyCA/z1y3njfTgdAF/gGLwh/vjZ6VZxb+kvB895/FJuAOYh8IY4Utb5vWbqGhr19fmrna4DMBOBN0R0eK+u1/t0vQ7APATeEI844jpdS43rq5GD+N5LwG4IvCEyE/rrP+4d0e75QRG9tPaRCT6YCICvcRaNQVY8MFoPpAzW9gNnVVl7Q6lxfZWbHqPwEK5qBeyIwBtmbEykxsZwa2AAHKIBAGMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwFIEHAEMReAAwlGWBP3PmjKZNm6akpCSlpKRo/fr1Vm0KANABy+5FExgYqF/96ldKT0/X1atXlZGRoenTpys5OdmqTQIAWrBsD37IkCFKT0+XJIWHhyspKUnnzp2zanMAgDa8cjfJ0tJSHTx4UBMnTmy3lp+fr/z8fElSeXm5N8YBAFuw/EPW6upqzZ07V+vWrVNERES79by8PDmdTjmdTkVHR1s9DgDYhqWBv3HjhubOnatFixYpNzfXyk0BANqwLPBut1tPPfWUkpKStGLFCqs2AwDohGWB/+STT7RlyxZ9/PHHSk1NVWpqqv76179atTkAQBuWfcg6efJkud1uq94eAOABV7ICgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYisADgKEIPAAYytLAf/jhhxo9erRGjBihF1980cpNAQDasCzwDQ0N+ulPf6oPPvhAxcXFevvtt1VcXGzV5gAAbVgW+P3792vEiBEaPny4goODtXDhQu3YscOqzQEA2gi06o3PnTunuLi45sexsbH67LPP2r0uPz9f+fn5kqSjR4/K4XBYNZKtlJeXKzo62tdjAB3iz+ftU1pa2umaZYF3u93tnvPz82v3XF5envLy8qwaw7YcDoecTqevxwA6xJ9P77DsEE1sbKzOnDnT/Pjs2bMaOnSoVZsDALRhWeAzMzN1/PhxlZSUqK6uTtu2bdPs2bOt2hwAoA3LDtEEBgbqtdde04wZM9TQ0KClS5cqJSXFqs2hDQ574U7Gn0/v8HN3dLAcANDjcSUrABiKwAOAoQg8AK8oLS3V2LFjfT2GrRB4ADAUgTfQnDlzlJGRoZSUlOarhIE7QX19vZYsWaLx48dr3rx5qq2t9fVIRuMsGgNdunRJ/fv317Vr15SZmandu3crKirK12PB5kpLS5WYmKi9e/cqKytLS5cuVXJyslauXOnr0YzFHryBNmzYoAkTJmjSpEk6c+aMjh8/7uuRAElSXFycsrKyJEmPPfaY9u7d6+OJzGbZhU7wjcLCQn300Ufat2+fQkNDlZOTI5fL5euxAEnt70fV0f2pcPuwB2+Yqqoq9evXT6GhoTp69KiKiop8PRLQ7PTp09q3b58k6e2339bkyZN9PJHZCLxhZs6cqfr6eo0fP16rVq3SpEmTfD0S0CwpKUlvvvmmxo8fr0uXLmn58uW+HslofMgKAIZiDx4ADEXgAcBQBB4ADEXgAcBQBB4ADEXgAcBQBB5oo76+3tcjALcFgYftvPDCCxozZoymT5+uRx99VGvXrlVOTo6ef/55ZWdna/369SooKFBaWprGjRunpUuX6vr165KkhIQEVVRUSJKcTqdycnIkSWvWrNHjjz+ue++9VyNHjtSmTZt89esBzbgXDWzF6XRq+/btOnjwoOrr65Wenq6MjAxJUmVlpXbv3i2Xy6WRI0eqoKBAo0aN0uLFi7Vx40Y9++yzXb73oUOHVFRUpJqaGqWlpWnWrFkaOnSoF34roGPswcNW9u7dq4cffli9e/dWeHi4Hnrooea1BQsWSJKOHTumxMREjRo1SpK0ZMkS7dmzx+N7f/u+AwYM0LRp07R//35rfgngFhF42EpXd+YICwvz+JrAwEA1NjZKUru7dHKnRNxpCDxsZfLkydq5c6dcLpeqq6u1a9eudq8ZM2aMSktLdeLECUnSli1blJ2dLanpGPwXX3whSdq+fXurn9uxY4dcLpcuXryowsJCZWZmWvzbAF0j8LCVzMxMzZ49WxMmTFBubq4cDociIyNbvSYkJESbN2/WI488onHjxsnf319PP/20JGn16tV65plnNGXKFAUEBLT6ubvvvluzZs3SpEmTtGrVKo6/w+e4myRsp7q6Wn369FFtba2mTp2q/Px8paend+s916xZoz59+vD1c7ijcBYNbCcvL0/FxcVyuVxasmRJt+MO3KnYgwcAQ3EMHgAMReABwFAEHgAMReABwFAEHgAM9f8Ule/46Sio0AAAAABJRU5ErkJggg==\n", 55 | "text/plain": [ 56 | "
" 57 | ] 58 | }, 59 | "metadata": { 60 | "needs_background": "light" 61 | }, 62 | "output_type": "display_data" 63 | } 64 | ], 65 | "source": [ 66 | "#generate normally distributed values\n", 67 | "group_a = np.random.normal(4, 1, 5) #mu, sigma, n\n", 68 | "group_b = np.random.normal(5, 1, 5) #mu, sigma, n\n", 69 | "df = pd.DataFrame(np.concatenate((np.vstack((group_a, np.array(['a']*len(group_a)))).T,\n", 70 | " np.vstack((group_b, np.array(['b']*len(group_a)))).T)), columns = ['value', 'group'])\n", 71 | "df['value'] = df['value'].astype('float')\n", 72 | "ax = sns.swarmplot(data = df, x = 'group', y = 'value', s = 8)\n", 73 | "plt.ylim(bottom = 0, top = df.value.max() + 2)\n", 74 | "plt.show()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "id": "0db544c9", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "Ttest_indResult(statistic=-2.141696254533902, pvalue=0.0646055063603198)" 87 | ] 88 | }, 89 | "execution_count": 3, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "stats.ttest_ind(group_a, group_b)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "id": "52f5d905", 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "Ttest_indResult(statistic=-4.273273869671518, pvalue=0.0016285682897101618)" 108 | ] 109 | }, 110 | "execution_count": 4, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "stats.ttest_ind([1,3,4,3,2], [4,5,6,6,5,4,5]) #input can just be list of numbers" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 5, 122 | "id": "d23231ee", 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/plain": [ 128 | "ShapiroResult(statistic=0.9919436573982239, pvalue=0.9807631969451904)" 129 | ] 130 | }, 131 | "execution_count": 5, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "#shapiro test for normal distribution\n", 138 | "stats.shapiro(np.random.normal(10, 1, 50) ) #p value > 0.05 if it is normally distributed" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 6, 144 | "id": "90f0bc0d", 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAigUlEQVR4nO3df1TUdaL/8ecA/kJ+qCgKjokKiIpCMJheFTXDH3ijbCt13dQvFse+tdXd2u/tbqdbu93ddU+7e63cq9HXutZ+F3ezdm1d9Grkj9VUdlJLJYlMFAH5oaKIAgLz/WNqaOSDrJszH2Jej3M4Z96fHzMvPJ558fltcTgcDkRERK7hZ3YAERHpnFQQIiJiSAUhIiKGVBAiImJIBSEiIoYCzA5wM/Xv35+oqCizY4iIfGsUFxdTXV1tOK9LFURUVBR2u93sGCIi3xo2m63dedrFJCIihlQQIiJiyGO7mDIzM9m0aRPh4eEcOXIEgPnz51NYWAhATU0Nffr04dChQ23WjYqKIjg4GH9/fwICArTbSETEBB4riKVLl/Loo4+yePFi17Tf//73rtdPPvkkoaGh7a6/fft2+vfv76l4IiLSAY8VRGpqKsXFxYbzHA4Hf/jDH/jggw889fEiIvINmXIM4q9//SsDBw4kJibGcL7FYmHmzJkkJyeTnZ193ffKzs7GZrNhs9moqqryRFwREZ9kymmuOTk5LFy4sN35e/bsITIyksrKStLS0oiLiyM1NdVw2aysLLKysoDrn64lIiI3xutbEE1NTbz77rvMnz+/3WUiIyMBCA8PZ968eeTn53srnoiIfMnrBfH+++8TFxeH1Wo1nF9XV0dtba3r9datW4mPj/dmRBERwYMFsXDhQiZOnEhhYSFWq5W1a9cCsH79+ja7l8rKykhPTwegoqKCyZMnk5CQwPjx45k7dy6zZ8/2VEwREWmHpSs9Uc5ms+maCRGRG3C9701dSS0iIoZUECIiYkgFISIihlQQIiJiSAUhIiKGVBAiImJIBSEiIoZUECIiYkgFIW00tzi41NBkdgwRMZkpd3OVzmvz4XKe3XiU6ksNTI0dwMsLbiU0sJvZsUTEBNqCEJeL9Vd58u2Pqb7UAMDOz6pYmfeZyalExCwqCHE5UVXH5cZmt2lHyy6alEZEzKaCEJe4iGDCend3mzZphJ4LLuKrVBA+7njVJTZ8dJovqi7RI8CftUtTSInqy6CQnmROGsbD00aYHVFETKKD1D7sD38r4V/f/QSHAywW+OW9CXwn2cov70ug9PwVkob2pXuA/oYQ8VUqCB/24tZCvnoaiMMBv9payMlzl3nlgyIcDhgY0oOchyYwfECQuUFFxBT689CH1V1zrUNtQxO/2f65qzQqLjbwygefG65bVFHLxyU1dKHnTYnINVQQPux7E4a6jWePGURzi/sX/kcnz3Pbz95nxq92kHu4HIfDwWM5B0n7z13c9Zs93P2bPdTWX/VmbBHxEhWED8tIiCSwu79rHBHag7hBwW7LnDp3mYqLDRyvquP7OQfZeKiM9z4uc83/+PQF1ueXeC2ziHiPCsKHvfJBkdt1D6t3fsHiCUMJ6RmABeh/zSmvzS0O9p842+Z9yi/UezqqiJhABeHDzl5qdBtfbXbw09xPuVjfhAOormtss869yVaCe7ae2+BngX9OiPB0VBExgccKIjMzk/DwcOLj413Tnn/+eQYPHkxiYiKJiYnk5uYarrtlyxZGjhxJdHQ0K1as8FREn/edZKvbeOSgYOquuZI6PLgH3f396BPYjRfuGkPy0H68vXwi9yQNZk78INZljifplr7ejC0iXmJxeOg0lF27dhEUFMTixYs5cuQI4CyIoKAgnnrqqXbXa25uJjY2lm3btmG1WklJSSEnJ4fRo0d3+Jk2mw273X7Tfgdf8N7HZWw9eoaosN4suu0WZq3cxcX61rObHpk+gifuiMXfYsHPz2JiUhHxhOt9b3psCyI1NZV+/frd8Hr5+flER0czfPhwunfvzoIFC9i4caMHEgo4D1Sv+m4ST80aSUSfXrx4bwL9enfH38/CxOFhPDo9hm7+fioHER/k9WMQq1atYty4cWRmZnL+/Pk280tLSxkyZIhrbLVaKS0tbff9srOzsdls2Gw2qqqqPJLZl6za/jnn6hppbnGw94uzbC04Y3YkETGJVwvi4Ycf5vjx4xw6dIiIiAiefPLJNssY7fGyWNr/6zUrKwu73Y7dbmfAgAE3Na+v+bT8IodLL7hNe9t+2qQ0ImI2rxbEwIED8ff3x8/Pj4ceeoj8/Pw2y1itVkpKWs+rP336NJGRkd6M6bP6BHbj2j1J/a451VVEfIdXC6K8vNz1+o9//KPbGU5fSUlJoaioiBMnTtDY2Mj69evJyMjwZkyfFRHai4emDHeN+/XuzqO3R5uYSETM5LGb9S1cuJAdO3ZQXV2N1Wrlxz/+MTt27ODQoUNYLBaioqJ49dVXASgrK+PBBx8kNzeXgIAAVq1axaxZs2hubiYzM5MxY8Z4KqZcY1b8IHZ9VkXphSvMiR9EVFhvsyOJL2tqcN5JsltPs5P4JI+d5moGneb6zdRfbWbSig84+7UL5J64I4Yn7og1MZX4rB0rYM9L0NIEtkyYvcJ5X3q5qa73vanbfYtL4Zlat3IA+PD4WZ64w6RA4rtOfgg7ft463r8GhoyHc1/AsVwIi4YZz0KfW8zL6ANUEOIyfEBvArv7u92fKT4y1MRE4rPKP247Lf//wqkPna/LDsCZw/C/92qrwoN0LyZxCe7ZjV/fn8jAkB5YLHB7XDiP3xFjdizxRVGTgWu++C9VuI+rPoWzx70WyRdpC0LczI4fxMzRA2loaqHX124FLuJVX+wAvnZ4NPQWGDQWzn2tELr1guBB3k7mU7QFIW34+VlUDmKuwxvcxxdO0WaLwr+nsyTEY1QQItL5hAx2Hwf0gurP3KfVn9cuJg9TQYhI53P7MxA00PnaLwDueB6sye7L9Oqrs5g8TMcgRKTzGTgGnjgMpR9B32EQEgF7XnFfJmSwLqDzMG1BiEjnFNADhv6TsxwAPr3mtv8VR+B8sddj+RIVhIh8O/S65vkyft2gR4g5WXyECkJEvh2m/Sv0+NqFm6lPQeCNP5RM/n46BiFt1FxupOJiA7EDg677LA4RrxqcDP9yGIp3O2+1MWCk2Ym6PBWEuPnvPSf42eZjNDa1EBMexLrM8UT20bnm0kl0D3KeuRQUbnYSn6BdTOJy9lIDP839lMamFgCKKi/xcl6RyalEvnT2OLySDGsmw6/i4KN1Zifq8lQQ4lJ+oZ6rze53fy8+W2dSGpFrfPACnD/hfN1UD1v+DRpqzc3UxakgxGVURAi39At0mzYnPsKkNCLXOHfCfXy1Di5VmpPFR+gYhLj4+1l4a9l4Vr5fRMm5y6SPjWDxxKFmxxJxGp0B5YdaxwPjIWyEaXF8gQpC3AwN681/zk80O4YIXDkPH/031FXD2Hth0r84b7txLBf6R8O0H0HlMefDhJqvQsoyGJxkduouRQUhIp1PSzO8MRcqjzrH+9fA0r/AxEdhxAwItTqPQ6xNg4aLzmUOvw3L/6rTX28iFYSIdD4n97SWAzifS/3hK1Bx1HmgOqAnjMpoLQeA5gY48g5M/5H383ZROkgtIp1P995tp31VDuDceijY2HaZ3gM8m8vHeKwgMjMzCQ8PJz4+3jXthz/8IXFxcYwbN4558+ZRU1NjuG5UVBRjx44lMTERm83mqYgi0lkNTobYOa3jXv2cN+/7uuYGsI5vHUckQsJCr8TzFR4riKVLl7Jlyxa3aWlpaRw5coRPPvmE2NhYfv7zn7e7/vbt2zl06BB2u91TEUWkM1vwO/jeuzDvVfj+R3DLBPf5gWHw4DZYts15fOKh7dAjyJysXZTHjkGkpqZSXFzsNm3mzJmu1xMmTGDDhmseKygi8hU/P4ie0To+f8p9/pUa5xlOQ8YjnmHaMYjXX3+dOXPmGM6zWCzMnDmT5ORksrOzr/s+2dnZ2Gw2bDYbVVVVnogqIp3B1Wuu6nc0w9Ur5mTxEaacxfTTn/6UgIAAFi1aZDh/z549REZGUllZSVpaGnFxcaSmphoum5WVRVZWFoCOV4h0ZbZMKNnfOo5Ogz5DzMvjA7xeEOvWrWPTpk3k5eW1eyvpyMhIAMLDw5k3bx75+fntFoTcfGU1VyiruULCkD5089eJbtJJJCyA3v2/vFAuBpKWmJ2oy/NqQWzZsoVf/OIX7Ny5k8DAQMNl6urqaGlpITg4mLq6OrZu3cq///u/ezOmT1v1QRG/3vYZLQ4Y3KcXv3voNoaGGZxyKGKG6DucP+IVHvvzcOHChUycOJHCwkKsVitr167l0Ucfpba2lrS0NBITE1m+fDkAZWVlpKenA1BRUcHkyZNJSEhg/PjxzJ07l9mzZ3sqpnxNZW09K98vouXLG7qW1lzh5bzPzQ0lIqbx2BZETk5Om2nLli0zXDYyMpLc3FwAhg8fzscff+ypWHIdlRcbaGpxv913+QUdBJROorkJdr0Ix/7ivBfTjOeg3zCzU3Vp2sEs/K34HH/4Wwn9encnJtz9PPJjZ2qZ+uJ21uefamdtES/Z9SLsXAEVh+HoH+F394PD0fF68g/TvZh83PPvHeW/PywGoEeAHy8tSOTD42c5XHqBg6dqOFfXyLm6Rp5+9zCxg4JJuqWvuYHFd3222X1c/ZnzKXP9o83J4wO0BeHDqmobeHNvsWvc0NTCW3tP0q93dxqutrRZfu/xs15MJ3KN/rHu4+5BEKIHWnmStiB8WENTM9cccuBQSQ172imCcdZQL6QSaUfYNVsKQeHGN/WTm0ZbED7M2jeQ2+PC3abVNTa7jf0s0KubP9+/PZopMbpTppioaKv7+NwXbR9DKjeVtiB83H8tSuJtewlfVNeRdEtfHlt/0O2435z4CFYuSNQFc2K+oEHuY/8e0KuPKVF8hQrCx/Xs5s8DE6Nc41d3HedIqfMhLBbgzoRIlYN0DtP/DUr2weWzYPGD25+BXjppwpNUEOJSfuEKBWWtT+hyALmHy5kdP6j9lUS8ZdBYeOKI835MYSOgzy1mJ+ryVBDiUnP5apuD1ucvN5oTRsRI90AYMd3sFD5D+w7EZVRESJszle6z6W6ZIr5KWxDi5s3M8by+p5jT5y6TPjaCO0YPNDuSiJhEBSFu+gR25wdpsR0vKOJpJz+EPS9DcyPcthxiZ3a8jtxUKggR6XzOF8Obd0Nzg3P8xXZ46APnvZc+2wJhMTBmHvjrK8yT9K8rIp3PZ//TWg4AjhbYvRI+fc/5Gpz3Zrr3dVPi+QodpBaRzqdvVNtpFUdaywHgyLtwscxrkXyRCkJEOp/oNBg3v3UcMwuCr7kex2IBi793c/kY7WISkc7Hzw/uyYbpz0BLk/PCuM/fh1P7nGOAxEUQrLPsPEkFISKdV9+hra+j74CH9zpv2tc/xrmVIR6lgpA2KmvrOXOhnjGRofj7WcyOI9JqQKzzR7xCBSFuXt15nBf/p5CmFgdDwwL57bLbGNIv0OxYImICHaQWl6raBlc5AJw8e5mX84pMTiUiZvFYQWRmZhIeHk58fLxr2rlz50hLSyMmJoa0tDTOnz9vuO6WLVsYOXIk0dHRrFixwlMR5RoVF+td5fCV0+evmJRGRMzmsYJYunQpW7ZscZu2YsUKZsyYQVFRETNmzDD88m9ubuaRRx5h8+bNFBQUkJOTQ0FBgadi+rzDpy/w0cnzOBwORkeEMLy/+yMc547TM39FfJXHCiI1NZV+/fq5Tdu4cSNLliwBYMmSJfzpT39qs15+fj7R0dEMHz6c7t27s2DBAjZu3OipmD6rqbmF//VGPneu2s13Vn/IPas/pL6pmUemjyCoh/Pc8vjBIcy7dbDJSUXELF49BlFRUUFEhPMv0oiICCorK9ssU1paypAhrbeYtlqtlJaWtvue2dnZ2Gw2bDYbVVVVNz90F5V3rJLtha3/XgdP1fC7/ad4/r0CLjU4n0t9pPQiL3+gYxAivqrTHaR2OBxtplks7Z9qmZWVhd1ux263M2DAAE9G61IqaxvaTPusopbahia3aQdP1XgpkYh0Nh0WREVFBcuWLWPOnDkAFBQUsHbt2n/owwYOHEh5eTkA5eXlhIeHt1nGarVSUlLiGp8+fZrIyMh/6POkfTNHDySoR+tZzt38LSy6bSihvbq5LTc+qt+1q4qY58g7sCETtv8c6i+YnabL67Agli5dyqxZsygrc94UKzY2lpUrV/5DH5aRkcG6desAWLduHXfddVebZVJSUigqKuLEiRM0Njayfv16MjIy/qHPk/YNDOnJ28sncl+ylbsTI1mfNYGEIX1Y8k9D8f9yiy08uAffm6Dn/konYX/dWQ5H3oGdKyBnodmJurwOC6K6upr7778fPz/nogEBAfj7d3yDrIULFzJx4kQKCwuxWq2sXbuWp59+mm3bthETE8O2bdt4+umnASgrKyM9Pd31/qtWrWLWrFmMGjWK+++/nzFjxnyT31HaMSoihBfvS2DlgltJHtqPi/VXeW3XCZq/3M1XWdvAmp1fmJxS5EsH/5/7+OQe53MjxGM6vJK6d+/enD171nUcYN++fYSGhnawFuTk5BhOz8vLazMtMjKS3Nxc1zg9Pd1VGOI9xdV1XLna7DatoPyiSWlErtG7v/vYvzv0CDEni4/osCB+/etfk5GRwfHjx5k0aRJVVVVs2LDBG9nEy0YOCqZ/UA+qL7UewE6N6X+dNUS8aNrTcGpv67GH1P8DgTpG5kkWh9FpQ9doamqisLAQh8PByJEj6datW0ermMJms2G3282O8a12+PQFXvhLAafOXSYjIZIfzhpJN/9Od7Kb+Kr6i85nVYdFQ/9os9N0Cdf73uxwC+LNN990Gx84cACAxYsX34Ro0tkUVtTyadlFahuasBef48KVq/QP6mF2LPE1tWfgvceg+K8QeSvc+bKzEK5egfoa5494XIcF8be//c31ur6+nry8PJKSklQQXVDN5Uae+eNhGpqcj3U8cKqGl94v4oW74ztYU+Qm2/QDKPof5+uTe+DdB2HWz+G398DVy87p//R9mPkf5mX0AR0WxCuvvOI2vnDhAg888IDHAol5Tp277CqHr3xWUWtSGvFpJ/e4j8sOwo4VreUAsPe/YNITbQ9ey01zwzuXAwMDKSrS7Re6olERIQwK6ek27fa4thczinicNcV9PDAemq65s7Cj2bnLSTymwy2IO++803WKa0tLCwUFBdx///0eDybe183fj3WZ4/nFlmOUnLtM+tgIHpwy3OxY4ov++T/hj8vh5G6ISIC7V8OZw1Cyv3WZ6DToM6T995BvrMOzmHbu3Ol6HRAQwNChQ7FarR4P9o/QWUwiXUxLC/h9bUfH53lQmOs8iyl5KXTrZVq0ruIbncU0derUmx5IROTv4nfNXvDoGc4f8Yp2CyI4ONjwLqoOhwOLxcLFi7rCVkSkK2u3IGprdfaKiIgv63AX01cqKyupr693jW+5RXf5FBHpyjo8zfW9994jJiaGYcOGMXXqVKKiolzPhhAR8aqLZXDwt3Bqf8fLyjfWYUE8++yz7Nu3j9jYWE6cOEFeXh6TJk3yRjYRkVbFe+DlJNj4CLw+E7b8yOxEXV6HBdGtWzfCwsJoaWmhpaWF6dOnc+jQIS9EExH5mr/+0v1iuf1roK7avDw+oMNjEH369OHSpUtMmTKFRYsWER4eTkDA333oQkTk5mi87D7WldQe1+EWRGpqKjU1Nbz00kvMnj2bESNG8Oc//9kb2UREWqU86D6Ona0rqT2sw00Bh8PBrFmz6NevHwsWLGD+/PmEhYV5I5uISKtx90HQADiWC/1j4FbdNNTTOtyCeO655zh69Ci/+c1vKCsrY+rUqdxxxx3eyCYi4q6x7sufS9DSZHaaLu/vPpgQHh7OoEGDCAsLo7Ky0pOZRETaOvAWvPdo6/iLHbB4o2lxfEGHWxCrV69m2rRpzJgxg+rqal577TU++eQTb2QTEWl1YJ37+IsdcP6kKVF8RYcFcfLkSVauXMnRo0f58Y9/zOjRo7/RBxYWFpKYmOj6CQkJYeXKlW7L7Nixg9DQUNcyP/nJT77RZ4pIF9Czj/vYrxv0CDYliq/ocBfTihUrbuoHjhw50nUdRXNzM4MHD2bevHltlpsyZQqbNm26qZ8tIt9iU/8VTu11Hn8AmPQ4BPYzN1MXZ+oFDXl5eYwYMYKhQ4eaGUNEvg2GpMATh+HETgiLgUF6Vrqn3fAjR2+m9evXs3DhQsN5e/fuJSEhgTlz5nD06FEvJxORTuns51D+MZQfgqZGs9N0eR0+Uc5TGhsbiYyM5OjRowwcONBt3sWLF/Hz8yMoKIjc3Fwef/zxdp+DnZ2dTXZ2NgBVVVWcPKmDViJd0tE/wdtLgS+/skZlwPy3TAzUNVzviXKmbUFs3ryZpKSkNuUAEBISQlBQEADp6elcvXqV6mrje65kZWVht9ux2+0MGDDAo5lFxET7X8VVDgCfvgcXSk2L4wtMK4icnJx2dy+dOXOGrzZs8vPzaWlp0dXbIr7Ov5v72OLXdprcVKYcpL58+TLbtm3j1VdfdU1bs2YNAMuXL2fDhg2sXr2agIAAevXqxfr16w0ffyoiPmTyvzjPYmr+8thD8lIICjc1Uldn2jEIT7jevjQR6QLOnYDP34ewETB8OugPx2/set+bum+3iHx79BsG4x8yO4XPMPU0VxER6bxUECIiYkgFISIihlQQIiJiSAUhIiKGVBAiImJIBSEiIoZUECIiYkgFISIihlQQIiJiSAUhIiKGVBAiImJIBSEiIoZUECIiYkgFISIihlQQIiJiSAUhIiKGVBAiImJIBSEiIoZUECIiYsiUgoiKimLs2LEkJiZis9nazHc4HDz22GNER0czbtw4Dhw4YEJKERHfFmDWB2/fvp3+/fsbztu8eTNFRUUUFRWxf/9+Hn74Yfbv3+/lhCIivq1T7mLauHEjixcvxmKxMGHCBGpqaigvLzc7loiITzGlICwWCzNnziQ5OZns7Ow280tLSxkyZIhrbLVaKS0t9WZEERGfZ8oupj179hAZGUllZSVpaWnExcWRmprqmu9wONqsY7FYDN8rOzvbVTJVVVWeCSwi4oNM2YKIjIwEIDw8nHnz5pGfn+8232q1UlJS4hqfPn3atc61srKysNvt2O12BgwY4LnQIiI+xusFUVdXR21trev11q1biY+Pd1smIyODN998E4fDwb59+wgNDSUiIsLbUUVEfJrXdzFVVFQwb948AJqamvjud7/L7NmzWbNmDQDLly8nPT2d3NxcoqOjCQwM5I033vB2TBERn2dxGO3w/5ay2WzY7XazY4iIfGtc73uzU57mKiIi5lNBiIiIIRWEiIgYUkGIiIghFYSIiBhSQYiIiCEVhIiIGFJBiIiIIRWEiIgYUkGIiIghFYSIiBhSQYiIiCEVhIiIGFJBiIiIIRWEiIgYUkGIiIghFYSIiBhSQYiIiCEVhIiIGFJBiIiIIRWEiIgYUkGIiIghrxdESUkJ06dPZ9SoUYwZM4aXXnqpzTI7duwgNDSUxMREEhMT+clPfuLtmCIiPi/A6x8YEMCvfvUrkpKSqK2tJTk5mbS0NEaPHu223JQpU9i0aZO344mIyJe8vgURERFBUlISAMHBwYwaNYrS0lJvxxARkQ6YegyiuLiYgwcPctttt7WZt3fvXhISEpgzZw5Hjx5t9z2ys7Ox2WzYbDaqqqo8GVdExKdYHA6Hw4wPvnTpElOnTuWZZ57hnnvucZt38eJF/Pz8CAoKIjc3l8cff5yioqIO39Nms2G32z0VWUSky7ne96YpWxBXr17lO9/5DosWLWpTDgAhISEEBQUBkJ6eztWrV6murvZ2TBERn+b1gnA4HCxbtoxRo0bxgx/8wHCZM2fO8NWGTX5+Pi0tLYSFhXkzpoiIz/P6WUx79uzhrbfeYuzYsSQmJgLws5/9jFOnTgGwfPlyNmzYwOrVqwkICKBXr16sX78ei8Xi7agiIj7NtGMQnqBjECIiN6bTHYMQEZHOTwUhIiKGVBAiImJIBSEiIoZUECIiYkgFISIihlQQIiJiSAUhIiKGVBAiImJIBSEiIoZUECIiYkgFISIihlQQIiJiSAUhIiKGVBAiImJIBSEiIoZUECIiYkgFISIihlQQIiJiSAUhIiKGVBAiImLIlILYsmULI0eOJDo6mhUrVrSZ73A4eOyxx4iOjmbcuHEcOHDAhJQiIr7N6wXR3NzMI488wubNmykoKCAnJ4eCggK3ZTZv3kxRURFFRUVkZ2fz8MMPezumiIjP83pB5OfnEx0dzfDhw+nevTsLFixg48aNbsts3LiRxYsXY7FYmDBhAjU1NZSXl3s7qoiITwvw9geWlpYyZMgQ19hqtbJ///4OlyktLSUiIqLN+2VnZ5OdnQ3AsWPHsNlsHkruW6qqqhgwYIDZMUQM6f/nzVNcXNzuPK8XhMPhaDPNYrHc8DJfycrKIisr6+aEExebzYbdbjc7hogh/f/0Dq/vYrJarZSUlLjGp0+fJjIy8oaXERERz/J6QaSkpFBUVMSJEydobGxk/fr1ZGRkuC2TkZHBm2++icPhYN++fYSGhhruXhIREc/x+i6mgIAAVq1axaxZs2hubiYzM5MxY8awZs0aAJYvX056ejq5ublER0cTGBjIG2+84e2YPk+77aQz0/9P77A4jHb4i4iIz9OV1CIiYkgFISIihlQQIvKtUFxcTHx8vNkxfIoKQkREDKkgpI27776b5ORkxowZ47pKXaQzaGpqYsmSJYwbN457772Xy5cvmx2pS9NZTNLGuXPn6NevH1euXCElJYWdO3cSFhZmdizxccXFxQwbNozdu3czadIkMjMzGT16NE899ZTZ0bosbUFIGy+//DIJCQlMmDCBkpISioqKzI4kAsCQIUOYNGkSAN/73vfYvXu3yYm6Nq9fKCed244dO3j//ffZu3cvgYGBTJs2jfr6erNjiQBt78nW3j3a5ObQFoS4uXDhAn379iUwMJBjx46xb98+syOJuJw6dYq9e/cCkJOTw+TJk01O1LWpIMTN7NmzaWpqYty4cTz77LNMmDDB7EgiLqNGjWLdunWMGzeOc+fO6WFiHqaD1CIiYkhbECIiYkgFISIihlQQIiJiSAUhIiKGVBAiImJIBSEiIoZUECI3WVNTk9kRRG4KFYTIDXrhhReIi4sjLS2NhQsX8stf/pJp06bxox/9iKlTp/LSSy+Rl5fHrbfeytixY8nMzKShoQGAqKgoqqurAbDb7UybNg2A559/ngceeIDbb7+dmJgYXnvtNbN+PREX3YtJ5AbY7XbeeecdDh48SFNTE0lJSSQnJwNQU1PDzp07qa+vJyYmhry8PGJjY1m8eDGrV6/miSeeuO57f/LJJ+zbt4+6ujpuvfVW5s6dS2RkpBd+KxFj2oIQuQG7d+/mrrvuolevXgQHB3PnnXe65s2fPx+AwsJChg0bRmxsLABLlixh165dHb73V+/bv39/pk+fTn5+vmd+CZG/kwpC5AZc7840vXv37nCZgIAAWlpaANrcJVd3KpXORgUhcgMmT57Mn//8Z+rr67l06RJ/+ctf2iwTFxdHcXExn3/+OQBvvfUWU6dOBZzHID766CMA3nnnHbf1Nm7cSH19PWfPnmXHjh2kpKR4+LcRuT4VhMgNSElJISMjg4SEBO655x5sNhuhoaFuy/Ts2ZM33niD++67j7Fjx+Ln58fy5csBeO6553j88ceZMmUK/v7+buuNHz+euXPnMmHCBJ599lkdfxDT6W6uIjfo0qVLBAUFcfnyZVJTU8nOziYpKekbvefzzz9PUFCQHp8pnYrOYhK5QVlZWRQUFFBfX8+SJUu+cTmIdFbaghAREUM6BiEiIoZUECIiYkgFISIihlQQIiJiSAUhIiKG/j9OoRMXxp6IigAAAABJRU5ErkJggg==\n", 150 | "text/plain": [ 151 | "
" 152 | ] 153 | }, 154 | "metadata": { 155 | "needs_background": "light" 156 | }, 157 | "output_type": "display_data" 158 | } 159 | ], 160 | "source": [ 161 | "#generate random distribution of numbers, i.e., not normally distributed\n", 162 | "group_a = np.random.uniform(low=9, high=17, size=(20,))\n", 163 | "group_b = np.random.uniform(low=7, high=15, size=(20,))\n", 164 | "df = pd.DataFrame(np.concatenate((np.vstack((group_a, np.array(['a']*len(group_a)))).T,\n", 165 | " np.vstack((group_b, np.array(['b']*len(group_a)))).T)), columns = ['value', 'group'])\n", 166 | "df['value'] = df['value'].astype('float')\n", 167 | "ax = sns.swarmplot(data = df, x = 'group', y = 'value')\n", 168 | "plt.ylim(bottom = 0, top = df.value.max() + 2)\n", 169 | "plt.show()" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 7, 175 | "id": "e41b85da", 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "ShapiroResult(statistic=0.8799253702163696, pvalue=0.017637841403484344)" 182 | ] 183 | }, 184 | "execution_count": 7, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "stats.shapiro(group_a)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 8, 196 | "id": "212ccb52", 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/plain": [ 202 | "ShapiroResult(statistic=0.9161470532417297, pvalue=0.08355055004358292)" 203 | ] 204 | }, 205 | "execution_count": 8, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "stats.shapiro(group_b)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 9, 217 | "id": "000c02b4", 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/plain": [ 223 | "MannwhitneyuResult(statistic=267.0, pvalue=0.0720454304673439)" 224 | ] 225 | }, 226 | "execution_count": 9, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "stats.mannwhitneyu(group_a, group_b)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 10, 238 | "id": "f40c3e62", 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/plain": [ 244 | "ShapiroResult(statistic=0.8577129244804382, pvalue=0.14439702033996582)" 245 | ] 246 | }, 247 | "execution_count": 10, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "stats.shapiro([4,5,6,6,5,4,5]) #example from value list" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 11, 259 | "id": "9386650d", 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "MannwhitneyuResult(statistic=1.0, pvalue=0.008167958654692957)" 266 | ] 267 | }, 268 | "execution_count": 11, 269 | "metadata": {}, 270 | "output_type": "execute_result" 271 | } 272 | ], 273 | "source": [ 274 | "stats.mannwhitneyu([1,3,4,3,2], [4,5,6,6,5,4,5]) #example from value list" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "id": "4aec45f5", 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 12, 288 | "id": "f558a08c", 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "#example function that tests distribution then picks test accordingly\n", 293 | "def test_sig(x, y):\n", 294 | " if stats.shapiro(x).pvalue >= 0.05 and stats.shapiro(y).pvalue >= 0.05: #if they are normally distriuted\n", 295 | " print(\"t-test\")\n", 296 | " return stats.ttest_ind(x,y)\n", 297 | " else:\n", 298 | " print(\"mann-whitney\")\n", 299 | " return stats.mannwhitneyu(x, y)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 13, 305 | "id": "752d96d8", 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": [ 312 | "t-test\n" 313 | ] 314 | }, 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "Ttest_indResult(statistic=-10.808567545708417, pvalue=8.002021519806372e-18)" 319 | ] 320 | }, 321 | "execution_count": 13, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [ 327 | "test_sig(np.random.normal(10, 1, 50), np.random.normal(13, 2, 40))" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 14, 333 | "id": "4540acbf", 334 | "metadata": {}, 335 | "outputs": [ 336 | { 337 | "name": "stdout", 338 | "output_type": "stream", 339 | "text": [ 340 | "mann-whitney\n" 341 | ] 342 | }, 343 | { 344 | "data": { 345 | "text/plain": [ 346 | "MannwhitneyuResult(statistic=7075.0, pvalue=4.0031006370913845e-07)" 347 | ] 348 | }, 349 | "execution_count": 14, 350 | "metadata": {}, 351 | "output_type": "execute_result" 352 | } 353 | ], 354 | "source": [ 355 | "test_sig(np.random.random(100)*1.4,np.random.random(100))" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "id": "5e348816", 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [] 365 | } 366 | ], 367 | "metadata": { 368 | "kernelspec": { 369 | "display_name": "Python 3 (ipykernel)", 370 | "language": "python", 371 | "name": "python3" 372 | }, 373 | "language_info": { 374 | "codemirror_mode": { 375 | "name": "ipython", 376 | "version": 3 377 | }, 378 | "file_extension": ".py", 379 | "mimetype": "text/x-python", 380 | "name": "python", 381 | "nbconvert_exporter": "python", 382 | "pygments_lexer": "ipython3", 383 | "version": "3.10.1" 384 | } 385 | }, 386 | "nbformat": 4, 387 | "nbformat_minor": 5 388 | } 389 | -------------------------------------------------------------------------------- /tutorial_complex_Heatmap.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | ```{r} 7 | if (!requireNamespace("BiocManager", quietly = TRUE)) 8 | install.packages("BiocManager") 9 | 10 | BiocManager::install("DESeq2") 11 | ``` 12 | 13 | ```{r} 14 | library(DESeq2) 15 | library(ggplot2) 16 | ``` 17 | 18 | ```{bash} 19 | ls 20 | ``` 21 | 22 | 23 | ```{r} 24 | Counts <- read.delim("count_table.csv", header = TRUE, row.names = 1, sep = ",") 25 | ``` 26 | 27 | ```{r} 28 | Counts 29 | ``` 30 | 31 | ```{r} 32 | Counts <- Counts[which(rowSums(Counts) > 0),] 33 | ``` 34 | 35 | 36 | ```{r} 37 | Counts 38 | ``` 39 | 40 | ```{r} 41 | condition <- factor(c("C","C","C","C", "S","S","S","S")) 42 | ``` 43 | 44 | ```{r} 45 | coldata <- data.frame(row.names = colnames(Counts), condition) 46 | ``` 47 | 48 | ```{r} 49 | coldata 50 | ``` 51 | 52 | ```{r} 53 | dds <- DESeqDataSetFromMatrix(countData = Counts, colData = coldata, design = ~condition) 54 | ``` 55 | 56 | ```{r} 57 | dds <- DESeq(dds) 58 | ``` 59 | 60 | ```{r} 61 | vsdata <- vst(dds, blind=FALSE) 62 | ``` 63 | 64 | ```{r} 65 | plotPCA(vsdata, intgroup = "condition") 66 | ``` 67 | 68 | ```{r} 69 | plotDispEsts(dds) 70 | ``` 71 | 72 | ```{r} 73 | res <- results(dds, contrast = c("condition", "S", "C")) 74 | ``` 75 | 76 | ```{r} 77 | res 78 | ``` 79 | 80 | 81 | ```{r} 82 | sigs <- na.omit(res) 83 | ``` 84 | 85 | ```{r} 86 | sigs <- sigs[sigs$padj < 0.05,] 87 | ``` 88 | 89 | ```{r} 90 | sigs 91 | ``` 92 | 93 | ```{r} 94 | write.csv(sigs, file = "deseq_results.csv") 95 | ``` 96 | 97 | 98 | ```{bash} 99 | ls 100 | ``` 101 | 102 | ```{r} 103 | sigs 104 | ``` 105 | 106 | ```{r} 107 | df <- as.data.frame(sigs) 108 | df 109 | ``` 110 | 111 | 112 | 113 | ```{r} 114 | ensembl_map <- read.csv('ensemble_key_mapper.csv', header = FALSE) 115 | 116 | 117 | keys <- ensembl_map$V1 118 | values <- ensembl_map$V2 119 | 120 | l <- list() 121 | for (i in 1:length(keys)){ 122 | l[keys[i]] <- values[i] 123 | } 124 | 125 | 126 | ``` 127 | 128 | 129 | 130 | 131 | 132 | ```{r} 133 | #for non-mapped labels 134 | no_values <- setdiff(rownames(df), keys) 135 | for (i in 1:length(no_values)){ 136 | l[no_values[i]] <- 'NA' 137 | } 138 | ``` 139 | 140 | 141 | ```{r} 142 | df$symbol <- unlist(l[rownames(df)], use.names = FALSE) 143 | ``` 144 | 145 | 146 | 147 | ```{r} 148 | df.top <- df[ (df$baseMean > 50) & (abs(df$log2FoldChange) > 2),] 149 | df.top 150 | ``` 151 | 152 | ```{r} 153 | df.top <- df.top[order(df.top$log2FoldChange, decreasing = TRUE),] 154 | ``` 155 | 156 | 157 | ```{r} 158 | rlog_out <- rlog(dds, blind=FALSE) #get normalized count data from dds object 159 | mat<-assay(rlog_out)[rownames(df.top), rownames(coldata)] #sig genes x samples 160 | colnames(mat) <- rownames(coldata) 161 | base_mean <- rowMeans(mat) 162 | mat.scaled <- t(apply(mat, 1, scale)) #center and scale each column (Z-score) then transpose 163 | colnames(mat.scaled)<-colnames(mat) 164 | ``` 165 | 166 | 167 | ```{r} 168 | num_keep <- 25 169 | #1 to num_keep len-num_keep to len 170 | rows_keep <- c(seq(1:num_keep), seq((nrow(mat.scaled)-num_keep), nrow(mat.scaled)) ) 171 | ``` 172 | 173 | 174 | 175 | ```{r} 176 | l2_val <- as.matrix(df.top[rows_keep,]$log2FoldChange) #getting log2 value for each gene we are keeping 177 | colnames(l2_val)<-"logFC" 178 | 179 | mean <- as.matrix(df.top[rows_keep,]$baseMean) #getting mean value for each gene we are keeping 180 | colnames(mean)<-"AveExpr" 181 | ``` 182 | 183 | 184 | 185 | 186 | 187 | ```{r} 188 | if (!requireNamespace("BiocManager", quietly = TRUE)) 189 | install.packages("BiocManager") 190 | 191 | BiocManager::install("ComplexHeatmap") 192 | ``` 193 | 194 | 195 | 196 | ```{r} 197 | library(ComplexHeatmap) 198 | library(RColorBrewer) 199 | library(circlize) 200 | ``` 201 | 202 | 203 | 204 | ```{r} 205 | #maps values between b/w/r for min and max l2 values 206 | col_logFC <- colorRamp2(c(min(l2_val),0, max(l2_val)), c("blue", "white", "red")) 207 | 208 | #maps between 0% quantile, and 75% quantile of mean values --- 0, 25, 50, 75, 100 209 | col_AveExpr <- colorRamp2(c(quantile(mean)[1], quantile(mean)[4]), c("white", "red")) 210 | ``` 211 | 212 | 213 | 214 | ```{r} 215 | 216 | ha <- HeatmapAnnotation(summary = anno_summary(gp = gpar(fill = 2), 217 | height = unit(2, "cm"))) 218 | 219 | h1 <- Heatmap(mat.scaled[rows_keep,], cluster_rows = F, 220 | column_labels = colnames(mat.scaled), name="Z-score", 221 | cluster_columns = T) 222 | h2 <- Heatmap(l2_val, row_labels = df.top$symbol[rows_keep], 223 | cluster_rows = F, name="logFC", top_annotation = ha, col = col_logFC, 224 | cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid 225 | grid.text(round(l2_val[i, j],2), x, y) 226 | }) 227 | h3 <- Heatmap(mean, row_labels = df.top$symbol[rows_keep], 228 | cluster_rows = F, name = "AveExpr", col=col_AveExpr, 229 | cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid 230 | grid.text(round(mean[i, j],2), x, y) 231 | }) 232 | 233 | h<-h1+h2+h3 234 | h 235 | ``` 236 | 237 | 238 | ```{r} 239 | png("./heatmap_v1.png", res = 300, width = 3000, height = 5500) 240 | print(h) 241 | dev.off() 242 | ``` 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | --------------------------------------------------------------------------------