├── README.md ├── cellchat ├── cellchat_lr_breast.ipynb ├── cellchat_lr_glioblastoma.ipynb ├── cellchat_lr_lung.ipynb ├── cellchat_lr_melanoma.ipynb ├── cellchat_lr_pancreas.ipynb ├── cellchat_lr_prostate.ipynb └── cellchat_lr_sarcoma.ipynb ├── differential_expression ├── bright_pb_vs_tr_deg.csv ├── de_up_adaptive.csv ├── de_up_brights.csv ├── de_up_cd57_pos.csv ├── de_up_kir_pos.csv ├── de_up_nkg2a_pos.csv ├── deg_pseudobulk.ipynb └── dim_pb_vs_tr_deg.csv ├── environment.yml ├── mapping_tumor_data └── map_tink_to_ref.ipynb ├── milo └── milo_r.ipynb ├── nk_reference └── normal_reference_nk.ipynb ├── pb_nk ├── meta_nk_aucell.ipynb ├── metank_gene_sig.txt ├── pb_nk_integration.ipynb └── pseudotime_trends.ipynb ├── regulatory_networks └── scenic_all_nk.ipynb ├── scripts └── decontx_all.py ├── spatial └── spatial_data_melanoma.ipynb ├── survival_analysis ├── nhood_markers_all.csv ├── tcga_survival_breast.ipynb ├── tcga_survival_gbm.ipynb ├── tcga_survival_lung.ipynb ├── tcga_survival_pancreas.ipynb ├── tcga_survival_prostate.ipynb ├── tcga_survival_sarc.ipynb └── tcga_survival_skcm.ipynb ├── tissue_integration ├── train_scvi_brain_normal_tissue_all_cells.ipynb ├── train_scvi_breast_normal_tissue_all_cells.ipynb ├── train_scvi_lung_normal_tissue_all_cells.ipynb ├── train_scvi_pancreas_normal_tissue_all_cells.ipynb ├── train_scvi_prostate_normal_tissue_all_cells.ipynb └── train_scvi_skin_normal_tissue_all_cells.ipynb └── tumor_integration ├── train_scvi_breast_tumor_all_cells.ipynb ├── train_scvi_glioblastoma_tumor_all_cells.ipynb ├── train_scvi_lung_tumor_all_cells.ipynb ├── train_scvi_melanoma_tumor_all_cells.ipynb ├── train_scvi_pancreas_tumor.ipynb ├── train_scvi_prostate_tumor_all_cells.ipynb └── train_scvi_sarcoma_tumor_all_cells.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Pan-cancer profiling of tumor-infiltrating natural killer cells through transcriptional reference mapping 2 | We describe a transcriptional reference map of human natural killer (NK) cells based on analysis of scRNA-seq data from both blood and tissue. We also analyze data from various solid tumors and map these onto this reference. 3 | 4 | 5 | ### Data 6 | The gene expression data generated for this paper is available at NCBI GEO with accession number GSE245690 and raw sequencing data is available at EGA with accession number EGAS50000000014. The details about the publicly available data included in the analysis are available in Supplemental tables S1, S2 and S3. Processed data and models have also been made available on Zenodo (https://doi.org/10.5281/zenodo.10139343) and as an online resource at http://nk-scrna.malmberglab.com/. 7 | 8 | ### Citation 9 | 10 | Netskar, H., Pfefferle, A., Goodridge, J.P. et al. Pan-cancer profiling of tumor-infiltrating natural killer cells through transcriptional reference mapping. Nat Immunol (2024). https://doi.org/10.1038/s41590-024-01884-z 11 | -------------------------------------------------------------------------------- /differential_expression/bright_pb_vs_tr_deg.csv: -------------------------------------------------------------------------------- 1 | ,p_val,lfc_mean,comparison,group1,group2 2 | BOP1,0.0,-2.76402115,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 3 | PSMA2,0.0003999999999999,-3.7520975,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 4 | CNBD2,0.0006,-2.90462925,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 5 | S1PR2,0.001,-2.906996,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 6 | MLLT1,0.0018,-2.5398525,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 7 | SLC5A3,0.0018,-2.932918,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 8 | CCL4L2,0.0036,-4.2268445,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 9 | NAA60,0.0036,-2.31744265,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 10 | ZNF26,0.0048,-2.1598449,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 11 | MAP1LC3B2,0.0053999999999999,-2.4647608,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 12 | CLN3,0.0056,-2.98025725,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 13 | IKZF3,0.0058,-1.9523335,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 14 | PHKG1,0.007,-2.42111635,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 15 | SNRPN,0.007,-1.9984381,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 16 | FCRL1,0.0076,-2.31681085,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 17 | GPR89B,0.0076,-2.19977975,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 18 | LRRC32,0.008,-2.22786615,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 19 | PROB1,0.0088,-1.9505713,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 20 | SCGB1A1,0.0091999999999999,-2.9693825,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 21 | TAS2R4,0.0091999999999999,-2.007922,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 22 | KCNC4,0.01,-2.0154915,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 23 | GMPPB,0.011,-2.0619165,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 24 | CD4,0.0111999999999999,-2.22581365,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 25 | KRT19,0.0111999999999999,-1.9637259,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 26 | ALPL,0.0118,-2.11624815,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 27 | CCL4,0.0121999999999999,-2.1702975,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 28 | AREG,0.0123999999999999,-2.9210632,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 29 | NEK5,0.0125999999999999,-1.9284998,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 30 | CD101,0.013,-2.71466065,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 31 | ADAMTSL4,0.0131999999999999,-2.32180165,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 32 | DAB2IP,0.0135999999999999,-2.34844635,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 33 | RAPGEF5,0.014,-2.005295,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 34 | TLCD2,0.0143999999999999,-1.9427197,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 35 | CD200,0.0148,-2.004511,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 36 | GOLGA6L9,0.0148,-2.1279032,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 37 | KIR2DL4,0.0151999999999999,-2.26822375,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 38 | FABP4,0.0158,-2.31095575,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 39 | HGH1,0.0158,-2.0082375,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 40 | VCAM1,0.0178,-2.07527875,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 41 | ADAMTS17,0.018,-2.11274815,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 42 | SSTR2,0.019,-1.96810915,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 43 | ADIRF,0.0191999999999999,-1.903949,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 44 | COLEC12,0.0193999999999999,-2.5455155,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 45 | CSF3,0.02,-1.9936505,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 46 | CCL3,0.0208,-2.197789,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 47 | LINC00623,0.0211999999999999,-2.41912415,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 48 | PLEKHG1,0.0218,-2.105088,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 49 | SLC22A31,0.0228,-2.1296122,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 50 | CCL20,0.0235999999999999,-2.071835,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 51 | FCN3,0.024,-2.14468265,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 52 | MAPK11,0.0242,-1.96474375,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 53 | PDCD1,0.0245999999999999,-2.11278485,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 54 | PPARG,0.0245999999999999,-2.17645025,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 55 | NR4A1,0.026,-1.95851265,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 56 | EIF4EBP3,0.0268,-1.98449455,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 57 | IFNG,0.0272,-1.94155835,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 58 | MSMB,0.0273999999999999,-1.96967185,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 59 | LIF,0.0285999999999999,-2.1386855,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 60 | LAG3,0.0285999999999999,-1.9229406,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 61 | TFF3,0.0288,-1.9135463,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 62 | ZBTB32,0.0295999999999999,-1.9855144,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 63 | CH25H,0.0298,-1.93597815,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 64 | GCSAM,0.0302,-1.9912733,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 65 | SCGB3A1,0.0302,-2.1193465,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 66 | NR5A2,0.0308,-2.143848,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 67 | SFTPA2,0.0328,-2.2648795,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 68 | SFTPA1,0.0358,-2.16216325,pb_CD56bright vs tr_CD56bright,pb_CD56bright,tr_CD56bright 69 | -------------------------------------------------------------------------------- /differential_expression/de_up_adaptive.csv: -------------------------------------------------------------------------------- 1 | ,p_val,lfc_mean,comparison,group1,group2 2 | LAG3,0.01140456182472993,3.429098,Adaptive vs Rest,Adaptive,Rest 3 | TPRG1,0.014805922368947622,3.223274,Adaptive vs Rest,Adaptive,Rest 4 | CD3G,0.021608643457383003,3.2557805,Adaptive vs Rest,Adaptive,Rest 5 | CD3D,0.02200880352140855,2.8584,Adaptive vs Rest,Adaptive,Rest 6 | LINC00944,0.024609843937575038,2.4433143,Adaptive vs Rest,Adaptive,Rest 7 | MT1E,0.025210084033613467,2.5400834,Adaptive vs Rest,Adaptive,Rest 8 | PTMS,0.027611044417767072,2.0949793,Adaptive vs Rest,Adaptive,Rest 9 | NSG1,0.027611044417767072,2.1801746,Adaptive vs Rest,Adaptive,Rest 10 | GDPD5,0.03221288515406162,1.9429904,Adaptive vs Rest,Adaptive,Rest 11 | LILRB1,0.03461384553821534,2.173475,Adaptive vs Rest,Adaptive,Rest 12 | ZNF683,0.03461384553821534,1.7962058,Adaptive vs Rest,Adaptive,Rest 13 | MCOLN2,0.037214885954381716,1.8035908,Adaptive vs Rest,Adaptive,Rest 14 | KLRC2,0.039015606242497,1.730746,Adaptive vs Rest,Adaptive,Rest 15 | HBA2,0.03941576630652266,1.9326701,Adaptive vs Rest,Adaptive,Rest 16 | JAKMIP1,0.04001600640256098,1.6203659,Adaptive vs Rest,Adaptive,Rest 17 | CDKN2A,0.04001600640256098,1.7987171,Adaptive vs Rest,Adaptive,Rest 18 | B3GAT1,0.041816726690676265,2.0496426,Adaptive vs Rest,Adaptive,Rest 19 | RAB11FIP5,0.042617046818727466,1.7063745,Adaptive vs Rest,Adaptive,Rest 20 | CADM1,0.04361744697879155,1.8269724,Adaptive vs Rest,Adaptive,Rest 21 | HBA1,0.044617847138855526,1.805651,Adaptive vs Rest,Adaptive,Rest 22 | CD3E,0.04681872749099636,1.572491,Adaptive vs Rest,Adaptive,Rest 23 | CD52,0.04801920768307322,1.5066476,Adaptive vs Rest,Adaptive,Rest 24 | MIAT,0.04801920768307322,1.8538712,Adaptive vs Rest,Adaptive,Rest 25 | MT1F,0.048419367747098874,1.392345,Adaptive vs Rest,Adaptive,Rest 26 | GSC,0.04961984793917562,1.5933071,Adaptive vs Rest,Adaptive,Rest 27 | -------------------------------------------------------------------------------- /differential_expression/de_up_brights.csv: -------------------------------------------------------------------------------- 1 | ,p_val,lfc_mean,comparison,group1,group2 2 | GPR183,0.009003601440576214,6.285398,CD56bright vs Rest,CD56bright,Rest 3 | LSR,0.012404961984793905,4.5500937,CD56bright vs Rest,CD56bright,Rest 4 | SCML1,0.01340536214485799,4.5577207,CD56bright vs Rest,CD56bright,Rest 5 | COTL1,0.014805922368947622,3.879388,CD56bright vs Rest,CD56bright,Rest 6 | TNFRSF11A,0.01540616246498594,4.5678854,CD56bright vs Rest,CD56bright,Rest 7 | IL7R,0.016606642657062798,4.3428907,CD56bright vs Rest,CD56bright,Rest 8 | SELL,0.017607042817126883,3.4740772,CD56bright vs Rest,CD56bright,Rest 9 | MYC,0.01880752300920363,4.1500235,CD56bright vs Rest,CD56bright,Rest 10 | ANO9,0.019207683073229287,3.7253182,CD56bright vs Rest,CD56bright,Rest 11 | TOX2,0.02000800320128049,4.383712,CD56bright vs Rest,CD56bright,Rest 12 | LEF1,0.02200880352140855,3.1148262,CD56bright vs Rest,CD56bright,Rest 13 | S100A13,0.02280912364945975,2.6687405,CD56bright vs Rest,CD56bright,Rest 14 | GAB1,0.025610244097639012,2.933383,CD56bright vs Rest,CD56bright,Rest 15 | ID3,0.02601040416166467,4.5148683,CD56bright vs Rest,CD56bright,Rest 16 | RUNX2,0.02621048419367744,3.6585102,CD56bright vs Rest,CD56bright,Rest 17 | DLL1,0.026610644257703098,3.962849,CD56bright vs Rest,CD56bright,Rest 18 | GNA15,0.027210884353741527,3.7796278,CD56bright vs Rest,CD56bright,Rest 19 | GZMK,0.027811124449779956,4.635734,CD56bright vs Rest,CD56bright,Rest 20 | PPP1R9A,0.029011604641856703,2.4981825,CD56bright vs Rest,CD56bright,Rest 21 | TSPAN4,0.029211684673869587,3.1267219,CD56bright vs Rest,CD56bright,Rest 22 | WNT11,0.029211684673869587,3.7044983,CD56bright vs Rest,CD56bright,Rest 23 | HVCN1,0.03081232492997199,2.7438998,CD56bright vs Rest,CD56bright,Rest 24 | YBX3,0.03261304521808728,3.2776875,CD56bright vs Rest,CD56bright,Rest 25 | MAML3,0.03301320528211282,3.1240966,CD56bright vs Rest,CD56bright,Rest 26 | XCL1,0.033213285314125596,3.621864,CD56bright vs Rest,CD56bright,Rest 27 | TIAM1,0.03701480592236894,2.731915,CD56bright vs Rest,CD56bright,Rest 28 | SPRY1,0.043217286914765896,3.0067346,CD56bright vs Rest,CD56bright,Rest 29 | TMEM14C,0.0440176070428171,2.1036067,CD56bright vs Rest,CD56bright,Rest 30 | TTLL10,0.04541816726690673,2.618513,CD56bright vs Rest,CD56bright,Rest 31 | HAPLN3,0.04561824729891961,2.9495122,CD56bright vs Rest,CD56bright,Rest 32 | MMP25,0.047218887555022016,2.4840047,CD56bright vs Rest,CD56bright,Rest 33 | B3GALNT1,0.047218887555022016,2.7522516,CD56bright vs Rest,CD56bright,Rest 34 | CLNK,0.049219687875150075,2.540661,CD56bright vs Rest,CD56bright,Rest 35 | FXYD7,0.04941976790716285,2.8110745,CD56bright vs Rest,CD56bright,Rest 36 | -------------------------------------------------------------------------------- /differential_expression/de_up_cd57_pos.csv: -------------------------------------------------------------------------------- 1 | ,p_val,lfc_mean,comparison,group1,group2 2 | PTGDS,0.031212484993997647,2.4036758,CD57+ vs Rest,CD57+,Rest 3 | ZNF683,0.03301320528211282,1.93766,CD57+ vs Rest,CD57+,Rest 4 | S100B,0.039215686274509776,1.4937865,CD57+ vs Rest,CD57+,Rest 5 | B3GAT1,0.04081632653061229,2.1979692,CD57+ vs Rest,CD57+,Rest 6 | COL13A1,0.04081632653061229,1.8461539,CD57+ vs Rest,CD57+,Rest 7 | HBA1,0.04361744697879155,1.5930859,CD57+ vs Rest,CD57+,Rest 8 | HBA2,0.04361744697879155,1.4869615,CD57+ vs Rest,CD57+,Rest 9 | IFI27,0.046618647458983586,1.3252943,CD57+ vs Rest,CD57+,Rest 10 | NSG1,0.04741896758703479,1.1485823,CD57+ vs Rest,CD57+,Rest 11 | SORBS2,0.04821928771508599,1.4136593,CD57+ vs Rest,CD57+,Rest 12 | PALLD,0.04961984793917562,1.6887579,CD57+ vs Rest,CD57+,Rest 13 | -------------------------------------------------------------------------------- /differential_expression/de_up_kir_pos.csv: -------------------------------------------------------------------------------- 1 | ,p_val,lfc_mean,comparison,group1,group2 2 | PTGDS,0.02941176470588236,1.9585695,KIR+ vs Rest,KIR+,Rest 3 | PCSK5,0.039215686274509776,1.8463434,KIR+ vs Rest,KIR+,Rest 4 | SORBS2,0.039215686274509776,1.5607915,KIR+ vs Rest,KIR+,Rest 5 | PDGFRB,0.03941576630652266,1.6379219,KIR+ vs Rest,KIR+,Rest 6 | B3GAT1,0.04001600640256098,1.2591028,KIR+ vs Rest,KIR+,Rest 7 | COL13A1,0.04421768707482998,1.0165076,KIR+ vs Rest,KIR+,Rest 8 | PCDH1,0.04601840736294516,1.5785737,KIR+ vs Rest,KIR+,Rest 9 | LGR6,0.04621848739495793,1.8811342,KIR+ vs Rest,KIR+,Rest 10 | NUAK1,0.046618647458983586,1.0643895,KIR+ vs Rest,KIR+,Rest 11 | GNAL,0.047819127651060445,1.5318292,KIR+ vs Rest,KIR+,Rest 12 | DAB2,0.049219687875150075,1.5454047,KIR+ vs Rest,KIR+,Rest 13 | -------------------------------------------------------------------------------- /differential_expression/de_up_nkg2a_pos.csv: -------------------------------------------------------------------------------- 1 | ,p_val,lfc_mean,comparison,group1,group2 2 | GPR183,0.016406562625050025,2.1510997,NKG2A+ vs Rest,NKG2A+,Rest 3 | GZMK,0.019807923169267716,2.241302,NKG2A+ vs Rest,NKG2A+,Rest 4 | MYC,0.024609843937575038,1.7149199,NKG2A+ vs Rest,NKG2A+,Rest 5 | SCML1,0.025610244097639012,1.1689938,NKG2A+ vs Rest,NKG2A+,Rest 6 | ID3,0.027811124449779956,1.9320335,NKG2A+ vs Rest,NKG2A+,Rest 7 | TNFRSF11A,0.0282112845138055,1.5531603,NKG2A+ vs Rest,NKG2A+,Rest 8 | XCL1,0.03001200480192079,1.5373559,NKG2A+ vs Rest,NKG2A+,Rest 9 | ANO9,0.03081232492997199,1.3135897,NKG2A+ vs Rest,NKG2A+,Rest 10 | IL7R,0.03281312525010005,1.2158722,NKG2A+ vs Rest,NKG2A+,Rest 11 | RUNX2,0.036414565826330514,1.4067564,NKG2A+ vs Rest,NKG2A+,Rest 12 | LSR,0.037214885954381716,1.0812525,NKG2A+ vs Rest,NKG2A+,Rest 13 | LDB2,0.044417767106842754,1.5218979,NKG2A+ vs Rest,NKG2A+,Rest 14 | SPRY1,0.04561824729891961,1.0180645,NKG2A+ vs Rest,NKG2A+,Rest 15 | ANK3,0.04601840736294516,1.4278271,NKG2A+ vs Rest,NKG2A+,Rest 16 | PDGFA,0.047819127651060445,1.200781,NKG2A+ vs Rest,NKG2A+,Rest 17 | HOXA10,0.04801920768307322,1.0112121,NKG2A+ vs Rest,NKG2A+,Rest 18 | -------------------------------------------------------------------------------- /differential_expression/dim_pb_vs_tr_deg.csv: -------------------------------------------------------------------------------- 1 | ,p_val,lfc_mean,comparison,group1,group2 2 | BOP1,0.0,-3.072363,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 3 | MLLT1,0.0,-2.79313875,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 4 | ZNF26,0.0,-2.3058375,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 5 | CNBD2,0.0003999999999999,-3.15787075,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 6 | PSMA2,0.0003999999999999,-3.4815605,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 7 | CLN3,0.0011999999999999,-2.8690903,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 8 | MAP1LC3B2,0.0013999999999999,-2.739333,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 9 | SLC5A3,0.0021999999999999,-2.9390855,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 10 | GPR89B,0.0021999999999999,-2.151487,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 11 | THTPA,0.0028,-2.6392795,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 12 | S1PR2,0.003,-2.4175923,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 13 | GMPPB,0.0036,-2.08686925,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 14 | NAA60,0.004,-2.0106585,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 15 | NEK5,0.0043999999999999,-2.32111835,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 16 | CCL4L2,0.0046,-3.3302903,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 17 | TAS2R4,0.0048,-2.009121,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 18 | PHKG1,0.0058,-2.4281675,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 19 | PROB1,0.0061999999999999,-1.9860959,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 20 | TLCD2,0.0063999999999999,-2.4841597,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 21 | HGH1,0.0076,-2.0555285,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 22 | FXYD3,0.0091999999999999,-2.4454746,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 23 | AREG,0.0096,-3.19706,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 24 | LIMS2,0.0106,-2.0491335,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 25 | CD4,0.0106,-2.1975955,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 26 | TMEM100,0.0108,-1.9190438,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 27 | PLLP,0.011,-1.9782828,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 28 | SAA1,0.0121999999999999,-2.1271913,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 29 | TIMP3,0.013,-1.9060068,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 30 | FCRL1,0.0131999999999999,-2.067314,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 31 | COLEC12,0.0133999999999999,-2.5503862,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 32 | RNASE1,0.0133999999999999,-2.1744415,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 33 | MANSC1,0.0135999999999999,-1.9382111,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 34 | GOLGA6L9,0.014,-1.9357234,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 35 | GSTA1,0.0153999999999999,-1.98287775,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 36 | SCGB1A1,0.0158,-2.95024015,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 37 | TMEM190,0.016,-1.95152805,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 38 | KRT19,0.016,-2.034782,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 39 | TFF3,0.0161999999999999,-1.93687855,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 40 | AGR2,0.0163999999999999,-1.9171795,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 41 | TACSTD2,0.0168,-1.9605725,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 42 | DLX2,0.017,-2.0536418,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 43 | PCDHGA5,0.0171999999999999,-1.9068047,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 44 | AGR3,0.0173999999999999,-2.1755805,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 45 | CDH13,0.0178,-2.22078515,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 46 | KIR2DL4,0.018,-2.07412385,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 47 | PCDHGB6,0.0188,-2.0085295,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 48 | MS4A2,0.019,-2.10072,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 49 | FCN3,0.019,-2.09395765,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 50 | SCGB3A2,0.0193999999999999,-2.479611,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 51 | NR4A3,0.0203999999999999,-2.14060185,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 52 | SCGB3A1,0.0205999999999999,-2.76610015,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 53 | BGN,0.0211999999999999,-1.95564045,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 54 | SSTR2,0.0213999999999999,-1.9162601,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 55 | PPARG,0.0228,-1.9421674,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 56 | NR4A2,0.0242,-2.03139685,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 57 | TNFSF9,0.0248,-1.97289,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 58 | NR4A1,0.0252,-2.05915665,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 59 | SPARCL1,0.0268,-1.9343699,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 60 | FOSB,0.0278,-1.91559375,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 61 | PTGDS,0.03,-1.9413053,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 62 | SLPI,0.0318,-2.078176,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 63 | TM4SF1,0.0335999999999999,-1.9220258,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 64 | GREM1,0.0345999999999999,-1.9036832,pb_CD56dim vs tr_CD56dim,pb_CD56dim,tr_CD56dim 65 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: nk_reference 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - _libgcc_mutex=0.1=conda_forge 8 | - _openmp_mutex=4.5=2_gnu 9 | - _r-mutex=1.0.1=anacondar_1 10 | - aiosignal=1.3.1=pyhd8ed1ab_0 11 | - anndata=0.10.3=pyhd8ed1ab_0 12 | - anndata2ri=1.1=pyhdfd78af_0 13 | - annotated-types=0.6.0=pyhd8ed1ab_0 14 | - anyio=4.1.0=pyhd8ed1ab_0 15 | - aom=3.7.1=h59595ed_0 16 | - argon2-cffi=23.1.0=pyhd8ed1ab_0 17 | - argon2-cffi-bindings=21.2.0=py39hd1e30aa_4 18 | - arpack=3.7.0=hdefa2d7_2 19 | - array-api-compat=1.4=pyhd8ed1ab_0 20 | - asttokens=2.4.1=pyhd8ed1ab_0 21 | - attrs=23.1.0=pyh71513ae_1 22 | - aws-c-auth=0.7.11=h0b4cabd_1 23 | - aws-c-cal=0.6.9=h14ec70c_3 24 | - aws-c-common=0.9.12=hd590300_0 25 | - aws-c-compression=0.2.17=h572eabf_8 26 | - aws-c-event-stream=0.4.1=h97bb272_2 27 | - aws-c-http=0.8.0=h9129f04_2 28 | - aws-c-io=0.14.0=hf8f278a_1 29 | - aws-c-mqtt=0.10.1=h2b97f5f_0 30 | - aws-c-s3=0.4.9=hca09fc5_0 31 | - aws-c-sdkutils=0.1.13=h572eabf_1 32 | - aws-checksums=0.1.17=h572eabf_7 33 | - aws-crt-cpp=0.26.0=h04327c0_8 34 | - aws-sdk-cpp=1.11.210=hba3e011_10 35 | - babel=2.13.1=pyhd8ed1ab_0 36 | - beautifulsoup4=4.12.2=pyha770c72_0 37 | - binutils_impl_linux-64=2.40=hf600244_0 38 | - bleach=6.1.0=pyhd8ed1ab_0 39 | - blosc=1.21.5=h0f2a231_0 40 | - brotli=1.1.0=hd590300_1 41 | - brotli-bin=1.1.0=hd590300_1 42 | - brotli-python=1.1.0=py39h3d6467e_1 43 | - brunsli=0.1=h9c3ff4c_0 44 | - bwidget=1.9.14=ha770c72_1 45 | - bzip2=1.0.8=hd590300_5 46 | - c-ares=1.23.0=hd590300_0 47 | - c-blosc2=2.12.0=hb4ffafa_0 48 | - ca-certificates=2023.11.17=hbcca054_0 49 | - cached-property=1.5.2=hd8ed1ab_1 50 | - cached_property=1.5.2=pyha770c72_1 51 | - cairo=1.18.0=h3faef2a_0 52 | - certifi=2023.11.17=pyhd8ed1ab_0 53 | - cffi=1.16.0=py39h7a31438_0 54 | - charls=2.4.2=h59595ed_0 55 | - charset-normalizer=3.3.2=pyhd8ed1ab_0 56 | - colorama=0.4.6=pyhd8ed1ab_0 57 | - comm=0.1.4=pyhd8ed1ab_0 58 | - contourpy=1.2.0=py39h7633fee_0 59 | - curl=8.5.0=hca28451_0 60 | - cycler=0.12.1=pyhd8ed1ab_0 61 | - dask-core=2024.1.0=pyhd8ed1ab_0 62 | - dask-image=2023.3.0=pyhd8ed1ab_0 63 | - dav1d=1.2.1=hd590300_0 64 | - debugpy=1.8.0=py39h3d6467e_1 65 | - decorator=5.1.1=pyhd8ed1ab_0 66 | - defusedxml=0.7.1=pyhd8ed1ab_0 67 | - docrep=0.3.2=pyh44b312d_0 68 | - dunamai=1.19.0=pyhd8ed1ab_0 69 | - entrypoints=0.4=pyhd8ed1ab_0 70 | - executing=2.0.1=pyhd8ed1ab_0 71 | - expat=2.5.0=hcb278e6_1 72 | - fa2=0.3.5=py39hb9d737c_2 73 | - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 74 | - font-ttf-inconsolata=3.000=h77eed37_0 75 | - font-ttf-source-code-pro=2.038=h77eed37_0 76 | - font-ttf-ubuntu=0.83=h77eed37_1 77 | - fontconfig=2.14.2=h14ed4e7_0 78 | - fonts-conda-ecosystem=1=0 79 | - fonts-conda-forge=1=0 80 | - fonttools=4.46.0=py39hd1e30aa_0 81 | - fqdn=1.5.1=pyhd8ed1ab_0 82 | - freetype=2.12.1=h267a509_2 83 | - fribidi=1.0.10=h36c2ea0_0 84 | - gcc_impl_linux-64=13.2.0=h338b0a0_3 85 | - get_version=3.5.5=pyhd8ed1ab_0 86 | - gettext=0.21.1=h27087fc_0 87 | - gflags=2.2.2=he1b5a44_1004 88 | - gfortran_impl_linux-64=13.2.0=h76e1118_3 89 | - giflib=5.2.1=h0b41bf4_3 90 | - git=2.43.0=pl5321h7bc287a_0 91 | - glog=0.6.0=h6f12383_0 92 | - glpk=5.0=h445213a_0 93 | - gmp=6.3.0=h59595ed_0 94 | - graphite2=1.3.13=h58526e2_1001 95 | - gxx_impl_linux-64=13.2.0=h338b0a0_3 96 | - h5py=3.10.0=nompi_py39h87cadad_100 97 | - harfbuzz=8.3.0=h3d44ed6_0 98 | - hdf5=1.14.2=nompi_h4f84152_100 99 | - icu=73.2=h59595ed_0 100 | - idna=3.6=pyhd8ed1ab_0 101 | - igraph=0.10.4=hb9ddf80_2 102 | - imagecodecs=2024.1.1=py39hf9b8f0e_0 103 | - imageio=2.33.1=pyh8c1a49c_0 104 | - importlib-metadata=7.0.0=pyha770c72_0 105 | - importlib-resources=6.1.1=pyhd8ed1ab_0 106 | - importlib_metadata=7.0.0=hd8ed1ab_0 107 | - importlib_resources=6.1.1=pyhd8ed1ab_0 108 | - inflect=7.0.0=pyhd8ed1ab_0 109 | - ipykernel=6.26.0=pyhf8b6a83_0 110 | - ipython=8.18.1=pyh707e725_3 111 | - ipython_genutils=0.2.0=py_1 112 | - isoduration=20.11.0=pyhd8ed1ab_0 113 | - jedi=0.19.1=pyhd8ed1ab_0 114 | - jinja2=3.1.2=pyhd8ed1ab_1 115 | - joblib=1.3.2=pyhd8ed1ab_0 116 | - json5=0.9.14=pyhd8ed1ab_0 117 | - jsonpointer=2.4=py39hf3d152e_3 118 | - jsonschema=4.20.0=pyhd8ed1ab_0 119 | - jsonschema-specifications=2023.11.2=pyhd8ed1ab_0 120 | - jsonschema-with-format-nongpl=4.20.0=pyhd8ed1ab_0 121 | - jupyter_client=8.6.0=pyhd8ed1ab_0 122 | - jupyter_core=5.5.0=py39hf3d152e_0 123 | - jupyter_events=0.9.0=pyhd8ed1ab_0 124 | - jupyter_server=2.12.1=pyhd8ed1ab_0 125 | - jupyter_server_terminals=0.4.4=pyhd8ed1ab_1 126 | - jupyterlab=3.5.3=pyhd8ed1ab_0 127 | - jupyterlab_pygments=0.3.0=pyhd8ed1ab_0 128 | - jupyterlab_server=2.25.2=pyhd8ed1ab_0 129 | - jxrlib=1.1=hd590300_3 130 | - kernel-headers_linux-64=2.6.32=he073ed8_16 131 | - keyutils=1.6.1=h166bdaf_0 132 | - kiwisolver=1.4.5=py39h7633fee_1 133 | - krb5=1.21.2=h659d440_0 134 | - lazy_loader=0.3=pyhd8ed1ab_0 135 | - lcms2=2.16=hb7c19ff_0 136 | - ld_impl_linux-64=2.40=h41732ed_0 137 | - leidenalg=0.9.1=py39h227be39_0 138 | - lerc=4.0.0=h27087fc_0 139 | - libabseil=20230802.1=cxx17_h59595ed_0 140 | - libaec=1.1.2=h59595ed_1 141 | - libarrow=14.0.2=h84dd17c_2_cpu 142 | - libarrow-acero=14.0.2=h59595ed_2_cpu 143 | - libarrow-dataset=14.0.2=h59595ed_2_cpu 144 | - libarrow-flight=14.0.2=h120cb0d_2_cpu 145 | - libarrow-flight-sql=14.0.2=h61ff412_2_cpu 146 | - libarrow-gandiva=14.0.2=hacb8726_2_cpu 147 | - libarrow-substrait=14.0.2=h61ff412_2_cpu 148 | - libavif16=1.0.3=hef5bec9_1 149 | - libblas=3.9.0=20_linux64_openblas 150 | - libbrotlicommon=1.1.0=hd590300_1 151 | - libbrotlidec=1.1.0=hd590300_1 152 | - libbrotlienc=1.1.0=hd590300_1 153 | - libcblas=3.9.0=20_linux64_openblas 154 | - libcrc32c=1.1.2=h9c3ff4c_0 155 | - libcurl=8.5.0=hca28451_0 156 | - libdeflate=1.19=hd590300_0 157 | - libedit=3.1.20191231=he28a2e2_2 158 | - libev=4.33=h516909a_1 159 | - libevent=2.1.12=hf998b51_1 160 | - libexpat=2.5.0=hcb278e6_1 161 | - libffi=3.4.2=h7f98852_5 162 | - libgcc-devel_linux-64=13.2.0=ha9c7c90_103 163 | - libgcc-ng=13.2.0=h807b86a_3 164 | - libgfortran-ng=13.2.0=h69a702a_3 165 | - libgfortran5=13.2.0=ha4646dd_3 166 | - libglib=2.78.2=h783c2da_0 167 | - libgomp=13.2.0=h807b86a_3 168 | - libgoogle-cloud=2.12.0=h5206363_4 169 | - libgrpc=1.59.3=hd6c4280_0 170 | - libhwloc=2.9.1=hd6dc26d_0 171 | - libiconv=1.17=h166bdaf_0 172 | - libjpeg-turbo=3.0.0=hd590300_1 173 | - liblapack=3.9.0=20_linux64_openblas 174 | - libllvm14=14.0.6=hcd5def8_4 175 | - libllvm15=15.0.7=hadd5161_1 176 | - libnghttp2=1.58.0=h47da74e_0 177 | - libnl=3.9.0=hd590300_0 178 | - libnsl=2.0.1=hd590300_0 179 | - libnuma=2.0.16=h0b41bf4_1 180 | - libopenblas=0.3.25=pthreads_h413a1c8_0 181 | - libparquet=14.0.2=h352af49_2_cpu 182 | - libpng=1.6.39=h753d276_0 183 | - libprotobuf=4.24.4=hf27288f_0 184 | - libre2-11=2023.06.02=h7a70373_0 185 | - libsanitizer=13.2.0=h7e041cc_3 186 | - libsodium=1.0.18=h36c2ea0_1 187 | - libsqlite=3.44.2=h2797004_0 188 | - libssh2=1.11.0=h0841786_0 189 | - libstdcxx-devel_linux-64=13.2.0=ha9c7c90_103 190 | - libstdcxx-ng=13.2.0=h7e041cc_3 191 | - libthrift=0.19.0=hb90f79a_1 192 | - libtiff=4.6.0=ha9c0a0a_2 193 | - libutf8proc=2.8.0=h166bdaf_0 194 | - libuuid=2.38.1=h0b41bf4_0 195 | - libwebp-base=1.3.2=hd590300_0 196 | - libxcb=1.15=h0b41bf4_0 197 | - libxml2=2.10.4=hf1b16e4_1 198 | - libzlib=1.2.13=hd590300_5 199 | - libzopfli=1.0.3=h9c3ff4c_0 200 | - locket=1.0.0=pyhd8ed1ab_0 201 | - lz4-c=1.9.4=hcb278e6_0 202 | - make=4.3=hd18ef5c_1 203 | - markupsafe=2.1.3=py39hd1e30aa_1 204 | - matplotlib-base=3.8.2=py39he9076e7_0 205 | - matplotlib-inline=0.1.6=pyhd8ed1ab_0 206 | - matplotlib-scalebar=0.8.1=pyhd8ed1ab_0 207 | - metis=5.1.0=h59595ed_1007 208 | - mistune=3.0.2=pyhd8ed1ab_0 209 | - mpfr=4.2.1=h9458935_0 210 | - msgpack-python=1.0.7=py39h7633fee_0 211 | - multidict=6.0.4=py39hd1e30aa_1 212 | - munkres=1.1.4=pyh9f0ad1d_0 213 | - natsort=8.4.0=pyhd8ed1ab_0 214 | - nb_conda_kernels=2.3.1=py39hf3d152e_2 215 | - nbclassic=1.0.0=pyh8b2e9e2_0 216 | - nbconvert-core=7.12.0=pyhd8ed1ab_0 217 | - nbformat=5.9.2=pyhd8ed1ab_0 218 | - ncurses=6.4=h59595ed_2 219 | - nest-asyncio=1.5.8=pyhd8ed1ab_0 220 | - networkx=3.2.1=pyhd8ed1ab_0 221 | - notebook=6.5.4=pyha770c72_0 222 | - notebook-shim=0.2.3=pyhd8ed1ab_0 223 | - numba=0.57.0=py39hb75a051_0 224 | - numcodecs=0.12.1=py39h3d6467e_0 225 | - numpy=1.23.5=py39h3d75532_0 226 | - omnipath=1.0.8=pyhd8ed1ab_0 227 | - openjpeg=2.5.0=h488ebb8_3 228 | - openssl=3.2.1=hd590300_0 229 | - orc=1.9.2=h4b38347_0 230 | - overrides=7.4.0=pyhd8ed1ab_0 231 | - packaging=23.2=pyhd8ed1ab_0 232 | - pandas=1.5.3=py39h2ad29b5_1 233 | - pandocfilters=1.5.0=pyhd8ed1ab_0 234 | - pango=1.50.14=ha41ecd1_2 235 | - parso=0.8.3=pyhd8ed1ab_0 236 | - patsy=0.5.4=pyhd8ed1ab_0 237 | - pcre2=10.42=hcad00b1_0 238 | - perl=5.32.1=4_hd590300_perl5 239 | - pexpect=4.8.0=pyh1a96a4e_2 240 | - pickleshare=0.7.5=py_1003 241 | - pillow=10.1.0=py39had0adad_0 242 | - pims=0.6.1=pyhd8ed1ab_1 243 | - pip=23.3.1=pyhd8ed1ab_0 244 | - pixman=0.42.2=h59595ed_0 245 | - pkg-config=0.29.2=h36c2ea0_1008 246 | - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1 247 | - prometheus_client=0.19.0=pyhd8ed1ab_0 248 | - prompt-toolkit=3.0.41=pyha770c72_0 249 | - psutil=5.9.5=py39hd1e30aa_1 250 | - pthread-stubs=0.4=h36c2ea0_1001 251 | - ptyprocess=0.7.0=pyhd3deb0d_0 252 | - pure_eval=0.2.2=pyhd8ed1ab_0 253 | - pyarrow-hotfix=0.6=pyhd8ed1ab_0 254 | - pycparser=2.21=pyhd8ed1ab_0 255 | - pydantic=2.5.3=pyhd8ed1ab_0 256 | - pydantic-core=2.14.6=py39h9fdd4d6_1 257 | - pygments=2.17.2=pyhd8ed1ab_0 258 | - pynndescent=0.5.11=pyhca7485f_0 259 | - pyparsing=3.1.1=pyhd8ed1ab_0 260 | - pysocks=1.7.1=pyha2e5f31_6 261 | - python=3.9.16=h2782a2a_0_cpython 262 | - python-dateutil=2.8.2=pyhd8ed1ab_0 263 | - python-fastjsonschema=2.19.0=pyhd8ed1ab_0 264 | - python-igraph=0.10.4=py39hfac54ea_0 265 | - python-json-logger=2.0.7=pyhd8ed1ab_0 266 | - python_abi=3.9=4_cp39 267 | - pytz=2023.3.post1=pyhd8ed1ab_0 268 | - pywavelets=1.4.1=py39h44dd56e_1 269 | - pyyaml=6.0.1=py39hd1e30aa_1 270 | - pyzmq=25.1.2=py39h8c080ef_0 271 | - r-base=4.3.2=hb8ee39d_1 272 | - rav1e=0.6.6=he8a937b_2 273 | - rdma-core=49.0=hd3aeb46_2 274 | - re2=2023.06.02=h2873b5e_0 275 | - readline=8.2=h8228510_1 276 | - referencing=0.31.1=pyhd8ed1ab_0 277 | - requests=2.31.0=pyhd8ed1ab_0 278 | - rfc3339-validator=0.1.4=pyhd8ed1ab_0 279 | - rfc3986-validator=0.1.1=pyh9f0ad1d_0 280 | - rpds-py=0.13.2=py39h9fdd4d6_0 281 | - rpy2=3.5.11=py39r43h44dd56e_3 282 | - s2n=1.4.1=h06160fa_0 283 | - scanpy==1.10.1 284 | - scikit-image==0.22.0 285 | - scikit-learn==1.3.2 286 | - scipy==1.11.4 287 | - scvelo==0.3.2 288 | - scvi-tools==1.0.4 289 | - seaborn==0.13.2 290 | - seaborn-base=0.13.0=pyhd8ed1ab_0 291 | - sed=4.8=he412f7d_0 292 | - send2trash=1.8.2=pyh41d4057_0 293 | - session-info=1.0.0=pyhd8ed1ab_0 294 | - setuptools=68.2.2=pyhd8ed1ab_0 295 | - simplegeneric=0.8.1=py_1 296 | - six=1.16.0=pyh6c4a22f_0 297 | - slicerator=1.1.0=pyhd8ed1ab_0 298 | - snappy=1.1.10=h9fff704_0 299 | - sniffio=1.3.0=pyhd8ed1ab_0 300 | - sortedcontainers=2.4.0=pyhd8ed1ab_0 301 | - soupsieve=2.5=pyhd8ed1ab_1 302 | - squidpy=1.2.2=pyhd8ed1ab_0 303 | - stack_data=0.6.2=pyhd8ed1ab_0 304 | - statsmodels=0.14.0=py39h44dd56e_2 305 | - stdlib-list=0.8.0=pyhd8ed1ab_0 306 | - suitesparse=5.10.1=h9e50725_1 307 | - svt-av1=1.8.0=h59595ed_0 308 | - sysroot_linux-64=2.12=he073ed8_16 309 | - tangram-sc==1.0.4 310 | - tbb=2021.9.0=hf52228f_0 311 | - terminado=0.18.0=pyh0d859eb_0 312 | - texttable=1.7.0=pyhd8ed1ab_0 313 | - threadpoolctl=3.2.0=pyha21a80b_0 314 | - tifffile=2023.12.9=pyhd8ed1ab_0 315 | - tinycss2=1.2.1=pyhd8ed1ab_0 316 | - tk=8.6.13=noxft_h4845f30_101 317 | - tktable=2.10=h0c5db8f_5 318 | - tomli=2.0.1=pyhd8ed1ab_0 319 | - toolz=0.12.0=pyhd8ed1ab_0 320 | - tornado=6.3.3=py39hd1e30aa_1 321 | - tqdm=4.66.1=pyhd8ed1ab_0 322 | - traitlets=5.9.0=pyhd8ed1ab_0 323 | - types-python-dateutil=2.8.19.14=pyhd8ed1ab_0 324 | - typing-extensions=4.8.0=hd8ed1ab_0 325 | - typing_extensions=4.8.0=pyha770c72_0 326 | - typing_utils=0.1.0=pyhd8ed1ab_0 327 | - tzdata=2023c=h71feb2d_0 328 | - tzlocal=5.2=py39hf3d152e_0 329 | - ucx=1.15.0=h75e419f_3 330 | - umap-learn=0.5.5=py39hf3d152e_0 331 | - unicodedata2=15.1.0=py39hd1e30aa_0 332 | - uri-template=1.3.0=pyhd8ed1ab_0 333 | - urllib3=2.1.0=pyhd8ed1ab_0 334 | - validators=0.22.0=pyhd8ed1ab_0 335 | - wcwidth=0.2.12=pyhd8ed1ab_0 336 | - webcolors=1.13=pyhd8ed1ab_0 337 | - webencodings=0.5.1=pyhd8ed1ab_2 338 | - websocket-client=1.7.0=pyhd8ed1ab_0 339 | - wheel=0.42.0=pyhd8ed1ab_0 340 | - wrapt=1.16.0=py39hd1e30aa_0 341 | - xarray=2024.1.1=pyhd8ed1ab_0 342 | - xorg-kbproto=1.0.7=h7f98852_1002 343 | - xorg-libice=1.1.1=hd590300_0 344 | - xorg-libsm=1.2.4=h7391055_0 345 | - xorg-libx11=1.8.7=h8ee46fc_0 346 | - xorg-libxau=1.0.11=hd590300_0 347 | - xorg-libxdmcp=1.1.3=h7f98852_0 348 | - xorg-libxext=1.3.4=h0b41bf4_2 349 | - xorg-libxrender=0.9.11=hd590300_0 350 | - xorg-libxt=1.3.0=hd590300_1 351 | - xorg-renderproto=0.11.1=h7f98852_1002 352 | - xorg-xextproto=7.3.0=h0b41bf4_1003 353 | - xorg-xproto=7.0.31=h7f98852_1007 354 | - xz=5.4.5=h5eee18b_0 355 | - yaml=0.2.5=h7f98852_2 356 | - zarr=2.16.1=pyhd8ed1ab_0 357 | - zeromq=4.3.5=h59595ed_0 358 | - zfp=1.0.1=h59595ed_0 359 | - zict=3.0.0=pyhd8ed1ab_0 360 | - zipp=3.17.0=pyhd8ed1ab_0 361 | - zlib=1.2.13=hd590300_5 362 | - zlib-ng=2.0.7=h0b41bf4_0 363 | - zstd=1.5.5=hfc55251_0 364 | - pip: 365 | - accessible-pygments==0.0.4 366 | - adjusttext==0.8 367 | - aiohttp==3.8.4 368 | - alabaster==0.7.13 369 | - arboreto==0.1.6 370 | - arrow==1.2.3 371 | - asciitree==0.3.3 372 | - async-timeout==4.0.2 373 | - binaryornot==0.4.4 374 | - bioinfokit==2.1.0 375 | - blosc2==2.0.0 376 | - bokeh==3.1.0 377 | - boltons==23.0.0 378 | - cfgv==3.4.0 379 | - chardet==5.1.0 380 | - click==8.1.3 381 | - cloudpickle==2.2.1 382 | - cmake==3.26.3 383 | - cookiecutter==2.1.1 384 | - ctxcore==0.2.0 385 | - cython==0.29.34 386 | - cytoolz==0.12.1 387 | - dask==2023.4.0 388 | - dill==0.3.6 389 | - distlib==0.3.7 390 | - distributed==2023.4.0 391 | - docutils==0.17.1 392 | - et-xmlfile==1.1.0 393 | - exceptiongroup==1.1.3 394 | - fasteners==0.19 395 | - fcsparser==0.2.6 396 | - filelock==3.12.4 397 | - frozendict==2.3.7 398 | - frozenlist==1.3.3 399 | - fsspec==2023.4.0 400 | - get-annotations==0.1.2 401 | - greenlet==3.0.1 402 | - gtfparse==2.5.0 403 | - identify==2.5.30 404 | - imagesize==1.4.1 405 | - infercnvpy==0.4.5 406 | - iniconfig==2.0.0 407 | - interlap==0.2.7 408 | - jinja2-time==0.2.0 409 | - jupyter-cache==0.6.1 410 | - llvmlite==0.40.0 411 | - loompy==3.0.7 412 | - lz4==4.3.2 413 | - markdown-it-py==2.2.0 414 | - matplotlib-venn==0.11.9 415 | - mdit-py-plugins==0.3.5 416 | - mdurl==0.1.2 417 | - milopy==0.1.1 418 | - more-itertools==9.1.0 419 | - msgpack==1.0.5 420 | - multiprocessing-on-dill==3.5.0a4 421 | - myst-nb==0.17.2 422 | - myst-parser==0.18.1 423 | - nbclient==0.7.4 424 | - nbsphinx==0.9.3 425 | - nodeenv==1.8.0 426 | - numexpr==2.8.4 427 | - numpy-groupies==0.9.20 428 | - nxviz==0.6.3 429 | - openpyxl==3.1.2 430 | - palantir==1.2 431 | - palettable==3.3.3 432 | - partd==1.4.0 433 | - pbr==5.11.1 434 | - phenograph==1.5.7 435 | - platformdirs==3.11.0 436 | - pluggy==1.3.0 437 | - polars==0.20.31 438 | - pre-commit==3.5.0 439 | - profimp==0.1.0 440 | - progressbar2==4.2.0 441 | - py-cpuinfo==9.0.0 442 | - pyarrow==14.0.2 443 | - pydata-sphinx-theme==0.13.3 444 | - pygam==0.9.0 445 | - pypi-latest==0.1.2 446 | - pyreadr==0.5.0 447 | - pysam==0.21.0 448 | - pyscenic==0.12.1 449 | - pytest==7.4.3 450 | - pytest-mock==3.12.0 451 | - pytest-nunit==1.0.4 452 | - python-slugify==8.0.1 453 | - python-utils==3.5.2 454 | - pytoml==0.1.21 455 | - questionary==1.10.0 456 | - rich==13.3.5 457 | - sc-toolbox==0.12.1 458 | - scanpydoc==0.9.5 459 | - scvelo==0.2.5 460 | - setuptools-scm==8.0.4 461 | - snowballstemmer==2.2.0 462 | - sphinx==4.5.0 463 | - sphinx-autodoc-typehints==1.19.1 464 | - sphinx-book-theme==1.0.1 465 | - sphinx-copybutton==0.5.2 466 | - sphinx-design==0.4.1 467 | - sphinxcontrib-applehelp==1.0.4 468 | - sphinxcontrib-devhelp==1.0.2 469 | - sphinxcontrib-htmlhelp==2.0.1 470 | - sphinxcontrib-jsmath==1.0.1 471 | - sphinxcontrib-qthelp==1.0.3 472 | - sphinxcontrib-serializinghtml==1.1.5 473 | - sphinxext-opengraph==0.8.2 474 | - sqlalchemy==2.0.22 475 | - tables==3.8.0 476 | - tabulate==0.9.0 477 | - tblib==1.7.0 478 | - text-unidecode==1.3 479 | - textwrap3==0.9.2 480 | - velocyto==0.17.17 481 | - virtualenv==20.24.6 482 | - xyzservices==2023.2.0 483 | - yarl==1.9.2 484 | -------------------------------------------------------------------------------- /mapping_tumor_data/map_tink_to_ref.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "0606f7fd", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "#import scvi\n", 11 | "import scanpy as sc\n", 12 | "import pandas as pd\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "id": "1aa95077", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import matplotlib.pyplot as plt" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "id": "3fe5bd3d", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import anndata" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 4, 39 | "id": "ecb149bf", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "import os" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 5, 49 | "id": "4c13c8c3", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "import random" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 6, 59 | "id": "7fc2a272", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "#from pyscenic.aucell import aucell\n", 64 | "#from pyscenic.genesig import GeneSignature\n", 65 | "#from pyscenic.aucell import create_rankings, enrichment" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 154, 71 | "id": "81b53796-af8f-42f6-87da-1d55ea93acda", 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "data_dir = ''" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "id": "54acaaf3", 81 | "metadata": {}, 82 | "source": [ 83 | "### Load data and reference model" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 108, 89 | "id": "d87c69e5-13b2-4e3c-971c-20ef158b8850", 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stderr", 94 | "output_type": "stream", 95 | "text": [ 96 | "/home/ubuntu/miniconda3/envs/scenic/lib/python3.9/site-packages/anndata/__init__.py:51: FutureWarning: `anndata.read` is deprecated, use `anndata.read_h5ad` instead. `ad.read` will be removed in mid 2024.\n", 97 | " warnings.warn(\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "adata = sc.read(os.path.join(data_dir, 'zenodo', 'adata_ref_after_training.h5ad'))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 137, 108 | "id": "ad8cb5cf-25c2-4fef-b962-d7a3611d9605", 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "name": "stderr", 113 | "output_type": "stream", 114 | "text": [ 115 | "/home/ubuntu/miniconda3/envs/scenic/lib/python3.9/site-packages/anndata/__init__.py:51: FutureWarning: `anndata.read` is deprecated, use `anndata.read_h5ad` instead. `ad.read` will be removed in mid 2024.\n", 116 | " warnings.warn(\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "adata_query = sc.read(os.path.join(data_dir, 'zenodo', 'adata_nk_tumor_query.h5ad'))" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 111, 127 | "id": "f71acd40", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "scvi_model = scvi.model.SCVI.load(os.path.join(data_dir, 'zenodo', 'nk_ref_scvi_model'), adata=adata)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "id": "a224fd9d", 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "id": "3747fbab", 145 | "metadata": {}, 146 | "source": [ 147 | "#### Map query scvi model" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "fac6079b", 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "name": "stdout", 158 | "output_type": "stream", 159 | "text": [ 160 | "\u001b[34mINFO \u001b[0m Found \u001b[1;36m97.47352972009644\u001b[0m% reference vars in query data. \n" 161 | ] 162 | }, 163 | { 164 | "name": "stderr", 165 | "output_type": "stream", 166 | "text": [ 167 | "/itf-fi-ml/home/hermankn/.local/lib/python3.8/site-packages/scvi/model/base/_archesmixin.py:211: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.\n", 168 | " adata_padding = AnnData(\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "scvi.model.SCVI.prepare_query_anndata(adata_query, scvi_model)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "id": "df180371", 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "query_model = scvi.model.SCVI.load_query_data(adata_query, scvi_model)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "id": "d60338ff", 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "query_model.train(max_epochs=100, plan_kwargs=dict(weight_decay=0.0))" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 102, 199 | "id": "d1c2a0f7", 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/html": [ 205 | "
SCVI Model with the following params: \n",
206 |        "n_hidden: 128, n_latent: 40, n_layers: 2, dropout_rate: 0.1, dispersion: gene, gene_likelihood: nb, \n",
207 |        "latent_distribution: normal\n",
208 |        "Training status: Not Trained\n",
209 |        "Model's adata is minified?: False\n",
210 |        "
\n" 211 | ], 212 | "text/plain": [ 213 | "SCVI Model with the following params: \n", 214 | "n_hidden: \u001b[1;36m128\u001b[0m, n_latent: \u001b[1;36m40\u001b[0m, n_layers: \u001b[1;36m2\u001b[0m, dropout_rate: \u001b[1;36m0.1\u001b[0m, dispersion: gene, gene_likelihood: nb, \n", 215 | "latent_distribution: normal\n", 216 | "Training status: Not Trained\n", 217 | "Model's adata is minified?: \u001b[3;91mFalse\u001b[0m\n" 218 | ] 219 | }, 220 | "metadata": {}, 221 | "output_type": "display_data" 222 | }, 223 | { 224 | "data": { 225 | "text/plain": [] 226 | }, 227 | "execution_count": 102, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "query_model" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "id": "68d1cc3c", 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "plt.plot(query_model.history[\"elbo_train\"], label=\"train\")\n", 244 | "#plt.plot(vae.history[\"elbo_validation\"], label=\"val\")\n", 245 | "plt.title(\"Negative ELBO over training epochs\")\n", 246 | "#plt.ylim(1100, 1500)\n", 247 | "plt.legend()" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 171, 253 | "id": "548a95a8", 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "adata_query.obsm['X_scVI'] = query_model.get_latent_representation()" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 172, 263 | "id": "99302dc6", 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "name": "stderr", 268 | "output_type": "stream", 269 | "text": [ 270 | "/itf-fi-ml/home/hermankn/.local/lib/python3.8/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n", 271 | " utils.warn_names_duplicates(\"obs\")\n" 272 | ] 273 | } 274 | ], 275 | "source": [ 276 | "adata_full = anndata.concat([adata, adata_query])" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 174, 282 | "id": "f9cdc741", 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "sc.pp.neighbors(adata_full, use_rep='X_scVI')\n", 287 | "sc.tl.umap(adata_full)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "id": "a2cb855f-d3fa-4406-833d-de6e7aedf9c2", 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "id": "71f2009f-8fd2-4536-b9c8-153729e34b89", 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "adata_full = sc.read(os.path.join(data_dir, 'zenodo_revision', 'adata_all_nk_after_mapping.h5ad'))" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "id": "47e7ecbb-c0a7-49d4-9913-c6ee9b973de7", 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "id": "1312a4e2-5158-49de-a40c-cc27d629973e", 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [] 323 | } 324 | ], 325 | "metadata": { 326 | "kernelspec": { 327 | "display_name": "Python 3 (ipykernel)", 328 | "language": "python", 329 | "name": "python3" 330 | }, 331 | "language_info": { 332 | "codemirror_mode": { 333 | "name": "ipython", 334 | "version": 3 335 | }, 336 | "file_extension": ".py", 337 | "mimetype": "text/x-python", 338 | "name": "python", 339 | "nbconvert_exporter": "python", 340 | "pygments_lexer": "ipython3", 341 | "version": "3.9.16" 342 | } 343 | }, 344 | "nbformat": 4, 345 | "nbformat_minor": 5 346 | } 347 | -------------------------------------------------------------------------------- /nk_reference/normal_reference_nk.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#import scvi\n", 10 | "import scanpy as sc\n", 11 | "import pandas as pd\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import matplotlib.pyplot as plt" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import anndata" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 4, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import os" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 5, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import random" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 6, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "#from pyscenic.aucell import aucell\n", 58 | "#from pyscenic.genesig import GeneSignature\n", 59 | "#from pyscenic.aucell import create_rankings, enrichment" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 6, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "data_dir = ''" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "#### Load data" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 7, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "name": "stderr", 85 | "output_type": "stream", 86 | "text": [ 87 | "/home/ubuntu/miniconda3/envs/scenic/lib/python3.9/site-packages/anndata/__init__.py:51: FutureWarning: `anndata.read` is deprecated, use `anndata.read_h5ad` instead. `ad.read` will be removed in mid 2024.\n", 88 | " warnings.warn(\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "adata_ref = sc.read(os.path.join(data_dir, 'zenodo', 'adata_ref_nk.h5ad'))" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "#### Train scVI model" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 56, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stderr", 117 | "output_type": "stream", 118 | "text": [ 119 | "/itf-fi-ml/home/hermankn/.local/lib/python3.8/site-packages/scvi/data/fields/_layer_field.py:91: UserWarning: adata.X does not contain unnormalized count data. Are you sure this is what you want?\n", 120 | " warnings.warn(\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "scvi.model.SCVI.setup_anndata(adata,\n", 126 | " batch_key='batch')" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 57, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "scvi_model = scvi.model.SCVI(adata,\n", 136 | " n_latent=20,#n_latent=40,\n", 137 | " n_layers=2,\n", 138 | " gene_likelihood = \"nb\")" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 58, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stderr", 148 | "output_type": "stream", 149 | "text": [ 150 | "GPU available: True (cuda), used: True\n", 151 | "TPU available: False, using: 0 TPU cores\n", 152 | "IPU available: False, using: 0 IPUs\n", 153 | "HPU available: False, using: 0 HPUs\n", 154 | "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" 155 | ] 156 | }, 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "Epoch 158/158: 100%|██████████| 158/158 [20:49<00:00, 8.29s/it, loss=2.52e+03, v_num=1]" 162 | ] 163 | }, 164 | { 165 | "name": "stderr", 166 | "output_type": "stream", 167 | "text": [ 168 | "`Trainer.fit` stopped: `max_epochs=158` reached.\n" 169 | ] 170 | }, 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "Epoch 158/158: 100%|██████████| 158/158 [20:49<00:00, 7.91s/it, loss=2.52e+03, v_num=1]\n" 176 | ] 177 | } 178 | ], 179 | "source": [ 180 | "scvi_model.train()" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 59, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "" 192 | ] 193 | }, 194 | "execution_count": 59, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | }, 198 | { 199 | "data": { 200 | "image/png": "\n", 201 | "text/plain": [ 202 | "
" 203 | ] 204 | }, 205 | "metadata": { 206 | "needs_background": "light" 207 | }, 208 | "output_type": "display_data" 209 | } 210 | ], 211 | "source": [ 212 | "plt.plot(scvi_model.history[\"elbo_train\"], label=\"train\")\n", 213 | "#plt.plot(vae.history[\"elbo_validation\"], label=\"val\")\n", 214 | "plt.title(\"Negative ELBO over training epochs\")\n", 215 | "#plt.ylim(1100, 1500)\n", 216 | "plt.legend()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 65, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "data": { 226 | "text/plain": [ 227 | "" 228 | ] 229 | }, 230 | "execution_count": 65, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | }, 234 | { 235 | "data": { 236 | "image/png": "\n", 237 | "text/plain": [ 238 | "
" 239 | ] 240 | }, 241 | "metadata": { 242 | "needs_background": "light" 243 | }, 244 | "output_type": "display_data" 245 | } 246 | ], 247 | "source": [ 248 | "plt.plot(scvi_model.history[\"elbo_train\"], label=\"train\")\n", 249 | "#plt.plot(vae.history[\"elbo_validation\"], label=\"val\")\n", 250 | "plt.title(\"Negative ELBO over training epochs\")\n", 251 | "#plt.ylim(1100, 1500)\n", 252 | "plt.legend()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 61, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "adata.obsm[\"X_scVI\"] = scvi_model.get_latent_representation()" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 62, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "sc.pp.neighbors(adata, use_rep=\"X_scVI\")" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "#### Load adata" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 95, 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "name": "stderr", 294 | "output_type": "stream", 295 | "text": [ 296 | "/home/ubuntu/miniconda3/envs/scenic/lib/python3.9/site-packages/anndata/__init__.py:51: FutureWarning: `anndata.read` is deprecated, use `anndata.read_h5ad` instead. `ad.read` will be removed in mid 2024.\n", 297 | " warnings.warn(\n" 298 | ] 299 | } 300 | ], 301 | "source": [ 302 | "adata = sc.read(os.path.join(data_dir, 'zenodo', 'adata_ref_after_training.h5ad'))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [] 311 | } 312 | ], 313 | "metadata": { 314 | "kernelspec": { 315 | "display_name": "Python 3 (ipykernel)", 316 | "language": "python", 317 | "name": "python3" 318 | }, 319 | "language_info": { 320 | "codemirror_mode": { 321 | "name": "ipython", 322 | "version": 3 323 | }, 324 | "file_extension": ".py", 325 | "mimetype": "text/x-python", 326 | "name": "python", 327 | "nbconvert_exporter": "python", 328 | "pygments_lexer": "ipython3", 329 | "version": "3.9.16" 330 | } 331 | }, 332 | "nbformat": 4, 333 | "nbformat_minor": 4 334 | } 335 | -------------------------------------------------------------------------------- /pb_nk/meta_nk_aucell.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 28, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import scvi\n", 10 | "import scanpy as sc\n", 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "\n", 14 | "import json\n", 15 | "\n", 16 | "from pyscenic.aucell import aucell\n", 17 | "from ctxcore.genesig import GeneSignature\n", 18 | "from pyscenic.aucell import create_rankings, enrichment" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 29, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "adata_bulk = sc.read('zenodo/pb_12_donors_bulk.h5ad')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 30, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "filename = 'metank_gene_sig.txt'\n", 44 | "\n", 45 | "with open(filename, 'r') as file:\n", 46 | " gene_sig = json.load(file)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 31, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "signature_genes = [x for x in gene_sig.values()]" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 32, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "gene_sigs = [GeneSignature(name,gene_sig[name]) for name in gene_sig]" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 33, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "AnnData object with n_obs × n_vars = 23253 × 32549\n", 83 | " obs: 'total_counts', 'n_genes_by_counts', 'pct_counts_mt', 'sample', 'batch', 'subset', 'label', 'dataset', '_scvi_batch', '_scvi_labels', 'C_scANVI', 'source', 'tumor_type'\n", 84 | " obsm: 'X_scANVI', 'X_scVI', 'X_umap'" 85 | ] 86 | }, 87 | "execution_count": 33, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "adata_bulk" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 35, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "exp_df = adata_bulk.to_df()" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 36, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "auc_mtx = aucell(exp_df, gene_sigs, num_workers=4)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 37, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "adata_bulk.obs[[x + '_auc' for x in auc_mtx.columns]] = auc_mtx.to_numpy()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 140, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "name": "stdout", 137 | "output_type": "stream", 138 | "text": [ 139 | "WARNING: saving figure to file /itf-fi-ml/home/hermankn/uni_res/figures/nk_all_cells_figs/matrixplot_meatank_sig_scores.pdf\n" 140 | ] 141 | }, 142 | { 143 | "data": { 144 | "image/png": "\n", 145 | "text/plain": [ 146 | "
" 147 | ] 148 | }, 149 | "metadata": { 150 | "image/png": { 151 | "height": 256, 152 | "width": 341 153 | }, 154 | "needs_background": "light" 155 | }, 156 | "output_type": "display_data" 157 | } 158 | ], 159 | "source": [ 160 | "sc.pl.matrixplot(adata_bulk, ['NK2A_auc', 'NK2B_auc', 'NK1A_auc', 'NK1B_auc', 'NK1C_auc', 'NK3_auc'], \n", 161 | " standard_scale='var',groupby='C_scANVI', save='metank_sig_scores.pdf')" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 3 (ipykernel)", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.10.4" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 4 200 | } 201 | -------------------------------------------------------------------------------- /pb_nk/metank_gene_sig.txt: -------------------------------------------------------------------------------- 1 | { 2 | "NK1C": [ 3 | "ACTB", 4 | "ACTG1", 5 | "SPON2", 6 | "PRF1", 7 | "FCER1G", 8 | "PFN1", 9 | "GZMA", 10 | "GZMB", 11 | "CORO1A", 12 | "CFL1", 13 | "ITGB2", 14 | "RAC2", 15 | "NKG7", 16 | "AKR1C3", 17 | "MYL12A", 18 | "ARPC2", 19 | "CLIC1", 20 | "TMSB10", 21 | "CST7", 22 | "ALOX5AP" 23 | ], 24 | "NK3": [ 25 | "CD52", 26 | "KLRC2", 27 | "CCL5", 28 | "GZMH", 29 | "IL32", 30 | "CD3E", 31 | "CD3D", 32 | "S100A6", 33 | "S100A4", 34 | "VIM", 35 | "RPL27A", 36 | "KLRC3", 37 | "LGALS1", 38 | "RPS29", 39 | "TMSB4X", 40 | "RPS27", 41 | "RPL23A", 42 | "HLA-DPB1" 43 | ], 44 | "NK1A": [ 45 | "KLRB1", 46 | "TXNIP", 47 | "CXCR4", 48 | "JUNB", 49 | "H3F3B", 50 | "CCL3", 51 | "ZFP36", 52 | "BTG1", 53 | "IER2", 54 | "C1orf56", 55 | "JUN" 56 | ], 57 | "NK1B": [ 58 | "KLRB1", 59 | "CLIC3", 60 | "CD38", 61 | "SPON2", 62 | "FCER1G", 63 | "JAK1", 64 | "HIST1H1E", 65 | "AOAH", 66 | "UTRN", 67 | "GNG2", 68 | "CD160", 69 | "GZMB", 70 | "MT-ND6", 71 | "SYNE1", 72 | "ETS1", 73 | "CEP78", 74 | "MT-CO1", 75 | "LBH", 76 | "CD247", 77 | "MT-ATP8" 78 | ], 79 | "NK2B": [ 80 | "GZMK", 81 | "TPT1", 82 | "EEF1A1", 83 | "RPL10", 84 | "RPL13", 85 | "XCL1", 86 | "RPS15A", 87 | "RPS23", 88 | "RPLP1", 89 | "RPL32", 90 | "FOS", 91 | "RPS28", 92 | "RPL19", 93 | "RPS24", 94 | "RPL34", 95 | "RPS12", 96 | "RPL18A", 97 | "RPS18", 98 | "RPSA", 99 | "RPL10A" 100 | ], 101 | "NK2A": [ 102 | "GZMK", 103 | "SELL", 104 | "CMC1", 105 | "XCL1", 106 | "IL7R", 107 | "XCL2", 108 | "RPS18", 109 | "RPL13A", 110 | "TPT1", 111 | "KLRC1", 112 | "LTB", 113 | "EEF1G", 114 | "RPLP1", 115 | "RPL36A", 116 | "CD44", 117 | "FOS", 118 | "RPL10A", 119 | "RPSA", 120 | "EEF1A1" 121 | ] 122 | } -------------------------------------------------------------------------------- /scripts/decontx_all.py: -------------------------------------------------------------------------------- 1 | import scanpy as sc 2 | import pandas as pd 3 | import numpy as np 4 | import os 5 | 6 | import anndata 7 | import anndata2ri 8 | import logging 9 | 10 | import rpy2.rinterface_lib.callbacks as rcb 11 | import rpy2.robjects as ro 12 | 13 | rcb.logger.setLevel(logging.ERROR) 14 | ro.pandas2ri.activate() 15 | anndata2ri.activate() 16 | from rpy2.robjects.packages import importr 17 | decontX = importr('decontX') 18 | 19 | data_dir = '' 20 | for f in os.listdir(data_dir): 21 | adata = sc.read_h5ad(os.path.join(data_dir, f)) 22 | 23 | batch_adatas = [] 24 | 25 | for batch in adata.obs['batch'].cat.categories: 26 | adata_temp = adata[adata.obs['batch'] == batch].copy() 27 | adata_temp.layers['counts'] = adata_temp.X 28 | adata_temp_adj = adata_temp.copy() 29 | adata_temp_adj = decontX.decontX(adata_temp_adj) 30 | adata_temp.layers['counts_adj'] = np.round(adata_temp_adj.layers['decontXcounts']) 31 | batch_adatas += [adata_temp] 32 | 33 | adata_adj = anndata.concat(batch_adatas) 34 | 35 | adata_adj.X = adata_adj.layers['counts_adj'] 36 | 37 | adata_adj.write(f) 38 | -------------------------------------------------------------------------------- /survival_analysis/tcga_survival_sarc.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "0ea4aebb-52ef-44d6-b23b-8c77da7a372e", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import scanpy as sc" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "f014eadd-55d4-42e6-997b-f9d1a2394124", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "import numpy as np\n", 22 | "import rpy2.rinterface_lib.callbacks\n", 23 | "import anndata2ri\n", 24 | "import logging\n", 25 | "\n", 26 | "from rpy2.robjects import pandas2ri\n", 27 | "from rpy2.robjects import r\n", 28 | "\n", 29 | "sc.settings.verbosity = 0\n", 30 | "rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)\n", 31 | "\n", 32 | "pandas2ri.activate()\n", 33 | "anndata2ri.activate()\n", 34 | "\n", 35 | "%load_ext rpy2.ipython" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "id": "e2d2d730-55a4-49cd-9f4d-bfaffc6e705f", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import os" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "id": "4c120c57-6d65-40ea-9906-93da7a2d5ae7", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import matplotlib.pyplot as plt" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 5, 61 | "id": "636fc6aa-cf16-480f-8370-42743e9a4c7f", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "import anndata2ri\n", 66 | "import rpy2.rinterface_lib.callbacks\n", 67 | "import logging" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "8e1836b8-0d02-4419-86ff-94e57c9771c9", 73 | "metadata": {}, 74 | "source": [ 75 | "### Prepare sc data" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 6, 81 | "id": "1199eb34-0797-4489-b30d-090225441123", 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "data_dir = 'zenodo'" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "id": "dfd24ed1-df46-4b9f-8c28-e69458f27f46", 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stderr", 96 | "output_type": "stream", 97 | "text": [ 98 | "/home/ubuntu/miniconda3/envs/scenic/lib/python3.9/site-packages/anndata/__init__.py:51: FutureWarning: `anndata.read` is deprecated, use `anndata.read_h5ad` instead. `ad.read` will be removed in mid 2024.\n", 99 | " warnings.warn(\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "adata = sc.read(os.path.join(data_dir, 'sarcoma.h5ad'))" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "2a57bce0-fffa-46d5-8498-90cb3b787c5b", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "adata.obs['label'] = adata.obs['level2_cell_type_new']" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "id": "57dba386-dd05-42cd-a5d2-d42c990bbd3f", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "adata.obs['replicate'] = adata.obs['dataset']" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "id": "be7c8d59-c136-4209-97b1-47c955829f88", 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "adata.obs[\"sample\"] = [\n", 135 | " f\"{rep}_{l}\" for rep, l in zip(adata.obs[\"replicate\"], adata.obs[\"label\"])\n", 136 | "]" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 17, 142 | "id": "d8fb6cf7-9796-4c57-b919-e2dfce3f958c", 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "adata.obs[\"replicate\"] = adata.obs[\"replicate\"].astype(\"category\")\n", 147 | "adata.obs[\"label\"] = adata.obs[\"label\"].astype(\"category\")\n", 148 | "adata.obs[\"sample\"] = adata.obs[\"sample\"].astype(\"category\")" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 31, 154 | "id": "e91620cb-f3c2-4191-9246-021dddcbfbe5", 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "del adata.obsp\n", 159 | "del adata.uns\n", 160 | "del adata.obsm\n", 161 | "#adata.obsm.pop('X_scVI')" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 20, 167 | "id": "4855e74b-21af-4602-9d3c-c25c8a6d56c0", 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "adata.obs['cellstate'] = adata.obs['label']" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 26, 177 | "id": "d555a094-2bca-4462-8262-951c2db0bf6b", 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "cellstate_celltype_dict = {'B cells' : 'B cells', 'CD56bright NK cells' : 'NK cells', 'CD56dim NK cells' : 'NK cells', 'Dendritic cells' : 'Myeloid',\n", 182 | " 'Endothelial cells' : 'Endothelial cells', 'Epithelial Stroma' : 'Epithelial Stroma', 'Fibroblasts' : 'Fibroblasts', 'ILC' : 'ILC', 'Late erythroid' : 'Late erythroid',\n", 183 | " 'MAIT cells' : 'T cells', 'Macrophages' : 'Myeloid', 'Mast cells' : 'Myeloid', 'NKT cells' : 'NKT cells', 'Plasma cells' : 'B cells',\n", 184 | " 'Melanoma cells' : 'Melanoma cells', 'Memory B cells' : 'B cells', 'Monocytes' : 'Myeloid', 'Regulatory T cells' : 'T cells',\n", 185 | " 'Tcm/Naive helper T cells' : 'T cells', 'Tem/Effector helper T cells' : 'T cells', 'Naive B cells' : 'B cells', 'pDC' : 'Myeloid',\n", 186 | " 'Tem/Trm cytotoxic T cells' : 'T cells', 'Type 1 helper T cells' : 'T cells', 'CAFs' : 'CAFs', 'Osteoblasts' : 'Osteoblasts', 'Osteoclasts' : 'Osteoclasts', 'Tcm/Naive cytotoxic T cells' : 'T cells'}" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 32, 192 | "id": "68c296df-7050-48e4-8a48-be4097a2bbd2", 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "adata.obs['celltype'] = [cellstate_celltype_dict[x] for x in adata.obs['cellstate']]" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 33, 202 | "id": "4b8914c0-57ca-4879-a1b6-e9ad2ba9cf40", 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "adata_subsample = adata#sc.pp.subsample(adata, fraction=0.1, copy=True)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 34, 212 | "id": "4aa6f5f2-523f-4b1c-8fb8-59380e2f3894", 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "name": "stdout", 217 | "output_type": "stream", 218 | "text": [ 219 | "class: SingleCellExperiment \n", 220 | "dim: 21505 150851 \n", 221 | "metadata(0):\n", 222 | "assays(3): X counts counts_adj\n", 223 | "rownames(21505): FAM138A OR4F5 ... MT-ND6 MT-CYB\n", 224 | "rowData names(0):\n", 225 | "colnames(150851): AAACCTGAGATCCCGC-1-BC2_zhou_BC2\n", 226 | " AAACCTGCAGGAATCG-1-BC2_zhou_BC2 ...\n", 227 | " TTTGTCAGTAGCCTAT-1_P4_cillo_cillio_osteosarcoma_4\n", 228 | " TTTGTCATCGTTACAG-1_P4_cillo_cillio_osteosarcoma_4\n", 229 | "colData names(23): sample patient ... cellstate celltype\n", 230 | "reducedDimNames(0):\n", 231 | "mainExpName: NULL\n", 232 | "altExpNames(0):\n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "%%R -i adata_subsample\n", 238 | "adata_subsample" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 35, 244 | "id": "f438e1dd-2a5a-4cfe-9c14-a7e656fde064", 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "%%R\n", 249 | "sc.dat <- t(assay(adata_subsample))" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 36, 255 | "id": "32b2054c-08cb-43a7-a687-e1a0f2fd339e", 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "%%R\n", 260 | "sc.dat <- as.matrix(sc.dat)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 37, 266 | "id": "e06da537-0374-46ff-b434-44dfc367b11f", 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "%%R\n", 271 | "cell.type.labels <- colData(adata_subsample)$celltype" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 38, 277 | "id": "8b117c99-4322-424e-b364-d49f4332eb16", 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "%%R\n", 282 | "cell.type.labels <- as.vector(cell.type.labels)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 39, 288 | "id": "79962a98-b844-470c-8871-54b673a4dd13", 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "%%R\n", 293 | "cell.state.labels <- colData(adata_subsample)$cellstate" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 40, 299 | "id": "c20a8b1b-4ef8-49f8-ba45-8d782d9dcc0d", 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "%%R\n", 304 | "cell.state.labels <- as.vector(cell.state.labels)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "id": "02a0f24a-fd60-4607-a050-843a972357bb", 311 | "metadata": {}, 312 | "outputs": [], 313 | "source": [] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "id": "e50d9bcd-93c5-4da1-b987-6eac737d7119", 318 | "metadata": {}, 319 | "source": [ 320 | "### download and prepare tcga data" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 32, 326 | "id": "d01783aa-ec4b-4e4c-8f29-68c3b030aae2", 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "%%R\n", 331 | "library(TCGAbiolinks)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 33, 337 | "id": "4dc026e3-9c63-47fd-ad1b-787f0ede45a8", 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "%%R\n", 342 | "library(scran)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "id": "bd8c9517-fc1d-4353-b862-a9d449e12943", 349 | "metadata": { 350 | "scrolled": true, 351 | "tags": [] 352 | }, 353 | "outputs": [], 354 | "source": [ 355 | "%%R\n", 356 | "my.dir = '/tumor-data/GDCdata'\n", 357 | "\n", 358 | "# Gene expression aligned against hg38\n", 359 | "query <- GDCquery(\n", 360 | " project = \"TARGET-OS\",\n", 361 | " data.category = \"Transcriptome Profiling\",\n", 362 | " data.type = \"Gene Expression Quantification\", \n", 363 | " workflow.type = \"STAR - Counts\"\n", 364 | ")\n", 365 | "#GDCdownload(query = query,\n", 366 | "# directory = \"/tumor-data/GDCdata\")\n", 367 | "\n", 368 | "data <- GDCprepare(query = query,\n", 369 | " directory = my.dir,\n", 370 | " save = TRUE,\n", 371 | " save.filename = \"/tumor-data/SARC_OS_exp.rda\")" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 44, 377 | "id": "6a557993-eb93-4a24-9563-d64d0872a5e1", 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "%%R\n", 382 | "load(file='/tumor-data/SARC_OS_exp.rda')" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 45, 388 | "id": "55c18344-ad02-462b-9848-965a1d81a6e9", 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "%%R\n", 393 | "bk.mtx <- assay(data)" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 46, 399 | "id": "84d6e95d-04f9-4f46-8dde-06ce23201818", 400 | "metadata": {}, 401 | "outputs": [], 402 | "source": [ 403 | "%%R\n", 404 | "rownames(bk.mtx) <- rowData(data)$gene_name" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 47, 410 | "id": "99939271-7c24-4139-b24a-09a2c59c83f8", 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "%%R\n", 415 | "bk.dat <- t(bk.mtx)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 34, 421 | "id": "92f83d3e-50a6-4cc1-b9b4-6d2ccca64de9", 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [ 425 | "%%R\n", 426 | "clin.dat <- GDCquery_clinic(\"TARGET-OS\", \"clinical\")" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": 49, 432 | "id": "aaed5275-64f3-4473-8bf3-6c46ead158b3", 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [ 436 | "%%R\n", 437 | "colnames(bk.dat) <- make.unique(colnames(bk.dat), sep = \".\")" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 50, 443 | "id": "fbbe0d62-9dd0-4608-8290-a7a8e32fe363", 444 | "metadata": {}, 445 | "outputs": [ 446 | { 447 | "name": "stdout", 448 | "output_type": "stream", 449 | "text": [ 450 | "[1] 88 60660\n" 451 | ] 452 | } 453 | ], 454 | "source": [ 455 | "%%R\n", 456 | "dim(bk.dat)" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 14, 462 | "id": "f64e84ed-8038-4730-964c-65f57d1b2bb1", 463 | "metadata": {}, 464 | "outputs": [ 465 | { 466 | "name": "stdout", 467 | "output_type": "stream", 468 | "text": [ 469 | "[1] 88 60660\n" 470 | ] 471 | } 472 | ], 473 | "source": [ 474 | "%%R\n", 475 | "dim(bk.dat)" 476 | ] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "id": "faf67e40-94b3-48d5-96d5-b3385380603f", 481 | "metadata": {}, 482 | "source": [ 483 | "### BayesPrism" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 51, 489 | "id": "322e477a-14c0-4223-a30d-6f54eed952fa", 490 | "metadata": {}, 491 | "outputs": [], 492 | "source": [ 493 | "%%R\n", 494 | "library(BayesPrism)" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 52, 500 | "id": "ab08cc58-e270-4154-8e91-90f9b4908465", 501 | "metadata": {}, 502 | "outputs": [ 503 | { 504 | "name": "stdout", 505 | "output_type": "stream", 506 | "text": [ 507 | "Gene symbols detected. Recommend to use EMSEMBLE IDs for more unique mapping.\n", 508 | "number of genes filtered in each category: \n", 509 | " Rb Mrp other_Rb chrM MALAT1 chrX chrY \n", 510 | " 89 78 17 13 1 882 115 \n", 511 | "A total of 1154 genes from Rb Mrp other_Rb chrM MALAT1 chrX chrY have been excluded \n", 512 | "A total of 2080 gene expressed in fewer than 5 cells have been excluded \n" 513 | ] 514 | } 515 | ], 516 | "source": [ 517 | "%%R\n", 518 | "sc.dat.filtered <- cleanup.genes (input=sc.dat, input.type=\"count.matrix\",\n", 519 | " species=\"hs\",\n", 520 | " gene.group=c( \"Rb\",\"Mrp\",\"other_Rb\",\"chrM\",\"MALAT1\",\"chrX\",\"chrY\") , exp.cells=5)" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 53, 526 | "id": "30182eba-cb55-439a-8d5c-3c55d2a397d9", 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "name": "stdout", 531 | "output_type": "stream", 532 | "text": [ 533 | "[1] 150851 18271\n" 534 | ] 535 | } 536 | ], 537 | "source": [ 538 | "%%R\n", 539 | "dim(sc.dat.filtered)" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": null, 545 | "id": "5a205390-81cb-4df2-97c3-7febed8fe9fa", 546 | "metadata": {}, 547 | "outputs": [ 548 | { 549 | "name": "stdout", 550 | "output_type": "stream", 551 | "text": [ 552 | "number of cells in each cell state \n", 553 | "cell.state.labels\n", 554 | " ILC MAIT cells \n", 555 | " 14 21 \n", 556 | " Osteoclasts Fibroblasts \n", 557 | " 31 105 \n", 558 | " Naive B cells CD56bright NK cells \n", 559 | " 237 393 \n", 560 | " Osteoblasts pDC \n", 561 | " 395 427 \n", 562 | "Tcm/Naive cytotoxic T cells Memory B cells \n", 563 | " 435 819 \n", 564 | " Plasma cells Tem/Effector helper T cells \n", 565 | " 886 887 \n", 566 | " Tcm/Naive helper T cells Dendritic cells \n", 567 | " 986 1057 \n", 568 | " CD56dim NK cells Regulatory T cells \n", 569 | " 1299 1361 \n", 570 | " NKT cells Monocytes \n", 571 | " 1405 1884 \n", 572 | " Endothelial cells Tem/Trm cytotoxic T cells \n", 573 | " 4891 7585 \n", 574 | " Macrophages CAFs \n", 575 | " 51616 74117 \n", 576 | "recommend to have sufficient number of cells in each cell state \n", 577 | "No tumor reference is speficied. Reference cell types are treated equally. \n", 578 | "Number of outlier genes filtered from mixture = 12 \n" 579 | ] 580 | } 581 | ], 582 | "source": [ 583 | "%%R\n", 584 | "myPrism <- new.prism(reference=sc.dat.filtered,\n", 585 | " mixture=bk.dat,\n", 586 | " input.type=\"count.matrix\",\n", 587 | " cell.type.labels = cell.type.labels,\n", 588 | " cell.state.labels = cell.state.labels,\n", 589 | " key=NULL,\n", 590 | " outlier.cut=0.01,\n", 591 | " outlier.fraction=0.1,\n", 592 | ")" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "id": "606301eb-2f5f-4bc0-9a2c-b9bece1e9fec", 599 | "metadata": {}, 600 | "outputs": [], 601 | "source": [ 602 | "%%R\n", 603 | "bp.res <- run.prism(prism = myPrism, n.cores=5)" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "id": "cfd406a5-6a9a-4288-9bd6-01ae97a33aad", 610 | "metadata": {}, 611 | "outputs": [], 612 | "source": [] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 7, 617 | "id": "e3242f34-c50b-4697-8a32-44f1ef1721f1", 618 | "metadata": {}, 619 | "outputs": [], 620 | "source": [ 621 | "%%R -o Z_nk,Z_nk_colnames,Z_nk_rownames\n", 622 | "Z_nk <- get.exp(bp.res, state.or.type='type', cell.name='NK cells')\n", 623 | "Z_nk_colnames <- colnames(Z_nk)\n", 624 | "Z_nk_rownames <- rownames(Z_nk)" 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": 8, 630 | "id": "544ed6cc-eef0-4470-9f18-7b813d0bb699", 631 | "metadata": {}, 632 | "outputs": [], 633 | "source": [ 634 | "Z_nk = pd.DataFrame(Z_nk,index=Z_nk_rownames,columns=Z_nk_colnames) ### NK expression matrix (patients x genes)" 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": 9, 640 | "id": "09a613af-fd35-4143-bb55-ffdad4dab2f7", 641 | "metadata": {}, 642 | "outputs": [], 643 | "source": [ 644 | "Z_nk_adata = sc.AnnData(Z_nk)" 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": 10, 650 | "id": "d9260841-f247-43b2-99ec-d80e32f73e7c", 651 | "metadata": {}, 652 | "outputs": [ 653 | { 654 | "data": { 655 | "text/plain": [ 656 | "AnnData object with n_obs × n_vars = 88 × 17836" 657 | ] 658 | }, 659 | "execution_count": 10, 660 | "metadata": {}, 661 | "output_type": "execute_result" 662 | } 663 | ], 664 | "source": [ 665 | "Z_nk_adata" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": 11, 671 | "id": "c1495ff9-6d9d-4979-bd9b-1739309942c6", 672 | "metadata": {}, 673 | "outputs": [], 674 | "source": [ 675 | "%%R -o theta,theta_colnames,theta_rownames\n", 676 | "theta <- get.fraction(bp=bp.res, \n", 677 | " which.theta=\"final\",\n", 678 | " state.or.type=\"type\")\n", 679 | "\n", 680 | "theta_colnames <- colnames(theta)\n", 681 | "theta_rownames <- rownames(theta)" 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": 12, 687 | "id": "9c2d9683-5d40-4fb4-9831-d3569f1c3a22", 688 | "metadata": {}, 689 | "outputs": [], 690 | "source": [ 691 | "%%R -o theta_state,theta_state_colnames,theta_state_rownames\n", 692 | "theta_state <- get.fraction(bp=bp.res, \n", 693 | " which.theta=\"first\",\n", 694 | " state.or.type=\"state\")\n", 695 | "\n", 696 | "theta_state_colnames <- colnames(theta_state)\n", 697 | "theta_state_rownames <- rownames(theta_state)" 698 | ] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "execution_count": 13, 703 | "id": "831b7ab0-fc6c-4985-9ad3-bf72501dccc7", 704 | "metadata": {}, 705 | "outputs": [], 706 | "source": [ 707 | "theta_type_df = pd.DataFrame(theta,index=theta_rownames,columns=theta_colnames) ### Cell type fractions (patients x cell types)" 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "execution_count": 14, 713 | "id": "75facb78-3a3a-4643-ba5f-0f107233443d", 714 | "metadata": {}, 715 | "outputs": [], 716 | "source": [ 717 | "theta_state_df = pd.DataFrame(theta_state,index=theta_state_rownames,columns=theta_state_colnames) ### Cell state fractions (patients x cell states)" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 15, 723 | "id": "efbc8a2a-7f67-4182-82a7-6af224c38594", 724 | "metadata": {}, 725 | "outputs": [ 726 | { 727 | "data": { 728 | "image/png": "", 729 | "text/plain": [ 730 | "
" 731 | ] 732 | }, 733 | "metadata": {}, 734 | "output_type": "display_data" 735 | } 736 | ], 737 | "source": [ 738 | "bp_plot = pd.DataFrame(theta_state_df[['CD56bright NK cells', 'CD56dim NK cells']].T/theta_state_df[['CD56bright NK cells', 'CD56dim NK cells']].sum(axis=1).T).T.boxplot()" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": 16, 744 | "id": "05e30b54-4072-4f09-a520-cd8f0e9fadac", 745 | "metadata": {}, 746 | "outputs": [], 747 | "source": [ 748 | "figure = bp_plot.get_figure() \n", 749 | "figure.savefig('cd56bright_cd56dim_sarcoma_tcga.pdf', dpi=400)" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": null, 755 | "id": "ec80c5e2-4a99-4347-9ec1-881e8ca8687a", 756 | "metadata": {}, 757 | "outputs": [], 758 | "source": [] 759 | }, 760 | { 761 | "cell_type": "code", 762 | "execution_count": 17, 763 | "id": "a2d83d04-88e3-4b52-bbbd-4a35a7601e4a", 764 | "metadata": {}, 765 | "outputs": [], 766 | "source": [ 767 | "theta_state_groups = theta_state_df[['CD56bright NK cells', 'CD56dim NK cells']].copy()" 768 | ] 769 | }, 770 | { 771 | "cell_type": "code", 772 | "execution_count": 18, 773 | "id": "01cbf038-fbbc-40f4-8538-1b7701188a5e", 774 | "metadata": {}, 775 | "outputs": [], 776 | "source": [ 777 | "theta_type_immune = theta_type_df[['T cells', 'Myeloid', 'B cells', 'NK cells', 'NKT cells',]].copy()" 778 | ] 779 | }, 780 | { 781 | "cell_type": "code", 782 | "execution_count": null, 783 | "id": "ac2e6d76-8481-4993-b427-7f6f8ac713bb", 784 | "metadata": {}, 785 | "outputs": [], 786 | "source": [] 787 | }, 788 | { 789 | "cell_type": "code", 790 | "execution_count": 19, 791 | "id": "5b73f275-1c42-4cef-b0a1-011fa3a5154f", 792 | "metadata": {}, 793 | "outputs": [], 794 | "source": [ 795 | "theta_type_immune.index = ['-'.join(x.split('-')[:3]) for x in theta_type_immune.index]" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": 20, 801 | "id": "5b3c9aa2-8403-441b-b6d4-813772430f0e", 802 | "metadata": {}, 803 | "outputs": [], 804 | "source": [ 805 | "theta_type_immune = theta_type_immune[~theta_type_immune.index.duplicated(keep='first')]" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": 21, 811 | "id": "2022c5a6-2a15-496e-8098-9b79addc9329", 812 | "metadata": {}, 813 | "outputs": [], 814 | "source": [ 815 | "theta_state_groups.index = ['-'.join(x.split('-')[:3]) for x in theta_state_groups.index]" 816 | ] 817 | }, 818 | { 819 | "cell_type": "code", 820 | "execution_count": 22, 821 | "id": "8dcab9cd-2046-4ad0-bea3-d30ed4135467", 822 | "metadata": {}, 823 | "outputs": [], 824 | "source": [ 825 | "theta_state_groups = theta_state_groups[~theta_state_groups.index.duplicated(keep='first')]" 826 | ] 827 | }, 828 | { 829 | "cell_type": "code", 830 | "execution_count": null, 831 | "id": "3d309734-9df1-4818-a06a-593bb012eb81", 832 | "metadata": {}, 833 | "outputs": [], 834 | "source": [] 835 | }, 836 | { 837 | "cell_type": "code", 838 | "execution_count": 23, 839 | "id": "301b9a52-d27e-419f-808e-ca284d19327d", 840 | "metadata": {}, 841 | "outputs": [], 842 | "source": [ 843 | "theta_state_groups_fraction = (theta_state_groups.T/theta_state_groups.sum(axis=1)).T.copy()" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": 24, 849 | "id": "27291254-145e-4376-b30d-b749c2ee1020", 850 | "metadata": {}, 851 | "outputs": [], 852 | "source": [ 853 | "theta_type_immune_fraction = (theta_type_immune.T/theta_type_immune.sum(axis=1)).T.copy()" 854 | ] 855 | }, 856 | { 857 | "cell_type": "markdown", 858 | "id": "5f173cd0-59f1-4529-824b-17816cbd837b", 859 | "metadata": {}, 860 | "source": [ 861 | "#### Clinical SARC" 862 | ] 863 | }, 864 | { 865 | "cell_type": "code", 866 | "execution_count": 35, 867 | "id": "00d19a4a-d93d-4594-b436-23436a985276", 868 | "metadata": {}, 869 | "outputs": [], 870 | "source": [ 871 | "%%R -o clin_sarc\n", 872 | "clin_sarc <- clin.dat[1:20]" 873 | ] 874 | }, 875 | { 876 | "cell_type": "code", 877 | "execution_count": 36, 878 | "id": "3d9a9806-6b68-4106-adbf-69bd29350ef7", 879 | "metadata": {}, 880 | "outputs": [], 881 | "source": [ 882 | "sarc_survival_df = pd.read_csv('/tumor-data/TARGET-OS.survival.tsv', sep='\\t')" 883 | ] 884 | }, 885 | { 886 | "cell_type": "code", 887 | "execution_count": 37, 888 | "id": "c0dd160d-5732-4dbe-ad15-d5aa82408607", 889 | "metadata": {}, 890 | "outputs": [], 891 | "source": [ 892 | "sarc_survival_df = sarc_survival_df.drop_duplicates(subset='_PATIENT')" 893 | ] 894 | }, 895 | { 896 | "cell_type": "code", 897 | "execution_count": 38, 898 | "id": "ea19ec9a-0da3-497e-ac62-2ce5d4255581", 899 | "metadata": {}, 900 | "outputs": [], 901 | "source": [ 902 | "clin_sarc = clin_sarc[[x in theta_type_immune.index for x in clin_sarc['submitter_id']]]" 903 | ] 904 | }, 905 | { 906 | "cell_type": "code", 907 | "execution_count": 39, 908 | "id": "8533de78-bf3d-47db-b9d8-328bb7888266", 909 | "metadata": {}, 910 | "outputs": [], 911 | "source": [ 912 | "sarc_survival_df.index = sarc_survival_df['_PATIENT']" 913 | ] 914 | }, 915 | { 916 | "cell_type": "code", 917 | "execution_count": 40, 918 | "id": "7d706a96-4886-4803-a157-fef261913071", 919 | "metadata": {}, 920 | "outputs": [], 921 | "source": [ 922 | "clin_sarc = clin_sarc[[x in sarc_survival_df.index for x in clin_sarc['submitter_id']]]" 923 | ] 924 | }, 925 | { 926 | "cell_type": "code", 927 | "execution_count": 41, 928 | "id": "a3fc78e9-830e-406d-80f1-124877f531c9", 929 | "metadata": {}, 930 | "outputs": [], 931 | "source": [ 932 | "sarc_survival_df = sarc_survival_df.loc[clin_sarc['submitter_id']].copy()" 933 | ] 934 | }, 935 | { 936 | "cell_type": "code", 937 | "execution_count": 42, 938 | "id": "f7985dbd-6a8d-4af8-94f3-31bfbeb80beb", 939 | "metadata": {}, 940 | "outputs": [ 941 | { 942 | "name": "stderr", 943 | "output_type": "stream", 944 | "text": [ 945 | "/tmp/ipykernel_584069/3320016945.py:1: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`\n", 946 | " clin_sarc.loc[:,sarc_survival_df.columns] = sarc_survival_df.values\n" 947 | ] 948 | } 949 | ], 950 | "source": [ 951 | "clin_sarc.loc[:,sarc_survival_df.columns] = sarc_survival_df.values" 952 | ] 953 | }, 954 | { 955 | "cell_type": "code", 956 | "execution_count": null, 957 | "id": "e4197c1f-72ae-43d9-8540-01c1a4ec5617", 958 | "metadata": {}, 959 | "outputs": [], 960 | "source": [] 961 | }, 962 | { 963 | "cell_type": "code", 964 | "execution_count": 43, 965 | "id": "b5ee2ddc-531f-4d48-a23a-f0467651b0b1", 966 | "metadata": {}, 967 | "outputs": [], 968 | "source": [ 969 | "clin_sarc['total_nk'] = ['high' if x in theta_type_immune.sort_values('NK cells')[-int(theta_type_immune.shape[0]/3):].index \n", 970 | " else 'low' if x in theta_type_immune.sort_values('NK cells')[:int(theta_type_immune.shape[0]/3)].index else 'middle' for x in clin_sarc['submitter_id']]" 971 | ] 972 | }, 973 | { 974 | "cell_type": "code", 975 | "execution_count": 44, 976 | "id": "af56602c-12b6-4a99-9a54-2f11cf608dd6", 977 | "metadata": {}, 978 | "outputs": [], 979 | "source": [ 980 | "cell_state='CD56bright NK cells'\n", 981 | "clin_sarc['cd56bright'] = ['high' if x in theta_state_groups.sort_values(cell_state)[-int(theta_state_groups.shape[0]/3):].index \n", 982 | " else 'low' if x in theta_state_groups.sort_values(cell_state)[:int(theta_state_groups.shape[0]/3)].index else 'middle' for x in clin_sarc['submitter_id']]" 983 | ] 984 | }, 985 | { 986 | "cell_type": "code", 987 | "execution_count": 45, 988 | "id": "6af859c3-dfce-4f2c-802d-c7f5a44a0c76", 989 | "metadata": {}, 990 | "outputs": [], 991 | "source": [ 992 | "cell_state='CD56dim NK cells'\n", 993 | "clin_sarc['cd56dim'] = ['high' if x in theta_state_groups.sort_values(cell_state)[-int(theta_state_groups.shape[0]/3):].index \n", 994 | " else 'low' if x in theta_state_groups.sort_values(cell_state)[:int(theta_state_groups.shape[0]/3)].index else 'middle' for x in clin_sarc['submitter_id']]" 995 | ] 996 | }, 997 | { 998 | "cell_type": "code", 999 | "execution_count": null, 1000 | "id": "3751f140-a483-4f94-a1a4-ad129e041a50", 1001 | "metadata": {}, 1002 | "outputs": [], 1003 | "source": [] 1004 | }, 1005 | { 1006 | "cell_type": "markdown", 1007 | "id": "3bcfd948-e1f8-4851-b80c-09b1c9cf7c0c", 1008 | "metadata": {}, 1009 | "source": [ 1010 | "#### Stratify by NK, CD56bright and CD56dim" 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "code", 1015 | "execution_count": 46, 1016 | "id": "d3955ba3-1080-4eb3-ada1-0b3609f021e3", 1017 | "metadata": {}, 1018 | "outputs": [], 1019 | "source": [ 1020 | "test_group='total_nk'" 1021 | ] 1022 | }, 1023 | { 1024 | "cell_type": "code", 1025 | "execution_count": 47, 1026 | "id": "23a0a87a-427f-46df-b0e3-6452572d51e6", 1027 | "metadata": {}, 1028 | "outputs": [], 1029 | "source": [ 1030 | "clin_sarc_test = clin_sarc[clin_sarc[f'{test_group}'] != 'middle'].copy()" 1031 | ] 1032 | }, 1033 | { 1034 | "cell_type": "code", 1035 | "execution_count": 48, 1036 | "id": "da9cfe55-9587-46a7-92d8-dfe5eee32b37", 1037 | "metadata": {}, 1038 | "outputs": [], 1039 | "source": [ 1040 | "%%R\n", 1041 | "library(survival)\n", 1042 | "library(\"survminer\")" 1043 | ] 1044 | }, 1045 | { 1046 | "cell_type": "code", 1047 | "execution_count": 49, 1048 | "id": "d1102d5a-324b-4cce-a741-ac70f1801512", 1049 | "metadata": {}, 1050 | "outputs": [], 1051 | "source": [ 1052 | "%%R -i clin_sarc_test\n", 1053 | "clin_sarc_test$OS=as.numeric(clin_sarc_test$OS)\n", 1054 | "clin_sarc_test$OS.time=as.numeric(clin_sarc_test$OS.time)\n", 1055 | "\n", 1056 | "clin_sarc_test$OS.time <- clin_sarc_test$OS.time / 365\n", 1057 | "\n", 1058 | "clin_sarc_test$OS[clin_sarc_test$OS.time>7] <- 0\n", 1059 | "clin_sarc_test$OS.time[clin_sarc_test$OS.time>7] <- 7\n", 1060 | "\n", 1061 | "clin_sarc_test$OS.use=clin_sarc_test$OS\n", 1062 | "clin_sarc_test$OS.time.use=clin_sarc_test$OS.time" 1063 | ] 1064 | }, 1065 | { 1066 | "cell_type": "code", 1067 | "execution_count": 50, 1068 | "id": "636acda2-0513-4dc2-9bac-febf8eb73ba6", 1069 | "metadata": {}, 1070 | "outputs": [], 1071 | "source": [ 1072 | "#%%R\n", 1073 | "#cox = coxph(Surv(OS.time.use,OS.use) ~ g1 + gender + age_at_index + ajcc_pathologic_stage, data=clin_sarc_test)" 1074 | ] 1075 | }, 1076 | { 1077 | "cell_type": "code", 1078 | "execution_count": 51, 1079 | "id": "558ee1ad-5212-4225-afc2-72c44cad8184", 1080 | "metadata": {}, 1081 | "outputs": [], 1082 | "source": [ 1083 | "%%R -i test_group\n", 1084 | "#form = as.formula(paste('Surv(OS.time.use,OS.use)', '~', test_group))\n", 1085 | "fit = survfit(as.formula(paste('Surv(OS.time.use,OS.use)', '~', test_group)), data=clin_sarc_test)" 1086 | ] 1087 | }, 1088 | { 1089 | "cell_type": "code", 1090 | "execution_count": 52, 1091 | "id": "d9fcbe81-9ad3-4478-853c-1ba3d28da488", 1092 | "metadata": {}, 1093 | "outputs": [ 1094 | { 1095 | "name": "stdout", 1096 | "output_type": "stream", 1097 | "text": [ 1098 | "png \n", 1099 | " 2 \n" 1100 | ] 1101 | } 1102 | ], 1103 | "source": [ 1104 | "%%R\n", 1105 | "dir=\"\"\n", 1106 | "cancer=paste('SARC', '_', test_group)\n", 1107 | "p = ggsurvplot(fit, clin_sarc_test, size=0.3, vlegend.labs=unique(clin_sarc_test[[test_group]]),\n", 1108 | " surv.median.line=\"none\", pval=T, conf.int=F,\n", 1109 | " palette=c(\"#990066\",\"#CCCCCC\"),title=cancer) + \n", 1110 | " xlab(\"Years\")\n", 1111 | " \n", 1112 | "pdf(file=sprintf(\"survival_%s.pdf\", cancer), width=3, height=3,onefile = FALSE)\n", 1113 | "print(p)\n", 1114 | "dev.off()" 1115 | ] 1116 | }, 1117 | { 1118 | "cell_type": "code", 1119 | "execution_count": null, 1120 | "id": "b4886f2e-69a8-4966-8b36-184058c6e808", 1121 | "metadata": {}, 1122 | "outputs": [], 1123 | "source": [] 1124 | }, 1125 | { 1126 | "cell_type": "code", 1127 | "execution_count": null, 1128 | "id": "d8d1dfa4-7a88-4c99-9203-9c7a296493be", 1129 | "metadata": {}, 1130 | "outputs": [], 1131 | "source": [] 1132 | }, 1133 | { 1134 | "cell_type": "markdown", 1135 | "id": "4af1fd89-99dd-4ce7-a615-3b1c9df13b6b", 1136 | "metadata": {}, 1137 | "source": [ 1138 | "#### Score NK cell states" 1139 | ] 1140 | }, 1141 | { 1142 | "cell_type": "code", 1143 | "execution_count": 53, 1144 | "id": "e8a4b598-0897-4fcd-8081-5e976e7f64b3", 1145 | "metadata": {}, 1146 | "outputs": [], 1147 | "source": [ 1148 | "from pyscenic.aucell import aucell\n", 1149 | "from ctxcore.genesig import GeneSignature\n", 1150 | "from pyscenic.aucell import create_rankings, enrichment" 1151 | ] 1152 | }, 1153 | { 1154 | "cell_type": "code", 1155 | "execution_count": null, 1156 | "id": "9a0c1a11-f727-4db3-9322-e9b1fec8d583", 1157 | "metadata": {}, 1158 | "outputs": [], 1159 | "source": [] 1160 | }, 1161 | { 1162 | "cell_type": "code", 1163 | "execution_count": 82, 1164 | "id": "5f887f91-1580-48a0-b3fb-1e53e27c6a73", 1165 | "metadata": {}, 1166 | "outputs": [], 1167 | "source": [ 1168 | "all_groups_deg = pd.read_csv('nhood_markers_all.csv')" 1169 | ] 1170 | }, 1171 | { 1172 | "cell_type": "code", 1173 | "execution_count": 83, 1174 | "id": "2df736c9-f190-4aef-9151-b6f45dd8d0b4", 1175 | "metadata": {}, 1176 | "outputs": [], 1177 | "source": [ 1178 | "for group in range(1,7):\n", 1179 | " all_groups_deg[f'rank_{group}'] = all_groups_deg[f'logFC_{group}']*-np.log10(all_groups_deg[f'adj.P.Val_{group}'])" 1180 | ] 1181 | }, 1182 | { 1183 | "cell_type": "code", 1184 | "execution_count": 84, 1185 | "id": "174b5fa8-6658-48f8-b65e-56884fdca455", 1186 | "metadata": {}, 1187 | "outputs": [], 1188 | "source": [ 1189 | "group = 1\n", 1190 | "g1_sig = np.array(all_groups_deg.sort_values(f'rank_{group}')[-30:]['GeneID'])" 1191 | ] 1192 | }, 1193 | { 1194 | "cell_type": "code", 1195 | "execution_count": 85, 1196 | "id": "532598c3-5e9d-4ecd-a7f5-b577ed9afc67", 1197 | "metadata": {}, 1198 | "outputs": [], 1199 | "source": [ 1200 | "group = 2\n", 1201 | "g2_sig = np.array(all_groups_deg.sort_values(f'rank_{group}')[-30:]['GeneID'])" 1202 | ] 1203 | }, 1204 | { 1205 | "cell_type": "code", 1206 | "execution_count": 86, 1207 | "id": "44c5db66-14c5-45ab-ac8b-e6a68e011ac8", 1208 | "metadata": {}, 1209 | "outputs": [], 1210 | "source": [ 1211 | "group = 3\n", 1212 | "g3_sig = np.array(all_groups_deg.sort_values(f'rank_{group}')[-30:]['GeneID'])" 1213 | ] 1214 | }, 1215 | { 1216 | "cell_type": "code", 1217 | "execution_count": 87, 1218 | "id": "4de9ce49-0fd7-4d56-828b-45b19b61bc18", 1219 | "metadata": {}, 1220 | "outputs": [], 1221 | "source": [ 1222 | "group = 4\n", 1223 | "g4_sig = np.array(all_groups_deg.sort_values(f'rank_{group}')[-30:]['GeneID'])" 1224 | ] 1225 | }, 1226 | { 1227 | "cell_type": "code", 1228 | "execution_count": 88, 1229 | "id": "73b54f8d-43bb-4bb0-8781-d40d3285f74c", 1230 | "metadata": {}, 1231 | "outputs": [], 1232 | "source": [ 1233 | "group = 5\n", 1234 | "g5_sig = np.array(all_groups_deg.sort_values(f'rank_{group}')[-30:]['GeneID'])" 1235 | ] 1236 | }, 1237 | { 1238 | "cell_type": "code", 1239 | "execution_count": 89, 1240 | "id": "fb6c6089-a94e-4d39-86cc-cc5b558f2f1b", 1241 | "metadata": {}, 1242 | "outputs": [], 1243 | "source": [ 1244 | "group = 6\n", 1245 | "g6_sig = np.array(all_groups_deg.sort_values(f'rank_{group}')[-30:]['GeneID'])" 1246 | ] 1247 | }, 1248 | { 1249 | "cell_type": "code", 1250 | "execution_count": null, 1251 | "id": "ccbc8921-f606-432a-b457-4bb6ef26c7c1", 1252 | "metadata": {}, 1253 | "outputs": [], 1254 | "source": [] 1255 | }, 1256 | { 1257 | "cell_type": "code", 1258 | "execution_count": 64, 1259 | "id": "8d26171f-c3e8-4758-9e7a-8152f4851ff8", 1260 | "metadata": {}, 1261 | "outputs": [], 1262 | "source": [ 1263 | "signature_genes = [g1_sig,g2_sig,g3_sig,g4_sig,g5_sig,g6_sig]\n", 1264 | "signature_names = [f'g{x}' for x in range(1,7)]" 1265 | ] 1266 | }, 1267 | { 1268 | "cell_type": "code", 1269 | "execution_count": null, 1270 | "id": "c2837b2e-8325-4ef0-a27d-080ce0bda7c3", 1271 | "metadata": {}, 1272 | "outputs": [], 1273 | "source": [] 1274 | }, 1275 | { 1276 | "cell_type": "code", 1277 | "execution_count": 65, 1278 | "id": "179e1677-e787-440e-b883-a50d85f9fa2b", 1279 | "metadata": {}, 1280 | "outputs": [], 1281 | "source": [ 1282 | "gene_sigs = [GeneSignature(name,genes) for (name,genes) in zip(signature_names, signature_genes)]" 1283 | ] 1284 | }, 1285 | { 1286 | "cell_type": "code", 1287 | "execution_count": 66, 1288 | "id": "a3cd91fe-766a-4701-91c7-cf0e71140924", 1289 | "metadata": {}, 1290 | "outputs": [], 1291 | "source": [ 1292 | "exp_df = Z_nk_adata.to_df()#pd.DataFrame(adata.X.todense(), columns=adata.var.index, index=adata.obs.index)" 1293 | ] 1294 | }, 1295 | { 1296 | "cell_type": "code", 1297 | "execution_count": 67, 1298 | "id": "7f06c825-276a-4603-af83-bf74664a1505", 1299 | "metadata": {}, 1300 | "outputs": [], 1301 | "source": [ 1302 | "import random" 1303 | ] 1304 | }, 1305 | { 1306 | "cell_type": "code", 1307 | "execution_count": 68, 1308 | "id": "f3b78f49-b4ab-4781-88fa-7c59685a608b", 1309 | "metadata": {}, 1310 | "outputs": [], 1311 | "source": [ 1312 | "random.seed(0)" 1313 | ] 1314 | }, 1315 | { 1316 | "cell_type": "code", 1317 | "execution_count": 69, 1318 | "id": "417bfe4d-a333-4b63-84d0-2610818021c0", 1319 | "metadata": {}, 1320 | "outputs": [], 1321 | "source": [ 1322 | "auc_mtx = aucell(exp_df, gene_sigs, num_workers=4) #normalize=True, " 1323 | ] 1324 | }, 1325 | { 1326 | "cell_type": "code", 1327 | "execution_count": 70, 1328 | "id": "79427fa9-b6a1-4ef7-9467-61318a929dd7", 1329 | "metadata": {}, 1330 | "outputs": [], 1331 | "source": [ 1332 | "auc_mtx.index = ['-'.join(x.split('-')[:3]) for x in auc_mtx.index]" 1333 | ] 1334 | }, 1335 | { 1336 | "cell_type": "code", 1337 | "execution_count": 71, 1338 | "id": "d9f7edb2-8f9b-48d1-b5f5-7c0582a9fb93", 1339 | "metadata": {}, 1340 | "outputs": [], 1341 | "source": [ 1342 | "auc_mtx = auc_mtx[~auc_mtx.index.duplicated(keep='first')]" 1343 | ] 1344 | }, 1345 | { 1346 | "cell_type": "code", 1347 | "execution_count": 72, 1348 | "id": "4ccfd324-cf00-402f-a328-38d650d3c7a6", 1349 | "metadata": {}, 1350 | "outputs": [], 1351 | "source": [ 1352 | "auc_mtx = auc_mtx.loc[clin_sarc['submitter_id']]" 1353 | ] 1354 | }, 1355 | { 1356 | "cell_type": "code", 1357 | "execution_count": null, 1358 | "id": "b2952d29-f759-4c47-b48e-4fdb825e7a26", 1359 | "metadata": {}, 1360 | "outputs": [], 1361 | "source": [] 1362 | }, 1363 | { 1364 | "cell_type": "code", 1365 | "execution_count": 73, 1366 | "id": "c749a55b-f672-432f-92cd-008a4c21318f", 1367 | "metadata": {}, 1368 | "outputs": [], 1369 | "source": [ 1370 | "for i in range(1,7):\n", 1371 | " nk_state=i\n", 1372 | " clin_sarc[f'g{nk_state}'] = ['high' if x in auc_mtx.sort_values(f'g{nk_state}')[-int(auc_mtx.shape[0]/2):].index \n", 1373 | " else 'low' if x in auc_mtx.sort_values(f'g{nk_state}')[:int(auc_mtx.shape[0]/2)].index else 'middle' for x in clin_sarc['submitter_id']]" 1374 | ] 1375 | }, 1376 | { 1377 | "cell_type": "code", 1378 | "execution_count": 74, 1379 | "id": "9c04681f-53fe-4508-a761-e664195df435", 1380 | "metadata": {}, 1381 | "outputs": [], 1382 | "source": [ 1383 | "clin_sarc['g1g3'] = [f'{x[:2]}_{y[:2]}' for x,y in zip(clin_sarc['g1'], clin_sarc['g3'])]" 1384 | ] 1385 | }, 1386 | { 1387 | "cell_type": "code", 1388 | "execution_count": null, 1389 | "id": "a752c09a-7668-4af3-a7a7-fdf04888378d", 1390 | "metadata": {}, 1391 | "outputs": [], 1392 | "source": [] 1393 | }, 1394 | { 1395 | "cell_type": "code", 1396 | "execution_count": 75, 1397 | "id": "a11b0f38-1f09-4b04-a1e2-8b80e4d1f5ee", 1398 | "metadata": {}, 1399 | "outputs": [], 1400 | "source": [ 1401 | "clin_sarc_test = clin_sarc[clin_sarc['g1g3'].isin(['lo_hi', 'hi_lo'])].copy()" 1402 | ] 1403 | }, 1404 | { 1405 | "cell_type": "code", 1406 | "execution_count": 76, 1407 | "id": "94aff084-d75a-4a8a-b97b-b45aa79fb9e1", 1408 | "metadata": {}, 1409 | "outputs": [], 1410 | "source": [ 1411 | "%%R\n", 1412 | "library(survival)\n", 1413 | "library(\"survminer\")" 1414 | ] 1415 | }, 1416 | { 1417 | "cell_type": "code", 1418 | "execution_count": 77, 1419 | "id": "3faf821e-2ba2-4766-b546-f9ec6b87ee51", 1420 | "metadata": {}, 1421 | "outputs": [], 1422 | "source": [ 1423 | "%%R -i clin_sarc_test\n", 1424 | "clin_sarc_test$OS=as.numeric(clin_sarc_test$OS)\n", 1425 | "clin_sarc_test$OS.time=as.numeric(clin_sarc_test$OS.time)\n", 1426 | "\n", 1427 | "clin_sarc_test$OS.time <- clin_sarc_test$OS.time / 365\n", 1428 | "\n", 1429 | "clin_sarc_test$OS[clin_sarc_test$OS.time>7] <- 0\n", 1430 | "clin_sarc_test$OS.time[clin_sarc_test$OS.time>7] <- 7\n", 1431 | "\n", 1432 | "clin_sarc_test$OS.use=clin_sarc_test$OS\n", 1433 | "clin_sarc_test$OS.time.use=clin_sarc_test$OS.time" 1434 | ] 1435 | }, 1436 | { 1437 | "cell_type": "code", 1438 | "execution_count": 78, 1439 | "id": "b2ab76f1-343d-47e8-be17-b0f96a81910d", 1440 | "metadata": {}, 1441 | "outputs": [], 1442 | "source": [ 1443 | "#%%R\n", 1444 | "#cox = coxph(Surv(OS.time.use,OS.use) ~ g1 + gender + age_at_index + ajcc_pathologic_stage, data=clin_sarc_test)" 1445 | ] 1446 | }, 1447 | { 1448 | "cell_type": "code", 1449 | "execution_count": 79, 1450 | "id": "b2092821-c1a9-4d1a-bebf-a61f6912a706", 1451 | "metadata": {}, 1452 | "outputs": [], 1453 | "source": [ 1454 | "%%R\n", 1455 | "fit = survfit(Surv(OS.time.use,OS.use) ~ g1g3, data=clin_sarc_test)" 1456 | ] 1457 | }, 1458 | { 1459 | "cell_type": "code", 1460 | "execution_count": 80, 1461 | "id": "141d3089-a904-44f5-b554-2f9919d92a1b", 1462 | "metadata": {}, 1463 | "outputs": [ 1464 | { 1465 | "name": "stdout", 1466 | "output_type": "stream", 1467 | "text": [ 1468 | "png \n", 1469 | " 2 \n" 1470 | ] 1471 | } 1472 | ], 1473 | "source": [ 1474 | "%%R\n", 1475 | "dir=\"\"\n", 1476 | "cancer='SARC_g1g3'\n", 1477 | "p = ggsurvplot(fit, clin_sarc_test, size=0.3, vlegend.labs=unique(clin_sarc_test$g1g3),\n", 1478 | " surv.median.line=\"none\", pval=T, conf.int=F,\n", 1479 | " #risk.table=T, risk.table.y.text.col=T,\n", 1480 | " palette=c(\"#CCCCCC\", \"#990066\"),title=cancer) + \n", 1481 | " xlab(\"Years\")\n", 1482 | " \n", 1483 | "pdf(file=sprintf(\"survival_%s.pdf\", cancer), width=3, height=3,onefile = FALSE)\n", 1484 | "print(p)\n", 1485 | "dev.off()" 1486 | ] 1487 | }, 1488 | { 1489 | "cell_type": "code", 1490 | "execution_count": null, 1491 | "id": "4efc8de6-2a32-4a5f-8623-06db71f7a1c1", 1492 | "metadata": {}, 1493 | "outputs": [], 1494 | "source": [] 1495 | }, 1496 | { 1497 | "cell_type": "code", 1498 | "execution_count": null, 1499 | "id": "fb80ca63-82b6-4df5-9d20-4114204bb002", 1500 | "metadata": {}, 1501 | "outputs": [], 1502 | "source": [] 1503 | }, 1504 | { 1505 | "cell_type": "code", 1506 | "execution_count": null, 1507 | "id": "83879ef4-f5b7-4cb7-b5af-abb0b944d9c1", 1508 | "metadata": {}, 1509 | "outputs": [], 1510 | "source": [] 1511 | }, 1512 | { 1513 | "cell_type": "code", 1514 | "execution_count": null, 1515 | "id": "9369a174-49f5-48b5-9cb3-967d04618bbf", 1516 | "metadata": {}, 1517 | "outputs": [], 1518 | "source": [] 1519 | } 1520 | ], 1521 | "metadata": { 1522 | "kernelspec": { 1523 | "display_name": "Python 3 (ipykernel)", 1524 | "language": "python", 1525 | "name": "python3" 1526 | }, 1527 | "language_info": { 1528 | "codemirror_mode": { 1529 | "name": "ipython", 1530 | "version": 3 1531 | }, 1532 | "file_extension": ".py", 1533 | "mimetype": "text/x-python", 1534 | "name": "python", 1535 | "nbconvert_exporter": "python", 1536 | "pygments_lexer": "ipython3", 1537 | "version": "3.9.16" 1538 | } 1539 | }, 1540 | "nbformat": 4, 1541 | "nbformat_minor": 5 1542 | } 1543 | --------------------------------------------------------------------------------