├── .gitignore ├── README.md ├── main_data ├── B_cell_aggregates.xlsx ├── CD8Tex_relevant_clonotype_number_over2.csv ├── GSE200996_HNSCC_T_with_TCR_annotation.csv ├── NMF_all_group_5.csv ├── expanded_CD4Treg_clonotype_number_over2.csv └── survival.metadata.final.csv └── main_figure ├── figure1_and_related_supplemental_figure ├── README.md ├── assessment.csv ├── logistic_regression_analysis_of_clinical_metadata_in_association_with_MPR_rate.Rmd ├── metadata_analysis.Rmd └── pathological_assessment.Rmd ├── figure2_and_related_supplemental_figure ├── NMF.R ├── README.md ├── proportion_plot.R └── robustness_of_NMF.R ├── figure3_and_related_supplemental_figure ├── TNBC_zyy.R ├── analysis_of_chemo_only_NSCLC.R ├── number_B_aggrates.R ├── startrac.R └── visualization_of_NKT_clones.Rmd ├── figure4_and_related_supplemental_figure ├── CCR8.IF.Rmd ├── CCR8_IF.csv ├── CCR8_Treg_in_all_Treg.R ├── DEG_volcano_plot.R └── T_cell_clonal_composition_in_individual_patients.Rmd ├── figure5_and_related_supplemental_figure ├── CCR8IHC.xlsm ├── CCR8_IHC_non-MPR_subtypes.Rmd ├── Tex_relevant_clononumber_6group.R ├── Treg_clonenumber_6group.R ├── fig5D_Tex_Treg_clone_number_scatter.R └── figure5E_alluvium.R └── figure6_and_related_supplemental_figure ├── HNSCC.analysis.Rmd ├── HNSCC.meta.csv ├── README.md ├── analysis_of_bulk_RNAseq_survival.Rmd ├── celltypist.ipynb └── survival_analysis.Rmd /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # User-specific files 9 | .Ruserdata 10 | 11 | # Example code in package build process 12 | *-Ex.R 13 | 14 | # Output files from R CMD build 15 | /*.tar.gz 16 | 17 | # Output files from R CMD check 18 | /*.Rcheck/ 19 | 20 | # RStudio files 21 | .Rproj.user/ 22 | 23 | # produced vignettes 24 | vignettes/*.html 25 | vignettes/*.pdf 26 | 27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 28 | .httr-oauth 29 | 30 | # knitr and R markdown default cache directories 31 | *_cache/ 32 | /cache/ 33 | 34 | # Temporary files created by R markdown 35 | *.utf8.md 36 | *.knit.md 37 | 38 | # R Environment Variables 39 | .Renviron 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # neoadjuvant treatment of NSCLC 2 | 3 | this code is used to produce figures in most panels. 4 | contact: liuzedaosk@163.com 5 | -------------------------------------------------------------------------------- /main_data/B_cell_aggregates.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zwj-tina/neoadjuvant-treatment-of-NSCLC/63ba67130a8e4c11e0bb7f5fac31b12ffcb2fab4/main_data/B_cell_aggregates.xlsx -------------------------------------------------------------------------------- /main_data/CD8Tex_relevant_clonotype_number_over2.csv: -------------------------------------------------------------------------------- 1 | "","samples","number" 2 | "1","P1","24" 3 | "2","P102","51" 4 | "3","P103","28" 5 | "4","P105","19" 6 | "5","P106","17" 7 | "6","P107","1" 8 | "7","P113","10" 9 | "8","P114","3" 10 | "9","P118","10" 11 | "10","P119","28" 12 | "11","P121","21" 13 | "12","P122","5" 14 | "13","P124","30" 15 | "14","P125","13" 16 | "15","P127","9" 17 | "16","P130","8" 18 | "17","P131","31" 19 | "18","P137","12" 20 | "19","P142","12" 21 | "20","P146","21" 22 | "21","P150","4" 23 | "22","P156","4" 24 | "23","P158","10" 25 | "24","P16","8" 26 | "25","P164","1" 27 | "26","P166","3" 28 | "27","P172","1" 29 | "28","P174","9" 30 | "29","P179","7" 31 | "30","P18","5" 32 | "31","P180","46" 33 | "32","P182","35" 34 | "33","P184","5" 35 | "34","P189","13" 36 | "35","P190","12" 37 | "36","P192","5" 38 | "37","P199","8" 39 | "38","P2","9" 40 | "39","P20","21" 41 | "40","P200","14" 42 | "41","P201","15" 43 | "42","P204","8" 44 | "43","P205","22" 45 | "44","P209","5" 46 | "45","P213","12" 47 | "46","P218","14" 48 | "47","P221","7" 49 | "48","P226","14" 50 | "49","P227","6" 51 | "50","P23","5" 52 | "51","P24","3" 53 | "52","P243","8" 54 | "53","P247","2" 55 | "54","P248","2" 56 | "55","P249","3" 57 | "56","P258","1" 58 | "57","P259","11" 59 | "58","P26","24" 60 | "59","P261","8" 61 | "60","P264","3" 62 | "61","P265","1" 63 | "62","P266","22" 64 | "63","P267","3" 65 | "64","P268","8" 66 | "65","P270","5" 67 | "66","P273","13" 68 | "67","P277","3" 69 | "68","P278","3" 70 | "69","P280","3" 71 | "70","P281","6" 72 | "71","P282","1" 73 | "72","P285","7" 74 | "73","P286","8" 75 | "74","P287","13" 76 | "75","P288","2" 77 | "76","P289","94" 78 | "77","P291","6" 79 | "78","P298","7" 80 | "79","P3","3" 81 | "80","P304","8" 82 | "81","P309","10" 83 | "82","P31","45" 84 | "83","P312","25" 85 | "84","P32","10" 86 | "85","P321","7" 87 | "86","P322","10" 88 | "87","P323","12" 89 | "88","P328","2" 90 | "89","P329","7" 91 | "90","P33","17" 92 | "91","P334","3" 93 | "92","P337","13" 94 | "93","P339","1" 95 | "94","P34","27" 96 | "95","P340","1" 97 | "96","P346","2" 98 | "97","P347","7" 99 | "98","P349","9" 100 | "99","P35","8" 101 | "100","P36","6" 102 | "101","P365","3" 103 | "102","P367","3" 104 | "103","P368","4" 105 | "104","P372","9" 106 | "105","P380","1" 107 | "106","P387","6" 108 | "107","P389","29" 109 | "108","P39","3" 110 | "109","P390","6" 111 | "110","P391","30" 112 | "111","P394","3" 113 | "112","P395","18" 114 | "113","P399","21" 115 | "114","P4","5" 116 | "115","P402","1" 117 | "116","P403","5" 118 | "117","P404","6" 119 | "118","P405","2" 120 | "119","P407","3" 121 | "120","P412","2" 122 | "121","P414","10" 123 | "122","P416","8" 124 | "123","P417","3" 125 | "124","P418","9" 126 | "125","P420","3" 127 | "126","P423","25" 128 | "127","P427","1" 129 | "128","P428","18" 130 | "129","P43","30" 131 | "130","P432","6" 132 | "131","P44","25" 133 | "132","P445","16" 134 | "133","P45","13" 135 | "134","P453","15" 136 | "135","P454","15" 137 | "136","P457","3" 138 | "137","P459","2" 139 | "138","P46","18" 140 | "139","P462","11" 141 | "140","P463","1" 142 | "141","P464","6" 143 | "142","P467","2" 144 | "143","P469","15" 145 | "144","P47","34" 146 | "145","P470","25" 147 | "146","P471","3" 148 | "147","P472","14" 149 | "148","P473","19" 150 | "149","P477","5" 151 | "150","P481","27" 152 | "151","P482","31" 153 | "152","P485","15" 154 | "153","P486","4" 155 | "154","P491","18" 156 | "155","P494","1" 157 | "156","P498","1" 158 | "157","P5","41" 159 | "158","P509","11" 160 | "159","P510","18" 161 | "160","P511","14" 162 | "161","P520","26" 163 | "162","P523","23" 164 | "163","P524","6" 165 | "164","P525","5" 166 | "165","P527","3" 167 | "166","P528","28" 168 | "167","P53","25" 169 | "168","P533","3" 170 | "169","P540","21" 171 | "170","P550","12" 172 | "171","P567","13" 173 | "172","P57","22" 174 | "173","P574","5" 175 | "174","P579","15" 176 | "175","P58","6" 177 | "176","P587","22" 178 | "177","P592","7" 179 | "178","P595","3" 180 | "179","P6","4" 181 | "180","P63","66" 182 | "181","P64","9" 183 | "182","P66","3" 184 | "183","P69","13" 185 | "184","P70","12" 186 | "185","P71","2" 187 | "186","P73","4" 188 | "187","P74","2" 189 | "188","P9","15" 190 | "189","P90","5" 191 | "190","P92","4" 192 | "191","P93","3" 193 | "192","P96","30" 194 | "193","P115","0" 195 | "194","P161","0" 196 | "195","P168","0" 197 | "196","P185","0" 198 | "197","P206","0" 199 | "198","P207","0" 200 | "199","P233","0" 201 | "200","P234","0" 202 | "201","P235","0" 203 | "202","P257","0" 204 | "203","P279","0" 205 | "204","P296","0" 206 | "205","P325","0" 207 | "206","P496","0" 208 | "207","P502","0" 209 | "208","P59","0" 210 | "209","P67","0" 211 | "210","P68","0" 212 | "211","P160","0" 213 | "212","P292","0" 214 | "213","P293","0" 215 | "214","P483","0" 216 | -------------------------------------------------------------------------------- /main_data/NMF_all_group_5.csv: -------------------------------------------------------------------------------- 1 | "","sampleID","group" 2 | "1","P1","4" 3 | "2","P102","1" 4 | "3","P103","3" 5 | "4","P105","3" 6 | "5","P106","4" 7 | "6","P107","1" 8 | "7","P111","2" 9 | "8","P113","3" 10 | "9","P114","2" 11 | "10","P115","5" 12 | "11","P118","3" 13 | "12","P119","3" 14 | "13","P121","3" 15 | "14","P122","2" 16 | "15","P124","3" 17 | "16","P125","3" 18 | "17","P127","2" 19 | "18","P130","2" 20 | "19","P131","4" 21 | "20","P137","3" 22 | "21","P142","3" 23 | "22","P146","3" 24 | "23","P150","1" 25 | "24","P156","4" 26 | "25","P158","2" 27 | "26","P16","3" 28 | "27","P160","3" 29 | "28","P161","5" 30 | "29","P164","5" 31 | "30","P166","2" 32 | "31","P168","5" 33 | "32","P172","5" 34 | "33","P174","2" 35 | "34","P179","2" 36 | "35","P18","1" 37 | "36","P180","3" 38 | "37","P182","4" 39 | "38","P184","3" 40 | "39","P185","2" 41 | "40","P189","1" 42 | "41","P190","3" 43 | "42","P192","3" 44 | "43","P199","2" 45 | "44","P2","3" 46 | "45","P20","3" 47 | "46","P200","2" 48 | "47","P201","3" 49 | "48","P204","3" 50 | "49","P205","3" 51 | "50","P206","5" 52 | "51","P207","2" 53 | "52","P209","2" 54 | "53","P213","3" 55 | "54","P218","3" 56 | "55","P22","4" 57 | "56","P221","4" 58 | "57","P223","4" 59 | "58","P226","3" 60 | "59","P227","1" 61 | "60","P23","1" 62 | "61","P233","3" 63 | "62","P234","1" 64 | "63","P235","3" 65 | "64","P24","1" 66 | "65","P243","3" 67 | "66","P247","5" 68 | "67","P248","2" 69 | "68","P249","2" 70 | "69","P257","1" 71 | "70","P258","1" 72 | "71","P259","4" 73 | "72","P26","4" 74 | "73","P261","3" 75 | "74","P264","3" 76 | "75","P265","5" 77 | "76","P266","5" 78 | "77","P267","2" 79 | "78","P268","2" 80 | "79","P270","3" 81 | "80","P273","3" 82 | "81","P277","1" 83 | "82","P278","5" 84 | "83","P279","2" 85 | "84","P280","2" 86 | "85","P281","4" 87 | "86","P282","5" 88 | "87","P285","2" 89 | "88","P286","1" 90 | "89","P287","1" 91 | "90","P288","3" 92 | "91","P289","4" 93 | "92","P29","3" 94 | "93","P291","2" 95 | "94","P292","2" 96 | "95","P293","5" 97 | "96","P296","1" 98 | "97","P298","3" 99 | "98","P3","2" 100 | "99","P304","3" 101 | "100","P309","2" 102 | "101","P31","3" 103 | "102","P312","3" 104 | "103","P32","4" 105 | "104","P321","5" 106 | "105","P322","2" 107 | "106","P323","4" 108 | "107","P325","1" 109 | "108","P328","3" 110 | "109","P329","3" 111 | "110","P33","3" 112 | "111","P334","3" 113 | "112","P337","3" 114 | "113","P339","2" 115 | "114","P34","3" 116 | "115","P340","5" 117 | "116","P346","5" 118 | "117","P347","2" 119 | "118","P349","2" 120 | "119","P35","3" 121 | "120","P36","3" 122 | "121","P365","1" 123 | "122","P367","1" 124 | "123","P368","2" 125 | "124","P372","2" 126 | "125","P380","2" 127 | "126","P387","3" 128 | "127","P389","3" 129 | "128","P39","5" 130 | "129","P390","1" 131 | "130","P391","4" 132 | "131","P394","2" 133 | "132","P395","4" 134 | "133","P399","2" 135 | "134","P4","3" 136 | "135","P402","5" 137 | "136","P403","2" 138 | "137","P404","4" 139 | "138","P405","5" 140 | "139","P407","3" 141 | "140","P412","1" 142 | "141","P414","3" 143 | "142","P416","4" 144 | "143","P417","1" 145 | "144","P418","3" 146 | "145","P420","3" 147 | "146","P423","3" 148 | "147","P427","3" 149 | "148","P428","4" 150 | "149","P43","2" 151 | "150","P432","4" 152 | "151","P44","2" 153 | "152","P445","2" 154 | "153","P45","3" 155 | "154","P453","3" 156 | "155","P454","4" 157 | "156","P457","1" 158 | "157","P459","5" 159 | "158","P46","3" 160 | "159","P462","4" 161 | "160","P463","2" 162 | "161","P464","3" 163 | "162","P467","4" 164 | "163","P469","3" 165 | "164","P47","3" 166 | "165","P470","3" 167 | "166","P471","1" 168 | "167","P472","3" 169 | "168","P473","2" 170 | "169","P477","5" 171 | "170","P481","3" 172 | "171","P482","4" 173 | "172","P483","5" 174 | "173","P485","3" 175 | "174","P486","3" 176 | "175","P491","4" 177 | "176","P494","3" 178 | "177","P496","5" 179 | "178","P498","3" 180 | "179","P5","3" 181 | "180","P502","5" 182 | "181","P509","4" 183 | "182","P510","2" 184 | "183","P511","2" 185 | "184","P520","3" 186 | "185","P523","3" 187 | "186","P524","2" 188 | "187","P525","2" 189 | "188","P527","3" 190 | "189","P528","5" 191 | "190","P53","3" 192 | "191","P533","2" 193 | "192","P540","4" 194 | "193","P550","2" 195 | "194","P567","4" 196 | "195","P57","3" 197 | "196","P574","3" 198 | "197","P579","3" 199 | "198","P58","3" 200 | "199","P587","3" 201 | "200","P59","1" 202 | "201","P592","3" 203 | "202","P595","5" 204 | "203","P6","3" 205 | "204","P62","4" 206 | "205","P63","4" 207 | "206","P64","3" 208 | "207","P66","3" 209 | "208","P67","2" 210 | "209","P68","2" 211 | "210","P69","4" 212 | "211","P70","2" 213 | "212","P71","1" 214 | "213","P73","1" 215 | "214","P74","1" 216 | "215","P84","2" 217 | "216","P9","3" 218 | "217","P90","3" 219 | "218","P92","1" 220 | "219","P93","3" 221 | "220","P94","1" 222 | "221","P96","3" 223 | "222","P97","2" 224 | -------------------------------------------------------------------------------- /main_data/expanded_CD4Treg_clonotype_number_over2.csv: -------------------------------------------------------------------------------- 1 | "","sampleID","number" 2 | "1","P1","45" 3 | "2","P102","9" 4 | "3","P103","21" 5 | "4","P105","10" 6 | "5","P106","45" 7 | "6","P113","1" 8 | "7","P118","8" 9 | "8","P119","2" 10 | "9","P121","34" 11 | "10","P122","1" 12 | "11","P124","9" 13 | "12","P127","1" 14 | "13","P130","3" 15 | "14","P131","56" 16 | "15","P137","3" 17 | "16","P142","1" 18 | "17","P150","1" 19 | "18","P156","5" 20 | "19","P164","4" 21 | "20","P168","1" 22 | "21","P179","6" 23 | "22","P18","1" 24 | "23","P180","10" 25 | "24","P182","15" 26 | "25","P184","1" 27 | "26","P189","6" 28 | "27","P190","1" 29 | "28","P20","9" 30 | "29","P201","14" 31 | "30","P204","25" 32 | "31","P205","4" 33 | "32","P209","4" 34 | "33","P218","5" 35 | "34","P221","37" 36 | "35","P226","10" 37 | "36","P234","2" 38 | "37","P249","1" 39 | "38","P259","29" 40 | "39","P26","75" 41 | "40","P261","2" 42 | "41","P264","2" 43 | "42","P265","2" 44 | "43","P266","2" 45 | "44","P273","1" 46 | "45","P279","5" 47 | "46","P281","9" 48 | "47","P282","1" 49 | "48","P288","2" 50 | "49","P289","19" 51 | "50","P291","1" 52 | "51","P293","3" 53 | "52","P298","3" 54 | "53","P3","3" 55 | "54","P304","4" 56 | "55","P309","7" 57 | "56","P31","7" 58 | "57","P312","5" 59 | "58","P32","54" 60 | "59","P321","2" 61 | "60","P323","10" 62 | "61","P325","1" 63 | "62","P328","1" 64 | "63","P329","14" 65 | "64","P33","59" 66 | "65","P334","1" 67 | "66","P337","6" 68 | "67","P34","10" 69 | "68","P340","4" 70 | "69","P346","1" 71 | "70","P347","2" 72 | "71","P349","3" 73 | "72","P35","9" 74 | "73","P36","28" 75 | "74","P367","8" 76 | "75","P368","1" 77 | "76","P372","2" 78 | "77","P380","1" 79 | "78","P387","1" 80 | "79","P389","12" 81 | "80","P39","1" 82 | "81","P390","5" 83 | "82","P391","37" 84 | "83","P395","7" 85 | "84","P399","3" 86 | "85","P4","1" 87 | "86","P403","13" 88 | "87","P404","22" 89 | "88","P407","3" 90 | "89","P414","8" 91 | "90","P416","6" 92 | "91","P418","6" 93 | "92","P423","1" 94 | "93","P427","4" 95 | "94","P428","43" 96 | "95","P43","2" 97 | "96","P432","21" 98 | "97","P445","3" 99 | "98","P45","34" 100 | "99","P453","18" 101 | "100","P454","53" 102 | "101","P457","3" 103 | "102","P459","4" 104 | "103","P462","25" 105 | "104","P464","2" 106 | "105","P467","7" 107 | "106","P469","9" 108 | "107","P47","6" 109 | "108","P470","1" 110 | "109","P471","1" 111 | "110","P472","17" 112 | "111","P481","7" 113 | "112","P482","17" 114 | "113","P483","13" 115 | "114","P486","8" 116 | "115","P491","20" 117 | "116","P5","14" 118 | "117","P509","36" 119 | "118","P510","18" 120 | "119","P511","64" 121 | "120","P520","14" 122 | "121","P523","21" 123 | "122","P525","6" 124 | "123","P527","16" 125 | "124","P528","33" 126 | "125","P53","2" 127 | "126","P540","28" 128 | "127","P550","1" 129 | "128","P567","14" 130 | "129","P57","3" 131 | "130","P574","4" 132 | "131","P579","5" 133 | "132","P58","2" 134 | "133","P587","8" 135 | "134","P592","3" 136 | "135","P595","6" 137 | "136","P6","1" 138 | "137","P63","35" 139 | "138","P64","1" 140 | "139","P66","4" 141 | "140","P67","4" 142 | "141","P69","19" 143 | "142","P70","21" 144 | "143","P73","15" 145 | "144","P9","8" 146 | "145","P90","3" 147 | "146","P92","2" 148 | "147","P93","4" 149 | "148","P96","3" 150 | "149","P107","0" 151 | "150","P114","0" 152 | "151","P115","0" 153 | "152","P125","0" 154 | "153","P146","0" 155 | "154","P158","0" 156 | "155","P16","0" 157 | "156","P161","0" 158 | "157","P166","0" 159 | "158","P174","0" 160 | "159","P185","0" 161 | "160","P192","0" 162 | "161","P199","0" 163 | "162","P2","0" 164 | "163","P200","0" 165 | "164","P206","0" 166 | "165","P207","0" 167 | "166","P227","0" 168 | "167","P23","0" 169 | "168","P233","0" 170 | "169","P235","0" 171 | "170","P24","0" 172 | "171","P243","0" 173 | "172","P247","0" 174 | "173","P248","0" 175 | "174","P257","0" 176 | "175","P258","0" 177 | "176","P267","0" 178 | "177","P268","0" 179 | "178","P277","0" 180 | "179","P278","0" 181 | "180","P280","0" 182 | "181","P285","0" 183 | "182","P286","0" 184 | "183","P287","0" 185 | "184","P296","0" 186 | "185","P322","0" 187 | "186","P339","0" 188 | "187","P365","0" 189 | "188","P405","0" 190 | "189","P412","0" 191 | "190","P417","0" 192 | "191","P420","0" 193 | "192","P44","0" 194 | "193","P46","0" 195 | "194","P463","0" 196 | "195","P473","0" 197 | "196","P477","0" 198 | "197","P485","0" 199 | "198","P494","0" 200 | "199","P496","0" 201 | "200","P502","0" 202 | "201","P524","0" 203 | "202","P59","0" 204 | "203","P68","0" 205 | "204","P74","0" 206 | "205","P160","0" 207 | "206","P172","0" 208 | "207","P213","0" 209 | "208","P270","0" 210 | "209","P292","0" 211 | "210","P394","0" 212 | "211","P402","0" 213 | "212","P498","0" 214 | "213","P533","0" 215 | "214","P71","0" 216 | -------------------------------------------------------------------------------- /main_figure/figure1_and_related_supplemental_figure/README.md: -------------------------------------------------------------------------------- 1 | # description of the code 2 | 3 | ## logictic regression analysis 4 | used to generate the forest plot in Supplemental Figure S1 5 | 6 | ## pathological assessment 7 | used to generate the figure for consistency assessment of MPR in Supplemental Figure S1 8 | 9 | ## metadata analysis 10 | used to generate the waterfall plot in Figure 1, as well as calculating pCR and MPR rates. 11 | note the "clean_metadata_all_surgical_sample_lusc_luad_only_excluded_egfr_alk.csv" file can be replaced with Supplemental Table S1, which is the same. 12 | -------------------------------------------------------------------------------- /main_figure/figure1_and_related_supplemental_figure/assessment.csv: -------------------------------------------------------------------------------- 1 | patient_number,histology,CICAMS,SPH,GDPH,notes, 2 | P10,LUAD,50,30,65,, 3 | P102,LUSC,100,100,100,, 4 | P105,LUAD,90,90,97,, 5 | P106,LUSC,40,20,50,, 6 | P110,LUSC/LUAD,20,40,60,LUSC/LUAD, 7 | P12,LUSC,30,30,50,, 8 | P122,LUAD,100,100,100,, 9 | P124,LUSC,100,100,100,tumor bed, 10 | P130,LUAD,80,100,100,, 11 | P131,LUSC,25,30,25,, 12 | P136,LUAD (Large cell lung cancer),100,100,100,bad staining, 13 | P137,LUSC,100,100,100,, 14 | P141,LUAD,80,50,95,, 15 | P142,LUSC,100,100,100,, 16 | P145,LUAD,60,50,85,, 17 | P146,LUSC,100,100,100,, 18 | P149,LUAD,60,65,92,, 19 | P163,LUAD,100,99,100,bad staining, 20 | P166,LUSC,90,40,100,LUAD?, 21 | P173,LUAD,5,10,20,, 22 | P175,LUAD,95,30,20,controversial viable or not, 23 | P176,LUAD,70,70,90,, 24 | P18,LUAD,95,90,100,, 25 | P182,LUAD,85,40,70,bad staining, 26 | P188-1,LUSC,60,,50,, 27 | P188-2,LUSC,70,20,15,, 28 | P19,LUAD,70,50,80,, 29 | P209,LUAD,95,99,100,, 30 | P22,LUSC,30,30,45,, 31 | P25,LUSC,100,70,100,bad staining,bad tissue 32 | P26,LUSC,40,40,50,, 33 | P3,LUSC,10,20,35,, 34 | P32,LUSC,15,20,30,, 35 | P33,LUAD,100,100,100,, 36 | P35,LUSC,60,50,85,, 37 | P36,LUAD,100,99,100,, 38 | P38,LUAD,90,98,50,, 39 | P4,LUSC,100,100,100,, 40 | P43,LUAD,100,100,100,, 41 | P44,LUSC,100,100,100,tumor bed, 42 | P46,LUSC,100,100,100,, 43 | P47,LUSC,100,100,100,bad imaging, 44 | P50,LUAD,100,100,100,, 45 | P52,LUAD,50,55,60,, 46 | P53,LUSC,100,99,100,, 47 | P62,LUSC,50,50,60,, 48 | P63,LUSC,90,65,75,, 49 | P64,LUAD,100,100,100,, 50 | P66,LUSC,100,98,100,, 51 | P69,LUSC,100,100,100,, 52 | P70,LUAD,5,10,65,, 53 | P71,LUAD,100,100,100,normal tissue, 54 | P73,LUSC,100,100,100,, 55 | P75,LUAD,40,25,70,, 56 | P84,LUSC,100,100,100,, 57 | P88,LUAD,95,98,99,, 58 | P90,LUSC,100,100,100,, 59 | P92,LUSC,100,100,100,, 60 | P96,LUAD,100,100,100,, 61 | P97,LUSC,60,65,80,, 62 | -------------------------------------------------------------------------------- /main_figure/figure1_and_related_supplemental_figure/logistic_regression_analysis_of_clinical_metadata_in_association_with_MPR_rate.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(tidyverse) 3 | library(ggpubr) 4 | library(ggplot2) 5 | 6 | # note this has ICI+chemo patients, and chemo-only patients 7 | metadata.lusc.luad.no.egfr.alk <- read_csv("clean_metadata_all_surgical_sample_lusc_luad_only_excluded_egfr_alk.csv") 8 | 9 | nrow(metadata.lusc.luad.no.egfr.alk) 10 | 11 | table.for.clinical.metadata.summary <- metadata.lusc.luad.no.egfr.alk %>% 12 | filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>% 13 | filter(!is.na(PD1)) 14 | 15 | table.for.clinical.metadata.summary 16 | ``` 17 | 18 | ```{r} 19 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.summary[c( 20 | "Age", "Gender", "issmoke", "pathology", "Platinum","Cycles", "Pathological Response", 21 | "center", "PD1", "Chemotherapy", "before_N", "PD-L1TPS", "Pre-treatment Staging", 22 | "grouped_staging")] 23 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.regression %>% filter(!PD1=="No") 24 | 25 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.regression %>% mutate( 26 | `Pathological Response`=ifelse(`Pathological Response` %in% c("MPR", "pCR"), "MPR", "non-MPR"), 27 | Age=ifelse(Age<=65, "<=65", ">65"), 28 | Platinum=ifelse(Platinum=="Carboplatin", "Carboplatin", 29 | ifelse(Platinum=="Cisplatin", "Cisplatin", "others")), 30 | Cycles=ifelse(Cycles=="2", "2 cycles", 31 | ifelse(Cycles=="3", "3 cycles", 32 | ifelse(Cycles=="4", "4 cycles", "others"))), 33 | PD1=ifelse(grepl("Pembrolizumab", PD1), "Pembrolizumab", 34 | ifelse(grepl("Nivolumab", PD1), "Nivolumab", 35 | ifelse(grepl("Sintilimab", PD1), "Sintilimab", 36 | ifelse(grepl("Tislelizumab", PD1), "Tislelizumab", 37 | ifelse(grepl("Camrelizumab", PD1), "Camrelizumab", "others"))))), 38 | second_chemotherapy=ifelse(Chemotherapy=="No", "No", 39 | ifelse(grepl("Paclitaxel", Chemotherapy), "Paclitaxel", 40 | ifelse(grepl("Abraxane", Chemotherapy), "Paclitaxel", 41 | ifelse(grepl("Docetaxel", Chemotherapy), "Paclitaxel", 42 | ifelse(grepl("Gemcitabine", Chemotherapy), "Gemcitabine", 43 | ifelse( grepl("Pemetrexed", Chemotherapy), "Pemetrexed", 44 | ifelse(is.na(Chemotherapy), "others", "others"))))))), 45 | `PD-L1TPS`=ifelse(`PD-L1TPS`<0.01, "<1%", 46 | ifelse(`PD-L1TPS`>=0.5, ">=50%", "1~50%")) 47 | ) 48 | 49 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.regression %>% 50 | filter(!PD1=="others") %>% 51 | filter(!Cycles=="others") %>% 52 | filter(!second_chemotherapy=="No") %>% 53 | filter(!second_chemotherapy=="others") %>% 54 | filter(!grouped_staging=="not available") 55 | 56 | 57 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.regression[c( 58 | "Age", "Gender", "issmoke", "pathology", "Platinum","Cycles", 59 | "center", "PD1", "before_N", "PD-L1TPS", "Pathological Response", "second_chemotherapy", 60 | "grouped_staging")] 61 | table.for.clinical.metadata.regression 62 | 63 | write.csv(table.for.clinical.metadata.regression, "metadata_table_cleaned_for_regression_analysis.csv") 64 | ``` 65 | 66 | ```{r} 67 | library("autoReg") 68 | library("dplyr") 69 | library("ggplot2") 70 | library("ggpubr") 71 | library("ggsci") 72 | 73 | regression_table <- read.csv("metadata_table_cleaned_for_regression_analysis.csv") 74 | regression_table <- regression_table %>% mutate(Pathological.Response=ifelse(Pathological.Response=="MPR", 1, 0)) 75 | regression_table 76 | ``` 77 | 78 | ```{r} 79 | LUSC.regression_table <- regression_table %>% filter(pathology == "LUSC") 80 | LUAD.regression_table <- regression_table %>% filter(pathology == "LUAD") 81 | 82 | nrow(LUSC.regression_table) 83 | sapply(LUSC.regression_table, function(x) sum(is.na(x))) 84 | 85 | nrow(LUAD.regression_table) 86 | sapply(LUAD.regression_table, function(x) sum(is.na(x))) 87 | ``` 88 | 89 | 90 | ```{r} 91 | LUSC.LUAD.overall.log <- glm(Pathological.Response ~ Age + Gender + issmoke + Platinum + 92 | Cycles + center + pathology + PD1 + before_N + second_chemotherapy + 93 | grouped_staging, data=regression_table, family=binomial) 94 | 95 | 96 | summary(LUSC.LUAD.overall.log) 97 | #LUSC.LUAD.result<-autoReg(LUSC.LUAD.overall.log,uni=TRUE,multi=TRUE, threshold=0.01) 98 | #LUSC.LUAD.result %>% myft() 99 | 100 | #myplot <- modelPlot(LUSC.LUAD.overall.log, uni=TRUE, show.ref = TRUE, threshold=0.01, change.pointsize = T) 101 | #myplot$p <- myplot$p + scale_fill_nejm() + scale_color_nejm() 102 | #myplot 103 | 104 | 105 | #summary(LUSC.LUAD.overall.log) 106 | LUSC.LUAD.result<-autoReg(LUSC.LUAD.overall.log,uni=TRUE,multi=FALSE, threshold=0.01) 107 | LUSC.LUAD.result %>% myft() 108 | 109 | myplot <- modelPlot(LUSC.LUAD.overall.log, uni=TRUE, multi=FALSE, show.ref = TRUE, threshold=0.01, change.pointsize = T) 110 | myplot 111 | #myplot$p <- myplot$p + scale_fill_nejm() + scale_color_nejm() 112 | #myplot 113 | 114 | ggsave("metadata_regression_analysis.pdf") 115 | ``` 116 | 117 | 118 | ```{r} 119 | #======== 120 | LUSC.overall.log <- glm(Pathological.Response ~ Age + Gender + issmoke + Platinum + 121 | Cycles + center + PD1 + before_N + second_chemotherapy + 122 | grouped_staging, data=LUSC.regression_table, family=binomial) 123 | 124 | 125 | summary(LUSC.overall.log) 126 | LUSC.result<-autoReg(LUSC.overall.log,uni=TRUE,multi=TRUE,threshold=0.1) 127 | LUSC.result 128 | ``` 129 | 130 | 131 | ```{r} 132 | #===== 133 | 134 | LUAD.overall.log <- glm(Pathological.Response ~ Age + Gender + issmoke + Platinum + 135 | Cycles + center + PD1 + before_N + second_chemotherapy + 136 | grouped_staging, data=LUAD.regression_table, family=binomial) 137 | 138 | summary(LUAD.overall.log) 139 | LUAD.result<-autoReg(LUAD.overall.log,uni=TRUE,multi=TRUE,threshold=0.1) 140 | LUAD.result 141 | ``` 142 | 143 | #===== analyze PDL1 only 144 | ```{r} 145 | LUSC.PDL1.log <- glm(Pathological.Response ~ PD.L1TPS, data=LUSC.regression_table, family=binomial) 146 | 147 | 148 | summary(LUSC.PDL1.log) 149 | LUSC.PDL1.result<-autoReg(LUSC.PDL1.log,uni=TRUE, multi=FALSE, threshold=1) 150 | LUSC.PDL1.result 151 | 152 | myplot <- modelPlot(LUSC.PDL1.log, uni=TRUE, show.ref = TRUE, multi=FALSE, threshold=1, change.pointsize = T) 153 | myplot 154 | ggsave("LUSC_PD-L1_regression_analysis.pdf") 155 | ``` 156 | 157 | 158 | ```{r} 159 | LUAD.PDL1.log <- glm(Pathological.Response ~ PD.L1TPS, data=LUAD.regression_table, family=binomial) 160 | 161 | summary(LUAD.PDL1.log) 162 | LUAD.PDL1.result<-autoReg(LUAD.PDL1.log,uni=TRUE, multi=FALSE, threshold=1) 163 | LUAD.PDL1.result 164 | 165 | myplot <- modelPlot(LUAD.PDL1.log, uni=TRUE, show.ref = TRUE, multi=FALSE, threshold=1, change.pointsize = T) 166 | myplot 167 | ggsave("LUAD_PD-L1_regression.pdf", height = 5, width = 15) 168 | ``` 169 | 170 | -------------------------------------------------------------------------------- /main_figure/figure1_and_related_supplemental_figure/metadata_analysis.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | # do not use renv or any project lib here 3 | 4 | library(tidyverse) 5 | library(ggpubr) 6 | library(ggplot2) 7 | 8 | # note this has ICI+chemo patients, and chemo-only patients 9 | metadata.lusc.luad.no.egfr.alk <- read_csv("clean_metadata_all_surgical_sample_lusc_luad_only_excluded_egfr_alk.csv") 10 | 11 | nrow(metadata.lusc.luad.no.egfr.alk) 12 | 13 | metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk %>% filter(PD1!="No") 14 | metadata.lusc.luad.no.egfr.alk.chemo.only <- metadata.lusc.luad.no.egfr.alk %>% filter(PD1=="No") 15 | 16 | # use this as the standard number of all followed patients with ICI+chemo 17 | nrow(metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo) 18 | 19 | # use this as the standard number of all followed patients with chemo-only 20 | nrow(metadata.lusc.luad.no.egfr.alk.chemo.only) 21 | 22 | confidence.interval <- function(proportion, n){ 23 | return(proportion + c(-1,1) * qnorm(1-0.05/2) * sqrt(proportion*(1-proportion)/n)) 24 | } 25 | ``` 26 | 27 | 28 | ## ICI+chemo 29 | ```{r} 30 | # --------------------------- overall response waterfall plot ----------------------- 31 | overall.RVT.plot.immunotherapy.lusc.and.luad <- ggbarplot( 32 | metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 33 | filter(is.na(RVT) == F), # note, some patients has no specific value of RVT, so has to filter out for successful RVT plot. 34 | "Tumor_Sample_Barcode", "RVT", 35 | sort.val = "desc", 36 | xlab = FALSE, 37 | palette = c("#077E64", "#878586"), 38 | color = "isMPR", 39 | fill = "isMPR", 40 | width = 0.6, 41 | sort.by.groups = F, 42 | # main = "All patients(n=240)", 43 | font.y = 15, 44 | font.legend = 15, 45 | font.tickslab = 15) + 46 | theme( 47 | legend.position = "right", 48 | axis.ticks.x = element_blank(), 49 | axis.text.x = element_blank(), 50 | plot.title = element_text(size = 20, hjust = 0.5)) + 51 | ylab("Change in primary tumor area with\n viable tumor cells(%)") + 52 | scale_y_continuous( 53 | expand = c(0, 0), 54 | breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100) 55 | ) + 56 | geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed") 57 | 58 | overall.RVT.plot.immunotherapy.lusc.and.luad 59 | 60 | ggsave("plots/overall.RVT.plot.immunotherapy.lusc.and.luad.pdf", overall.RVT.plot.immunotherapy.lusc.and.luad, width = 12, height = 5) 61 | ``` 62 | ```{r} 63 | metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo 64 | ``` 65 | 66 | 67 | 68 | ```{r} 69 | # calculate MPR & pCR rate in the ICI+chemo group, not distinguishing histology 70 | # do not use the "isMPR" since it is derived from RVT, minor inconsistency.) 71 | 72 | MPR.pCR.Rate.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 73 | filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>% # filter patients without pathological response info 74 | count(`Pathological Response`) %>% 75 | summarise( 76 | pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 77 | MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) 78 | 79 | MPR.pCR.Rate.ICI.plus.chemo 80 | 81 | N.sample <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 82 | filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) 83 | N.sample <- nrow(N.sample) 84 | MPR.confidence.interval <- confidence.interval(MPR.pCR.Rate.ICI.plus.chemo$MPR_rate*0.01, N.sample) 85 | MPR.confidence.interval 86 | pCR.confidence.interval <- confidence.interval(MPR.pCR.Rate.ICI.plus.chemo$pCR_rate*0.01, N.sample) 87 | pCR.confidence.interval 88 | ``` 89 | 90 | 91 | ```{r} 92 | # calculate by.LUSC/LUAD MPR.pCR rates 93 | 94 | by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 95 | filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>% # filter patients without pathological response info 96 | count(`Pathological Response`, pathology) %>% 97 | group_by(pathology) %>% 98 | summarise( 99 | pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 100 | MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) %>% 101 | mutate(pathology = factor(pathology, levels = c("LUSC", "LUAD"))) 102 | 103 | N.LUSC.LUAD <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% group_by(pathology) %>% count() 104 | N.LUSC.LUAD 105 | by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo 106 | 107 | 108 | # calculate by.LUSC/LUAD pPR rates 109 | 110 | by.LUSC.LUAD.MPR.pPR.Rate.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 111 | filter(response %in% c("MPR", "pPR", "nPR", "pCR")) %>% # filter patients without pathological response info 112 | count(response, pathology) %>% 113 | group_by(pathology) %>% 114 | summarise( 115 | pPR_rate = sum(n[response == "pPR"])/sum(n) * 100) %>% 116 | mutate(pathology = factor(pathology, levels = c("LUSC", "LUAD"))) 117 | 118 | by.LUSC.LUAD.MPR.pPR.Rate.ICI.plus.chemo 119 | 120 | 121 | ##====LUSC 122 | N.sample <- N.LUSC.LUAD %>% filter(pathology=="LUSC") 123 | N.sample <- N.sample$n 124 | temp.table <- by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo %>% filter(pathology=="LUSC") 125 | MPR.confidence.interval <- confidence.interval(temp.table$MPR_rate*0.01, N.sample) 126 | MPR.confidence.interval 127 | pCR.confidence.interval <- confidence.interval(temp.table$pCR_rate*0.01, N.sample) 128 | pCR.confidence.interval 129 | 130 | ##====LUAD 131 | N.sample <- N.LUSC.LUAD %>% filter(pathology=="LUAD") 132 | N.sample <- N.sample$n 133 | temp.table <- by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo %>% filter(pathology=="LUAD") 134 | MPR.confidence.interval <- confidence.interval(temp.table$MPR_rate*0.01, N.sample) 135 | MPR.confidence.interval 136 | pCR.confidence.interval <- confidence.interval(temp.table$pCR_rate*0.01, N.sample) 137 | pCR.confidence.interval 138 | ``` 139 | 140 | 141 | ```{r} 142 | # calculate by.LUSC/LUAD MPR.pCR rates 143 | 144 | by.LUSC.LUAD.sub.by.cycles.MPR.pCR.Rate.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 145 | filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR"), 146 | Cycles %in% c(2, 3, 4)) %>% # filter patients without pathological response info 147 | count(`Pathological Response`, pathology, Cycles) %>% 148 | group_by(pathology, Cycles) %>% 149 | summarise( 150 | pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 151 | MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) %>% 152 | mutate(pathology = factor(pathology, levels = c("LUSC", "LUAD"))) 153 | 154 | by.LUSC.LUAD.sub.by.cycles.MPR.pCR.Rate.ICI.plus.chemo 155 | ``` 156 | 157 | 158 | ```{r} 159 | #===== MPR & pCR rate difference plot ICI.plus.chemo 160 | 161 | p_pCR <- ggbarplot(by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo, "pathology", "pCR_rate", 162 | palette = c("LUSC"="#50B8C3", "LUAD"="#EDAE7B"), 163 | fill = "pathology", 164 | xlab = FALSE, 165 | ylab = "pCR rate(%)", 166 | label = T, 167 | legend = "none", 168 | lab.nb.digits = 1, 169 | lab.size = 6, 170 | font.y = 20, 171 | font.tickslab = 15, 172 | ) + 173 | geom_bracket( 174 | xmin = "LUSC", xmax = "LUAD", y.position = 60, 175 | label = "Difference = 25.0%", label.size = 6, tip.length = c(0.2, 0.9) 176 | ) + 177 | ylim(0,100) 178 | 179 | p_pCR 180 | ggsave("plots/pCR_difference_between_LUSC_LUAD.pdf", width = 4.5, height = 8) 181 | 182 | #==== calculate chi-square 183 | temp.table <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo 184 | temp.table[temp.table=="MPR"] <- "non-PCR" 185 | temp.table[temp.table=="non-MPR"] <- "non-PCR" 186 | temp.table <- table(temp.table$pathology,temp.table$`Pathological Response`) 187 | temp.table 188 | chisq.test(temp.table) 189 | 190 | 191 | 192 | #============ 193 | 194 | p_MPR <- ggbarplot(by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo, "pathology", "MPR_rate", 195 | palette = c("LUSC"="#50B8C3", "LUAD"="#EDAE7B"), 196 | fill = "pathology", 197 | xlab = FALSE, 198 | ylab = "MPR rate(%)", 199 | label = T, 200 | legend = "none", 201 | lab.nb.digits = 1, 202 | lab.size = 6, 203 | font.y = 20, 204 | font.tickslab = 15, 205 | ) + 206 | geom_bracket( 207 | xmin = "LUSC", xmax = "LUAD", y.position = 75, 208 | label = "Difference = 25.9%", label.size = 6, tip.length = c(0.2, 0.9) 209 | ) + ylim(0,100) 210 | p_MPR 211 | ggsave("plots/MPR_difference_between_LUSC_LUAD.pdf", width = 4.5, height = 8) 212 | 213 | #==== calculate chi-square 214 | temp.table <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo 215 | temp.table[temp.table=="pCR"] <- "MPR" 216 | temp.table <- table(temp.table$pathology,temp.table$`Pathological Response`) 217 | chisq.test(temp.table) 218 | 219 | 220 | #============ 221 | 222 | p_pPR <- ggbarplot(by.LUSC.LUAD.MPR.pPR.Rate.ICI.plus.chemo, "pathology", "pPR_rate", 223 | palette = c("LUSC"="#50B8C3", "LUAD"="#EDAE7B"), 224 | fill = "pathology", 225 | xlab = FALSE, 226 | ylab = "pPR rate(%)", 227 | label = T, 228 | legend = "none", 229 | lab.nb.digits = 1, 230 | lab.size = 6, 231 | font.y = 20, 232 | font.tickslab = 15, 233 | ) + 234 | geom_bracket( 235 | xmin = "LUSC", xmax = "LUAD", y.position = 75, 236 | label = "Difference = 25.9%", label.size = 6, tip.length = c(0.2, 0.9) 237 | ) + ylim(0,100) 238 | p_pPR 239 | ggsave("plots/pPR_difference_between_LUSC_LUAD.pdf", width = 4.5, height = 8) 240 | 241 | #==== calculate chi-square 242 | temp.table <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% filter(response %in% c("MPR", "pPR", "nPR", "pCR")) 243 | temp.table[temp.table %in% c("pCR", "MPR", "nPR")] <- "non-pPR" 244 | temp.table <- table(temp.table$pathology,temp.table$`Pathological Response`) 245 | chisq.test(temp.table) 246 | 247 | #==== plot response percentage 248 | temp.table <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% filter(response %in% c("MPR", "pPR", "nPR", "pCR")) %>% count(response, pathology) 249 | temp.table$response <- factor(temp.table$response, levels = c("nPR", "pPR", "MPR", "pCR")) 250 | ggplot(temp.table, aes(fill=response, y=n, x=pathology)) + 251 | geom_bar(position="fill", stat="identity") 252 | 253 | 254 | 255 | ``` 256 | 257 | 258 | ```{r} 259 | # --------------------------- LUSC response waterfall plot ----------------------- 260 | overall.RVT.plot.immunotherapy.lusc<- ggbarplot(metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% filter(pathology == "LUSC", is.na(RVT) == F), "Tumor_Sample_Barcode", "RVT", 261 | sort.val = "desc", 262 | xlab = FALSE, 263 | palette = c("#077E64", "#878586"), 264 | color = "isMPR", 265 | fill = "isMPR", 266 | width = 0.6, 267 | sort.by.groups = F, 268 | # main = "All patients(n=240)", 269 | font.y = 15, 270 | font.legend = 15, 271 | font.tickslab = 15) + 272 | theme( 273 | legend.position = "right", 274 | axis.ticks.x = element_blank(), 275 | axis.text.x = element_blank(), 276 | plot.title = element_text(size = 20, hjust = 0.5)) + 277 | ylab("Change in primary tumor area with\n viable tumor cells(%)") + 278 | scale_y_continuous( 279 | expand = c(0, 0), 280 | breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100) 281 | ) + 282 | geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed") 283 | 284 | overall.RVT.plot.immunotherapy.lusc 285 | 286 | ggsave("plots/overall.RVT.plot.immunotherapy.lusc.pdf", overall.RVT.plot.immunotherapy.lusc, width = 12, height = 5) 287 | 288 | # --------------------------- luad response waterfall plot ----------------------- 289 | overall.RVT.plot.immunotherapy.luad<- ggbarplot(metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% filter(pathology == "LUAD", is.na(RVT) == F), "Tumor_Sample_Barcode", "RVT", 290 | sort.val = "desc", 291 | xlab = FALSE, 292 | palette = c("#077E64", "#878586"), 293 | color = "isMPR", 294 | fill = "isMPR", 295 | width = 0.6, 296 | sort.by.groups = F, 297 | # main = "All patients(n=240)", 298 | font.y = 15, 299 | font.legend = 15, 300 | font.tickslab = 15) + 301 | theme( 302 | legend.position = "right", 303 | axis.ticks.x = element_blank(), 304 | axis.text.x = element_blank(), 305 | plot.title = element_text(size = 20, hjust = 0.5)) + 306 | ylab("Change in primary tumor area with\n viable tumor cells(%)") + 307 | scale_y_continuous( 308 | expand = c(0, 0), 309 | breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100) 310 | ) + 311 | geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed") 312 | 313 | overall.RVT.plot.immunotherapy.luad 314 | 315 | ggsave("plots/overall.RVT.plot.immunotherapy.luad.pdf", overall.RVT.plot.immunotherapy.luad, width = 12, height = 5) 316 | 317 | 318 | #======= 319 | metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% count(pathology) 320 | 321 | ``` 322 | 323 | 324 | ## chemo 325 | ```{r} 326 | # --------------------------- chemo overall response waterfall plot ----------------------- 327 | overall.RVT.plot.chemo.lusc.and.luad <- ggbarplot( 328 | metadata.lusc.luad.no.egfr.alk.chemo.only %>% 329 | filter(is.na(RVT) == F), # note, some patients has no specific value of RVT, so has to filter out for successful RVT plot. 330 | "Tumor_Sample_Barcode", "RVT", 331 | sort.val = "desc", 332 | xlab = FALSE, 333 | palette = c("#077E64", "#878586"), 334 | color = "isMPR", 335 | fill = "isMPR", 336 | width = 0.6, 337 | sort.by.groups = F, 338 | # main = "All patients(n=240)", 339 | font.y = 15, 340 | font.legend = 15, 341 | font.tickslab = 15) + 342 | theme( 343 | legend.position = "right", 344 | axis.ticks.x = element_blank(), 345 | axis.text.x = element_blank(), 346 | plot.title = element_text(size = 20, hjust = 0.5)) + 347 | ylab("Change in primary tumor area with\n viable tumor cells(%)") + 348 | scale_y_continuous( 349 | expand = c(0, 0), 350 | breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100) 351 | ) + 352 | geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed") 353 | 354 | overall.RVT.plot.chemo.lusc.and.luad 355 | 356 | ggsave("plots/overall.RVT.plot.chemo.lusc.and.luad.pdf", overall.RVT.plot.chemo.lusc.and.luad, width = 12, height = 5) 357 | ``` 358 | 359 | 360 | ```{r} 361 | # calculate MPR & pCR rate in the chemo group, not distinguishing histology 362 | # do not use the "isMPR" since it is derived from RVT, minor inconsistency.) 363 | 364 | MPR.pCR.Rate.chemo <- metadata.lusc.luad.no.egfr.alk.chemo.only %>% 365 | filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>% # filter patients without pathological response info 366 | count(`Pathological Response`) %>% 367 | summarise( 368 | pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 369 | MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) 370 | 371 | metadata.lusc.luad.no.egfr.alk.chemo.only %>% group_by(pathology) %>% count() 372 | MPR.pCR.Rate.chemo 373 | 374 | 375 | N.sample <- metadata.lusc.luad.no.egfr.alk.chemo.only %>% 376 | filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) 377 | N.sample <- nrow(N.sample) 378 | MPR.confidence.interval <- confidence.interval(MPR.pCR.Rate.chemo$MPR_rate*0.01, N.sample) 379 | MPR.confidence.interval 380 | pCR.confidence.interval <- confidence.interval(MPR.pCR.Rate.chemo$pCR_rate*0.01, N.sample) 381 | pCR.confidence.interval 382 | ``` 383 | 384 | 385 | ```{r} 386 | # calculate chemo by.LUSC/LUAD MPR.pCR rates 387 | 388 | by.LUSC.LUAD.MPR.pCR.Rate.chemo<- metadata.lusc.luad.no.egfr.alk.chemo.only %>% 389 | filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>% # filter patients without pathological response info 390 | count(`Pathological Response`, pathology) %>% 391 | group_by(pathology) %>% 392 | summarise( 393 | pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 394 | MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) %>% 395 | mutate(pathology = factor(pathology, levels = c("LUSC", "LUAD"))) 396 | 397 | by.LUSC.LUAD.MPR.pCR.Rate.chemo 398 | ``` 399 | 400 | ```{r} 401 | # --------------------------- LUSC response waterfall plot ----------------------- 402 | overall.RVT.plot.chemo.lusc<- ggbarplot(metadata.lusc.luad.no.egfr.alk.chemo.only %>% filter(pathology == "LUSC", is.na(RVT) == F), "Tumor_Sample_Barcode", "RVT", 403 | sort.val = "desc", 404 | xlab = FALSE, 405 | palette = c("#077E64", "#878586"), 406 | color = "isMPR", 407 | fill = "isMPR", 408 | width = 0.6, 409 | sort.by.groups = F, 410 | # main = "All patients(n=240)", 411 | font.y = 15, 412 | font.legend = 15, 413 | font.tickslab = 15) + 414 | theme( 415 | legend.position = "right", 416 | axis.ticks.x = element_blank(), 417 | axis.text.x = element_blank(), 418 | plot.title = element_text(size = 20, hjust = 0.5)) + 419 | ylab("Change in primary tumor area with\n viable tumor cells(%)") + 420 | scale_y_continuous( 421 | expand = c(0, 0), 422 | breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100) 423 | ) + 424 | geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed") 425 | 426 | overall.RVT.plot.chemo.lusc 427 | 428 | ggsave("plots/overall.RVT.plot.chemo.lusc.pdf", overall.RVT.plot.chemo.lusc, width = 12, height = 5) 429 | 430 | 431 | 432 | # --------------------------- luad response waterfall plot ----------------------- 433 | overall.RVT.plot.chemo.luad<- ggbarplot(metadata.lusc.luad.no.egfr.alk.chemo.only %>% filter(pathology == "LUAD", is.na(RVT) == F), "Tumor_Sample_Barcode", "RVT", 434 | sort.val = "desc", 435 | xlab = FALSE, 436 | palette = c("#077E64", "#878586"), 437 | color = "isMPR", 438 | fill = "isMPR", 439 | width = 0.6, 440 | sort.by.groups = F, 441 | # main = "All patients(n=240)", 442 | font.y = 15, 443 | font.legend = 15, 444 | font.tickslab = 15) + 445 | theme( 446 | legend.position = "right", 447 | axis.ticks.x = element_blank(), 448 | axis.text.x = element_blank(), 449 | plot.title = element_text(size = 20, hjust = 0.5)) + 450 | ylab("Change in primary tumor area with\n viable tumor cells(%)") + 451 | scale_y_continuous( 452 | expand = c(0, 0), 453 | breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100) 454 | ) + 455 | geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed") 456 | 457 | overall.RVT.plot.chemo.luad 458 | 459 | ggsave("plots/overall.RVT.plot.chemo.luad.pdf", overall.RVT.plot.chemo.luad, width = 12, height = 5) 460 | 461 | 462 | #======= 463 | metadata.lusc.luad.no.egfr.alk.chemo.only %>% count(pathology) 464 | 465 | 466 | ``` 467 | 468 | 469 | -------------------------------------------------------------------------------- /main_figure/figure1_and_related_supplemental_figure/pathological_assessment.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(reshape2) 3 | library(RColorBrewer) 4 | assess <- read.csv("assessment.csv", sep=',', row.names = 1) 5 | ``` 6 | 7 | 8 | ```{r} 9 | p1 <- ggplot(assess, aes(x=SPH, y=CICAMS)) + 10 | geom_point() + 11 | stat_cor(data=assess, method = "pearson", na.rm = TRUE) + 12 | geom_abline(intercept = 0, slope = 1) + 13 | geom_hline(yintercept = 90, linetype="dotted") + 14 | geom_vline(xintercept = 90, linetype="dotted") + 15 | # geom_text(aes(SPH,CICAMS,label=rownames(assess))) + 16 | coord_fixed() + theme_bw() + 17 | xlab("SPH") + 18 | ylab("CICAMS") + 19 | ggtitle("Pathological assessment CICAMS vs SPH") 20 | 21 | 22 | 23 | p2 <- ggplot(assess, aes(x=CICAMS, y=GDPH)) + 24 | geom_point() + 25 | stat_cor(data=assess, method = "pearson", na.rm = TRUE) + 26 | geom_abline(intercept = 0, slope = 1) + 27 | geom_hline(yintercept = 90, linetype="dotted") + 28 | geom_vline(xintercept = 90, linetype="dotted") + 29 | # geom_text(aes(CICAMS,GDPH,label=rownames(assess))) + 30 | coord_fixed() + theme_bw() + 31 | xlab("CICAMS") + 32 | ylab("GDPH") + 33 | ggtitle("Pathological assessment GDPH vs CICAMS") 34 | 35 | 36 | p3 <- ggplot(assess, aes(x=GDPH, y=SPH)) + 37 | geom_point() + 38 | stat_cor(data=assess, method = "pearson", na.rm = TRUE) + 39 | geom_abline(intercept = 0, slope = 1) + 40 | geom_hline(yintercept = 90, linetype="dotted") + 41 | geom_vline(xintercept = 90, linetype="dotted") + 42 | # geom_text(aes(GDPH,SPH,label=rownames(assess))) + 43 | coord_fixed() + theme_bw() + 44 | xlab("GDPH") + 45 | ylab("SPH") + 46 | ggtitle("Pathological assessment SPH vs GDPH") 47 | 48 | 49 | p <- p1+p2+p3 50 | ggsave(p,filename = "assessment_new.png",height = 10,width = 30) 51 | ``` 52 | 53 | ```{r} 54 | type_prr <- function(prr){ 55 | if (is.na(prr)){ 56 | return (NA) 57 | } 58 | else if (as.numeric(prr)>=90){ 59 | return("RVT≤10%") 60 | } 61 | else{ 62 | return("RVT>10%") 63 | } 64 | } 65 | # assess <- assess %>% add_column(CICAMS = NA, SPH = NA, GDPH = NA) 66 | for (name in row.names(assess)){ 67 | assess[name,]['CICAMS'] <- type_prr(assess[name,]$CICAMS) 68 | assess[name,]['SPH'] <- type_prr(assess[name,]$SPH) 69 | assess[name,]['GDPH'] <- type_prr(assess[name,]$GDPH) 70 | } 71 | ``` 72 | 73 | ```{r} 74 | c = order(as.numeric(gsub("P","",row.names(assess)))) 75 | assess_prr <- assess[c,][,c(1:4)] 76 | assess_prr <- rbind(assess_prr[which(assess_prr$histology=="LUAD"),],assess_prr[which(assess_prr$histology=="LUSC"),]) 77 | assess2 <- melt(assess_prr %>% add_column(patient = row.names(assess_prr)),id="patient") 78 | head(assess2) 79 | assess2$patient=factor(assess2$patient, levels=row.names(assess_prr)) 80 | assess2$variable=factor(assess2$variable,levels = c("histology","CICAMS","SPH","GDPH")) 81 | #mycolors <- 82 | p4 <- assess2%>%ggplot(aes(x=patient,y=variable))+ 83 | geom_tile(aes(fill=value),color="white",size=1)+ 84 | scale_x_discrete("",expand = c(0,0))+ 85 | scale_y_discrete("",expand = c(0,0))+ 86 | xlab("Patient")+ 87 | theme(# axis.text.x.bottom = element_text(size=10,angle=-45,hjust=0,vjust=0.5), 88 | axis.text.y.left = element_text(size=10), 89 | axis.text.x=element_blank(), 90 | axis.title.x=element_text(size=10), 91 | axis.ticks.x=element_blank() 92 | #axis.text.x = element_text(size=2) 93 | )+ 94 | scale_fill_brewer(palette = "Paired") 95 | p4 96 | ggsave("tile_new.png",width = 30,height = 8,units = "cm") 97 | ``` 98 | 99 | 100 | ```{r} 101 | # Filter bad stainings 102 | c = order(as.numeric(gsub("P","",row.names(assess)))) 103 | assess_prr <- assess[c,][which(assess$notes==""),][,c(1:4)] 104 | assess_prr <- rbind(assess_prr[which(assess_prr$histology=="LUAD"),],assess_prr[which(assess_prr$histology=="LUSC"),]) 105 | assess2 <- melt(assess_prr %>% add_column(patient = row.names(assess_prr)),id="patient") 106 | head(assess2) 107 | assess2$patient=factor(assess2$patient, levels=row.names(assess_prr)) 108 | assess2$variable=factor(assess2$variable,levels = c("histology","CICAMS","SPH","GDPH")) 109 | #mycolors <- 110 | p5 <- assess2%>%ggplot(aes(x=patient,y=variable))+ 111 | geom_tile(aes(fill=value),color="white",size=1)+ 112 | scale_x_discrete("",expand = c(0,0))+ 113 | scale_y_discrete("",expand = c(0,0))+ 114 | xlab("Patient")+ 115 | theme(# axis.text.x.bottom = element_text(size=10,angle=-45,hjust=0,vjust=0.5), 116 | axis.text.y.left = element_text(size=10), 117 | axis.text.x=element_blank(), 118 | axis.title.x=element_text(size=10), 119 | axis.ticks.x=element_blank() 120 | #axis.text.x = element_text(size=2) 121 | )+ 122 | scale_fill_brewer(palette = "Paired") 123 | p5 124 | ggsave("tile_new_filtered.png",width = 30,height = 8,units = "cm") 125 | ``` 126 | 127 | -------------------------------------------------------------------------------- /main_figure/figure2_and_related_supplemental_figure/NMF.R: -------------------------------------------------------------------------------- 1 | library(NMF) 2 | library(ComplexHeatmap) 3 | library(reshape2) 4 | library(tidyverse) 5 | library(dplyr) 6 | library(readxl) 7 | ################################################################### 8 | info <- read.csv("all_sub_cell_type.csv") 9 | head(info) 10 | length(unique(info$sampleID)) 11 | 12 | df <- table(info$sampleID,info$sub_cell_type) 13 | ratio <- as.data.frame(df / rowSums(df)) 14 | head(ratio) 15 | colnames(ratio) <- c("sampleID","cell.type","Freq") 16 | head(ratio) 17 | 18 | sample.info <- as.data.frame(read_excel("sample.xlsx")) 19 | head(sample.info) 20 | sample.info <- sample.info[sample.info$sampleID %in% info$sampleID,] 21 | 22 | 23 | pathological_response_level <- c() 24 | for(each in sample.info$pathological_response){ 25 | if(each %in% c("MPR","pCR")){ 26 | pathological_response_level <- c(pathological_response_level, "MPR") 27 | }else{ 28 | pathological_response_level <- c(pathological_response_level, "non-MPR") 29 | } 30 | } 31 | 32 | sample.info$pathological_response_level <- pathological_response_level 33 | 34 | response.meta <- sample.info[, c("sampleID","smoking_history","cancer_type","pre_treatment_staging", 35 | "PDL1_TPS","PD1","chemotherapy","targeted_therapy","cycles", 36 | "pathological_response","pathological_response_level", 37 | "pathological_response_rate","radiological_response", 38 | "RVT_pre_dominant_histology")] 39 | response.meta <- response.meta %>% distinct(sampleID, .keep_all = TRUE) 40 | head(response.meta) 41 | length(unique(response.meta$sampleID)) 42 | 43 | ratio <- dcast(ratio, sampleID ~ ratio$cell.type, value.var = "Freq") 44 | 45 | 46 | merge.version <- merge(ratio, response.meta, by = "sampleID", all.x = TRUE) 47 | head(merge.version) 48 | dim(merge.version) 49 | 50 | rownames(ratio) <- ratio$sampleID 51 | ratio <- ratio[merge.version$sampleID,] 52 | dim(ratio) 53 | 54 | ratio <- ratio[,-1] 55 | head(ratio) 56 | ratio[is.na(ratio)] <- 0 57 | 58 | 59 | #normalization 60 | scale_ratio <- apply(ratio, MARGIN = 2, function(x) (x-min(x))/(max(x)-min(x))) 61 | head(scale_ratio) 62 | scale_ratio <- as.data.frame(scale_ratio) 63 | head(scale_ratio) 64 | scale_ratio <- t(scale_ratio) 65 | head(scale_ratio) 66 | dim(scale_ratio) 67 | 68 | ranks <- 2:10 69 | estim.coad <- nmf(scale_ratio, ranks, nrun=100,method = "lee") 70 | plot(estim.coad) 71 | 72 | #再次NMF,rank=5 73 | seed = 2020820 74 | 75 | nmf.rank5 <- nmf(scale_ratio, 76 | rank = 5, 77 | nrun=200, 78 | seed = seed, 79 | method = "lee") 80 | 81 | index <- extractFeatures(nmf.rank5,"max") 82 | 83 | #change the order of the index 84 | new.index <- list() 85 | new.index[[1]] <- index[[1]] 86 | new.index[[2]] <- index[[2]] 87 | new.index[[3]] <- index[[4]] 88 | new.index[[4]] <- index[[3]] 89 | new.index[[5]] <- index[[5]] 90 | 91 | sig.order <- unlist(new.index) 92 | NMF.Exp.rank5 <- scale_ratio[sig.order,] 93 | NMF.Exp.rank5 <- na.omit(NMF.Exp.rank5) 94 | dim(NMF.Exp.rank5) 95 | 96 | group <- predict(nmf.rank5) 97 | 98 | #adjust the position of the module 99 | new.group <- c() 100 | for(each in group){ 101 | if(each %in% c("1")){ 102 | new.group <- c(new.group, "1") 103 | } 104 | if(each %in% c("2")){ 105 | new.group <- c(new.group, "2") 106 | } 107 | if(each %in% c("4")){ 108 | new.group <- c(new.group, "3") 109 | } 110 | if(each %in% c("3")){ 111 | new.group <- c(new.group, "4") 112 | } 113 | if(each %in% c("5")){ 114 | new.group <- c(new.group, "5") 115 | } 116 | } 117 | new.group <- factor(new.group, levels = c("1","2","3","4","5")) 118 | 119 | 120 | #z_score 121 | z_ratio <- scale(ratio)/4 122 | head(z_ratio) 123 | z_ratio <- as.data.frame(z_ratio) 124 | head(z_ratio) 125 | z_ratio <- t(z_ratio) 126 | 127 | plot_matrix <- z_ratio[sig.order,] 128 | plot_matrix <- na.omit(plot_matrix) 129 | dim(plot_matrix) 130 | 131 | 132 | info.matrix <- as.data.frame(t(NMF.Exp.rank5)) 133 | head(info.matrix) 134 | info.matrix$sampleID <- rownames(info.matrix) 135 | info.matrix$group <- new.group 136 | info.matrix <- merge(info.matrix, response.meta, by = "sampleID", all.x = TRUE) 137 | head(info.matrix) 138 | 139 | gene.group <- c() 140 | for(each in rownames(NMF.Exp.rank5)){ 141 | if(each %in% rownames(scale_ratio)[new.index[[1]]]){ 142 | gene.group <- c(gene.group, "module1") 143 | }else if(each %in% rownames(scale_ratio)[new.index[[2]]]){ 144 | gene.group <- c(gene.group, "module2") 145 | }else if(each %in% rownames(scale_ratio)[new.index[[3]]]){ 146 | gene.group <- c(gene.group, "module3") 147 | }else if(each %in% rownames(scale_ratio)[new.index[[4]]]){ 148 | gene.group <- c(gene.group, "module4") 149 | }else if(each %in% rownames(scale_ratio)[new.index[[5]]]){ 150 | gene.group <- c(gene.group, "module5") 151 | } 152 | } 153 | 154 | PDL1_TPS_group <- c() 155 | for(each in info.matrix$PDL1_TPS){ 156 | if (each %in% c("<1%","0")){ 157 | PDL1_TPS_group <- c(PDL1_TPS_group,"<1%") 158 | }else if (each %in% c("0.01","0.02","0.05","0.08","0.03","0.3","0.2","0.35","0.4")){ 159 | PDL1_TPS_group <- c(PDL1_TPS_group,"1%-49%") 160 | }else if (each %in% c("0.7","0.6","0.9","0.8","0.85","1","0.55","0.75","0.65","0.5")){ 161 | PDL1_TPS_group <- c(PDL1_TPS_group,">=50%") 162 | }else{ 163 | PDL1_TPS_group <- c(PDL1_TPS_group,"Not tested") 164 | } 165 | } 166 | info.matrix$PDL1_TPS_group <- PDL1_TPS_group 167 | 168 | 169 | info.matrix[is.na(info.matrix)] <- "unknown" 170 | ha = HeatmapAnnotation(smokingHistory = factor(info.matrix$smoking_history, levels = c("Y","N","unknown")), 171 | cycles = factor(info.matrix$cycles, levels = c("unknown","2","3","4","5","6")), 172 | PDL1_TPS = factor(info.matrix$PDL1_TPS_group, levels = c("Not tested","<1%","1%-49%",">=50%")), 173 | histology = factor(info.matrix$cancer_type, levels = c("LUSC","LUAD")), 174 | pathologicalResponse = factor(info.matrix$pathological_response, levels = c("nPR","pPR","MPR","pCR")), 175 | pathologicalResponseLevel = factor(info.matrix$pathological_response_level, levels = c("MPR","non-MPR")), 176 | group = factor(info.matrix$group, levels = c("1","2","3","4","5")), 177 | col = list(smokingHistory = c("Y" = "#F6E382","N" = "#B8DCC5","unknown" = "#82B0D2"), 178 | cycles = c("unknown" = "#E8E8D0","2" = "#DEDEBE","3" = "#CDCD9A","4"="#B9B973","5"="#AFAF61","6"="#949449"), 179 | pathologicalResponseLevel = c("MPR" = "#2868A6", "non-MPR" = "#B1161C"), 180 | pathologicalResponse = c("nPR" = "#E84445","pPR" = "#F39DA0","MPR" = "#95BCE5","pCR" = "#1999B2"), 181 | PDL1_TPS = c("Not tested" = "#D1E9E9","<1%"="#B3D9D9","1%-49%"="#6FB7B7", 182 | ">=50%"="#4F9D9D"), 183 | histology = c("LUSC" = "#E97777", "LUAD" = "#88AB8E"), 184 | RFS.group = c("notAvailable" = "#E8F3F1","not recurred <= 0.5y"= "#F2F1EB","not recurred 0.5-1y" = "#EEE7DA", 185 | "not recurred 1-2y" = "#AFC8AD","not recurred > 2y" = "#88AB8E", 186 | "recurred <= 0.5y" = "#E97777","recurred 0.5-1y"= "#FF9F9F","recurred 1-2y"="#FCDDB0"), 187 | group = c("1"="#E64B35B2","2"="#4DBBD5B2", 188 | "3"="#00A087B2","4"="#3C5488B2", 189 | "5"="#F39B7FB2")), 190 | simple_anno_size = unit(0.5, "cm")) 191 | 192 | 193 | a <- Heatmap(plot_matrix, name = "ratio", 194 | top_annotation = ha, 195 | row_split = gene.group, 196 | column_split = new.group, 197 | row_gap = unit(2, "mm"), 198 | column_gap = unit(2, "mm"), 199 | cluster_rows = FALSE, 200 | cluster_columns = FALSE, 201 | column_order = order(factor(info.matrix$pathological_response, levels = c("non-MPR","nPR","pPR","MPR","pCR"))), 202 | row_names_gp = grid::gpar(fontsize = 10), 203 | column_names_gp = grid::gpar(fontsize = 5)) 204 | a 205 | 206 | 207 | head(info.matrix) 208 | group <- info.matrix[,c("sampleID","group")] 209 | head(group) 210 | write.csv(group,"NMF_all_group_5.csv") 211 | -------------------------------------------------------------------------------- /main_figure/figure2_and_related_supplemental_figure/README.md: -------------------------------------------------------------------------------- 1 | # file description 2 | 3 | ## NMF.R 4 | used to generate the NMF figrues in Figure 2A 5 | 6 | ## robustness_of_NMF.R 7 | used to generate the figures in Supplemental Figure S3 and S4 to validate the robustness of NMF 8 | 9 | ## proportion_plot.R 10 | used to plot the proportion of each module or each cell type in all CD45+ immune cells, and some basic information like PD-L1,radiological_response and pathological response 11 | -------------------------------------------------------------------------------- /main_figure/figure2_and_related_supplemental_figure/proportion_plot.R: -------------------------------------------------------------------------------- 1 | library(reshape2) 2 | library(tidyverse) 3 | library(dplyr) 4 | library(readxl) 5 | ################################################################### 6 | info <- read.csv("all_sub_cell_type.csv") 7 | head(info) 8 | length(unique(info$sampleID)) 9 | 10 | cluster.info <- read.csv("NMF_all_group_5.csv") 11 | cluster.info <- cluster.info[,-1] 12 | head(cluster.info) 13 | dim(cluster.info) 14 | 15 | 16 | info <- info[info$sampleID %in% cluster.info$sampleID,] 17 | length(unique(info$sampleID)) 18 | 19 | 20 | df <- table(info$sampleID,info$sub_cell_type) 21 | ratio <- as.data.frame(df / rowSums(df)) 22 | head(ratio) 23 | colnames(ratio) <- c("sampleID","cell.type","Freq") 24 | head(ratio) 25 | 26 | ratio <- dcast(ratio, sampleID ~ ratio$cell.type, value.var = "Freq") 27 | rownames(ratio) <- ratio$sampleID 28 | head(ratio) 29 | 30 | ratio$group1_module <- ratio$`CD8T_NK-like_FGFBP2` + ratio$NK_CD16hi_FGFBP2 + ratio$CD4T_Tm_ANXA1 + ratio$Mφ_FCGR3A 31 | ratio$group2_module <- ratio$Bm_TNFSF9 + ratio$Bm_FCRL4 + ratio$Bm_PDE4D + ratio$Bm_CD74 + ratio$ILC3_KIT + ratio$Bm_TNF + ratio$Bn_TCL1A 32 | ratio$group3_module <- ratio$`CD8T_Tem_GZMK+GZMH+` + ratio$CD8T_Trm_ZNF683 + ratio$`CD8T_Tem_GZMK+NR4A1+` + ratio$CD8T_Tm_IL7R + ratio$CD8T_MAIT_KLRB1 33 | ratio$group4_module <- ratio$CD4T_Treg_FOXP3 + ratio$CD4T_Treg_CCR8 + ratio$CD4T_Tfh_CXCL13 + ratio$`CD4T_Th1-like_CXCL13` + 34 | ratio$CD4T_Treg_MKI67 + ratio$CD8T_ISG15 + ratio$CD8T_terminal_Tex_LAYN + ratio$CD8T_Tex_CXCL13 35 | ratio$group5_module <- ratio$Mφ_VCAN + ratio$Mφ_FOLR2 + ratio$cDC2_CD1C + ratio$Mφ_CXCL2 + 36 | ratio$Mφ_DNAJB1 + ratio$Mφ_ISG15 + ratio$mDC_LAMP3 + ratio$Mφ_MARCO + ratio$Mφ_CXCL10 + ratio$pDC_LILRA4 + ratio$Mφ_MMP9 + ratio$cDC1_CLEC9A 37 | 38 | ratio <- merge(ratio,cluster.info,by = "sampleID") 39 | head(ratio) 40 | ratio$group <- paste0("group",ratio$group) 41 | ratio$group <- factor(ratio$group, levels = c("group1","group2","group3","group4","group5")) 42 | 43 | compaired <- list(c("group1","group2"), 44 | c("group1","group3"), 45 | c("group1","group4"), 46 | c("group1","group5")) 47 | ggboxplot(ratio, x = "group", y = "`CD8T_NK-like_FGFBP2`", 48 | color = "group",add="jitter",add.params=list(size=0.5), 49 | x.text.angle=0) + labs(y= 'CD8T_NK-like_FGFBP2 / CD45+') + 50 | theme(legend.position="none") + 51 | scale_color_manual(values=c("group1"="#E84C35","group2"="#4FBAD6", 52 | "group3"="#00A289","group4"="#3C5487", 53 | "group5"="#F29B80")) + 54 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 55 | 56 | 57 | 58 | 59 | sample.info <- as.data.frame(read_excel("sample.xlsx")) 60 | head(sample.info) 61 | pathological_response <- c() 62 | for(each in sample.info$pathological_response){ 63 | if(each %in% c("MPR","pCR")){ 64 | pathological_response <- c(pathological_response, "MPR") 65 | }else{ 66 | pathological_response <- c(pathological_response, "non-MPR") 67 | } 68 | } 69 | 70 | sample.info$pathological_response <- pathological_response 71 | 72 | response.meta <- sample.info[, c("sampleID","smoking_history","cancer_type","pre_treatment_staging", 73 | "PDL1_TPS","PD1","chemotherapy","targeted_therapy","cycles", 74 | "pathological_response","pathological_response_rate","radiological_response", 75 | "RVT_pre_dominant_histology")] 76 | response.meta <- response.meta %>% distinct(sampleID, .keep_all = TRUE) 77 | head(response.meta) 78 | 79 | ratio <- merge(ratio, response.meta, by = "sampleID", all.x = TRUE) 80 | head(ratio) 81 | 82 | ratio$pathological_response <- factor(ratio$pathological_response, levels = c("MPR","non-MPR")) 83 | 84 | a <- table(ratio$pathological_response,ratio$group) 85 | a <- as.data.frame(a / rowSums(a)) 86 | colnames(a) <- c("response","group","Freq") 87 | head(a) 88 | ggbarplot(a, x="response", y="Freq", fill = "group", 89 | x.text.angle=90) + theme(legend.position = "right") + 90 | scale_fill_manual(values=c("group1"="#E84C35","group2"="#4FBAD6", 91 | "group3"="#00A289","group4"="#3C5487", 92 | "group5"="#F29B80")) 93 | 94 | LUSC <- ratio[ratio$cancer_type %in% c("LUSC"),] 95 | a <- table(LUSC$pathological_response,LUSC$group) 96 | a <- as.data.frame(a / rowSums(a)) 97 | colnames(a) <- c("response","group","Freq") 98 | head(a) 99 | ggbarplot(a, x="response", y="Freq", fill = "group", 100 | x.text.angle=90) + theme(legend.position = "right") + 101 | scale_fill_manual(values=c("group1"="#E84C35","group2"="#4FBAD6", 102 | "group3"="#00A289","group4"="#3C5487", 103 | "group5"="#F29B80")) 104 | 105 | LUAD <- ratio[ratio$cancer_type %in% c("LUAD"),] 106 | a <- table(LUAD$pathological_response,LUAD$group) 107 | a <- as.data.frame(a / rowSums(a)) 108 | colnames(a) <- c("response","group","Freq") 109 | head(a) 110 | ggbarplot(a, x="response", y="Freq", fill = "group", 111 | x.text.angle=90) + theme(legend.position = "right") + 112 | scale_fill_manual(values=c("group1"="#E84C35","group2"="#4FBAD6", 113 | "group3"="#00A289","group4"="#3C5487", 114 | "group5"="#F29B80")) 115 | 116 | 117 | 118 | #PD1 119 | PDL1_TPS_group <- c() 120 | for(each in ratio$PDL1_TPS){ 121 | if (each %in% c("<1%","0")){ 122 | PDL1_TPS_group <- c(PDL1_TPS_group,"<1%") 123 | }else if (each %in% c("0.02","0.05","0.08","0.03","0.01","0.3","0.2","0.35","0.4")){ 124 | PDL1_TPS_group <- c(PDL1_TPS_group,"1%-49%") 125 | }else if (each %in% c("0.5","0.7","0.6","0.9","0.8","0.85","1","0.55","0.75","0.65")){ 126 | PDL1_TPS_group <- c(PDL1_TPS_group,">=50%") 127 | }else{ 128 | PDL1_TPS_group <- c(PDL1_TPS_group,"Not tested") 129 | } 130 | } 131 | ratio$PDL1_TPS_group <- PDL1_TPS_group 132 | 133 | ratio.part <- ratio[ratio$PDL1_TPS_group != "Not tested",] 134 | ratio.part$PDL1_TPS_group <- factor(as.vector(ratio.part$PDL1_TPS_group), levels = c("<1%","1%-49%",">=50%")) 135 | ratio.part$cluster <- paste0(ratio.part$group,"_",ratio.part$pathological_response) 136 | 137 | a <- table(ratio.part$cluster,ratio.part$PDL1_TPS_group) 138 | df <- as.data.frame(a) 139 | head(df) 140 | colnames(df) <- c("cluster","PDL1_TPS","number") 141 | head(df) 142 | df$cluster <- factor(df$cluster, 143 | levels = c("group1_MPR","group1_non-MPR", 144 | "group2_MPR","group2_non-MPR", 145 | "group3_MPR","group3_non-MPR", 146 | "group4_MPR","group4_non-MPR", 147 | "group5_MPR","group5_non-MPR")) 148 | 149 | ggbarplot(df, x="cluster", y="number", fill = "PDL1_TPS", 150 | x.text.angle=90) + theme(legend.position = "right") + 151 | scale_fill_manual(values=c("<1%"="#B3D9D9","1%-49%"="#4F9D9D", 152 | ">=50%"="#3D7878")) 153 | 154 | 155 | 156 | 157 | group <- c() 158 | response <- c() 159 | for(each in df$cluster){ 160 | group <- c(group, str_split(each, "_")[[1]][1]) 161 | response <- c(response, str_split(each, "_")[[1]][2]) 162 | } 163 | df$group <- group 164 | df$response <- response 165 | 166 | 167 | ggbarplot(df, x="PDL1_TPS", y="number", fill = "group", 168 | x.text.angle=90,facet.by = "response") + theme(legend.position = "right") + 169 | scale_fill_manual(values=c("group1"="#E84C35","group2"="#4FBAD6", 170 | "group3"="#00A289","group4"="#3C5487", 171 | "group5"="#F29B80")) 172 | ggsave("/home/zhangwj/data_yi/neoadjuvant/revision2/figure/PDL1_2.pdf",width = 5, height = 4) 173 | 174 | #alluvial 175 | library(readxl) 176 | sample.info <- as.data.frame(read_excel("sample.xlsx")) 177 | head(sample.info) 178 | 179 | cluster.info <- read.csv("NMF_all_group_5.csv") 180 | cluster.info <- cluster.info[,-1] 181 | head(cluster.info) 182 | 183 | sample.info <- merge(sample.info, cluster.info, by = "sampleID", all.x = TRUE) 184 | head(sample.info) 185 | sample.info <- sample.info[sample.info$group %in% c("1","2","3","4","5"),] 186 | head(sample.info) 187 | 188 | sample.info$group <- paste0("group",sample.info$group) 189 | 190 | pathological_response <- c() 191 | for(each in sample.info$pathological_response){ 192 | if(each %in% c("MPR","pCR")){ 193 | pathological_response <- c(pathological_response, "MPR") 194 | }else{ 195 | pathological_response <- c(pathological_response, "non-MPR") 196 | } 197 | } 198 | 199 | sample.info$pathological_response <- pathological_response 200 | sample.info$cluster <- paste0(sample.info$group,"_",sample.info$pathological_response) 201 | 202 | #PD1 203 | PDL1_TPS_group <- c() 204 | for(each in sample.info$PDL1_TPS){ 205 | if (each %in% c("<1%","0")){ 206 | PDL1_TPS_group <- c(PDL1_TPS_group,"<1%") 207 | }else if (each %in% c("0.01","0.02","0.05","0.08","0.03","0.3","0.2","0.35","0.4")){ 208 | PDL1_TPS_group <- c(PDL1_TPS_group,"1%-49%") 209 | }else if (each %in% c("0.5","0.7","0.6","0.9","0.8","0.85","1","0.55","0.75","0.65")){ 210 | PDL1_TPS_group <- c(PDL1_TPS_group,">=50%") 211 | }else{ 212 | PDL1_TPS_group <- c(PDL1_TPS_group,"Not tested") 213 | } 214 | } 215 | sample.info$PDL1_TPS_group <- PDL1_TPS_group 216 | 217 | sample.info <- sample.info[sample.info$PDL1_TPS_group != "Not tested",] 218 | sample.info$PDL1_TPS_group <- factor(as.vector(sample.info$PDL1_TPS_group), levels = c("<1%","1%-49%",">=50%")) 219 | a <- table(sample.info$cluster,sample.info$PDL1_TPS_group) 220 | df <- as.data.frame(a) 221 | head(df) 222 | colnames(df) <- c("cluster","PDL1_TPS","number") 223 | head(df) 224 | df$cluster <- factor(df$cluster, 225 | levels = c("group1_MPR","group1_non-MPR", 226 | "group2_MPR","group2_non-MPR", 227 | "group3_MPR","group3_non-MPR", 228 | "group4_MPR","group4_non-MPR", 229 | "group5_MPR","group5_non-MPR")) 230 | 231 | 232 | df$group <- sapply(as.vector(df$cluster), function(x) strsplit(x,"_")[[1]][1]) 233 | head(df) 234 | df$pathological_response <- sapply(as.vector(df$cluster), function(x) strsplit(x,"_")[[1]][2]) 235 | head(df) 236 | df <- df[!df$number %in% c(0),] 237 | ggplot(data = df, 238 | aes(axis1 = PDL1_TPS, # First variable on the X-axis 239 | axis2 = group, # Third variable on the X-axis 240 | y = number)) + 241 | geom_alluvium(aes(fill = pathological_response,order = pathological_response)) + 242 | geom_stratum() + 243 | geom_text(stat = "stratum", 244 | aes(label = after_stat(stratum))) + 245 | theme_void() + 246 | scale_fill_manual(values=c("MPR" = "#2868A6", "non-MPR" = "#B1161C")) 247 | 248 | library(ggalluvial) 249 | cluster.info <- read.csv("NMF_all_group_5.csv") 250 | cluster.info <- cluster.info[,-1] 251 | head(cluster.info) 252 | 253 | sample.info <- as.data.frame(read_excel("sample.xlsx")) 254 | head(sample.info) 255 | sample.info <- sample.info[sample.info$sampleID %in% cluster.info$sampleID,] 256 | sample.info <- sample.info[,c("sampleID","pathological_response","radiological_response")] 257 | 258 | pathological_response_level <- c() 259 | for(each in sample.info$pathological_response){ 260 | if(each %in% c("MPR","pCR")){ 261 | pathological_response_level <- c(pathological_response_level, "MPR") 262 | }else{ 263 | pathological_response_level <- c(pathological_response_level, "non-MPR") 264 | } 265 | } 266 | sample.info$pathological_response_level <- pathological_response_level 267 | rownames(sample.info) <- sample.info$sampleID 268 | head(sample.info) 269 | sample.info <- sample.info[cluster.info$sampleID,] 270 | sample.info$group <- cluster.info$group 271 | sample.info$group <- paste0("group",sample.info$group) 272 | sample.info$sub.group <- paste0(sample.info$group,"_",sample.info$pathological_response_level) 273 | sample.info$sub.group <- factor(sample.info$sub.group, 274 | levels = c("group1_MPR","group1_non-MPR", 275 | "group2_MPR","group2_non-MPR", 276 | "group3_MPR","group3_non-MPR", 277 | "group4_MPR","group4_non-MPR", 278 | "group5_MPR","group5_non-MPR")) 279 | head(sample.info) 280 | 281 | sample.info <- sample.info[sample.info$radiological_response %in% c("SD","PR","CR","PD"),] 282 | head(sample.info) 283 | mm <- as.data.frame(table(sample.info$sub.group,sample.info$radiological_response)) 284 | colnames(mm) <- c("sub.group","radiological_response","number") 285 | mm$radiological_response <- factor(mm$radiological_response,levels = c("CR","PR","SD","PD")) 286 | head(mm) 287 | 288 | ggplot(data = mm, 289 | aes(axis1 = sub.group, # First variable on the X-axis 290 | axis2 = radiological_response, # Third variable on the X-axis 291 | y = number)) + 292 | geom_alluvium(aes(fill = radiological_response,order = radiological_response)) + 293 | geom_stratum() + 294 | geom_text(stat = "stratum", 295 | aes(label = after_stat(stratum))) + 296 | theme_void() + 297 | scale_fill_manual(values=c("CR"="#E6A4B4","PR"="#FFD9C0","SD"="#8CC0DE","PD"="#0B60B0")) 298 | 299 | -------------------------------------------------------------------------------- /main_figure/figure2_and_related_supplemental_figure/robustness_of_NMF.R: -------------------------------------------------------------------------------- 1 | library(NMF) 2 | library(ComplexHeatmap) 3 | library(reshape2) 4 | library(tidyverse) 5 | library(dplyr) 6 | library(readxl) 7 | library(viridis) 8 | ################################################################### 9 | count <- 1 10 | module_merge <- list() 11 | 12 | for(i in 1:200){ 13 | info <- read.csv("all_sub_cell_type.csv") 14 | group <- read.csv("NMF_all_group_5.csv") 15 | 16 | #remove 20% samples randomly 17 | random.samples <- sample(group$sampleID, 45) 18 | print(random.samples) 19 | group <- group[!group$sampleID %in% random.samples,] 20 | info <- info[info$sampleID %in% group$sampleID,] 21 | print(length(unique(info$sampleID))) 22 | 23 | df <- table(info$sampleID,info$sub_cell_type) 24 | ratio <- as.data.frame(df / rowSums(df)) 25 | colnames(ratio) <- c("sampleID","cell.type","Freq") 26 | print(length(unique(ratio$sampleID))) 27 | 28 | ratio <- dcast(ratio, sampleID ~ ratio$cell.type, value.var = "Freq") 29 | rownames(ratio) <- ratio$sampleID 30 | 31 | ratio <- ratio[,-1] 32 | head(ratio) 33 | ratio[is.na(ratio)] <- 0 34 | 35 | #normalization 36 | scale_ratio <- apply(ratio, MARGIN = 2, function(x) (x-min(x))/(max(x)-min(x))) 37 | scale_ratio <- as.data.frame(scale_ratio) 38 | scale_ratio <- t(scale_ratio) 39 | 40 | seed = 2020820 41 | for(rk in 2:10){ 42 | nmf.rank5 <- nmf(scale_ratio, 43 | rank = rk, 44 | nrun=200, 45 | seed = seed, 46 | method = "lee") 47 | 48 | index <- extractFeatures(nmf.rank5,"max") 49 | for(j in 1:rk){ 50 | part <- scale_ratio[index[[j]],] 51 | module_merge[[count]] <- rownames(part) 52 | count <- count + 1 53 | } 54 | } 55 | } 56 | 57 | module_merge 58 | 59 | 60 | Mat <- matrix(0, ncol = length(unique(info$sub_cell_type)), nrow = length(unique(info$sub_cell_type))) 61 | rownames(Mat) <- unique(info$sub_cell_type) 62 | colnames(Mat) <- unique(info$sub_cell_type) 63 | head(Mat) 64 | 65 | for (i in 1:length(unique(info$sub_cell_type))) { 66 | for (j in 1:length(unique(info$sub_cell_type))) { 67 | number <- 0 68 | for(m in 1:length(module_merge)){ 69 | if((rownames(Mat)[i] %in% module_merge[[m]]) & (rownames(Mat)[j] %in% module_merge[[m]])){ 70 | number <- number + 1 71 | } 72 | } 73 | Mat[i,j] <- number 74 | } 75 | } 76 | 77 | custom_magma <- c(colorRampPalette(c("white", rev(magma(323, begin = 0.15))[1]))(10), rev(magma(323, begin = 0.18))) 78 | pheatmap(as.matrix(Mat), cluster_cols=T, cluster_rows=T, 79 | clustering_distance_rows="euclidean", color=custom_magma, 80 | fontsize=12,treeheight_row=0,treeheight_col=30, 81 | cellheight = 7,cellwidth = 7,show_rownames=T, 82 | show_colnames=F,clustering_method = "ward.D2", border_color = NA) 83 | 84 | -------------------------------------------------------------------------------- /main_figure/figure3_and_related_supplemental_figure/TNBC_zyy.R: -------------------------------------------------------------------------------- 1 | #downloaded data from GSE169246 2 | TNBC <- readRDS("/home/zhangwj/data_yi/neoadjuvant/data/other_data/zhangyy/zyy_TNBC.rds") 3 | 4 | dim(TNBC) 5 | Idents(TNBC) <- "cellType_in_paper" 6 | DimPlot(TNBC, reduction = "umap", label = FALSE,pt.size = 0.1) +NoLegend() 7 | 8 | FeaturePlot(TNBC, features = c("GBP1"), cols = c("lightgrey" ,"#FD3131"),pt.size = 0.1) 9 | FeaturePlot(TNBC, features = c("NKG7"), cols = c("lightgrey" ,"#FD3131")) 10 | 11 | NKT.part <- subset(TNBC, cellType_in_paper %in% c("t_Tn-LEF1","t_ILC1-IL32","t_CD8_Tem-GZMK", 12 | "t_CD4-CXCL13","t_ILC1-GZMK", 13 | "t_CD8_MAIT-KLRB1", 14 | "t_CD4_Treg-FOXP3", 15 | "t_CD8_Trm-ZNF683","t_CD8_Teff-GNLY","t_CD4_Tcm-LMNA", 16 | "t_ILC3-AREG", 17 | "t_CD8-CXCL13","t_ILC1-IFNG", 18 | "t_ILC1-FGFBP2","t_Tact-IFI6", 19 | "t_ILC1-ZNF683", 20 | "t_ILC1-CD160","t_ILC3-IL7R","t_ILC1-CX3CR1", 21 | "t_ILC1-SELL","t_CD4_Tact-XIST","t_ILC2-SPON2", 22 | "t_ILC1-CNOT2","t_Tprf-MKI67", 23 | "t_ILC1-VCAM1")) 24 | dim(NKT.part) 25 | NKT.part <- NormalizeData(NKT.part, normalization.method = "LogNormalize", scale.factor = 10000) 26 | 27 | NKT.part <- FindVariableFeatures(NKT.part, selection.method = "vst",nfeatures = 1000) 28 | #delet IgG/H/L 29 | NKT.part@assays$RNA@var.features <- NKT.part@assays$RNA@var.features[-which(NKT.part@assays$RNA@var.features %in% grep("^IG[KHL]",NKT.part@assays$RNA@var.features,value=T))] 30 | NKT.part@assays$RNA@var.features <- NKT.part@assays$RNA@var.features[-which(NKT.part@assays$RNA@var.features %in% grep("^MT",NKT.part@assays$RNA@var.features,value=T))] 31 | #NKT.part@assays$RNA@var.features <- NKT.part@assays$RNA@var.features[-which(NKT.part@assays$RNA@var.features %in% grep("^RP[LS]",NKT.part@assays$RNA@var.features,value=T))] 32 | length(NKT.part@assays$RNA@var.features) 33 | 34 | all.genes <- rownames(NKT.part) 35 | NKT.part <- ScaleData(NKT.part, features = all.genes) 36 | NKT.part <- RunPCA(NKT.part, features = VariableFeatures(object = NKT.part)) 37 | 38 | ElbowPlot(NKT.part) 39 | 40 | #NKT.part <- subset(NKT.part, sample %in% c("medial 2","distal 2", 41 | # "distal 1a","proximal 3", 42 | # "distal 3")) 43 | #remove batch effect 44 | NKT.part <- RunHarmony(NKT.part, c("sampleID")) 45 | 46 | NKT.part <- NKT.part %>% 47 | FindNeighbors(reduction = "harmony", dims = 1:15) %>% 48 | FindClusters(resolution = 0.5) %>% 49 | RunUMAP(reduction = "harmony", dims = 1:15) %>% 50 | identity() 51 | 52 | NKT.part <- FindClusters(NKT.part,resolution = 1) 53 | 54 | DimPlot(NKT.part, reduction = "umap", label = TRUE,pt.size = 1) 55 | NKT.part$cellType_in_paper 56 | 57 | FeaturePlot(NKT.part, features = c(""),pt.size = 1, cols = c("lightgrey" ,"#FD3131")) 58 | 59 | NKT.markers <- FindAllMarkers(NKT.part, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) 60 | NKT.markers %>% 61 | group_by(cluster) %>% 62 | slice_max(n = 10, order_by = avg_log2FC) -> NKT.top5 63 | 64 | FeaturePlot(NKT.part, features = c("FCGR3A"), cols = c("lightgrey" ,"#FD3131")) 65 | FeaturePlot(NKT.part, features = c("FGFBP2"), cols = c("lightgrey" ,"#FD3131")) 66 | 67 | DimPlot(NKT.part, reduction = "umap", label = TRUE,pt.size = 0.1) + NoLegend() 68 | 69 | Idents(NKT.part) <- "seurat_clusters" 70 | new.cluster.ids <- c("other","other","other","other","other","other", 71 | "other","other","other","NK_CD16hi_FGFBP2","other", 72 | "other","other","other","other","other","NK_CD16hi_FGFBP2", 73 | "other") 74 | names(new.cluster.ids) <- levels(NKT.part) 75 | NKT.part <- RenameIdents(NKT.part, new.cluster.ids) 76 | DimPlot(NKT.part, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() + 77 | scale_color_manual(values = c("#8ECFC9","#FA7F6F")) 78 | NKT.part$new.cell.type <- Idents(NKT.part) 79 | 80 | a <- table(NKT.part$sampleID,NKT.part$new.cell.type) 81 | a <- as.data.frame(a / rowSums(a)) 82 | colnames(a) <- c("sampleID","cell.type","Freq") 83 | head(a) 84 | 85 | obs <- NKT.part@meta.data 86 | obs <- obs[,c("sampleID","patientID","tissue","treatment_status", 87 | "ICB_treatment","treatment","treatment_response")] 88 | obs <- obs %>% distinct(sampleID, .keep_all = TRUE) 89 | head(obs) 90 | 91 | df <- merge(a, obs, by = "sampleID", all.x = TRUE) 92 | head(df) 93 | df$treatment_response <- factor(df$treatment_response, levels = c("PD","SD","PR")) 94 | 95 | df <- df[df$cell.type == "NK_CD16hi_FGFBP2",] 96 | df <- df[df$treatment %in% c("Chemo"),] 97 | df <- df[df$treatment_status %in% c("Post-treatment"),] 98 | 99 | treatment.response <- c() 100 | for(each in df$treatment_response){ 101 | if(each %in% c("PD","SD")){ 102 | treatment.response <- c(treatment.response,"PD/SD") 103 | }else{ 104 | treatment.response <- c(treatment.response,"PR") 105 | } 106 | } 107 | df$treatment.response <- treatment.response 108 | compaired <- list(c("PD/SD","PR")) 109 | 110 | ggboxplot(df, x = "treatment.response", y = "Freq", 111 | color = "treatment.response",add = "jitter", 112 | x.text.angle=0,size = 0.5,pt.size = 1, facet.by = "treatment") + 113 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 114 | scale_color_manual(values = c("PD/SD" = "#88AB8E","PR"="#E97777")) + theme(legend.position="none") 115 | -------------------------------------------------------------------------------- /main_figure/figure3_and_related_supplemental_figure/analysis_of_chemo_only_NSCLC.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(ggpubr) 3 | library(reshape2) 4 | library(tidyverse) 5 | library(dplyr) 6 | 7 | info <- read.csv("all_sub_cell_type.csv") 8 | head(info) 9 | df <- table(info$sampleID,info$sub_cell_type) 10 | ratio <- as.data.frame(df / rowSums(df)) 11 | head(ratio) 12 | colnames(ratio) <- c("sampleID","cell.type","Freq") 13 | head(ratio) 14 | 15 | library(readxl) 16 | sample.info <- as.data.frame(read_excel("sample.xlsx")) 17 | head(sample.info) 18 | pathological_response <- c() 19 | for(each in sample.info$pathological_response){ 20 | if(each %in% c("MPR","pCR")){ 21 | pathological_response <- c(pathological_response, "MPR") 22 | }else{ 23 | pathological_response <- c(pathological_response, "non-MPR") 24 | } 25 | } 26 | 27 | sample.info$pathological_response <- pathological_response 28 | 29 | response.meta <- sample.info[, c("sampleID","smoking_history","cancer_type","pre_treatment_staging", 30 | "PDL1_TPS","PD1","chemotherapy","targeted_therapy","cycles", 31 | "pathological_response","pathological_response_rate","radiological_response", 32 | "RVT_pre_dominant_histology")] 33 | response.meta <- response.meta %>% distinct(sampleID, .keep_all = TRUE) 34 | head(response.meta) 35 | 36 | ratio <- dcast(ratio, sampleID ~ ratio$cell.type, value.var = "Freq") 37 | 38 | head(ratio) 39 | merge.version <- merge(ratio, response.meta, by = "sampleID", all.x = TRUE) 40 | head(merge.version) 41 | #patients with only chemotherapy 42 | merge.version <- merge.version[merge.version$PD1 %in% c("No"),] 43 | head(merge.version) 44 | 45 | 46 | ggboxplot(merge.version, x = "pathological_response", y = "`CD8T_NK-like_FGFBP2`", 47 | color = "pathological_response",add = "jitter", 48 | x.text.angle=0,size = 0.5,pt.size = 1) + 49 | stat_compare_means(aes(group = pathological_response)) + 50 | scale_color_manual(values = c("MPR" = "#2868A6", "non-MPR" = "#B1161C")) + 51 | theme(legend.position="none") 52 | -------------------------------------------------------------------------------- /main_figure/figure3_and_related_supplemental_figure/number_B_aggrates.R: -------------------------------------------------------------------------------- 1 | library(readxl) 2 | #只保留了T1的数目 3 | B.info <- as.data.frame(read_excel("B_cell_aggregates.xlsx")) 4 | head(B.info) 5 | 6 | cluster.info <- read.csv("NMF_all_group_5.csv") 7 | cluster.info <- cluster.info[,-1] 8 | head(cluster.info) 9 | 10 | df <- merge(cluster.info, B.info, by = "sampleID", all.x = TRUE) 11 | head(df) 12 | df <- df[df$group %in% c("1","2","3","4","5"),] 13 | head(df) 14 | df[is.na(df)] <- "unknown" 15 | head(df) 16 | df <- df[df$number_of_B_cell_aggregates != "unknown",] 17 | dim(df) 18 | df$number_of_B_cell_aggregates <- as.numeric(df$number_of_B_cell_aggregates) 19 | df$number_of_B_cell_aggregates <- 4 * df$number_of_B_cell_aggregates 20 | df$group <- paste0("group", df$group) 21 | df$group <- factor(as.vector(df$group), levels = c("group1","group2","group3", 22 | "group4","group5")) 23 | 24 | compaired <- list(c("group1", "group2"),c("group2", "group3"), 25 | c("group2", "group4"),c("group2", "group5")) 26 | 27 | ggboxplot(df, x = "group", y = "number_of_B_cell_aggregates", 28 | color = "group",add="jitter",add.params=list(size=0.7), 29 | x.text.angle=0) + labs(x='group', y= 'number_of_B_cell_aggregates / cm2') + 30 | theme(legend.position="none") + 31 | scale_color_manual(values=c("group1"="#E84C35","group2"="#4FBAD6", 32 | "group3"="#00A289","group4"="#3C5487", 33 | "group5"="#F29B80"),guide = "none") + 34 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 35 | 36 | 37 | new_group <- c() 38 | for(each in df$group){ 39 | if(each %in% c("group1","group3","group4","group5")){ 40 | new_group <- c(new_group, "other_group") 41 | }else{ 42 | new_group <- c(new_group, "group2") 43 | } 44 | } 45 | 46 | df$new_group <- new_group 47 | df$new_group <- factor(df$new_group, levels = c("group2","other_group")) 48 | 49 | compaired <- list(c("group2","other_group")) 50 | ggboxplot(df, x = "new_group", y = "number_of_B_cell_aggregates", 51 | color = "new_group",add="jitter",add.params=list(size=0.7), 52 | x.text.angle=0) + labs(x='group', y= 'number_of_B_cell_aggregates / cm2') + 53 | theme(legend.position="none") + 54 | scale_color_manual(values=c("other_group"="#8491B4CC","group2"="#4FBAD6"),guide = "none") + 55 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 56 | 57 | 58 | -------------------------------------------------------------------------------- /main_figure/figure3_and_related_supplemental_figure/startrac.R: -------------------------------------------------------------------------------- 1 | library(Startrac) 2 | library(ggpubr) 3 | library(ggplot2) 4 | library(circlize) 5 | library(ggpmisc) 6 | library(ggsci) 7 | 8 | #################################################################### 9 | TCR.data <- read.csv("T_with_TCR.csv") 10 | # TCR.data must include the clone type, expansion state and the cluster 11 | head(TCR.data) 12 | dim(TCR.data) 13 | 14 | #only include CD8 T cells 15 | TCR.data <- TCR.data[TCR.data$sub_cell_type %in% c("your sub cell types"),] 16 | head(TCR.data) 17 | 18 | head(TCR.data) 19 | 20 | 21 | in.dat <- TCR.data[,c("sampleID","cellID","clonetype","expansion", 22 | "sub_cell_type")] 23 | head(in.dat) 24 | 25 | colnames(in.dat) <- c("patient","Cell_Name","clone.id","clone.status", 26 | "majorCluster") 27 | head(in.dat) 28 | 29 | in.dat$loc = "T" 30 | 31 | head(in.dat) 32 | 33 | out <- Startrac.run(in.dat, proj="NSCLC",verbose=F) 34 | 35 | expan <- out@cluster.data 36 | head(expan) 37 | expan$majorCluster <- factor(expan$majorCluster, levels = c("your sub cell types")) 38 | 39 | compaired <- list(c(),c()) 40 | ggboxplot(expan, x = "majorCluster", y = "expa", 41 | color = "majorCluster",add="jitter",add.params=list(size=0.5), 42 | x.text.angle=45) + labs(x='cell.type', y= 'expa') + 43 | theme(legend.position="none") + 44 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 45 | 46 | -------------------------------------------------------------------------------- /main_figure/figure3_and_related_supplemental_figure/visualization_of_NKT_clones.Rmd: -------------------------------------------------------------------------------- 1 | # P107 plot 2 | coerced.cell.types <- c("CD8T_Tem_GZMK+GZMH+", "CD4T_Tfh_CXCL13", "CD4T_Tm_XCL1", 3 | "CD4T_Tem_GZMA", "ILC3_KIT", "CD8T_Trm_ZNF683", "CD8T_prf_MKI67", 4 | "CD4T_Tm_ANXA1", "CD8T_MAIT_KLRB1", "CD4T_Tn_CCR7", 5 | "CD4T_Treg_MKI67", "CD8T_Tem_GZMK+NR4A1+", "CD8T_Tm_IL7R", 6 | "T_gdT_TRDV2", "T_gdT_TRDV1", "NK_CD16hi_FGFBP2", 7 | "CD4T_Th1-like_CXCL13") 8 | 9 | color.value <- c( 10 | "expanded_terminal_Tex"="#004949", 11 | "Expanded CCR8+Treg"="#A50021", 12 | "other"="#BCBCBC", 13 | "CD8Texp"="#418849", 14 | "CD8T_NK-like_FGFBP2"="#6551CC" 15 | ) 16 | 17 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P107") 18 | 19 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq)) 20 | x.orders <- as.vector(tcr.count.table$Var1) 21 | by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n()) 22 | by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2", "other")) 23 | p <- ggplot(by.cell.type.count.table, 24 | aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 25 | theme_classic() + 26 | scale_fill_manual(values = color.value) + 27 | geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) + 28 | ggtitle("P107") 29 | p 30 | ggsave("figures/P107_clonal_composition.pdf", p, width = 10, height =5) 31 | 32 | 33 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P471") 34 | 35 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq)) 36 | x.orders <- as.vector(tcr.count.table$Var1) 37 | by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n()) 38 | by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2", "other")) 39 | p <- ggplot(by.cell.type.count.table, 40 | aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 41 | theme_classic() + 42 | scale_fill_manual(values = color.value) + 43 | geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) + 44 | ggtitle("P471") 45 | p 46 | 47 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P325") 48 | 49 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq)) 50 | x.orders <- as.vector(tcr.count.table$Var1) 51 | by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n()) 52 | by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2", "other")) 53 | p <- ggplot(by.cell.type.count.table, 54 | aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 55 | theme_classic() + 56 | scale_fill_manual(values = color.value) + 57 | geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) + 58 | ggtitle("P325") 59 | p 60 | 61 | 62 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P23") 63 | 64 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq)) 65 | x.orders <- as.vector(tcr.count.table$Var1) 66 | by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n()) 67 | by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2", "other")) 68 | p <- ggplot(by.cell.type.count.table, 69 | aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 70 | theme_classic() + 71 | scale_fill_manual(values = color.value) + 72 | geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) + 73 | ggtitle("P23") 74 | p 75 | 76 | 77 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P258") 78 | 79 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq)) 80 | x.orders <- as.vector(tcr.count.table$Var1) 81 | by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n()) 82 | by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2", "other")) 83 | p <- ggplot(by.cell.type.count.table, 84 | aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 85 | theme_classic() + 86 | scale_fill_manual(values = color.value) + 87 | geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) + 88 | ggtitle("P258") 89 | p 90 | ggsave("figures/P258_clonal_composition.pdf", p, width = 10, height =5) 91 | 92 | 93 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P365") 94 | 95 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq)) 96 | x.orders <- as.vector(tcr.count.table$Var1) 97 | by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n()) 98 | by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2", "other")) 99 | p <- ggplot(by.cell.type.count.table, 100 | aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 101 | theme_classic() + 102 | scale_fill_manual(values = color.value) + 103 | geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) + 104 | ggtitle("P365") 105 | p 106 | 107 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P412") 108 | 109 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq)) 110 | x.orders <- as.vector(tcr.count.table$Var1) 111 | by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n()) 112 | by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2", "other")) 113 | p <- ggplot(by.cell.type.count.table, 114 | aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 115 | theme_classic() + 116 | scale_fill_manual(values = color.value) + 117 | geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) + 118 | ggtitle("P412") 119 | p 120 | 121 | ggsave("figures/P412_clonal_composition.pdf", p, width = 10, height =5) 122 | 123 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P59") 124 | 125 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq)) 126 | x.orders <- as.vector(tcr.count.table$Var1) 127 | by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n()) 128 | by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2", "other")) 129 | p <- ggplot(by.cell.type.count.table, 130 | aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 131 | theme_classic() + 132 | scale_fill_manual(values = color.value) + 133 | geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) + 134 | ggtitle("P59") 135 | p 136 | -------------------------------------------------------------------------------- /main_figure/figure4_and_related_supplemental_figure/CCR8.IF.Rmd: -------------------------------------------------------------------------------- 1 | ccr8.ihc <- read_csv("CCR8_IF.csv") %>% mutate(group=as.character(group)) 2 | 3 | compaired <- list(c("3", "4")) 4 | 5 | ggboxplot(ccr8.ihc, x = "group", y = "FOXP3.area", 6 | color = "group",add="jitter",add.params=list(size=0.5), 7 | x.text.angle=0) + labs(x='group', y= 'number of FOXP3+ Cells per mm2') + 8 | theme(legend.position="none") + 9 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 10 | scale_color_manual(values=c("3"="#00A289","4"="#3C5487")) 11 | ggsave("number.FOXP3.pdf", width = 2.5, height = 3.5) 12 | 13 | ggboxplot(ccr8.ihc, x = "group", y = "CCR8.area", 14 | color = "group",add="jitter",add.params=list(size=0.5), 15 | x.text.angle=0) + labs(x='group', y= 'number of FOXP3+CCR8+ Cells per mm2') + 16 | theme(legend.position="none") + 17 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 18 | scale_color_manual(values=c("3"="#00A289","4"="#3C5487")) 19 | ggsave("number.CCR8.pdf", width = 2.5, height = 3.5) 20 | 21 | ggboxplot(ccr8.ihc, x = "group", y = "CCR8.FOXP3", 22 | color = "group",add="jitter",add.params=list(size=0.5), 23 | x.text.angle=0) + labs(x='group', y= 'percentage of FOXP3+CCR8+ Cells in FOXP3+ cells') + 24 | theme(legend.position="none") + 25 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 26 | scale_color_manual(values=c("3"="#00A289","4"="#3C5487")) 27 | ggsave("CCR8.FOXP3.pdf", width = 2.5, height = 3.5) 28 | -------------------------------------------------------------------------------- /main_figure/figure4_and_related_supplemental_figure/CCR8_IF.csv: -------------------------------------------------------------------------------- 1 | group,sampleID,FOXP3,CCR8,area,FOXP3.area,CCR8.area,CCR8.FOXP3 2 | 3,P46,19,2,69619,273,29,0.11 3 | 3,P46,16,2,69619,230,29,0.13 4 | 3,P46,22,1,69619,316,14,0.05 5 | 3,P46,22,2,69619,316,29,0.09 6 | 3,P46,27,6,69619,388,86,0.22 7 | 3,P31,17,3,61634,276,49,0.18 8 | 3,P31,22,4,61634,357,65,0.18 9 | 3,P31,21,6,61634,341,97,0.29 10 | 3,P31,17,5,61634,276,81,0.29 11 | 3,P31,20,4,61634,324,65,0.2 12 | 3,P57,22,6,54563,403,110,0.27 13 | 3,P57,17,1,54563,312,18,0.06 14 | 3,P57,23,3,54563,422,55,0.13 15 | 3,P57,23,4,54563,422,73,0.17 16 | 3,P57,20,7,54563,367,128,0.35 17 | 4,P22,31,11,37335,830,295,0.35 18 | 4,P22,26,14,37335,696,375,0.54 19 | 4,P22,32,16,37335,857,429,0.5 20 | 4,P22,18,10,37335,482,268,0.56 21 | 4,P22,21,9,37335,562,241,0.43 22 | 4,P32,16,7,35817,447,195,0.44 23 | 4,P32,22,7,35817,614,195,0.32 24 | 4,P32,28,8,35817,782,223,0.29 25 | 4,P32,25,9,35817,698,251,0.36 26 | 4,P32,31,8,35817,866,223,0.26 27 | 4,P106,18,6,19437,926,309,0.33 28 | 4,P106,16,7,19437,823,360,0.44 29 | 4,P106,21,7,19437,1080,360,0.33 30 | 4,P106,8,3,19437,412,154,0.38 31 | 4,P106,8,2,19437,412,103,0.25 32 | -------------------------------------------------------------------------------- /main_figure/figure4_and_related_supplemental_figure/CCR8_Treg_in_all_Treg.R: -------------------------------------------------------------------------------- 1 | ############################################################################################################# 2 | info <- read.csv("all_sub_cell_type.csv") 3 | head(info) 4 | length(unique(info$sampleID)) 5 | 6 | info <- info[info$sub_cell_type %in% c("CD4T_Treg_CCR8","CD4T_Treg_FOXP3","CD4T_Treg_MKI67"),] 7 | 8 | cluster.info <- read.csv("NMF_all_group_5.csv") 9 | cluster.info <- cluster.info[,-1] 10 | head(cluster.info) 11 | dim(cluster.info) 12 | 13 | 14 | info <- info[info$sampleID %in% cluster.info$sampleID,] 15 | length(unique(info$sampleID)) 16 | 17 | 18 | df <- table(info$sampleID,info$sub_cell_type) 19 | ratio <- as.data.frame(df / rowSums(df)) 20 | head(ratio) 21 | colnames(ratio) <- c("sampleID","cell.type","Freq") 22 | head(ratio) 23 | 24 | ratio <- merge(ratio,cluster.info) 25 | head(ratio) 26 | ratio$group <- paste0("group",ratio$group) 27 | ratio$group <- factor(ratio$group, levels = c("group1","group2","group3","group4","group5")) 28 | head(ratio) 29 | 30 | CCR8 <- ratio[ratio$cell.type %in% c("CD4T_Treg_CCR8"),] 31 | 32 | compaired <- list(c("group3","group4")) 33 | 34 | ggboxplot(CCR8[CCR8$group %in% c("group3","group4"),], x = "group", y = "Freq", 35 | color = "group",add="jitter",add.params=list(size=0.5), 36 | x.text.angle=0) + labs(x='group', y= 'CCR8+ Treg in all Tregs') + 37 | theme(legend.position="none") + 38 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 39 | scale_color_manual(values=c("group3"="#00A289","group4"="#3C5487")) 40 | "group3"="#00A289","group4"="#3C5487", 41 | -------------------------------------------------------------------------------- /main_figure/figure4_and_related_supplemental_figure/DEG_volcano_plot.R: -------------------------------------------------------------------------------- 1 | ######################################################################################## 2 | #load library 3 | ######################################################################################### 4 | suppressMessages(library(Seurat)) 5 | suppressMessages(library(dplyr)) 6 | suppressMessages(library(ggplot2)) 7 | suppressMessages(library(readr)) 8 | suppressMessages(library(harmony)) 9 | suppressMessages(library(ggpubr)) 10 | suppressMessages(library(ggpmisc)) 11 | suppressMessages(library(ggrepel)) 12 | suppressMessages(library(readxl)) 13 | ############################################################################################## 14 | #B cell 15 | ########################################################################## 16 | scRNA <- readRDS("Treg_count.rds") 17 | dim(scRNA) 18 | Idents(scRNA) <- "sub_cell_type" 19 | 20 | scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000) 21 | 22 | scRNA <- FindVariableFeatures(scRNA, selection.method = "vst",nfeatures = 1000) 23 | length(scRNA@assays$RNA@var.features) 24 | 25 | all.genes <- rownames(scRNA) 26 | scRNA <- ScaleData(scRNA, features = all.genes) 27 | scRNA <- RunPCA(scRNA, features = VariableFeatures(object = scRNA)) 28 | 29 | ElbowPlot(scRNA) 30 | 31 | Idents(scRNA) <- "sub_cell_type" 32 | cluster1.markers <- FindMarkers(scRNA, ident.1 = "CD4T_Treg_CCR8",min.pct = 0.5, 33 | test.use = "wilcox_limma",slot = "counts") 34 | head(cluster1.markers, n = 5) 35 | 36 | data.markers <- cluster1.markers 37 | 38 | data.markers$symbol <- rownames(data.markers) 39 | data.markers$logP <- -log10(data.markers$p_val_adj + 1e-100) 40 | dim(data.markers) 41 | data.markers$Group = "not-significant" 42 | data.markers$Group[which((data.markers$p_val_adj < 0.05) & (data.markers$avg_log2FC > 1.4))] = "CD4T_Treg_CCR8" 43 | data.markers$Group[which((data.markers$p_val_adj < 0.05) & (data.markers$avg_log2FC < -0.9))] = "CD4T_Treg_FOXP3" 44 | table(data.markers$Group) 45 | 46 | data.markers$label = "" 47 | #对差异基因的p值进行从小到大的排序 48 | data.markers <- data.markers[order(data.markers$avg_log2FC, decreasing = TRUE),] 49 | #高表达基因中选取p_val_adj最小的10个 50 | up.genes <- head(data.markers$symbol[which(data.markers$Group == "CD4T_Treg_CCR8")], 13) 51 | #低表达基因中选取p_val_adj最小的10个 52 | down.genes <- tail(data.markers$symbol[which(data.markers$Group == "CD4T_Treg_FOXP3")], 7) 53 | #将up.genes和down.genes合并并加入到Label 54 | data.top10.genes <- c(as.character(up.genes), as.character(down.genes)) 55 | data.markers$label[match(data.top10.genes, data.markers$symbol)] <- data.top10.genes 56 | ggscatter(data.markers, x = "avg_log2FC", y = "logP", color = "Group", 57 | palette = c("#CC0000","#2f5688","#BBBBBB"), 58 | size = 1, font.label = 18, 59 | repel = T, xlab = "log2FoldChange", 60 | ylab = "-log10(Adjust P-value)") + 61 | geom_text_repel(size=3,point.padding = NA,label = data.markers$label, max.overlaps = 1000) 62 | -------------------------------------------------------------------------------- /main_figure/figure4_and_related_supplemental_figure/T_cell_clonal_composition_in_individual_patients.Rmd: -------------------------------------------------------------------------------- 1 | # select expanded TCRs 2 | all.tcr.table <- read_csv("T_with_TCR_obs_V3.csv") 3 | all.tcr.table 4 | 5 | unique.tcr <- unique(all.tcr.table$clonetype) 6 | tcr.frequency.table <- all.tcr.table %>% group_by(clonetype) %>% summarise(n=n()) 7 | tcr.frequency.table 8 | TCRs.expansion.cutoff <- 10 9 | expanded.TCR.frequency.table <- tcr.frequency.table %>% filter(n>=TCRs.expansion.cutoff) 10 | expanded.TCR.frequency.table 11 | expanded.TCRs <- as.vector(expanded.TCR.frequency.table$clonetype) 12 | 13 | expanded.tcr.table <- all.tcr.table %>% filter(clonetype %in% expanded.TCRs) 14 | 15 | color.value <- c( 16 | "expanded_terminal_Tex"="#004949", 17 | "Expanded CCR8+Treg"="#A50021", 18 | "other"="#BCBCBC", 19 | "CD8Texp"="#418849" 20 | ) 21 | 22 | 23 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P9") 24 | 25 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq)) 26 | x.orders <- as.vector(tcr.count.table$Var1) 27 | by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n()) 28 | by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "other")) 29 | p <- ggplot(by.cell.type.count.table, 30 | aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 31 | theme_classic() + 32 | scale_fill_manual(values = color.value) + 33 | geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) + 34 | ggtitle("P9") 35 | p 36 | ggsave("figures/P9_clonal_composition.pdf", p, width = 10, height =5) 37 | -------------------------------------------------------------------------------- /main_figure/figure5_and_related_supplemental_figure/CCR8IHC.xlsm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zwj-tina/neoadjuvant-treatment-of-NSCLC/63ba67130a8e4c11e0bb7f5fac31b12ffcb2fab4/main_figure/figure5_and_related_supplemental_figure/CCR8IHC.xlsm -------------------------------------------------------------------------------- /main_figure/figure5_and_related_supplemental_figure/CCR8_IHC_non-MPR_subtypes.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(dplyr) 3 | library(tidyverse) 4 | library(readxl) 5 | library(ggpubr) 6 | library(survival) 7 | library(ggsurvfit) 8 | library(survminer) 9 | ``` 10 | 11 | ```{r} 12 | nmf.data <- read_csv("clean_nmf_data.csv") 13 | ccr8.data <- read_excel("CCR8IHC.xlsm") %>% filter(SampleID != "P189") %>% left_join(nmf.data, by="SampleID") %>% filter((MPR=="non-MPR"), !is.na(numberTregClone)) %>% mutate(TCR.classification=ifelse(numberTregClone>=9, "type I non-MPR", "type II non-MPR")) 14 | ccr8.data 15 | 16 | table(ccr8.data$TCR.classification) 17 | ``` 18 | 19 | ```{r} 20 | ccr8.data %>% group_by(TCR.classification) %>% count(group) 21 | ``` 22 | 23 | 24 | 25 | ```{r} 26 | compaired <- list(c("type I non-MPR", "type II non-MPR")) 27 | 28 | ggboxplot(ccr8.data, x = "TCR.classification", y = "number.CCR8.cells", 29 | color = "TCR.classification",add="jitter",add.params=list(size=0.5), 30 | x.text.angle=0) + labs(x='TCR.classification', y= 'number of CCR8+ Cells per mm2') + 31 | theme(legend.position="none") + 32 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 33 | scale_color_manual(values=c("type I non-MPR"="#8CB4A3","type II non-MPR"="#7998AD")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 34 | ggsave("number.CCR8.cells.pdf", width = 2.5, height = 3.5) 35 | 36 | ggboxplot(ccr8.data, x = "TCR.classification", y = "number.IC", 37 | color = "TCR.classification",add="jitter",add.params=list(size=0.5), 38 | x.text.angle=0) + labs(x='TCR.classification', y= 'number of immune Cells per mm2') + 39 | theme(legend.position="none") + 40 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 41 | scale_color_manual(values=c("type I non-MPR"="#8CB4A3","type II non-MPR"="#7998AD")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 42 | ggsave("number.immune.cells.pdf", width = 2.5, height = 3.5) 43 | 44 | ggboxplot(ccr8.data, x = "TCR.classification", y = "percentage.CCR8.IC", 45 | color = "TCR.classification",add="jitter",add.params=list(size=0.5), 46 | x.text.angle=0) + labs(x='TCR.classification', y= 'proportion of CCR8+ cells in immune Cells') + 47 | theme(legend.position="none") + 48 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 49 | scale_color_manual(values=c("type I non-MPR"="#8CB4A3","type II non-MPR"="#7998AD")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 50 | ggsave("proportion.CCR8.cells.pdf", width = 2.5, height = 3.5) 51 | 52 | ggboxplot(ccr8.data, x = "TCR.classification", y = "percentage.CCR8.IC", 53 | color = "TCR.classification",add="jitter",add.params=list(size=0.5), 54 | x.text.angle=0) + labs(x='TCR.classification', y= 'proportion of CCR8+ cells in immune Cells') + 55 | theme(legend.position="none") + 56 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 57 | scale_color_manual(values=c("type I non-MPR"="#8CB4A3","type II non-MPR"="#7998AD")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 58 | 59 | 60 | ggplot(ccr8.data %>% mutate(value=1), aes(fill=as.factor(group), y=value, x=TCR.classification)) + 61 | geom_bar(position="stack", stat="identity") + scale_fill_manual(values=c("#4DBBD5", "#00A087", "3C5488", "#F39B7F")) + theme_classic() 62 | 63 | ggsave("CCR8.IHC.patient.number.pdf", width = 3, height = 4) 64 | ``` 65 | 66 | -------------------------------------------------------------------------------- /main_figure/figure5_and_related_supplemental_figure/Tex_relevant_clononumber_6group.R: -------------------------------------------------------------------------------- 1 | Tex_relevant <- read.csv("CD8Tex_relevant_clonotype_number_over2.csv") 2 | dim(Tex_relevant) 3 | Tex_relevant <- Tex_relevant[,-1] 4 | colnames(Tex_relevant) <- c("sampleID","number") 5 | rownames(Tex_relevant) <- Tex_relevant$sampleID 6 | head(Tex_relevant) 7 | 8 | 9 | cluster.info <- read.csv("NMF_all_group_5.csv") 10 | dim(cluster.info) 11 | cluster.info <- cluster.info[,-1] 12 | rownames(cluster.info) <- cluster.info$sampleID 13 | head(cluster.info) 14 | 15 | #include samples with TCR data and in our clustering data 16 | samples <- intersect(Tex_relevant$sampleID, cluster.info$sampleID) 17 | length(samples) 18 | 19 | cluster.info <- cluster.info[samples,] 20 | Tex_relevant <- Tex_relevant[samples,] 21 | 22 | Tex_relevant$group <- paste0("group",cluster.info$group) 23 | Tex_relevant$group <- factor(Tex_relevant$group,levels = c("group1","group2","group3","group4","group5")) 24 | head(Tex_relevant) 25 | 26 | 27 | 28 | library(readxl) 29 | sample.info <- as.data.frame(read_excel("neoadjuvant/data/other/sample.xlsx")) 30 | sample.info <- sample.info[,c("sampleID","pathological_response","cancer_type")] 31 | pathology <- c() 32 | for(each in sample.info$pathological_response){ 33 | if(each %in% c("MPR","pCR")){ 34 | pathology <- c(pathology, "MPR") 35 | }else{ 36 | pathology <- c(pathology, "non-MPR") 37 | } 38 | } 39 | sample.info$pathology <- pathology 40 | rownames(sample.info) <- sample.info$sampleID 41 | sample.info <- sample.info[sample.info$sampleID %in% c(Tex_relevant$sampleID),] 42 | dim(sample.info) 43 | head(sample.info) 44 | 45 | Tex_relevant <- merge(Tex_relevant,sample.info,by = "sampleID") 46 | head(Tex_relevant) 47 | 48 | new_group <- c() 49 | for(each in 1:length(Tex_relevant$sampleID)){ 50 | print(Tex_relevant$group[[each]]) 51 | if(Tex_relevant$group[each] %in% c("group3")){ 52 | new_group <- c(new_group, paste0(Tex_relevant$group[[each]],"_",Tex_relevant$cancer_type[[each]])) 53 | }else{ 54 | new_group <- c(new_group,as.vector(Tex_relevant$group)[[each]]) 55 | } 56 | } 57 | Tex_relevant$new_group <- new_group 58 | head(Tex_relevant) 59 | Tex_relevant$new_group <- factor(Tex_relevant$new_group,levels = c("group1", 60 | "group2", 61 | "group3_LUSC", 62 | "group3_LUAD", 63 | "group4", 64 | "group5")) 65 | ggboxplot(Tex_relevant, x = "new_group", y = "number", 66 | color = "pathology",add="jitter",add.params=list(size=0.5), 67 | x.text.angle=90) + labs(x='group', y= 'clonotype number of Tex-relevent cells') + 68 | scale_color_manual(values=c("MPR" = "#2868A6", "non-MPR" = "#B1161C")) + 69 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 70 | 71 | Tex_relevant$cluster <- paste0(Tex_relevant$new_group,"_",Tex_relevant$pathology) 72 | Tex_relevant$cluster <- factor(Tex_relevant$cluster,levels = c("group1_MPR","group1_non-MPR", 73 | "group2_MPR","group2_non-MPR", 74 | "group3_LUSC_MPR","group3_LUSC_non-MPR", 75 | "group3_LUAD_MPR","group3_LUAD_non-MPR", 76 | "group4_MPR","group4_non-MPR", 77 | "group5_MPR","group5_non-MPR")) 78 | 79 | compaired <- list(c("group2_MPR","group2_non-MPR"), 80 | c("group3_LUSC_MPR","group3_LUSC_non-MPR"), 81 | c("group3_LUAD_MPR","group3_LUAD_non-MPR"), 82 | c("group5_MPR","group5_non-MPR"), 83 | c("group4_non-MPR","group3_LUSC_non-MPR"), 84 | c("group4_non-MPR","group3_LUAD_non-MPR"), 85 | c("group3_LUSC_non-MPR","group3_LUAD_non-MPR"), 86 | c("group4_non-MPR","group3_LUSC_MPR"), 87 | c("group4_non-MPR","group3_LUAD_MPR")) 88 | 89 | ggboxplot(Tex_relevant, x = "cluster", y = "number", 90 | color = "cluster",add="jitter",add.params=list(size=0.5), 91 | x.text.angle=90) + labs(x='group', y= 'clonotype number of Tex-relevent cells') + 92 | theme(legend.position="none") + 93 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 94 | -------------------------------------------------------------------------------- /main_figure/figure5_and_related_supplemental_figure/Treg_clonenumber_6group.R: -------------------------------------------------------------------------------- 1 | Treg_clone <- read.csv("expanded_CD4Treg_clonotype_number_over2.csv") 2 | dim(Treg_clone) 3 | Treg_clone <- Treg_clone[,-1] 4 | colnames(Treg_clone) <- c("sampleID","number") 5 | rownames(Treg_clone) <- Treg_clone$sampleID 6 | head(Treg_clone) 7 | 8 | 9 | cluster.info <- read.csv("NMF_all_group_5.csv") 10 | dim(cluster.info) 11 | cluster.info <- cluster.info[,-1] 12 | rownames(cluster.info) <- cluster.info$sampleID 13 | head(cluster.info) 14 | 15 | #include samples with TCR data and in our clustering data 16 | samples <- intersect(Treg_clone$sampleID, cluster.info$sampleID) 17 | length(samples) 18 | 19 | cluster.info <- cluster.info[samples,] 20 | Treg_clone <- Treg_clone[samples,] 21 | 22 | Treg_clone$group <- paste0("group",cluster.info$group) 23 | Treg_clone$group <- factor(Treg_clone$group,levels = c("group1","group2","group3","group4","group5")) 24 | head(Treg_clone) 25 | 26 | 27 | 28 | library(readxl) 29 | sample.info <- as.data.frame(read_excel("sample.xlsx")) 30 | sample.info <- sample.info[,c("sampleID","pathological_response","cancer_type")] 31 | pathology <- c() 32 | for(each in sample.info$pathological_response){ 33 | if(each %in% c("MPR","pCR")){ 34 | pathology <- c(pathology, "MPR") 35 | }else{ 36 | pathology <- c(pathology, "non-MPR") 37 | } 38 | } 39 | sample.info$pathology <- pathology 40 | rownames(sample.info) <- sample.info$sampleID 41 | sample.info <- sample.info[sample.info$sampleID %in% c(Treg_clone$sampleID),] 42 | dim(sample.info) 43 | head(sample.info) 44 | 45 | Treg_clone <- merge(Treg_clone,sample.info,by = "sampleID") 46 | head(Treg_clone) 47 | 48 | new_group <- c() 49 | for(each in 1:length(Treg_clone$sampleID)){ 50 | print(Treg_clone$group[[each]]) 51 | if(Treg_clone$group[each] %in% c("group3")){ 52 | new_group <- c(new_group, paste0(Treg_clone$group[[each]],"_",Treg_clone$cancer_type[[each]])) 53 | }else{ 54 | new_group <- c(new_group,as.vector(Treg_clone$group)[[each]]) 55 | } 56 | } 57 | Treg_clone$new_group <- new_group 58 | head(Treg_clone) 59 | 60 | head(Treg_clone) 61 | 62 | Treg_clone$new_group <- factor(Treg_clone$new_group,levels = c("group1", 63 | "group2", 64 | "group3_LUSC", 65 | "group3_LUAD", 66 | "group4", 67 | "group5")) 68 | ggboxplot(Treg_clone, x = "new_group", y = "number", 69 | color = "pathology",add="jitter",add.params=list(size=0.5), 70 | x.text.angle=90) + labs(x='group', y= 'clonotype number of Tex-relevent cells') + 71 | scale_color_manual(values=c("MPR" = "#2868A6", "non-MPR" = "#B1161C")) + 72 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 73 | 74 | Treg_clone$cluster <- paste0(Treg_clone$new_group,"_",Treg_clone$pathology) 75 | Treg_clone$cluster <- factor(Treg_clone$cluster,levels = c("group1_MPR","group1_non-MPR", 76 | "group2_MPR","group2_non-MPR", 77 | "group3_LUSC_MPR","group3_LUSC_non-MPR", 78 | "group3_LUAD_MPR","group3_LUAD_non-MPR", 79 | "group4_MPR","group4_non-MPR", 80 | "group5_MPR","group5_non-MPR")) 81 | 82 | compaired <- list(c("group2_MPR","group2_non-MPR"), 83 | c("group3_LUSC_MPR","group3_LUSC_non-MPR"), 84 | c("group3_LUAD_MPR","group3_LUAD_non-MPR"), 85 | c("group5_MPR","group5_non-MPR"), 86 | c("group3_LUSC_non-MPR","group3_LUAD_non-MPR"), 87 | c("group4_non-MPR","group3_LUSC_non-MPR"), 88 | c("group4_non-MPR","group3_LUAD_non-MPR"), 89 | c("group4_non-MPR","group3_LUSC_MPR"), 90 | c("group4_non-MPR","group3_LUAD_MPR")) 91 | 92 | ggboxplot(Treg_clone, x = "cluster", y = "number", 93 | color = "cluster",add="jitter",add.params=list(size=0.5), 94 | x.text.angle=90) + labs(x='group', y= 'clonotype number of Tex-relevent cells') + 95 | theme(legend.position="none") + 96 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 97 | -------------------------------------------------------------------------------- /main_figure/figure5_and_related_supplemental_figure/fig5D_Tex_Treg_clone_number_scatter.R: -------------------------------------------------------------------------------- 1 | Tex_relevant <- read.csv("CD8Tex_relevant_clonotype_number_over2.csv") 2 | Tex_relevant <- Tex_relevant[,-1] 3 | colnames(Tex_relevant) <- c("sampleID","number") 4 | rownames(Tex_relevant) <- Tex_relevant$sampleID 5 | 6 | all.group <- read.csv("/home/zhangwj/data_yi/neoadjuvant/revision2/data/NMF_all_group_5.csv") 7 | all.group <- all.group[,-1] 8 | rownames(all.group) <- all.group$sampleID 9 | 10 | samples <- intersect(Tex_relevant$sampleID, all.group$sampleID) 11 | length(samples) 12 | 13 | Tex_relevant <- Tex_relevant[samples,] 14 | all.group <- all.group[samples,] 15 | 16 | Tex_relevant$group <- all.group$group 17 | head(Tex_relevant) 18 | 19 | library(readxl) 20 | sample.info <- as.data.frame(read_excel("sample.xlsx")) 21 | sample.info <- sample.info[,c("sampleID","pathological_response","cancer_type")] 22 | head(sample.info) 23 | pathology <- c() 24 | for(each in sample.info$pathological_response){ 25 | if(each %in% c("MPR","pCR")){ 26 | pathology <- c(pathology, "MPR") 27 | }else{ 28 | pathology <- c(pathology, "non-MPR") 29 | } 30 | } 31 | sample.info$pathology <- pathology 32 | rownames(sample.info) <- sample.info$sampleID 33 | sample.info <- sample.info[rownames(Tex_relevant),] 34 | 35 | Tex_relevant <- merge(Tex_relevant,sample.info,by = "sampleID") 36 | head(Tex_relevant) 37 | Tex_relevant$group <- paste0("group",Tex_relevant$group) 38 | colnames(Tex_relevant) <- c("sampleID","Tex_number","group","pathological_response_detail","cancer_type","pathological_response") 39 | 40 | Treg <- read.csv("expanded_CD4Treg_clonotype_number_over2.csv") 41 | Treg <- Treg[,-1] 42 | colnames(Treg) <- c("sampleID","number") 43 | rownames(Treg) <- Treg$sampleID 44 | head(Treg) 45 | dim(Treg) 46 | Treg <- Treg[Tex_relevant$sampleID,] 47 | head(Treg) 48 | 49 | Tex_relevant$Treg_number <- Treg$number 50 | head(Tex_relevant) 51 | 52 | ggscatter(Tex_relevant[Tex_relevant$pathological_response %in% c("non-MPR"),], 53 | x = "Tex_number", 54 | y = "Treg_number", color = "group",shape = "cancer_type")+ 55 | geom_hline(yintercept = 9,linetype = "dashed") + 56 | scale_color_manual(values=c("group1"="#E84C35","group2"="#4FBAD6", 57 | "group3"="#00A289","group4"="#3C5487", 58 | "group5"="#F29B80")) + geom_abline(intercept = 0, slope = 1,linetype = "dashed") + 59 | xlim(0,100) +ylim(0,100) 60 | 61 | 62 | ggscatter(Tex_relevant[Tex_relevant$pathological_response %in% c("MPR"),], 63 | x = "Tex_number", 64 | y = "Treg_number", color = "group",shape = "cancer_type")+ 65 | geom_hline(yintercept = 9,linetype = "dashed") + 66 | scale_color_manual(values=c("group1"="#E84C35","group2"="#4FBAD6", 67 | "group3"="#00A289","group4"="#3C5487", 68 | "group5"="#F29B80")) + geom_abline(intercept = 0, slope = 1,linetype = "dashed") + 69 | xlim(0,100) +ylim(0,100) 70 | -------------------------------------------------------------------------------- /main_figure/figure5_and_related_supplemental_figure/figure5E_alluvium.R: -------------------------------------------------------------------------------- 1 | library(ggalluvial) 2 | cluster.info <- read.csv("/home/zhangwj/data_yi/neoadjuvant/revision2/data/NMF_all_group_5.csv") 3 | cluster.info <- cluster.info[,-1] 4 | head(cluster.info) 5 | 6 | sample.info <- as.data.frame(read_excel("/home/zhangwj/data_yi/neoadjuvant/data/other/sample.xlsx")) 7 | head(sample.info) 8 | sample.info <- sample.info[sample.info$sampleID %in% cluster.info$sampleID,] 9 | sample.info <- sample.info[,c("sampleID","pathological_response","cancer_type")] 10 | 11 | pathological_response_level <- c() 12 | for(each in sample.info$pathological_response){ 13 | if(each %in% c("MPR","pCR")){ 14 | pathological_response_level <- c(pathological_response_level, "MPR") 15 | }else{ 16 | pathological_response_level <- c(pathological_response_level, "non-MPR") 17 | } 18 | } 19 | sample.info$pathological_response_level <- pathological_response_level 20 | rownames(sample.info) <- sample.info$sampleID 21 | head(sample.info) 22 | sample.info <- sample.info[cluster.info$sampleID,] 23 | sample.info$group <- cluster.info$group 24 | sample.info$group <- paste0("group",sample.info$group) 25 | sample.info$sub.group <- paste0(sample.info$group,"_",sample.info$pathological_response_level) 26 | sample.info$sub.group <- factor(sample.info$sub.group, 27 | levels = c("group1_MPR","group1_non-MPR", 28 | "group2_MPR","group2_non-MPR", 29 | "group3_MPR","group3_non-MPR", 30 | "group4_MPR","group4_non-MPR", 31 | "group5_MPR","group5_non-MPR")) 32 | head(sample.info) 33 | 34 | 35 | Treg_clone <- read.csv("expanded_CD4Treg_clonotype_number_over2.csv") 36 | dim(Treg_clone) 37 | Treg_clone <- Treg_clone[,-1] 38 | colnames(Treg_clone) <- c("sampleID","number") 39 | rownames(Treg_clone) <- Treg_clone$sampleID 40 | head(Treg_clone) 41 | 42 | Treg_clone <- Treg_clone[Treg_clone$sampleID %in% cluster.info$sampleID,] 43 | dim(Treg_clone) 44 | Treg_level <- c() 45 | for(each in Treg_clone$number){ 46 | print(each) 47 | if(each >= 9){ 48 | Treg_level <- c(Treg_level,"high") 49 | }else{ 50 | Treg_level <- c(Treg_level,"low") 51 | } 52 | } 53 | Treg_clone$Treg_level <- Treg_level 54 | colnames(Treg_clone) <- c("sampleID","Treg_number","Treg_level") 55 | head(Treg_clone) 56 | dim(Treg_clone) 57 | 58 | head(sample.info) 59 | 60 | sample.info <- merge(sample.info, Treg_clone,by = "sampleID") 61 | head(sample.info) 62 | 63 | cluster <- c() 64 | for(i in 1:length(sample.info$sampleID)){ 65 | if(sample.info$pathological_response_level[[i]] %in% c("MPR")){ 66 | cluster <- c(cluster, "MPR") 67 | }else{ 68 | cluster <- c(cluster, paste0(sample.info$pathological_response_level[[i]],"_",sample.info$Treg_level[[i]])) 69 | } 70 | } 71 | unique(cluster) 72 | 73 | sample.info$cluster <- cluster 74 | sample.info$cluster <- factor(sample.info$cluster, levels = c("MPR","non-MPR_high","non-MPR_low")) 75 | head(sample.info) 76 | 77 | LUSC <- sample.info[sample.info$cancer_type %in% c("LUSC"),] 78 | 79 | mm <- as.data.frame(table(sample.info$sub.group,sample.info$cluster)) 80 | colnames(mm) <- c("sub.group","tolerance","number") 81 | mm$tolerance <- factor(mm$tolerance,levels = c("MPR","non-MPR_high", 82 | "non-MPR_low")) 83 | head(mm) 84 | 85 | ggplot(data = mm, 86 | aes(axis1 = sub.group, # First variable on the X-axis 87 | axis2 = tolerance, # Third variable on the X-axis 88 | y = number)) + 89 | geom_alluvium(aes(fill = tolerance,order = tolerance)) + 90 | geom_stratum() + 91 | geom_text(stat = "stratum", 92 | aes(label = after_stat(stratum))) + 93 | theme_void() + 94 | scale_fill_manual(values=c("MPR"="#D9BFAE", 95 | "non-MPR_high"="#8CB4A3", 96 | "non-MPR_low"="#7998AD")) 97 | -------------------------------------------------------------------------------- /main_figure/figure6_and_related_supplemental_figure/HNSCC.analysis.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(dplyr) 3 | library(tidyverse) 4 | library(readxl) 5 | library(ggpubr) 6 | library(survival) 7 | library(ggsurvfit) 8 | library(survminer) 9 | ``` 10 | 11 | ```{r} 12 | HNSCC.meta <- read_csv("HNSCC.meta.csv") 13 | 14 | HNSCC.data <- read_csv("HNSCC.T_with_TCR_harmony.csv") 15 | HNSCC.data 16 | HNSCC.post <- HNSCC.data %>% filter(state=="post-Tx") %>% left_join(HNSCC.meta, by="patientID") 17 | HNSCC.post 18 | ``` 19 | 20 | ```{r} 21 | HNSCC.post.summary <- HNSCC.post %>% filter(T_new_name %in% c("Texp", "expanded terminal Tex")) %>% group_by(patientID, T_new_name) %>% summarise(n=n()) %>% 22 | mutate(freq = n / sum(n)) %>% filter(T_new_name=="Texp") %>% left_join(HNSCC.meta, by="patientID") 23 | HNSCC.post.summary 24 | 25 | ggboxplot(HNSCC.post.summary, x = "RFS_event", y ="Pathological response %" , 26 | color = "RFS_event",add="jitter",add.params=list(size=0.5), 27 | x.text.angle=0) + labs(x='RFS_event', y= 'PRR') + 28 | theme(legend.position="none") + 29 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + scale_color_manual(values=c("yes"="#C6595A","no"="#878586")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 30 | 31 | ggsave("HNSCC.RFS.PRR.pdf", width = 2.5, height = 3.5) 32 | ``` 33 | 34 | 35 | ```{r} 36 | # plot texp in association with RFS 37 | HNSCC.post.summary <- HNSCC.post %>% filter(T_new_name %in% c("Texp", "expanded terminal Tex")) %>% group_by(patientID, T_new_name) %>% summarise(n=n()) %>% 38 | mutate(freq = n / sum(n)) %>% filter(T_new_name=="Texp") %>% left_join(HNSCC.meta, by="patientID") 39 | HNSCC.post.summary 40 | 41 | compaired <- list(c("yes", "no")) 42 | 43 | ggboxplot(HNSCC.post.summary, x = "RFS_event", y = "freq", 44 | color = "RFS_event",add="jitter",add.params=list(size=0.5), 45 | x.text.angle=0) + labs(x='RFS_event', y= 'Texp in tex-relevant cells') + 46 | theme(legend.position="none") + 47 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + scale_color_manual(values=c("yes"="#C6595A","no"="#878586")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 48 | ggsave("HNSCC.RFS.Texp.pdf", width = 2.5, height = 3.5) 49 | 50 | 51 | ``` 52 | 53 | 54 | 55 | ```{r} 56 | # plot treg in association with RFS 57 | HNSCC.post.summary <- HNSCC.post %>% filter(majority_voting %in% c("CD4T_Treg_CCR8", "CD4T_Treg_FOXP3", "CD4T_Treg_MKI67")) %>% group_by(patientID, majority_voting) %>% summarise(n=n()) %>% 58 | mutate(freq = n / sum(n)) %>% filter(majority_voting=="CD4T_Treg_CCR8") %>% left_join(HNSCC.meta, by="patientID") 59 | HNSCC.post.summary 60 | 61 | compaired <- list(c("yes", "no")) 62 | 63 | ggboxplot(HNSCC.post.summary, x = "RFS_event", y = "freq", 64 | color = "RFS_event",add="jitter",add.params=list(size=0.5), 65 | x.text.angle=0) + labs(x='RFS_event', y= 'Texp in tex-relevant cells') + 66 | theme(legend.position="none") + 67 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + scale_color_manual(values=c("yes"="#C6595A","no"="#878586")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 68 | #ggsave("HNSCC.RFS.Texp.pdf", width = 2.5, height = 3.5) 69 | ``` 70 | 71 | ```{r} 72 | # plot treg in association with MPR 73 | HNSCC.post.summary <- HNSCC.post %>% filter(majority_voting %in% c("CD4T_Treg_CCR8", "CD4T_Treg_FOXP3", "CD4T_Treg_MKI67")) %>% group_by(patientID, majority_voting) %>% summarise(n=n()) %>% 74 | mutate(freq = n / sum(n)) %>% filter(majority_voting=="CD4T_Treg_CCR8") %>% left_join(HNSCC.meta, by="patientID") 75 | HNSCC.post.summary 76 | 77 | compaired <- list(c("yes", "no")) 78 | 79 | ggboxplot(HNSCC.post.summary, x = "PRR.cat", y = "freq", 80 | color = "PRR.cat",add="jitter",add.params=list(size=0.5), 81 | x.text.angle=0) + labs(x='PRR.cat', y= 'CCR8 Treg in all Tregs') + 82 | theme(legend.position="none") + 83 | geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + scale_color_manual(values=c("yes"="#C6595A","no"="#878586")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) 84 | #ggsave("HNSCC.RFS.Texp.pdf", width = 2.5, height = 3.5) 85 | ``` 86 | 87 | ```{r} 88 | # plot treg in correlation with terminal Tex 89 | HNSCC.post.summary <- HNSCC.post %>% filter(majority_voting %in% c("CD8T_terminal_Tex_LAYN", "CD4T_Treg_CCR8"), clononumber>=3) %>% group_by(patientID, majority_voting) %>% summarise(n=n_distinct(clonotype)) 90 | HNSCC.post.summary 91 | #ggsave("HNSCC.RFS.Texp.pdf", width = 2.5, height = 3.5) 92 | 93 | HNSCC.Treg.summary <- HNSCC.post.summary %>% filter(majority_voting=="CD4T_Treg_CCR8") 94 | colnames(HNSCC.Treg.summary)[3] <- "numberTregClone" 95 | HNSCC.Treg.summary 96 | HNSCC.Tex.summary <- HNSCC.post.summary %>% filter(majority_voting=="CD8T_terminal_Tex_LAYN") 97 | colnames(HNSCC.Tex.summary)[3] <- "numberTexClone" 98 | HNSCC.Clone.summary <- HNSCC.Tex.summary %>% left_join(HNSCC.Treg.summary %>% dplyr::select(patientID, numberTregClone), by="patientID") %>% mutate(numberTregClone=ifelse(is.na(numberTregClone), 0, numberTregClone)) %>% left_join(HNSCC.meta, by="patientID") 99 | HNSCC.Clone.summary 100 | 101 | 102 | ggplot(HNSCC.Clone.summary, aes(x=numberTexClone, y=numberTregClone, color=MPR)) + geom_point() + theme_classic() + coord_equal() 103 | ``` 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /main_figure/figure6_and_related_supplemental_figure/HNSCC.meta.csv: -------------------------------------------------------------------------------- 1 | patientID,Cohort,Cycles delivered,RFS_event,OS,Died ,Clinical to pathologic downstaging,RECIST response excluding non measurable,Volumetric response,Viable Tumor % post,Pathological response %,PRR.cat,MPR,Any response >=10% (90% or less viable tumor) ,Comment 2 | P01,Mono,1,no,yes,no,NA,stable/progress,yes,NA,NA,NA,NA,NA,"P01 did not have tumor resection following treatment and thus tumor regression score, pathological response, and downstaging could not be assessed" 3 | P02,Mono,2,NA,NA,NA,no,stable/progress,no,90,10,Medium,no,yes,"P02 was originally enrolled but later deemed to have been ineligible and therefore excluded from response analyses, although sample collection was performed" 4 | P04,Mono,2,no,yes,no,yes,not measurable,no,90,10,Medium,no,yes,P03 ID was skipped because consented patient was excluded upon screening and not treated 5 | P05,Combo,2,no,yes,no,yes,not measurable,yes,5,95,High,yes,yes, 6 | P06,Combo,2,no,yes,no,yes,not measurable,no,70,30,Medium,no,yes, 7 | P07,Combo,2,no,yes,no,no,not measurable,no,90,10,Medium,no,yes, 8 | P08,Mono,2,no,yes,no,no,not measurable,no,95,5,Low,no,no, 9 | P09,Mono,2,no,yes,no,yes,not measurable,yes,20,80,High,no,yes, 10 | P10,Combo,2,yes,no,yes,no,stable/progress,no,85,15,Medium,no,yes, 11 | P12,Combo,2,no,yes,no,no,not measurable,yes,95,5,Low,no,no,P11 ID was skipped because consented patient was excluded upon screening and not treated 12 | P13,Combo,2,no,yes,no,yes,response,yes,0,100,High,yes,yes, 13 | P14,Mono,2,yes,yes,no,yes,stable/progress,yes,70,30,Medium,no,yes, 14 | P15,Mono,2,no,yes,no,yes,stable/progress,no,95,5,Low,no,no, 15 | P16,Combo,2,no,yes,no,no,not measurable,yes,95,5,Low,no,no, 16 | P17,Combo,2,no,yes,no,yes,response,yes,5,95,High,yes,yes, 17 | P18,Combo,1,no,yes,no,yes,not measurable,no,90,10,Medium,no,yes, 18 | P19,Mono,1,no,yes,no,yes,not measurable,yes,5,95,High,yes,yes, 19 | P20,Mono,2,no,yes,no,no,not measurable,no,95,5,Low,no,no, 20 | P21,Combo,1,yes,no,no,no,stable/progress,no,30,70,High,no,yes, 21 | P22,Mono,2,yes,yes,no,yes,not measurable,yes,80,20,Medium,no,yes, 22 | P23,Mono,2,no,yes,no,yes,stable/progress,no,90,10,Medium,no,yes, 23 | P24,Combo,2,no,yes,no,no,stable/progress,no,95,5,Low,no,no, 24 | P25,Combo,1,no,yes,no,no,stable/progress,no,95,5,Low,no,no, 25 | P26,Combo,2,yes,no,yes,yes,response,yes,90,10,Medium,no,yes, 26 | P27,Mono,2,no,yes,no,yes,stable/progress,no,95,5,Low,no,no, 27 | P28,Mono,2,no,yes,no,no,response,yes,85,15,Medium,no,yes, 28 | P29,Mono,2,no,yes,no,yes,stable/progress,yes,95,5,Low,no,no, 29 | P30,Combo,2,no,yes,no,yes,not measurable,yes,80,20,Medium,no,yes, 30 | P31,Mono,2,yes,no,yes,no,stable/progress,no,95,5,Low,no,no, 31 | P32,Combo,1,no,yes,no,yes,stable/progress,yes,40,60,High,no,yes, 32 | ,,,,,,,,,,,,,, 33 | ,,,,,,,,,,,,,, 34 | -------------------------------------------------------------------------------- /main_figure/figure6_and_related_supplemental_figure/README.md: -------------------------------------------------------------------------------- 1 | # description of file 2 | 3 | ## celltypist.ipynb 4 | used to analyze the HNSCC data mentioned in Supplemental Figure S8 5 | 6 | ## survival_analysis.Rmd 7 | used to generate figrues in Figure 6 and Supplemental Figure S8. 8 | involves data generated by this study 9 | 10 | ## HNSCC.analysis.Rmd 11 | used to generate HNSCC-related figrue in Supplemntal Figure S8 12 | 13 | ## analysis_of_bulk_RNA-seq_survival.Rmd 14 | used to generate bulk RNA-seq survival figrue in Supplemntal Figure S8 15 | -------------------------------------------------------------------------------- /main_figure/figure6_and_related_supplemental_figure/analysis_of_bulk_RNAseq_survival.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(matrixStats) 3 | library(tidyverse) 4 | library(readxl) 5 | library(ggpubr) 6 | library(survival) 7 | #library(ggsurvfit) 8 | library(survminer) 9 | ``` 10 | 11 | 12 | 13 | # OAK study 14 | ```{r} 15 | OAK.meta <- read.csv("Genetech/EGAF00005797822/go28915_anon_subsetted_BYN_n699.csv") 16 | #dim(OAK.meta) 17 | #head(OAK.meta) 18 | 19 | OAK <- read.csv("Genetech/EGAF00005797824/anonymized_OAK-TPMs2.csv") 20 | dim(OAK) 21 | colnames(OAK) <- sapply(colnames(OAK), function(x) str_split(x, "[.]")[[1]][2]) 22 | OAK[1:10,1:10] 23 | 24 | OAK.meta$sample <- colnames(OAK) 25 | 26 | OAK <- t(t(OAK) / rowSums(t(OAK)) * 1000000) 27 | #OAK 28 | #colSums(OAK) 29 | 30 | OAK.meta.ICI <- OAK.meta %>% filter(ACTARM=="MPDL3280A" & HIST=="NON-SQUAMOUS") 31 | ICI.sampleID <- OAK.meta.ICI$sample 32 | OAK.ICI <- OAK[, ICI.sampleID] 33 | 34 | OAK.ICI.z.scored <- (OAK.ICI - rowMeans(OAK.ICI) ) / rowSds(OAK.ICI) 35 | 36 | ``` 37 | ## validating PD1 vs chemo 38 | ```{r} 39 | fit.data <- OAK.meta 40 | fit.cox <- coxph(Surv(fit.data$OS_MONTHS, fit.data$OS_CENSOR) ~ fit.data$ACTARM, data = fit.data) 41 | summary(fit.cox) 42 | 43 | fit <- survfit(Surv(fit.data$OS_MONTHS, fit.data$OS_CENSOR) ~ fit.data$ACTARM, data = fit.data) 44 | print(fit) 45 | 46 | p <- ggsurvplot(fit, 47 | pval = TRUE, conf.int = FALSE, 48 | risk.table = TRUE, # Add risk table 49 | risk.table.col = "strata", # Change risk table color by groups 50 | linetype = "strata", # Change line type by groups 51 | #surv.median.line = "hv", # Specify median survival 52 | ggtheme = theme_classic(), # Change ggplot2 theme 53 | risk.table.pos = "in", 54 | xlab = "Months from surgery", 55 | title = "", 56 | palette = c("#00C598", "#C9EFE8","#9FDC98", "#24A07E") 57 | ) 58 | p 59 | 60 | 61 | ## PFS 62 | fit.cox <- coxph(Surv(fit.data$OS_MONTHS, fit.data$PFS_CENSOR) ~ fit.data$ACTARM, data = fit.data) 63 | summary(fit.cox) 64 | 65 | fit <- survfit(Surv(fit.data$OS_MONTHS, fit.data$PFS_CENSOR) ~ fit.data$ACTARM, data = fit.data) 66 | print(fit) 67 | 68 | p <- ggsurvplot(fit, 69 | pval = TRUE, conf.int = FALSE, 70 | risk.table = TRUE, # Add risk table 71 | risk.table.col = "strata", # Change risk table color by groups 72 | linetype = "strata", # Change line type by groups 73 | #surv.median.line = "hv", # Specify median survival 74 | ggtheme = theme_classic(), # Change ggplot2 theme 75 | risk.table.pos = "in", 76 | xlab = "Months from surgery", 77 | title = "", 78 | palette = c("#00C598", "#C9EFE8","#9FDC98", "#24A07E") 79 | ) 80 | p 81 | 82 | ``` 83 | 84 | 85 | ## signature approach 86 | ```{r} 87 | b.cell.signature <- c("CD79A", "FCRL4") 88 | treg.signature <- c("FOXP3", "CCR8") 89 | texp.signature <- c("GZMK", "IL7R") 90 | 91 | fit.data <- OAK.meta.ICI %>% mutate( 92 | b.cell.signature=colMeans2(OAK.ICI.z.scored[b.cell.signature,]), 93 | texp.signature=colMeans2(OAK.ICI.z.scored[texp.signature,]), 94 | ) %>% mutate( 95 | b.cell.signature=ifelse(b.cell.signature\n", 288 | "\n", 301 | "\n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | "
predicted_labelsover_clusteringmajority_voting
P304_ACGAGCCGTGTGCCTG_1MANA specific117MANA specific
P64_TACTTACCAGGTCTCG_1MANA specific307Viral specific
P481_GTGCATAGTAAATGAC_1Viral specific42MANA specific
P435_CGATCGGGTTATCGGT_1MANA specific15MANA specific
P182_GTGCGGTTCCAACCAA_1Viral specific6MANA specific
............
P469_GGTGTTATCAGGCAAG_1MANA specific309MANA specific
P454_GGGTCTGCAGACGTAG_1Viral specific41Viral specific
P53_ACTGCTCTCCAGATCA_1Viral specific155Viral specific
P44_CTGATAGGTTCGTCTC_1MANA specific1MANA specific
P45_GCTCCTACACAGGTTT_1Viral specific66MANA specific
\n", 379 | "

144162 rows × 3 columns

\n", 380 | "" 381 | ], 382 | "text/plain": [ 383 | " predicted_labels over_clustering majority_voting\n", 384 | "P304_ACGAGCCGTGTGCCTG_1 MANA specific 117 MANA specific\n", 385 | "P64_TACTTACCAGGTCTCG_1 MANA specific 307 Viral specific\n", 386 | "P481_GTGCATAGTAAATGAC_1 Viral specific 42 MANA specific\n", 387 | "P435_CGATCGGGTTATCGGT_1 MANA specific 15 MANA specific\n", 388 | "P182_GTGCGGTTCCAACCAA_1 Viral specific 6 MANA specific\n", 389 | "... ... ... ...\n", 390 | "P469_GGTGTTATCAGGCAAG_1 MANA specific 309 MANA specific\n", 391 | "P454_GGGTCTGCAGACGTAG_1 Viral specific 41 Viral specific\n", 392 | "P53_ACTGCTCTCCAGATCA_1 Viral specific 155 Viral specific\n", 393 | "P44_CTGATAGGTTCGTCTC_1 MANA specific 1 MANA specific\n", 394 | "P45_GCTCCTACACAGGTTT_1 Viral specific 66 MANA specific\n", 395 | "\n", 396 | "[144162 rows x 3 columns]" 397 | ] 398 | }, 399 | "execution_count": 79, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "predictions.predicted_labels" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "id": "f7c3b5b1", 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [] 415 | } 416 | ], 417 | "metadata": { 418 | "kernelspec": { 419 | "display_name": "Python 3 (ipykernel)", 420 | "language": "python", 421 | "name": "python3" 422 | }, 423 | "language_info": { 424 | "codemirror_mode": { 425 | "name": "ipython", 426 | "version": 3 427 | }, 428 | "file_extension": ".py", 429 | "mimetype": "text/x-python", 430 | "name": "python", 431 | "nbconvert_exporter": "python", 432 | "pygments_lexer": "ipython3", 433 | "version": "3.9.11" 434 | } 435 | }, 436 | "nbformat": 4, 437 | "nbformat_minor": 5 438 | } 439 | -------------------------------------------------------------------------------- /main_figure/figure6_and_related_supplemental_figure/survival_analysis.Rmd: -------------------------------------------------------------------------------- 1 | ```{r} 2 | library(tidyverse) 3 | library(dplyr) 4 | library(readxl) 5 | library(ggpubr) 6 | library(survival) 7 | library(ggsurvfit) 8 | library(survminer) 9 | library(risksetROC) 10 | library(forestplot) 11 | ``` 12 | 13 | ```{r} 14 | survival.metadata.clean <- read_csv("survival.metadata.final.csv") %>% mutate(MPR_pPR_nPR=ifelse(PRR_group=="pCR", "MPR", PRR_group)) 15 | survival.metadata.clean 16 | ``` 17 | 18 | 19 | ```{r} 20 | survival.metadata.clean %>% dplyr::group_by(histology) %>% count(MPR) 21 | ``` 22 | 23 | ```{r} 24 | survival.metadata.clean %>% group_by(TCR.classification) %>% count(RFS_status) 25 | survival.metadata.clean %>% group_by(TCR.classification) %>% count(is.na(numberTexClone)) 26 | survival.metadata.clean %>% group_by(TCR.classification) %>% count(numberTexClone==0) 27 | survival.metadata.clean %>% group_by(TCR.classification) %>% count(is.na(Texp.in.Tex.relevant)) 28 | ``` 29 | 30 | 31 | ### global analysis of risk factors 32 | ```{r} 33 | fit.data <- survival.metadata.clean 34 | 35 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE) 36 | 37 | covariates <- c("histology", "PRR", "numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8" ) 38 | univ_formulas <- sapply(covariates, 39 | function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x))) 40 | 41 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)}) 42 | # Extract data 43 | univ_results <- lapply(univ_models, 44 | function(x){ 45 | x <- summary(x) 46 | p.value<-signif(x$wald["pvalue"], digits=2) 47 | #wald.test<-signif(x$wald["test"], digits=2) 48 | #beta<-signif(x$coef[1], digits=2);#coeficient beta 49 | HR <-signif(x$coef[2], digits=2);#exp(beta) 50 | HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2) 51 | HR.confint.upper <- signif(x$conf.int[,"upper .95"],2) 52 | # HR <- paste0(HR, " (", 53 | # HR.confint.lower, "-", HR.confint.upper, ")") 54 | res<-c(HR, HR.confint.lower, HR.confint.upper, p.value) 55 | names(res)<-c("HR", "HR.lower", "HR.upper", 56 | "p.value") 57 | return(res) 58 | #return(exp(cbind(coef(x),confint(x)))) 59 | }) 60 | univ_results 61 | result <- as.data.frame(univ_results) 62 | result <- as.data.frame(t(result)) 63 | result #%>% filter(p.value <0.05) 64 | 65 | tabletext1<-as.character(rownames(result)) 66 | tabletext2<-as.numeric(result[,"p.value"]) 67 | tabletext<-cbind(tabletext1,tabletext2) 68 | 69 | 70 | pdf("final_plots/all.patients.HR.pdf", # File name 71 | width = 8, height = 5, # Width and height in inches 72 | bg = "white", # Background color 73 | colormodel = "RGB", # Color model (cmyk is required for most publications) 74 | paper = "A4") # Paper size 75 | 76 | # Closing the graphical device 77 | 78 | forestplot(labeltext=tabletext, #文本信息 79 | mean = result[,'HR'],##HR值 80 | lower = result[,"HR.lower"],##95%置信区间 81 | upper = result[,"HR.upper"],#95%置信区间 82 | boxsize = 0.1,##大小 83 | graph.pos=3,#图在表中的列位置 84 | graphwidth = unit(0.4,"npc"),#图在表中的宽度比例 85 | #fn.ci_norm="fpDrawDiamondCI",#box类型选择钻石,可以更改fpDrawNormalCI;fpDrawCircleCI等 86 | col=fpColors(box="steelblue", lines="black", zero = "black"),#颜色设置 87 | lwd.ci=2,ci.vertices.height = 0.1,ci.vertices=TRUE,#置信区间用线宽、高、型 88 | zero=1,#zero线横坐标 89 | lwd.zero=2,#zero线宽 90 | grid=T, 91 | lwd.xaxis=2,#X轴线宽 92 | title="Hazard Ratio", 93 | xlab="",#X轴标题 94 | clip=c(-Inf,4),#边界 95 | colgap = unit(0.5,"cm") 96 | ) 97 | dev.off() 98 | ``` 99 | 100 | 101 | ```{r} 102 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$PRR + fit.data$Texp.in.Tex.relevant + fit.data$Treg_CCR8, data = fit.data) 103 | summary(fit.cox) 104 | ``` 105 | 106 | 107 | ```{r} 108 | pdf("final_plots/all.patients.ROC.pdf", # File name 109 | width = 5, height = 5, # Width and height in inches 110 | bg = "white", # Color model (cmyk is required for most publications) 111 | paper = "A4") 112 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant)) 113 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant, data = fit.data) 114 | PI<-fit.cox$linear.predictor 115 | tROC.hf.texp <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months), method="Cox",col="#418849",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC") 116 | tROC.hf.texp$AUC 117 | 118 | 119 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant)) 120 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$PRR, data = fit.data) 121 | PI<-fit.cox$linear.predictor 122 | tROC.hf.prr <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months), method="Cox",col="blue",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC") 123 | tROC.hf.prr$AUC 124 | 125 | 126 | lines(tROC.hf.texp$FP, tROC.hf.texp$TP, type="l",col="#418849",xlim=c(0,1), ylim=c(0,1)) 127 | lines(tROC.hf.prr$FP, tROC.hf.prr$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1)) 128 | legend("bottomright",c(paste("AUC of texp: ",round(tROC.hf.texp$AUC,3)), paste("AUC of PRR : ",round(tROC.hf.prr$AUC,3))),col=c("#418849","blue"),lty=1,lwd=2,bty = "n") 129 | dev.off() 130 | ``` 131 | 132 | 133 | ```{r} 134 | fit.data <- survival.metadata.clean %>% mutate(numberTregClone.bin= ifelse( 135 | numberTregClone = top25%" # the quantile of survival analysis cohort and nmf cohort are almost the same quantile. 136 | )) 137 | 138 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$numberTregClone.bin, data = fit.data) 139 | 140 | p <- ggsurvplot(fit, 141 | pval = TRUE, conf.int = FALSE, 142 | risk.table = TRUE, # Add risk table 143 | risk.table.col = "strata", # Change risk table color by groups 144 | linetype = "solid", # Change line type by groups 145 | #surv.median.line = "hv", # Specify median survival 146 | ggtheme = theme_classic(), # Change ggplot2 theme 147 | risk.table.pos = "in", 148 | xlab = "Months from surgery", 149 | title = "all.patients.numberTregClone", 150 | palette = c("#077E64", "#C6595A") 151 | ) 152 | p 153 | ggsave("final_plots/all.patients.numberTregClone.pdf", width = 8, height = 6) 154 | 155 | # 0.235771 0.291866 0.342278 0.520000 156 | #=============== 157 | fit.data <- survival.metadata.clean %>% mutate(Treg_CCR8.bin= ifelse( 158 | Treg_CCR8 = top75%" # the quantile of survival analysis cohort and nmf cohort are almost the same 161 | )) 162 | 163 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Treg_CCR8.bin, data = fit.data) 164 | 165 | p <- ggsurvplot(fit, 166 | pval = TRUE, conf.int = FALSE, 167 | risk.table = TRUE, # Add risk table 168 | risk.table.col = "strata", # Change risk table color by groups 169 | linetype = "solid", # Change line type by groups 170 | #surv.median.line = "hv", # Specify median survival 171 | ggtheme = theme_classic(), # Change ggplot2 theme 172 | risk.table.pos = "in", 173 | xlab = "Months from surgery", 174 | title = "all.patients.CCR8Treg.in.allTreg", 175 | palette = c("#077E64", "#C6595A") 176 | ) 177 | p 178 | ggsave("final_plots/all.patients.CCR8Treg.in.allTreg.pdf", width = 8, height = 6) 179 | #============ 180 | 181 | 182 | #========== 183 | fit.data <- survival.metadata.clean %>% mutate(Texp.in.Tex.relevant.bin= ifelse( 184 | Texp.in.Tex.relevant = top75%" # the quantile of survival analysis cohort and nmf cohort are almost the same 187 | )) 188 | 189 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant.bin, data = fit.data) 190 | 191 | p <- ggsurvplot(fit, 192 | pval = TRUE, conf.int = FALSE, 193 | risk.table = TRUE, # Add risk table 194 | risk.table.col = "strata", # Change risk table color by groups 195 | linetype = "solid", # Change line type by groups 196 | #surv.median.line = "hv", # Specify median survival 197 | ggtheme = theme_classic(), # Change ggplot2 theme 198 | risk.table.pos = "in", 199 | xlab = "Months from surgery", 200 | title = "all.patients.Texp", 201 | palette = c( "#C6595A","#077E64") 202 | ) 203 | p 204 | ggsave("final_plots/all.patients.Texp.pdf", width = 8, height = 6) 205 | #============ 206 | 207 | #======== 208 | fit.data <- survival.metadata.clean 209 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$MPR_pPR_nPR, data = fit.data) 210 | 211 | p <- ggsurvplot(fit, 212 | pval = TRUE, conf.int = FALSE, 213 | risk.table = TRUE, # Add risk table 214 | risk.table.col = "strata", # Change risk table color by groups 215 | linetype = "solid", # Change line type by groups 216 | #surv.median.line = "hv", # Specify median survival 217 | ggtheme = theme_classic(), # Change ggplot2 theme 218 | risk.table.pos = "in", 219 | xlab = "Months from surgery", 220 | title = "all_patients_by_PRR", 221 | palette = c("#077E64", "#C6595A", "#878586") 222 | ) 223 | p 224 | ggsave("final_plots/all.patients.PRR.pdf", width = 8, height = 6) 225 | #============ 226 | 227 | #====== 228 | fit.data <- survival.metadata.clean 229 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ as.factor(fit.data$new_group), data = fit.data) 230 | summary(fit.cox) 231 | 232 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ as.factor(fit.data$new_group), data = fit.data) 233 | print(fit) 234 | 235 | p <- ggsurvplot(fit, 236 | pval = TRUE, conf.int = FALSE, 237 | risk.table = TRUE, # Add risk table 238 | risk.table.col = "strata", # Change risk table color by groups 239 | linetype = "solid", # Change line type by groups 240 | #surv.median.line = "hv", # Specify median survival 241 | ggtheme = theme_classic(), # Change ggplot2 theme 242 | risk.table.pos = "in", 243 | xlab = "Months from surgery", 244 | title = "all_patients_by_TIME", 245 | palette = c("#E64B35", "#4DBBD5", "#00A087", "#3C5488", "#F39B7F") 246 | ) 247 | p 248 | ggsave("final_plots/all.patients.TIME.pdf", width = 8, height = 6) 249 | ``` 250 | 251 | 252 | ### focusing on non-MPR patients 253 | ```{r} 254 | fit.data <- survival.metadata.clean 255 | 256 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$TCR.classification, data = fit.data) 257 | 258 | p <- ggsurvplot(fit, 259 | pval = TRUE, conf.int = FALSE, 260 | risk.table = TRUE, # Add risk table 261 | risk.table.col = "strata", # Change risk table color by groups 262 | linetype = "solid", # Change line type by groups 263 | #surv.median.line = "hv", # Specify median survival 264 | ggtheme = theme_classic(), # Change ggplot2 theme 265 | risk.table.pos = "in", 266 | xlab = "Months from surgery", 267 | title = "all_patients_by_non-MPR_subtype", 268 | palette = c("#D9BFAE", "#8CB4A3", "#7998AD") 269 | ) 270 | p 271 | 272 | ggsave("final_plots/all.patients.non-MPR.subtype.pdf", width = 8, height = 6) 273 | ``` 274 | 275 | 276 | ### type I patients 277 | ```{r} 278 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="type I") 279 | 280 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE) 281 | 282 | covariates <- c("histology","PRR","numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8") 283 | univ_formulas <- sapply(covariates, 284 | function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x))) 285 | 286 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)}) 287 | # Extract data 288 | univ_results <- lapply(univ_models, 289 | function(x){ 290 | x <- summary(x) 291 | p.value<-signif(x$wald["pvalue"], digits=2) 292 | #wald.test<-signif(x$wald["test"], digits=2) 293 | #beta<-signif(x$coef[1], digits=2);#coeficient beta 294 | HR <-signif(x$coef[2], digits=2);#exp(beta) 295 | HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2) 296 | HR.confint.upper <- signif(x$conf.int[,"upper .95"],2) 297 | # HR <- paste0(HR, " (", 298 | # HR.confint.lower, "-", HR.confint.upper, ")") 299 | res<-c(HR, HR.confint.lower, HR.confint.upper, p.value) 300 | names(res)<-c("HR", "HR.lower", "HR.upper", 301 | "p.value") 302 | return(res) 303 | #return(exp(cbind(coef(x),confint(x)))) 304 | }) 305 | univ_results 306 | result <- as.data.frame(univ_results) 307 | result <- as.data.frame(t(result)) 308 | result #%>% filter(p.value <0.05) 309 | 310 | tabletext1<-as.character(rownames(result)) 311 | tabletext2<-as.numeric(result[,"p.value"]) 312 | tabletext<-cbind(tabletext1,tabletext2) 313 | 314 | pdf("final_plots/typeI.patients.HR.pdf", # File name 315 | width = 8, height = 5, # Width and height in inches 316 | bg = "white", # Background color 317 | colormodel = "RGB", # Color model (cmyk is required for most publications) 318 | paper = "A4") # Paper size 319 | forestplot(labeltext=tabletext, #文本信息 320 | mean = result[,'HR'],##HR值 321 | lower = result[,"HR.lower"],##95%置信区间 322 | upper = result[,"HR.upper"],,#95%置信区间 323 | boxsize = 0.1,##大小 324 | graph.pos=3,#图在表中的列位置 325 | graphwidth = unit(0.4,"npc"),#图在表中的宽度比例 326 | #fn.ci_norm="fpDrawDiamondCI",#box类型选择钻石,可以更改fpDrawNormalCI;fpDrawCircleCI等 327 | col=fpColors(box="steelblue", lines="black", zero = "black"),#颜色设置 328 | lwd.ci=2,ci.vertices.height = 0.1,ci.vertices=TRUE,#置信区间用线宽、高、型 329 | zero=1,#zero线横坐标 330 | lwd.zero=2,#zero线宽 331 | grid=T, 332 | lwd.xaxis=2,#X轴线宽 333 | title="Hazard Ratio", 334 | xlab="",#X轴标题 335 | clip=c(-Inf,4),#边界 336 | colgap = unit(0.5,"cm") 337 | ) 338 | dev.off() 339 | 340 | 341 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Surgery_N, data = fit.data) 342 | summary(fit.cox) 343 | 344 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Surgery_N + fit.data$Texp.in.Tex.relevant, data = fit.data) 345 | summary(fit.cox) 346 | ``` 347 | 348 | ```{r} 349 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="type I") %>% mutate(Texp.in.Tex.relevant.bin= ifelse( 350 | #Texp.in.Tex.relevant < 0.7592234, 351 | Texp.in.Tex.relevant < as.vector(quantile(Texp.in.Tex.relevant,na.rm=T)[3]), 352 | "= top25%" # the quantile of survival analysis cohort and nmf cohort are almost the same 353 | )) 354 | 355 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant.bin, data = fit.data) 356 | 357 | p <- ggsurvplot(fit, 358 | pval = TRUE, conf.int = FALSE, 359 | risk.table = TRUE, # Add risk table 360 | risk.table.col = "strata", # Change risk table color by groups 361 | linetype = "solid", # Change line type by groups 362 | #surv.median.line = "hv", # Specify median survival 363 | ggtheme = theme_classic(), # Change ggplot2 theme 364 | risk.table.pos = "in", 365 | xlab = "Months from surgery", 366 | title = "typeI.non-MPR.by.Texp", 367 | palette = c("#C6595A", "#077E64") 368 | ) 369 | p 370 | ggsave("final_plots/typeI.non-MPR.by.Texp.pdf", width = 8, height = 6) 371 | ``` 372 | 373 | 374 | ### type II non-MPR patients 375 | ```{r} 376 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="type II") 377 | 378 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE) 379 | 380 | covariates <- c("histology","PRR","numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8") 381 | univ_formulas <- sapply(covariates, 382 | function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x))) 383 | 384 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)}) 385 | # Extract data 386 | univ_results <- lapply(univ_models, 387 | function(x){ 388 | x <- summary(x) 389 | p.value<-signif(x$wald["pvalue"], digits=2) 390 | #wald.test<-signif(x$wald["test"], digits=2) 391 | #beta<-signif(x$coef[1], digits=2);#coeficient beta 392 | HR <-signif(x$coef[2], digits=2);#exp(beta) 393 | HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2) 394 | HR.confint.upper <- signif(x$conf.int[,"upper .95"],2) 395 | # HR <- paste0(HR, " (", 396 | # HR.confint.lower, "-", HR.confint.upper, ")") 397 | res<-c(HR, HR.confint.lower, HR.confint.upper, p.value) 398 | names(res)<-c("HR", "HR.lower", "HR.upper", 399 | "p.value") 400 | return(res) 401 | #return(exp(cbind(coef(x),confint(x)))) 402 | }) 403 | univ_results 404 | result <- as.data.frame(univ_results) 405 | result <- as.data.frame(t(result)) 406 | result #%>% filter(p.value <0.05) 407 | 408 | tabletext1<-as.character(rownames(result)) 409 | tabletext2<-as.numeric(result[,"p.value"]) 410 | tabletext<-cbind(tabletext1,tabletext2) 411 | 412 | pdf("final_plots/typeII.patients.HR.pdf", # File name 413 | width = 8, height = 5, # Width and height in inches 414 | bg = "white", # Background color 415 | colormodel = "RGB", # Color model (cmyk is required for most publications) 416 | paper = "A4") # Paper size 417 | forestplot(labeltext=tabletext, #文本信息 418 | mean = result[,'HR'],##HR值 419 | lower = result[,"HR.lower"],##95%置信区间 420 | upper = result[,"HR.upper"],,#95%置信区间 421 | boxsize = 0.1,##大小 422 | graph.pos=3,#图在表中的列位置 423 | graphwidth = unit(0.4,"npc"),#图在表中的宽度比例 424 | #fn.ci_norm="fpDrawDiamondCI",#box类型选择钻石,可以更改fpDrawNormalCI;fpDrawCircleCI等 425 | col=fpColors(box="steelblue", lines="black", zero = "black"),#颜色设置 426 | lwd.ci=2,ci.vertices.height = 0.1,ci.vertices=TRUE,#置信区间用线宽、高、型 427 | zero=1,#zero线横坐标 428 | lwd.zero=2,#zero线宽 429 | grid=T, 430 | lwd.xaxis=2,#X轴线宽 431 | title="Hazard Ratio", 432 | xlab="",#X轴标题 433 | clip=c(-Inf,4),#边界 434 | colgap = unit(0.5,"cm") 435 | ) 436 | dev.off() 437 | 438 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Surgery_N, data = fit.data) 439 | summary(fit.cox) 440 | 441 | 442 | ``` 443 | 444 | ```{r} 445 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="type II") %>% mutate(Texp.in.Tex.relevant.bin= ifelse( 446 | #Texp.in.Tex.relevant < 0.7592234, 447 | Texp.in.Tex.relevant < as.vector(quantile(Texp.in.Tex.relevant,na.rm=T)[3]), 448 | "= top25%" # the quantile of survival analysis cohort and nmf cohort are almost the same 449 | )) 450 | 451 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant.bin, data = fit.data) 452 | 453 | p <- ggsurvplot(fit, 454 | pval = TRUE, conf.int = FALSE, 455 | risk.table = TRUE, # Add risk table 456 | risk.table.col = "strata", # Change risk table color by groups 457 | linetype = "solid", # Change line type by groups 458 | #surv.median.line = "hv", # Specify median survival 459 | ggtheme = theme_classic(), # Change ggplot2 theme 460 | risk.table.pos = "in", 461 | xlab = "Months from surgery", 462 | title = "typeII.non-MPR.by.subtypes", 463 | palette = c("#C6595A", "#077E64") 464 | ) 465 | p 466 | ggsave("final_plots/typeII.non-MPR.by.Texp.pdf", width = 8, height = 6) 467 | ``` 468 | 469 | 470 | ### MPR patients 471 | ```{r} 472 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="MPR") 473 | 474 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE) 475 | 476 | covariates <- c("numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8", "PRR", "histology") 477 | univ_formulas <- sapply(covariates, 478 | function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x))) 479 | 480 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)}) 481 | # Extract data 482 | univ_results <- lapply(univ_models, 483 | function(x){ 484 | x <- summary(x) 485 | p.value<-signif(x$wald["pvalue"], digits=2) 486 | wald.test<-signif(x$wald["test"], digits=2) 487 | beta<-signif(x$coef[1], digits=2);#coeficient beta 488 | HR <-signif(x$coef[2], digits=2);#exp(beta) 489 | HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2) 490 | HR.confint.upper <- signif(x$conf.int[,"upper .95"],2) 491 | HR <- paste0(HR, " (", 492 | HR.confint.lower, "-", HR.confint.upper, ")") 493 | res<-c(beta, HR, wald.test, p.value) 494 | names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 495 | "p.value") 496 | return(res) 497 | #return(exp(cbind(coef(x),confint(x)))) 498 | }) 499 | univ_results 500 | result <- as.data.frame(univ_results) 501 | result <- as.data.frame(t(result)) 502 | result #%>% filter(p.value <0.05) 503 | 504 | 505 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Surgery_N, data = fit.data) 506 | summary(fit.cox) 507 | ``` 508 | 509 | ### all non-MPR patients 510 | ```{r} 511 | fit.data <- survival.metadata.clean %>% filter(TCR.classification %in% c("type I", "type II")) 512 | 513 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE) 514 | 515 | covariates <- c("numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8", "PRR", "histology") 516 | univ_formulas <- sapply(covariates, 517 | function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x))) 518 | 519 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)}) 520 | # Extract data 521 | univ_results <- lapply(univ_models, 522 | function(x){ 523 | x <- summary(x) 524 | p.value<-signif(x$wald["pvalue"], digits=2) 525 | #wald.test<-signif(x$wald["test"], digits=2) 526 | #beta<-signif(x$coef[1], digits=2);#coeficient beta 527 | HR <-signif(x$coef[2], digits=2);#exp(beta) 528 | HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2) 529 | HR.confint.upper <- signif(x$conf.int[,"upper .95"],2) 530 | # HR <- paste0(HR, " (", 531 | # HR.confint.lower, "-", HR.confint.upper, ")") 532 | res<-c(HR, HR.confint.lower, HR.confint.upper, p.value) 533 | names(res)<-c("HR", "HR.lower", "HR.upper", 534 | "p.value") 535 | return(res) 536 | #return(exp(cbind(coef(x),confint(x)))) 537 | }) 538 | univ_results 539 | result <- as.data.frame(univ_results) 540 | result <- as.data.frame(t(result)) 541 | result #%>% filter(p.value <0.05) 542 | 543 | tabletext1<-as.character(rownames(result)) 544 | tabletext2<-as.numeric(result[,"p.value"]) 545 | tabletext<-cbind(tabletext1,tabletext2) 546 | 547 | 548 | forestplot(labeltext=tabletext, #文本信息 549 | mean = result[,'HR'],##HR值 550 | lower = result[,"HR.lower"],##95%置信区间 551 | upper = result[,"HR.upper"],,#95%置信区间 552 | boxsize = 0.1,##大小 553 | graph.pos=3,#图在表中的列位置 554 | graphwidth = unit(0.4,"npc"),#图在表中的宽度比例 555 | #fn.ci_norm="fpDrawDiamondCI",#box类型选择钻石,可以更改fpDrawNormalCI;fpDrawCircleCI等 556 | col=fpColors(box="steelblue", lines="black", zero = "black"),#颜色设置 557 | lwd.ci=2,ci.vertices.height = 0.1,ci.vertices=TRUE,#置信区间用线宽、高、型 558 | zero=1,#zero线横坐标 559 | lwd.zero=2,#zero线宽 560 | grid=T, 561 | lwd.xaxis=2,#X轴线宽 562 | title="Hazard Ratio", 563 | xlab="",#X轴标题 564 | clip=c(-Inf,4),#边界 565 | colgap = unit(0.5,"cm") 566 | ) 567 | 568 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$TCR.classification, data = fit.data) 569 | summary(fit.cox) 570 | 571 | 572 | ``` 573 | 574 | ```{r} 575 | ggplot(survival.metadata.clean %>% filter(MPR=="MPR"), aes(x=Texp.in.Tex.relevant)) + geom_histogram() 576 | quantile(survival.metadata.clean$Texp.in.Tex.relevant, na.rm = T) 577 | ``` 578 | 579 | 580 | ```{r} 581 | temp.data <- survival.metadata.clean %>% filter(MPR=="non-MPR") 582 | fit.data <- survival.metadata.clean %>% mutate(Texp.in.Tex.relevant.bin= ifelse(MPR=="MPR", "MPR", 583 | ifelse( 584 | Texp.in.Tex.relevant < as.vector(quantile(temp.data$Texp.in.Tex.relevant,na.rm=T)[3]), 585 | "= top50%" # the quantile of survival analysis cohort and nmf cohort are almost the same 586 | ))) 587 | 588 | fit.data <- fit.data 589 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ factor(fit.data$Texp.in.Tex.relevant.bin, levels=c("MPR", ">= top50%", "% group_by(Texp.in.Tex.relevant.bin) %>% count(PRR_group) 610 | ``` 611 | 612 | 613 | ```{r} 614 | plot.data <- fit.data %>% filter(Texp.in.Tex.relevant.bin %in% c("= top50%")) %>% mutate(value=1, PRR_group=factor(PRR_group, levels = c("pPR", "nPR"))) 615 | plot.data 616 | ggplot(plot.data, aes(fill=PRR_group, y=value, x=Texp.in.Tex.relevant.bin)) + 617 | geom_bar(position="stack", stat="identity") + scale_fill_manual(values=c("#878586","#C6595A")) + theme_classic() 618 | 619 | ggsave("final_plots/pPRnPR_Texp.pdf", width = 3, height = 4) 620 | 621 | 622 | plot.data <- fit.data %>% filter(Texp.in.Tex.relevant.bin %in% c("= top50%")) %>% mutate(value=1, histology=factor(histology, levels = c("LUSC", "LUAD"))) 623 | plot.data 624 | ggplot(plot.data, aes(fill=histology, y=value, x=Texp.in.Tex.relevant.bin)) + 625 | geom_bar(position="stack", stat="identity") + scale_fill_manual(values=c("#E97777","#88AB8E")) + theme_classic() 626 | 627 | ggsave("final_plots/LUSCLUAD_Texp.pdf", width = 3, height = 4) 628 | ``` 629 | 630 | 631 | ```{r} 632 | pdf("final_plots/non-MPR.patients.ROC.pdf", # File name 633 | width = 5, height = 5, # Width and height in inches 634 | bg = "white", # Color model (cmyk is required for most publications) 635 | paper = "A4") 636 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant),MPR=="non-MPR") 637 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant, data = fit.data) 638 | PI<-fit.cox$linear.predictor 639 | tROC.hf.texp <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months), method="Cox",col="#418849",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC") 640 | #tROC.hf.texp$AUC 641 | 642 | 643 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant),MPR=="non-MPR") 644 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$PRR, data = fit.data) 645 | PI<-fit.cox$linear.predictor 646 | tROC.hf.prr <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months), method="Cox",col="blue",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC") 647 | #tROC.hf.prr$AUC 648 | 649 | 650 | lines(tROC.hf.texp$FP, tROC.hf.texp$TP, type="l",col="#418849",xlim=c(0,1), ylim=c(0,1)) 651 | lines(tROC.hf.prr$FP, tROC.hf.prr$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1)) 652 | legend("bottomright",c(paste("AUC of texp: ",round(tROC.hf.texp$AUC,3)), paste("AUC of PRR : ",round(tROC.hf.prr$AUC,3))),col=c("#418849","blue"),lty=1,lwd=2,bty = "n") 653 | dev.off() 654 | ``` 655 | 656 | 657 | ```{r} 658 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Surgery_N, data = fit.data) 659 | summary(fit.cox) 660 | 661 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Surgery_N + fit.data$Texp.in.Tex.relevant, data = fit.data) 662 | summary(fit.cox) 663 | 664 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$PRR + fit.data$Texp.in.Tex.relevant, data = fit.data) 665 | summary(fit.cox) 666 | 667 | 668 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant)) 669 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant, data = fit.data) 670 | PI<-fit.cox$linear.predictor 671 | tROC.hf <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months_new), method="Cox",col="green",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC") 672 | tROC.hf$AUC 673 | 674 | 675 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant)) 676 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$PRR, data = fit.data) 677 | PI<-fit.cox$linear.predictor 678 | tROC.hf <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months_new), method="Cox",col="green",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC") 679 | tROC.hf$AUC 680 | 681 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant)) 682 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant + fit.data$Surgery_N, data = fit.data) 683 | PI<-fit.cox$linear.predictor 684 | tROC.hf <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months_new), method="Cox",col="green",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC") 685 | tROC.hf$AUC 686 | 687 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant)) 688 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Surgery_N, data = fit.data) 689 | PI<-fit.cox$linear.predictor 690 | tROC.hf <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months_new), method="Cox",col="green",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC") 691 | tROC.hf$AUC 692 | ``` 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | --------------------------------------------------------------------------------