├── .gitignore
├── README.md
├── main_data
    ├── B_cell_aggregates.xlsx
    ├── CD8Tex_relevant_clonotype_number_over2.csv
    ├── GSE200996_HNSCC_T_with_TCR_annotation.csv
    ├── NMF_all_group_5.csv
    ├── expanded_CD4Treg_clonotype_number_over2.csv
    └── survival.metadata.final.csv
└── main_figure
    ├── figure1_and_related_supplemental_figure
        ├── README.md
        ├── assessment.csv
        ├── logistic_regression_analysis_of_clinical_metadata_in_association_with_MPR_rate.Rmd
        ├── metadata_analysis.Rmd
        └── pathological_assessment.Rmd
    ├── figure2_and_related_supplemental_figure
        ├── NMF.R
        ├── README.md
        ├── proportion_plot.R
        └── robustness_of_NMF.R
    ├── figure3_and_related_supplemental_figure
        ├── TNBC_zyy.R
        ├── analysis_of_chemo_only_NSCLC.R
        ├── number_B_aggrates.R
        ├── startrac.R
        └── visualization_of_NKT_clones.Rmd
    ├── figure4_and_related_supplemental_figure
        ├── CCR8.IF.Rmd
        ├── CCR8_IF.csv
        ├── CCR8_Treg_in_all_Treg.R
        ├── DEG_volcano_plot.R
        └── T_cell_clonal_composition_in_individual_patients.Rmd
    ├── figure5_and_related_supplemental_figure
        ├── CCR8IHC.xlsm
        ├── CCR8_IHC_non-MPR_subtypes.Rmd
        ├── Tex_relevant_clononumber_6group.R
        ├── Treg_clonenumber_6group.R
        ├── fig5D_Tex_Treg_clone_number_scatter.R
        └── figure5E_alluvium.R
    └── figure6_and_related_supplemental_figure
        ├── HNSCC.analysis.Rmd
        ├── HNSCC.meta.csv
        ├── README.md
        ├── analysis_of_bulk_RNAseq_survival.Rmd
        ├── celltypist.ipynb
        └── survival_analysis.Rmd


/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # User-specific files
 9 | .Ruserdata
10 | 
11 | # Example code in package build process
12 | *-Ex.R
13 | 
14 | # Output files from R CMD build
15 | /*.tar.gz
16 | 
17 | # Output files from R CMD check
18 | /*.Rcheck/
19 | 
20 | # RStudio files
21 | .Rproj.user/
22 | 
23 | # produced vignettes
24 | vignettes/*.html
25 | vignettes/*.pdf
26 | 
27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
28 | .httr-oauth
29 | 
30 | # knitr and R markdown default cache directories
31 | *_cache/
32 | /cache/
33 | 
34 | # Temporary files created by R markdown
35 | *.utf8.md
36 | *.knit.md
37 | 
38 | # R Environment Variables
39 | .Renviron
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # neoadjuvant treatment of NSCLC
2 | 
3 | this code is used to produce figures in most panels.
4 | contact: liuzedaosk@163.com
5 | 


--------------------------------------------------------------------------------
/main_data/B_cell_aggregates.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zwj-tina/neoadjuvant-treatment-of-NSCLC/63ba67130a8e4c11e0bb7f5fac31b12ffcb2fab4/main_data/B_cell_aggregates.xlsx


--------------------------------------------------------------------------------
/main_data/CD8Tex_relevant_clonotype_number_over2.csv:
--------------------------------------------------------------------------------
  1 | "","samples","number"
  2 | "1","P1","24"
  3 | "2","P102","51"
  4 | "3","P103","28"
  5 | "4","P105","19"
  6 | "5","P106","17"
  7 | "6","P107","1"
  8 | "7","P113","10"
  9 | "8","P114","3"
 10 | "9","P118","10"
 11 | "10","P119","28"
 12 | "11","P121","21"
 13 | "12","P122","5"
 14 | "13","P124","30"
 15 | "14","P125","13"
 16 | "15","P127","9"
 17 | "16","P130","8"
 18 | "17","P131","31"
 19 | "18","P137","12"
 20 | "19","P142","12"
 21 | "20","P146","21"
 22 | "21","P150","4"
 23 | "22","P156","4"
 24 | "23","P158","10"
 25 | "24","P16","8"
 26 | "25","P164","1"
 27 | "26","P166","3"
 28 | "27","P172","1"
 29 | "28","P174","9"
 30 | "29","P179","7"
 31 | "30","P18","5"
 32 | "31","P180","46"
 33 | "32","P182","35"
 34 | "33","P184","5"
 35 | "34","P189","13"
 36 | "35","P190","12"
 37 | "36","P192","5"
 38 | "37","P199","8"
 39 | "38","P2","9"
 40 | "39","P20","21"
 41 | "40","P200","14"
 42 | "41","P201","15"
 43 | "42","P204","8"
 44 | "43","P205","22"
 45 | "44","P209","5"
 46 | "45","P213","12"
 47 | "46","P218","14"
 48 | "47","P221","7"
 49 | "48","P226","14"
 50 | "49","P227","6"
 51 | "50","P23","5"
 52 | "51","P24","3"
 53 | "52","P243","8"
 54 | "53","P247","2"
 55 | "54","P248","2"
 56 | "55","P249","3"
 57 | "56","P258","1"
 58 | "57","P259","11"
 59 | "58","P26","24"
 60 | "59","P261","8"
 61 | "60","P264","3"
 62 | "61","P265","1"
 63 | "62","P266","22"
 64 | "63","P267","3"
 65 | "64","P268","8"
 66 | "65","P270","5"
 67 | "66","P273","13"
 68 | "67","P277","3"
 69 | "68","P278","3"
 70 | "69","P280","3"
 71 | "70","P281","6"
 72 | "71","P282","1"
 73 | "72","P285","7"
 74 | "73","P286","8"
 75 | "74","P287","13"
 76 | "75","P288","2"
 77 | "76","P289","94"
 78 | "77","P291","6"
 79 | "78","P298","7"
 80 | "79","P3","3"
 81 | "80","P304","8"
 82 | "81","P309","10"
 83 | "82","P31","45"
 84 | "83","P312","25"
 85 | "84","P32","10"
 86 | "85","P321","7"
 87 | "86","P322","10"
 88 | "87","P323","12"
 89 | "88","P328","2"
 90 | "89","P329","7"
 91 | "90","P33","17"
 92 | "91","P334","3"
 93 | "92","P337","13"
 94 | "93","P339","1"
 95 | "94","P34","27"
 96 | "95","P340","1"
 97 | "96","P346","2"
 98 | "97","P347","7"
 99 | "98","P349","9"
100 | "99","P35","8"
101 | "100","P36","6"
102 | "101","P365","3"
103 | "102","P367","3"
104 | "103","P368","4"
105 | "104","P372","9"
106 | "105","P380","1"
107 | "106","P387","6"
108 | "107","P389","29"
109 | "108","P39","3"
110 | "109","P390","6"
111 | "110","P391","30"
112 | "111","P394","3"
113 | "112","P395","18"
114 | "113","P399","21"
115 | "114","P4","5"
116 | "115","P402","1"
117 | "116","P403","5"
118 | "117","P404","6"
119 | "118","P405","2"
120 | "119","P407","3"
121 | "120","P412","2"
122 | "121","P414","10"
123 | "122","P416","8"
124 | "123","P417","3"
125 | "124","P418","9"
126 | "125","P420","3"
127 | "126","P423","25"
128 | "127","P427","1"
129 | "128","P428","18"
130 | "129","P43","30"
131 | "130","P432","6"
132 | "131","P44","25"
133 | "132","P445","16"
134 | "133","P45","13"
135 | "134","P453","15"
136 | "135","P454","15"
137 | "136","P457","3"
138 | "137","P459","2"
139 | "138","P46","18"
140 | "139","P462","11"
141 | "140","P463","1"
142 | "141","P464","6"
143 | "142","P467","2"
144 | "143","P469","15"
145 | "144","P47","34"
146 | "145","P470","25"
147 | "146","P471","3"
148 | "147","P472","14"
149 | "148","P473","19"
150 | "149","P477","5"
151 | "150","P481","27"
152 | "151","P482","31"
153 | "152","P485","15"
154 | "153","P486","4"
155 | "154","P491","18"
156 | "155","P494","1"
157 | "156","P498","1"
158 | "157","P5","41"
159 | "158","P509","11"
160 | "159","P510","18"
161 | "160","P511","14"
162 | "161","P520","26"
163 | "162","P523","23"
164 | "163","P524","6"
165 | "164","P525","5"
166 | "165","P527","3"
167 | "166","P528","28"
168 | "167","P53","25"
169 | "168","P533","3"
170 | "169","P540","21"
171 | "170","P550","12"
172 | "171","P567","13"
173 | "172","P57","22"
174 | "173","P574","5"
175 | "174","P579","15"
176 | "175","P58","6"
177 | "176","P587","22"
178 | "177","P592","7"
179 | "178","P595","3"
180 | "179","P6","4"
181 | "180","P63","66"
182 | "181","P64","9"
183 | "182","P66","3"
184 | "183","P69","13"
185 | "184","P70","12"
186 | "185","P71","2"
187 | "186","P73","4"
188 | "187","P74","2"
189 | "188","P9","15"
190 | "189","P90","5"
191 | "190","P92","4"
192 | "191","P93","3"
193 | "192","P96","30"
194 | "193","P115","0"
195 | "194","P161","0"
196 | "195","P168","0"
197 | "196","P185","0"
198 | "197","P206","0"
199 | "198","P207","0"
200 | "199","P233","0"
201 | "200","P234","0"
202 | "201","P235","0"
203 | "202","P257","0"
204 | "203","P279","0"
205 | "204","P296","0"
206 | "205","P325","0"
207 | "206","P496","0"
208 | "207","P502","0"
209 | "208","P59","0"
210 | "209","P67","0"
211 | "210","P68","0"
212 | "211","P160","0"
213 | "212","P292","0"
214 | "213","P293","0"
215 | "214","P483","0"
216 | 


--------------------------------------------------------------------------------
/main_data/NMF_all_group_5.csv:
--------------------------------------------------------------------------------
  1 | "","sampleID","group"
  2 | "1","P1","4"
  3 | "2","P102","1"
  4 | "3","P103","3"
  5 | "4","P105","3"
  6 | "5","P106","4"
  7 | "6","P107","1"
  8 | "7","P111","2"
  9 | "8","P113","3"
 10 | "9","P114","2"
 11 | "10","P115","5"
 12 | "11","P118","3"
 13 | "12","P119","3"
 14 | "13","P121","3"
 15 | "14","P122","2"
 16 | "15","P124","3"
 17 | "16","P125","3"
 18 | "17","P127","2"
 19 | "18","P130","2"
 20 | "19","P131","4"
 21 | "20","P137","3"
 22 | "21","P142","3"
 23 | "22","P146","3"
 24 | "23","P150","1"
 25 | "24","P156","4"
 26 | "25","P158","2"
 27 | "26","P16","3"
 28 | "27","P160","3"
 29 | "28","P161","5"
 30 | "29","P164","5"
 31 | "30","P166","2"
 32 | "31","P168","5"
 33 | "32","P172","5"
 34 | "33","P174","2"
 35 | "34","P179","2"
 36 | "35","P18","1"
 37 | "36","P180","3"
 38 | "37","P182","4"
 39 | "38","P184","3"
 40 | "39","P185","2"
 41 | "40","P189","1"
 42 | "41","P190","3"
 43 | "42","P192","3"
 44 | "43","P199","2"
 45 | "44","P2","3"
 46 | "45","P20","3"
 47 | "46","P200","2"
 48 | "47","P201","3"
 49 | "48","P204","3"
 50 | "49","P205","3"
 51 | "50","P206","5"
 52 | "51","P207","2"
 53 | "52","P209","2"
 54 | "53","P213","3"
 55 | "54","P218","3"
 56 | "55","P22","4"
 57 | "56","P221","4"
 58 | "57","P223","4"
 59 | "58","P226","3"
 60 | "59","P227","1"
 61 | "60","P23","1"
 62 | "61","P233","3"
 63 | "62","P234","1"
 64 | "63","P235","3"
 65 | "64","P24","1"
 66 | "65","P243","3"
 67 | "66","P247","5"
 68 | "67","P248","2"
 69 | "68","P249","2"
 70 | "69","P257","1"
 71 | "70","P258","1"
 72 | "71","P259","4"
 73 | "72","P26","4"
 74 | "73","P261","3"
 75 | "74","P264","3"
 76 | "75","P265","5"
 77 | "76","P266","5"
 78 | "77","P267","2"
 79 | "78","P268","2"
 80 | "79","P270","3"
 81 | "80","P273","3"
 82 | "81","P277","1"
 83 | "82","P278","5"
 84 | "83","P279","2"
 85 | "84","P280","2"
 86 | "85","P281","4"
 87 | "86","P282","5"
 88 | "87","P285","2"
 89 | "88","P286","1"
 90 | "89","P287","1"
 91 | "90","P288","3"
 92 | "91","P289","4"
 93 | "92","P29","3"
 94 | "93","P291","2"
 95 | "94","P292","2"
 96 | "95","P293","5"
 97 | "96","P296","1"
 98 | "97","P298","3"
 99 | "98","P3","2"
100 | "99","P304","3"
101 | "100","P309","2"
102 | "101","P31","3"
103 | "102","P312","3"
104 | "103","P32","4"
105 | "104","P321","5"
106 | "105","P322","2"
107 | "106","P323","4"
108 | "107","P325","1"
109 | "108","P328","3"
110 | "109","P329","3"
111 | "110","P33","3"
112 | "111","P334","3"
113 | "112","P337","3"
114 | "113","P339","2"
115 | "114","P34","3"
116 | "115","P340","5"
117 | "116","P346","5"
118 | "117","P347","2"
119 | "118","P349","2"
120 | "119","P35","3"
121 | "120","P36","3"
122 | "121","P365","1"
123 | "122","P367","1"
124 | "123","P368","2"
125 | "124","P372","2"
126 | "125","P380","2"
127 | "126","P387","3"
128 | "127","P389","3"
129 | "128","P39","5"
130 | "129","P390","1"
131 | "130","P391","4"
132 | "131","P394","2"
133 | "132","P395","4"
134 | "133","P399","2"
135 | "134","P4","3"
136 | "135","P402","5"
137 | "136","P403","2"
138 | "137","P404","4"
139 | "138","P405","5"
140 | "139","P407","3"
141 | "140","P412","1"
142 | "141","P414","3"
143 | "142","P416","4"
144 | "143","P417","1"
145 | "144","P418","3"
146 | "145","P420","3"
147 | "146","P423","3"
148 | "147","P427","3"
149 | "148","P428","4"
150 | "149","P43","2"
151 | "150","P432","4"
152 | "151","P44","2"
153 | "152","P445","2"
154 | "153","P45","3"
155 | "154","P453","3"
156 | "155","P454","4"
157 | "156","P457","1"
158 | "157","P459","5"
159 | "158","P46","3"
160 | "159","P462","4"
161 | "160","P463","2"
162 | "161","P464","3"
163 | "162","P467","4"
164 | "163","P469","3"
165 | "164","P47","3"
166 | "165","P470","3"
167 | "166","P471","1"
168 | "167","P472","3"
169 | "168","P473","2"
170 | "169","P477","5"
171 | "170","P481","3"
172 | "171","P482","4"
173 | "172","P483","5"
174 | "173","P485","3"
175 | "174","P486","3"
176 | "175","P491","4"
177 | "176","P494","3"
178 | "177","P496","5"
179 | "178","P498","3"
180 | "179","P5","3"
181 | "180","P502","5"
182 | "181","P509","4"
183 | "182","P510","2"
184 | "183","P511","2"
185 | "184","P520","3"
186 | "185","P523","3"
187 | "186","P524","2"
188 | "187","P525","2"
189 | "188","P527","3"
190 | "189","P528","5"
191 | "190","P53","3"
192 | "191","P533","2"
193 | "192","P540","4"
194 | "193","P550","2"
195 | "194","P567","4"
196 | "195","P57","3"
197 | "196","P574","3"
198 | "197","P579","3"
199 | "198","P58","3"
200 | "199","P587","3"
201 | "200","P59","1"
202 | "201","P592","3"
203 | "202","P595","5"
204 | "203","P6","3"
205 | "204","P62","4"
206 | "205","P63","4"
207 | "206","P64","3"
208 | "207","P66","3"
209 | "208","P67","2"
210 | "209","P68","2"
211 | "210","P69","4"
212 | "211","P70","2"
213 | "212","P71","1"
214 | "213","P73","1"
215 | "214","P74","1"
216 | "215","P84","2"
217 | "216","P9","3"
218 | "217","P90","3"
219 | "218","P92","1"
220 | "219","P93","3"
221 | "220","P94","1"
222 | "221","P96","3"
223 | "222","P97","2"
224 | 


--------------------------------------------------------------------------------
/main_data/expanded_CD4Treg_clonotype_number_over2.csv:
--------------------------------------------------------------------------------
  1 | "","sampleID","number"
  2 | "1","P1","45"
  3 | "2","P102","9"
  4 | "3","P103","21"
  5 | "4","P105","10"
  6 | "5","P106","45"
  7 | "6","P113","1"
  8 | "7","P118","8"
  9 | "8","P119","2"
 10 | "9","P121","34"
 11 | "10","P122","1"
 12 | "11","P124","9"
 13 | "12","P127","1"
 14 | "13","P130","3"
 15 | "14","P131","56"
 16 | "15","P137","3"
 17 | "16","P142","1"
 18 | "17","P150","1"
 19 | "18","P156","5"
 20 | "19","P164","4"
 21 | "20","P168","1"
 22 | "21","P179","6"
 23 | "22","P18","1"
 24 | "23","P180","10"
 25 | "24","P182","15"
 26 | "25","P184","1"
 27 | "26","P189","6"
 28 | "27","P190","1"
 29 | "28","P20","9"
 30 | "29","P201","14"
 31 | "30","P204","25"
 32 | "31","P205","4"
 33 | "32","P209","4"
 34 | "33","P218","5"
 35 | "34","P221","37"
 36 | "35","P226","10"
 37 | "36","P234","2"
 38 | "37","P249","1"
 39 | "38","P259","29"
 40 | "39","P26","75"
 41 | "40","P261","2"
 42 | "41","P264","2"
 43 | "42","P265","2"
 44 | "43","P266","2"
 45 | "44","P273","1"
 46 | "45","P279","5"
 47 | "46","P281","9"
 48 | "47","P282","1"
 49 | "48","P288","2"
 50 | "49","P289","19"
 51 | "50","P291","1"
 52 | "51","P293","3"
 53 | "52","P298","3"
 54 | "53","P3","3"
 55 | "54","P304","4"
 56 | "55","P309","7"
 57 | "56","P31","7"
 58 | "57","P312","5"
 59 | "58","P32","54"
 60 | "59","P321","2"
 61 | "60","P323","10"
 62 | "61","P325","1"
 63 | "62","P328","1"
 64 | "63","P329","14"
 65 | "64","P33","59"
 66 | "65","P334","1"
 67 | "66","P337","6"
 68 | "67","P34","10"
 69 | "68","P340","4"
 70 | "69","P346","1"
 71 | "70","P347","2"
 72 | "71","P349","3"
 73 | "72","P35","9"
 74 | "73","P36","28"
 75 | "74","P367","8"
 76 | "75","P368","1"
 77 | "76","P372","2"
 78 | "77","P380","1"
 79 | "78","P387","1"
 80 | "79","P389","12"
 81 | "80","P39","1"
 82 | "81","P390","5"
 83 | "82","P391","37"
 84 | "83","P395","7"
 85 | "84","P399","3"
 86 | "85","P4","1"
 87 | "86","P403","13"
 88 | "87","P404","22"
 89 | "88","P407","3"
 90 | "89","P414","8"
 91 | "90","P416","6"
 92 | "91","P418","6"
 93 | "92","P423","1"
 94 | "93","P427","4"
 95 | "94","P428","43"
 96 | "95","P43","2"
 97 | "96","P432","21"
 98 | "97","P445","3"
 99 | "98","P45","34"
100 | "99","P453","18"
101 | "100","P454","53"
102 | "101","P457","3"
103 | "102","P459","4"
104 | "103","P462","25"
105 | "104","P464","2"
106 | "105","P467","7"
107 | "106","P469","9"
108 | "107","P47","6"
109 | "108","P470","1"
110 | "109","P471","1"
111 | "110","P472","17"
112 | "111","P481","7"
113 | "112","P482","17"
114 | "113","P483","13"
115 | "114","P486","8"
116 | "115","P491","20"
117 | "116","P5","14"
118 | "117","P509","36"
119 | "118","P510","18"
120 | "119","P511","64"
121 | "120","P520","14"
122 | "121","P523","21"
123 | "122","P525","6"
124 | "123","P527","16"
125 | "124","P528","33"
126 | "125","P53","2"
127 | "126","P540","28"
128 | "127","P550","1"
129 | "128","P567","14"
130 | "129","P57","3"
131 | "130","P574","4"
132 | "131","P579","5"
133 | "132","P58","2"
134 | "133","P587","8"
135 | "134","P592","3"
136 | "135","P595","6"
137 | "136","P6","1"
138 | "137","P63","35"
139 | "138","P64","1"
140 | "139","P66","4"
141 | "140","P67","4"
142 | "141","P69","19"
143 | "142","P70","21"
144 | "143","P73","15"
145 | "144","P9","8"
146 | "145","P90","3"
147 | "146","P92","2"
148 | "147","P93","4"
149 | "148","P96","3"
150 | "149","P107","0"
151 | "150","P114","0"
152 | "151","P115","0"
153 | "152","P125","0"
154 | "153","P146","0"
155 | "154","P158","0"
156 | "155","P16","0"
157 | "156","P161","0"
158 | "157","P166","0"
159 | "158","P174","0"
160 | "159","P185","0"
161 | "160","P192","0"
162 | "161","P199","0"
163 | "162","P2","0"
164 | "163","P200","0"
165 | "164","P206","0"
166 | "165","P207","0"
167 | "166","P227","0"
168 | "167","P23","0"
169 | "168","P233","0"
170 | "169","P235","0"
171 | "170","P24","0"
172 | "171","P243","0"
173 | "172","P247","0"
174 | "173","P248","0"
175 | "174","P257","0"
176 | "175","P258","0"
177 | "176","P267","0"
178 | "177","P268","0"
179 | "178","P277","0"
180 | "179","P278","0"
181 | "180","P280","0"
182 | "181","P285","0"
183 | "182","P286","0"
184 | "183","P287","0"
185 | "184","P296","0"
186 | "185","P322","0"
187 | "186","P339","0"
188 | "187","P365","0"
189 | "188","P405","0"
190 | "189","P412","0"
191 | "190","P417","0"
192 | "191","P420","0"
193 | "192","P44","0"
194 | "193","P46","0"
195 | "194","P463","0"
196 | "195","P473","0"
197 | "196","P477","0"
198 | "197","P485","0"
199 | "198","P494","0"
200 | "199","P496","0"
201 | "200","P502","0"
202 | "201","P524","0"
203 | "202","P59","0"
204 | "203","P68","0"
205 | "204","P74","0"
206 | "205","P160","0"
207 | "206","P172","0"
208 | "207","P213","0"
209 | "208","P270","0"
210 | "209","P292","0"
211 | "210","P394","0"
212 | "211","P402","0"
213 | "212","P498","0"
214 | "213","P533","0"
215 | "214","P71","0"
216 | 


--------------------------------------------------------------------------------
/main_figure/figure1_and_related_supplemental_figure/README.md:
--------------------------------------------------------------------------------
 1 | # description of the code
 2 | 
 3 | ## logictic regression analysis
 4 | used to generate the forest plot in Supplemental Figure S1
 5 | 
 6 | ## pathological assessment
 7 | used to generate the figure for consistency assessment of MPR in Supplemental Figure S1
 8 | 
 9 | ## metadata analysis
10 | used to generate the waterfall plot in Figure 1, as well as calculating pCR and MPR rates.
11 | note the "clean_metadata_all_surgical_sample_lusc_luad_only_excluded_egfr_alk.csv" file can be replaced with Supplemental Table S1, which is the same.
12 | 


--------------------------------------------------------------------------------
/main_figure/figure1_and_related_supplemental_figure/assessment.csv:
--------------------------------------------------------------------------------
 1 | ﻿patient_number,histology,CICAMS,SPH,GDPH,notes,
 2 | P10,LUAD,50,30,65,,
 3 | P102,LUSC,100,100,100,,
 4 | P105,LUAD,90,90,97,,
 5 | P106,LUSC,40,20,50,,
 6 | P110,LUSC/LUAD,20,40,60,LUSC/LUAD,
 7 | P12,LUSC,30,30,50,,
 8 | P122,LUAD,100,100,100,,
 9 | P124,LUSC,100,100,100,tumor bed,
10 | P130,LUAD,80,100,100,,
11 | P131,LUSC,25,30,25,,
12 | P136,LUAD (Large cell lung cancer),100,100,100,bad staining,
13 | P137,LUSC,100,100,100,,
14 | P141,LUAD,80,50,95,,
15 | P142,LUSC,100,100,100,,
16 | P145,LUAD,60,50,85,,
17 | P146,LUSC,100,100,100,,
18 | P149,LUAD,60,65,92,,
19 | P163,LUAD,100,99,100,bad staining,
20 | P166,LUSC,90,40,100,LUAD?,
21 | P173,LUAD,5,10,20,,
22 | P175,LUAD,95,30,20,controversial viable or not,
23 | P176,LUAD,70,70,90,,
24 | P18,LUAD,95,90,100,,
25 | P182,LUAD,85,40,70,bad staining,
26 | P188-1,LUSC,60,,50,,
27 | P188-2,LUSC,70,20,15,,
28 | P19,LUAD,70,50,80,,
29 | P209,LUAD,95,99,100,,
30 | P22,LUSC,30,30,45,,
31 | P25,LUSC,100,70,100,bad staining,bad tissue
32 | P26,LUSC,40,40,50,,
33 | P3,LUSC,10,20,35,,
34 | P32,LUSC,15,20,30,,
35 | P33,LUAD,100,100,100,,
36 | P35,LUSC,60,50,85,,
37 | P36,LUAD,100,99,100,,
38 | P38,LUAD,90,98,50,,
39 | P4,LUSC,100,100,100,,
40 | P43,LUAD,100,100,100,,
41 | P44,LUSC,100,100,100,tumor bed,
42 | P46,LUSC,100,100,100,,
43 | P47,LUSC,100,100,100,bad imaging,
44 | P50,LUAD,100,100,100,,
45 | P52,LUAD,50,55,60,,
46 | P53,LUSC,100,99,100,,
47 | P62,LUSC,50,50,60,,
48 | P63,LUSC,90,65,75,,
49 | P64,LUAD,100,100,100,,
50 | P66,LUSC,100,98,100,,
51 | P69,LUSC,100,100,100,,
52 | P70,LUAD,5,10,65,,
53 | P71,LUAD,100,100,100,normal tissue,
54 | P73,LUSC,100,100,100,,
55 | P75,LUAD,40,25,70,,
56 | P84,LUSC,100,100,100,,
57 | P88,LUAD,95,98,99,,
58 | P90,LUSC,100,100,100,,
59 | P92,LUSC,100,100,100,,
60 | P96,LUAD,100,100,100,,
61 | P97,LUSC,60,65,80,,
62 | 


--------------------------------------------------------------------------------
/main_figure/figure1_and_related_supplemental_figure/logistic_regression_analysis_of_clinical_metadata_in_association_with_MPR_rate.Rmd:
--------------------------------------------------------------------------------
  1 | ```{r}
  2 | library(tidyverse)
  3 | library(ggpubr)
  4 | library(ggplot2)
  5 | 
  6 | # note this has ICI+chemo patients, and chemo-only patients
  7 | metadata.lusc.luad.no.egfr.alk <- read_csv("clean_metadata_all_surgical_sample_lusc_luad_only_excluded_egfr_alk.csv")
  8 | 
  9 | nrow(metadata.lusc.luad.no.egfr.alk)
 10 | 
 11 | table.for.clinical.metadata.summary <- metadata.lusc.luad.no.egfr.alk %>% 
 12 |   filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>%
 13 |   filter(!is.na(PD1))
 14 | 
 15 | table.for.clinical.metadata.summary
 16 | ```
 17 | 
 18 | ```{r}
 19 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.summary[c(
 20 |   "Age", "Gender", "issmoke", "pathology", "Platinum","Cycles", "Pathological Response",
 21 |   "center", "PD1", "Chemotherapy", "before_N", "PD-L1TPS", "Pre-treatment Staging",
 22 |   "grouped_staging")]
 23 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.regression %>% filter(!PD1=="No")
 24 | 
 25 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.regression %>% mutate(
 26 |   `Pathological Response`=ifelse(`Pathological Response` %in% c("MPR", "pCR"), "MPR", "non-MPR"),
 27 |   Age=ifelse(Age<=65, "<=65", ">65"),
 28 |   Platinum=ifelse(Platinum=="Carboplatin", "Carboplatin", 
 29 |                   ifelse(Platinum=="Cisplatin", "Cisplatin", "others")),
 30 |   Cycles=ifelse(Cycles=="2", "2 cycles",
 31 |                 ifelse(Cycles=="3", "3 cycles",
 32 |                        ifelse(Cycles=="4", "4 cycles", "others"))),
 33 |   PD1=ifelse(grepl("Pembrolizumab", PD1), "Pembrolizumab", 
 34 |              ifelse(grepl("Nivolumab", PD1), "Nivolumab", 
 35 |                     ifelse(grepl("Sintilimab", PD1), "Sintilimab", 
 36 |                            ifelse(grepl("Tislelizumab", PD1), "Tislelizumab", 
 37 |                                   ifelse(grepl("Camrelizumab", PD1), "Camrelizumab", "others"))))),
 38 |   second_chemotherapy=ifelse(Chemotherapy=="No", "No", 
 39 |                              ifelse(grepl("Paclitaxel", Chemotherapy), "Paclitaxel",
 40 |                                     ifelse(grepl("Abraxane", Chemotherapy), "Paclitaxel",
 41 |                                            ifelse(grepl("Docetaxel", Chemotherapy), "Paclitaxel",
 42 |                                                   ifelse(grepl("Gemcitabine", Chemotherapy), "Gemcitabine",
 43 |                                                          ifelse( grepl("Pemetrexed", Chemotherapy), "Pemetrexed",
 44 |                                                                 ifelse(is.na(Chemotherapy), "others", "others"))))))),
 45 |   `PD-L1TPS`=ifelse(`PD-L1TPS`<0.01, "<1%", 
 46 |                     ifelse(`PD-L1TPS`>=0.5, ">=50%", "1~50%"))
 47 | )
 48 | 
 49 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.regression %>% 
 50 |   filter(!PD1=="others") %>% 
 51 |   filter(!Cycles=="others") %>% 
 52 |   filter(!second_chemotherapy=="No") %>%
 53 |   filter(!second_chemotherapy=="others") %>%
 54 |   filter(!grouped_staging=="not available")
 55 | 
 56 | 
 57 | table.for.clinical.metadata.regression <- table.for.clinical.metadata.regression[c(
 58 |   "Age", "Gender", "issmoke", "pathology", "Platinum","Cycles",
 59 |   "center", "PD1", "before_N", "PD-L1TPS", "Pathological Response", "second_chemotherapy",
 60 |   "grouped_staging")]
 61 | table.for.clinical.metadata.regression
 62 | 
 63 | write.csv(table.for.clinical.metadata.regression, "metadata_table_cleaned_for_regression_analysis.csv")
 64 | ```
 65 | 
 66 | ```{r}
 67 | library("autoReg")
 68 | library("dplyr")
 69 | library("ggplot2")
 70 | library("ggpubr")
 71 | library("ggsci")
 72 | 
 73 | regression_table <- read.csv("metadata_table_cleaned_for_regression_analysis.csv")
 74 | regression_table <- regression_table %>% mutate(Pathological.Response=ifelse(Pathological.Response=="MPR", 1, 0))
 75 | regression_table
 76 | ```
 77 | 
 78 | ```{r}
 79 | LUSC.regression_table <- regression_table %>% filter(pathology == "LUSC")
 80 | LUAD.regression_table <- regression_table %>% filter(pathology == "LUAD")
 81 | 
 82 | nrow(LUSC.regression_table)
 83 | sapply(LUSC.regression_table, function(x) sum(is.na(x)))
 84 | 
 85 | nrow(LUAD.regression_table)
 86 | sapply(LUAD.regression_table, function(x) sum(is.na(x)))
 87 | ```
 88 | 
 89 | 
 90 | ```{r}
 91 | LUSC.LUAD.overall.log <- glm(Pathological.Response ~ Age + Gender + issmoke + Platinum +
 92 |                                            Cycles + center + pathology + PD1 + before_N + second_chemotherapy +
 93 |                                            grouped_staging, data=regression_table, family=binomial) 
 94 | 
 95 | 
 96 | summary(LUSC.LUAD.overall.log)
 97 | #LUSC.LUAD.result<-autoReg(LUSC.LUAD.overall.log,uni=TRUE,multi=TRUE, threshold=0.01)
 98 | #LUSC.LUAD.result %>% myft()
 99 | 
100 | #myplot <- modelPlot(LUSC.LUAD.overall.log, uni=TRUE, show.ref = TRUE, threshold=0.01,   change.pointsize = T)
101 | #myplot$p <- myplot$p + scale_fill_nejm() + scale_color_nejm()
102 | #myplot
103 | 
104 | 
105 | #summary(LUSC.LUAD.overall.log)
106 | LUSC.LUAD.result<-autoReg(LUSC.LUAD.overall.log,uni=TRUE,multi=FALSE, threshold=0.01)
107 | LUSC.LUAD.result %>% myft()
108 | 
109 | myplot <- modelPlot(LUSC.LUAD.overall.log, uni=TRUE, multi=FALSE, show.ref = TRUE, threshold=0.01,   change.pointsize = T)
110 | myplot
111 | #myplot$p <- myplot$p + scale_fill_nejm() + scale_color_nejm()
112 | #myplot
113 | 
114 | ggsave("metadata_regression_analysis.pdf")
115 | ```
116 | 
117 | 
118 | ```{r}
119 | #========
120 | LUSC.overall.log <- glm(Pathological.Response ~ Age + Gender + issmoke + Platinum +
121 |                                            Cycles + center + PD1 + before_N + second_chemotherapy +
122 |                                            grouped_staging, data=LUSC.regression_table, family=binomial) 
123 | 
124 | 
125 | summary(LUSC.overall.log)
126 | LUSC.result<-autoReg(LUSC.overall.log,uni=TRUE,multi=TRUE,threshold=0.1)
127 | LUSC.result
128 | ```
129 | 
130 | 
131 | ```{r}
132 | #=====
133 | 
134 | LUAD.overall.log <- glm(Pathological.Response ~ Age + Gender + issmoke + Platinum +
135 |                                            Cycles + center + PD1 + before_N + second_chemotherapy +
136 |                                            grouped_staging, data=LUAD.regression_table, family=binomial) 
137 | 
138 | summary(LUAD.overall.log)
139 | LUAD.result<-autoReg(LUAD.overall.log,uni=TRUE,multi=TRUE,threshold=0.1)
140 | LUAD.result
141 | ```
142 | 
143 | #===== analyze PDL1 only
144 | ```{r}
145 | LUSC.PDL1.log <- glm(Pathological.Response ~ PD.L1TPS, data=LUSC.regression_table, family=binomial) 
146 | 
147 | 
148 | summary(LUSC.PDL1.log)
149 | LUSC.PDL1.result<-autoReg(LUSC.PDL1.log,uni=TRUE, multi=FALSE, threshold=1)
150 | LUSC.PDL1.result
151 | 
152 | myplot <- modelPlot(LUSC.PDL1.log, uni=TRUE, show.ref = TRUE, multi=FALSE, threshold=1,   change.pointsize = T)
153 | myplot
154 | ggsave("LUSC_PD-L1_regression_analysis.pdf")
155 | ```
156 | 
157 | 
158 | ```{r}
159 | LUAD.PDL1.log <- glm(Pathological.Response ~ PD.L1TPS, data=LUAD.regression_table, family=binomial) 
160 | 
161 | summary(LUAD.PDL1.log)
162 | LUAD.PDL1.result<-autoReg(LUAD.PDL1.log,uni=TRUE, multi=FALSE, threshold=1)
163 | LUAD.PDL1.result
164 | 
165 | myplot <- modelPlot(LUAD.PDL1.log, uni=TRUE, show.ref = TRUE, multi=FALSE, threshold=1,   change.pointsize = T)
166 | myplot
167 | ggsave("LUAD_PD-L1_regression.pdf", height = 5, width = 15)
168 | ```
169 | 
170 | 


--------------------------------------------------------------------------------
/main_figure/figure1_and_related_supplemental_figure/metadata_analysis.Rmd:
--------------------------------------------------------------------------------
  1 | ```{r}
  2 | # do not use renv or any project lib here
  3 | 
  4 | library(tidyverse)
  5 | library(ggpubr)
  6 | library(ggplot2)
  7 | 
  8 | # note this has ICI+chemo patients, and chemo-only patients
  9 | metadata.lusc.luad.no.egfr.alk <- read_csv("clean_metadata_all_surgical_sample_lusc_luad_only_excluded_egfr_alk.csv")
 10 | 
 11 | nrow(metadata.lusc.luad.no.egfr.alk)
 12 | 
 13 | metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk %>% filter(PD1!="No")
 14 | metadata.lusc.luad.no.egfr.alk.chemo.only <- metadata.lusc.luad.no.egfr.alk %>% filter(PD1=="No")
 15 | 
 16 | # use this as the standard number of all followed patients with ICI+chemo
 17 | nrow(metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo)
 18 | 
 19 | # use this as the standard number of all followed patients with chemo-only
 20 | nrow(metadata.lusc.luad.no.egfr.alk.chemo.only)
 21 | 
 22 | confidence.interval <- function(proportion, n){
 23 |   return(proportion + c(-1,1) * qnorm(1-0.05/2) * sqrt(proportion*(1-proportion)/n))
 24 | }
 25 | ```
 26 | 
 27 | 
 28 | ## ICI+chemo
 29 | ```{r}
 30 | # --------------------------- overall response waterfall plot -----------------------
 31 | overall.RVT.plot.immunotherapy.lusc.and.luad <- ggbarplot(
 32 |   metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 
 33 |     filter(is.na(RVT) == F), # note, some patients has no specific value of RVT, so has to filter out for successful RVT plot.
 34 |   "Tumor_Sample_Barcode", "RVT", 
 35 |         sort.val = "desc", 
 36 |         xlab = FALSE,
 37 |         palette = c("#077E64", "#878586"),
 38 |         color = "isMPR",
 39 |         fill = "isMPR",
 40 |         width = 0.6, 
 41 |         sort.by.groups = F, 
 42 |         # main = "All patients(n=240)",
 43 |         font.y = 15, 
 44 |         font.legend = 15, 
 45 |         font.tickslab = 15) +
 46 |     theme( 
 47 |         legend.position = "right",
 48 |         axis.ticks.x = element_blank(), 
 49 |         axis.text.x = element_blank(), 
 50 |         plot.title = element_text(size = 20, hjust = 0.5)) +
 51 |     ylab("Change in primary tumor area with\n viable tumor cells(%)") +
 52 |     scale_y_continuous(
 53 |         expand = c(0, 0),
 54 |         breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100)
 55 |     ) +
 56 |     geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed")
 57 | 
 58 | overall.RVT.plot.immunotherapy.lusc.and.luad
 59 | 
 60 | ggsave("plots/overall.RVT.plot.immunotherapy.lusc.and.luad.pdf", overall.RVT.plot.immunotherapy.lusc.and.luad, width = 12, height = 5)
 61 | ```
 62 | ```{r}
 63 | metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo
 64 | ```
 65 | 
 66 | 
 67 | 
 68 | ```{r}
 69 | # calculate MPR & pCR rate in the ICI+chemo group, not distinguishing histology
 70 | # do not use the "isMPR" since it is derived from RVT, minor inconsistency.)  
 71 | 
 72 | MPR.pCR.Rate.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 
 73 |   filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>% # filter patients without pathological response info
 74 |   count(`Pathological Response`) %>%
 75 |   summarise(
 76 |     pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 
 77 |     MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) 
 78 | 
 79 | MPR.pCR.Rate.ICI.plus.chemo
 80 | 
 81 | N.sample <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 
 82 |   filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR"))
 83 | N.sample <- nrow(N.sample)
 84 | MPR.confidence.interval <- confidence.interval(MPR.pCR.Rate.ICI.plus.chemo$MPR_rate*0.01, N.sample)
 85 | MPR.confidence.interval
 86 | pCR.confidence.interval <- confidence.interval(MPR.pCR.Rate.ICI.plus.chemo$pCR_rate*0.01, N.sample)
 87 | pCR.confidence.interval
 88 | ```
 89 | 
 90 | 
 91 | ```{r}
 92 | # calculate by.LUSC/LUAD MPR.pCR rates
 93 | 
 94 | by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 
 95 |   filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>% # filter patients without pathological response info
 96 |   count(`Pathological Response`, pathology) %>% 
 97 |   group_by(pathology) %>% 
 98 |   summarise(
 99 |     pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 
100 |     MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) %>%
101 |   mutate(pathology = factor(pathology, levels = c("LUSC", "LUAD")))
102 | 
103 | N.LUSC.LUAD <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% group_by(pathology) %>% count()
104 | N.LUSC.LUAD
105 | by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo
106 | 
107 | 
108 | # calculate by.LUSC/LUAD pPR rates
109 | 
110 | by.LUSC.LUAD.MPR.pPR.Rate.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 
111 |   filter(response %in% c("MPR", "pPR", "nPR", "pCR")) %>% # filter patients without pathological response info
112 |   count(response, pathology) %>% 
113 |   group_by(pathology) %>% 
114 |   summarise(
115 |     pPR_rate = sum(n[response == "pPR"])/sum(n) * 100) %>%
116 |   mutate(pathology = factor(pathology, levels = c("LUSC", "LUAD")))
117 | 
118 | by.LUSC.LUAD.MPR.pPR.Rate.ICI.plus.chemo
119 | 
120 | 
121 | ##====LUSC 
122 | N.sample <- N.LUSC.LUAD %>% filter(pathology=="LUSC")
123 | N.sample <- N.sample$n
124 | temp.table <- by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo %>% filter(pathology=="LUSC")
125 | MPR.confidence.interval <- confidence.interval(temp.table$MPR_rate*0.01, N.sample)
126 | MPR.confidence.interval
127 | pCR.confidence.interval <- confidence.interval(temp.table$pCR_rate*0.01, N.sample)
128 | pCR.confidence.interval
129 | 
130 | ##====LUAD
131 | N.sample <- N.LUSC.LUAD %>% filter(pathology=="LUAD")
132 | N.sample <- N.sample$n
133 | temp.table <- by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo %>% filter(pathology=="LUAD")
134 | MPR.confidence.interval <- confidence.interval(temp.table$MPR_rate*0.01, N.sample)
135 | MPR.confidence.interval
136 | pCR.confidence.interval <- confidence.interval(temp.table$pCR_rate*0.01, N.sample)
137 | pCR.confidence.interval
138 | ```
139 | 
140 | 
141 | ```{r}
142 | # calculate by.LUSC/LUAD MPR.pCR rates
143 | 
144 | by.LUSC.LUAD.sub.by.cycles.MPR.pCR.Rate.ICI.plus.chemo <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% 
145 |   filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR"),
146 |          Cycles %in% c(2, 3, 4)) %>% # filter patients without pathological response info
147 |   count(`Pathological Response`, pathology, Cycles) %>% 
148 |   group_by(pathology, Cycles) %>% 
149 |   summarise(
150 |     pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 
151 |     MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) %>%
152 |   mutate(pathology = factor(pathology, levels = c("LUSC", "LUAD")))
153 | 
154 | by.LUSC.LUAD.sub.by.cycles.MPR.pCR.Rate.ICI.plus.chemo
155 | ```
156 | 
157 | 
158 | ```{r}
159 | #===== MPR & pCR rate difference plot ICI.plus.chemo
160 | 
161 | p_pCR <- ggbarplot(by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo, "pathology", "pCR_rate",
162 |     palette = c("LUSC"="#50B8C3", "LUAD"="#EDAE7B"),
163 |     fill = "pathology",
164 |     xlab = FALSE,
165 |     ylab = "pCR rate(%)",
166 |     label = T,
167 |     legend = "none",
168 |     lab.nb.digits = 1,
169 |     lab.size = 6,
170 |     font.y = 20, 
171 |     font.tickslab = 15,
172 | ) +
173 |   geom_bracket(
174 |     xmin = "LUSC", xmax = "LUAD", y.position = 60,
175 |     label = "Difference = 25.0%", label.size = 6, tip.length = c(0.2, 0.9)
176 |   ) +
177 |   ylim(0,100)
178 | 
179 | p_pCR
180 | ggsave("plots/pCR_difference_between_LUSC_LUAD.pdf", width = 4.5, height = 8)
181 | 
182 | #==== calculate chi-square 
183 | temp.table <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo
184 | temp.table[temp.table=="MPR"] <- "non-PCR"
185 | temp.table[temp.table=="non-MPR"] <- "non-PCR"
186 | temp.table <- table(temp.table$pathology,temp.table$`Pathological Response`)
187 | temp.table
188 | chisq.test(temp.table)
189 | 
190 | 
191 | 
192 | #============
193 | 
194 | p_MPR <- ggbarplot(by.LUSC.LUAD.MPR.pCR.Rate.ICI.plus.chemo, "pathology", "MPR_rate",
195 |                    palette = c("LUSC"="#50B8C3", "LUAD"="#EDAE7B"),
196 |                    fill = "pathology",
197 |     xlab = FALSE,
198 |     ylab = "MPR rate(%)",
199 |     label = T,
200 |     legend = "none",
201 |     lab.nb.digits = 1,
202 |     lab.size = 6,
203 |     font.y = 20, 
204 |     font.tickslab = 15,
205 | ) +
206 |   geom_bracket(
207 |     xmin = "LUSC", xmax = "LUAD", y.position = 75,
208 |     label = "Difference = 25.9%", label.size = 6, tip.length = c(0.2, 0.9)
209 |   ) + ylim(0,100)
210 | p_MPR
211 | ggsave("plots/MPR_difference_between_LUSC_LUAD.pdf", width = 4.5, height = 8)
212 | 
213 | #==== calculate chi-square 
214 | temp.table <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo
215 | temp.table[temp.table=="pCR"] <- "MPR"
216 | temp.table <- table(temp.table$pathology,temp.table$`Pathological Response`)
217 | chisq.test(temp.table)
218 | 
219 | 
220 | #============
221 | 
222 | p_pPR <- ggbarplot(by.LUSC.LUAD.MPR.pPR.Rate.ICI.plus.chemo, "pathology", "pPR_rate",
223 |                    palette = c("LUSC"="#50B8C3", "LUAD"="#EDAE7B"),
224 |                    fill = "pathology",
225 |     xlab = FALSE,
226 |     ylab = "pPR rate(%)",
227 |     label = T,
228 |     legend = "none",
229 |     lab.nb.digits = 1,
230 |     lab.size = 6,
231 |     font.y = 20, 
232 |     font.tickslab = 15,
233 | ) +
234 |   geom_bracket(
235 |     xmin = "LUSC", xmax = "LUAD", y.position = 75,
236 |     label = "Difference = 25.9%", label.size = 6, tip.length = c(0.2, 0.9)
237 |   ) + ylim(0,100)
238 | p_pPR
239 | ggsave("plots/pPR_difference_between_LUSC_LUAD.pdf", width = 4.5, height = 8)
240 | 
241 | #==== calculate chi-square 
242 | temp.table <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% filter(response %in% c("MPR", "pPR", "nPR", "pCR"))
243 | temp.table[temp.table %in% c("pCR", "MPR", "nPR")] <- "non-pPR"
244 | temp.table <- table(temp.table$pathology,temp.table$`Pathological Response`)
245 | chisq.test(temp.table)
246 | 
247 | #==== plot response percentage
248 | temp.table <- metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% filter(response %in% c("MPR", "pPR", "nPR", "pCR")) %>% count(response, pathology)
249 | temp.table$response <- factor(temp.table$response, levels = c("nPR", "pPR", "MPR", "pCR"))
250 | ggplot(temp.table, aes(fill=response, y=n, x=pathology)) + 
251 |     geom_bar(position="fill", stat="identity")
252 | 
253 | 
254 | 
255 | ```
256 | 
257 | 
258 | ```{r}
259 | # --------------------------- LUSC response waterfall plot -----------------------
260 | overall.RVT.plot.immunotherapy.lusc<- ggbarplot(metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% filter(pathology == "LUSC", is.na(RVT) == F), "Tumor_Sample_Barcode", "RVT", 
261 |         sort.val = "desc", 
262 |         xlab = FALSE,
263 |         palette = c("#077E64", "#878586"),
264 |         color = "isMPR",
265 |         fill = "isMPR",
266 |         width = 0.6, 
267 |         sort.by.groups = F, 
268 |         # main = "All patients(n=240)",
269 |         font.y = 15, 
270 |         font.legend = 15, 
271 |         font.tickslab = 15) +
272 |     theme( 
273 |         legend.position = "right",
274 |         axis.ticks.x = element_blank(), 
275 |         axis.text.x = element_blank(), 
276 |         plot.title = element_text(size = 20, hjust = 0.5)) +
277 |     ylab("Change in primary tumor area with\n viable tumor cells(%)") +
278 |     scale_y_continuous(
279 |         expand = c(0, 0),
280 |         breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100)
281 |     ) +
282 |     geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed")
283 | 
284 | overall.RVT.plot.immunotherapy.lusc
285 | 
286 | ggsave("plots/overall.RVT.plot.immunotherapy.lusc.pdf", overall.RVT.plot.immunotherapy.lusc, width = 12, height = 5)
287 | 
288 | # --------------------------- luad response waterfall plot -----------------------
289 | overall.RVT.plot.immunotherapy.luad<- ggbarplot(metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% filter(pathology == "LUAD", is.na(RVT) == F), "Tumor_Sample_Barcode", "RVT", 
290 |         sort.val = "desc", 
291 |         xlab = FALSE,
292 |         palette = c("#077E64", "#878586"),
293 |         color = "isMPR",
294 |         fill = "isMPR",
295 |         width = 0.6, 
296 |         sort.by.groups = F, 
297 |         # main = "All patients(n=240)",
298 |         font.y = 15, 
299 |         font.legend = 15, 
300 |         font.tickslab = 15) +
301 |     theme( 
302 |         legend.position = "right",
303 |         axis.ticks.x = element_blank(), 
304 |         axis.text.x = element_blank(), 
305 |         plot.title = element_text(size = 20, hjust = 0.5)) +
306 |     ylab("Change in primary tumor area with\n viable tumor cells(%)") +
307 |     scale_y_continuous(
308 |         expand = c(0, 0),
309 |         breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100)
310 |     ) +
311 |     geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed")
312 | 
313 | overall.RVT.plot.immunotherapy.luad
314 | 
315 | ggsave("plots/overall.RVT.plot.immunotherapy.luad.pdf", overall.RVT.plot.immunotherapy.luad, width = 12, height = 5)
316 | 
317 | 
318 | #=======
319 | metadata.lusc.luad.no.egfr.alk.ICI.plus.chemo %>% count(pathology)
320 | 
321 | ```
322 | 
323 | 
324 | ## chemo 
325 | ```{r}
326 | # --------------------------- chemo overall response waterfall plot -----------------------
327 | overall.RVT.plot.chemo.lusc.and.luad <- ggbarplot(
328 |   metadata.lusc.luad.no.egfr.alk.chemo.only %>% 
329 |     filter(is.na(RVT) == F), # note, some patients has no specific value of RVT, so has to filter out for successful RVT plot.
330 |   "Tumor_Sample_Barcode", "RVT", 
331 |         sort.val = "desc", 
332 |         xlab = FALSE,
333 |         palette = c("#077E64", "#878586"),
334 |         color = "isMPR",
335 |         fill = "isMPR",
336 |         width = 0.6, 
337 |         sort.by.groups = F, 
338 |         # main = "All patients(n=240)",
339 |         font.y = 15, 
340 |         font.legend = 15, 
341 |         font.tickslab = 15) +
342 |     theme( 
343 |         legend.position = "right",
344 |         axis.ticks.x = element_blank(), 
345 |         axis.text.x = element_blank(), 
346 |         plot.title = element_text(size = 20, hjust = 0.5)) +
347 |     ylab("Change in primary tumor area with\n viable tumor cells(%)") +
348 |     scale_y_continuous(
349 |         expand = c(0, 0),
350 |         breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100)
351 |     ) +
352 |     geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed")
353 | 
354 | overall.RVT.plot.chemo.lusc.and.luad
355 | 
356 | ggsave("plots/overall.RVT.plot.chemo.lusc.and.luad.pdf", overall.RVT.plot.chemo.lusc.and.luad, width = 12, height = 5)
357 | ```
358 | 
359 | 
360 | ```{r}
361 | # calculate MPR & pCR rate in the chemo group, not distinguishing histology
362 | # do not use the "isMPR" since it is derived from RVT, minor inconsistency.)  
363 | 
364 | MPR.pCR.Rate.chemo <- metadata.lusc.luad.no.egfr.alk.chemo.only %>% 
365 |   filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>% # filter patients without pathological response info
366 |   count(`Pathological Response`) %>%
367 |   summarise(
368 |     pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 
369 |     MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) 
370 | 
371 | metadata.lusc.luad.no.egfr.alk.chemo.only %>% group_by(pathology) %>% count()
372 | MPR.pCR.Rate.chemo
373 | 
374 | 
375 | N.sample <- metadata.lusc.luad.no.egfr.alk.chemo.only %>% 
376 |   filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR"))
377 | N.sample <- nrow(N.sample)
378 | MPR.confidence.interval <- confidence.interval(MPR.pCR.Rate.chemo$MPR_rate*0.01, N.sample)
379 | MPR.confidence.interval
380 | pCR.confidence.interval <- confidence.interval(MPR.pCR.Rate.chemo$pCR_rate*0.01, N.sample)
381 | pCR.confidence.interval
382 | ```
383 | 
384 | 
385 | ```{r}
386 | # calculate chemo by.LUSC/LUAD MPR.pCR rates
387 | 
388 | by.LUSC.LUAD.MPR.pCR.Rate.chemo<- metadata.lusc.luad.no.egfr.alk.chemo.only %>% 
389 |   filter(`Pathological Response` %in% c("MPR", "non-MPR", "pCR")) %>% # filter patients without pathological response info
390 |   count(`Pathological Response`, pathology) %>% 
391 |   group_by(pathology) %>% 
392 |   summarise(
393 |     pCR_rate = sum(n[`Pathological Response` == "pCR"])/sum(n) * 100, 
394 |     MPR_rate = sum(n[`Pathological Response` %in% c("MPR", "pCR")])/sum(n) * 100) %>%
395 |   mutate(pathology = factor(pathology, levels = c("LUSC", "LUAD")))
396 | 
397 | by.LUSC.LUAD.MPR.pCR.Rate.chemo
398 | ```
399 | 
400 | ```{r}
401 | # --------------------------- LUSC response waterfall plot -----------------------
402 | overall.RVT.plot.chemo.lusc<- ggbarplot(metadata.lusc.luad.no.egfr.alk.chemo.only %>% filter(pathology == "LUSC", is.na(RVT) == F), "Tumor_Sample_Barcode", "RVT", 
403 |         sort.val = "desc", 
404 |         xlab = FALSE,
405 |         palette = c("#077E64", "#878586"),
406 |         color = "isMPR",
407 |         fill = "isMPR",
408 |         width = 0.6, 
409 |         sort.by.groups = F, 
410 |         # main = "All patients(n=240)",
411 |         font.y = 15, 
412 |         font.legend = 15, 
413 |         font.tickslab = 15) +
414 |     theme( 
415 |         legend.position = "right",
416 |         axis.ticks.x = element_blank(), 
417 |         axis.text.x = element_blank(), 
418 |         plot.title = element_text(size = 20, hjust = 0.5)) +
419 |     ylab("Change in primary tumor area with\n viable tumor cells(%)") +
420 |     scale_y_continuous(
421 |         expand = c(0, 0),
422 |         breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100)
423 |     ) +
424 |     geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed")
425 | 
426 | overall.RVT.plot.chemo.lusc
427 | 
428 | ggsave("plots/overall.RVT.plot.chemo.lusc.pdf", overall.RVT.plot.chemo.lusc, width = 12, height = 5)
429 | 
430 | 
431 | 
432 | # --------------------------- luad response waterfall plot -----------------------
433 | overall.RVT.plot.chemo.luad<- ggbarplot(metadata.lusc.luad.no.egfr.alk.chemo.only %>% filter(pathology == "LUAD", is.na(RVT) == F), "Tumor_Sample_Barcode", "RVT", 
434 |         sort.val = "desc", 
435 |         xlab = FALSE,
436 |         palette = c("#077E64", "#878586"),
437 |         color = "isMPR",
438 |         fill = "isMPR",
439 |         width = 0.6, 
440 |         sort.by.groups = F, 
441 |         # main = "All patients(n=240)",
442 |         font.y = 15, 
443 |         font.legend = 15, 
444 |         font.tickslab = 15) +
445 |     theme( 
446 |         legend.position = "right",
447 |         axis.ticks.x = element_blank(), 
448 |         axis.text.x = element_blank(), 
449 |         plot.title = element_text(size = 20, hjust = 0.5)) +
450 |     ylab("Change in primary tumor area with\n viable tumor cells(%)") +
451 |     scale_y_continuous(
452 |         expand = c(0, 0),
453 |         breaks = c(0, -10, -20, -30, -40, -50, -60, -70, -80, -90, -100)
454 |     ) +
455 |     geom_hline(yintercept = -90, color = "black", size = 0.5, linetype = "dashed")
456 | 
457 | overall.RVT.plot.chemo.luad
458 | 
459 | ggsave("plots/overall.RVT.plot.chemo.luad.pdf", overall.RVT.plot.chemo.luad, width = 12, height = 5)
460 | 
461 | 
462 | #=======
463 | metadata.lusc.luad.no.egfr.alk.chemo.only %>% count(pathology)
464 | 
465 | 
466 | ```
467 | 
468 | 
469 | 


--------------------------------------------------------------------------------
/main_figure/figure1_and_related_supplemental_figure/pathological_assessment.Rmd:
--------------------------------------------------------------------------------
  1 | ```{r}
  2 | library(reshape2)
  3 | library(RColorBrewer)
  4 | assess <- read.csv("assessment.csv", sep=',', row.names = 1)
  5 | ```
  6 | 
  7 | 
  8 | ```{r}
  9 | p1 <- ggplot(assess, aes(x=SPH, y=CICAMS)) +
 10 |   geom_point() + 
 11 |   stat_cor(data=assess, method = "pearson", na.rm = TRUE) + 
 12 |   geom_abline(intercept = 0, slope = 1) +
 13 |   geom_hline(yintercept = 90, linetype="dotted") + 
 14 |   geom_vline(xintercept = 90, linetype="dotted") + 
 15 | #  geom_text(aes(SPH,CICAMS,label=rownames(assess))) +
 16 |   coord_fixed() + theme_bw() +
 17 |   xlab("SPH") +
 18 |   ylab("CICAMS") +
 19 |   ggtitle("Pathological assessment CICAMS vs SPH")
 20 | 
 21 | 
 22 | 
 23 | p2 <- ggplot(assess, aes(x=CICAMS, y=GDPH)) +
 24 |   geom_point() + 
 25 |   stat_cor(data=assess, method = "pearson", na.rm = TRUE) + 
 26 |   geom_abline(intercept = 0, slope = 1) +
 27 |   geom_hline(yintercept = 90, linetype="dotted") + 
 28 |   geom_vline(xintercept = 90, linetype="dotted") + 
 29 | #  geom_text(aes(CICAMS,GDPH,label=rownames(assess))) + 
 30 |   coord_fixed() + theme_bw() +
 31 |   xlab("CICAMS") +
 32 |   ylab("GDPH") +
 33 |   ggtitle("Pathological assessment GDPH vs CICAMS")
 34 | 
 35 | 
 36 | p3 <- ggplot(assess, aes(x=GDPH, y=SPH)) +
 37 |   geom_point() + 
 38 |   stat_cor(data=assess, method = "pearson", na.rm = TRUE) + 
 39 |   geom_abline(intercept = 0, slope = 1) +
 40 |   geom_hline(yintercept = 90, linetype="dotted") + 
 41 |   geom_vline(xintercept = 90, linetype="dotted") + 
 42 | #  geom_text(aes(GDPH,SPH,label=rownames(assess))) + 
 43 |   coord_fixed() + theme_bw() +
 44 |   xlab("GDPH") +
 45 |   ylab("SPH") +
 46 |   ggtitle("Pathological assessment SPH vs GDPH")
 47 | 
 48 | 
 49 | p <- p1+p2+p3
 50 | ggsave(p,filename = "assessment_new.png",height = 10,width = 30)
 51 | ```
 52 | 
 53 | ```{r}
 54 | type_prr <- function(prr){
 55 |   if (is.na(prr)){
 56 |     return (NA)
 57 |   }
 58 |   else if (as.numeric(prr)>=90){
 59 |     return("RVT≤10%")
 60 |   }
 61 |   else{
 62 |     return("RVT>10%")
 63 |   }
 64 | }
 65 | # assess <- assess %>% add_column(CICAMS = NA, SPH = NA, GDPH = NA)
 66 | for (name in row.names(assess)){
 67 |   assess[name,]['CICAMS'] <- type_prr(assess[name,]$CICAMS)
 68 |   assess[name,]['SPH'] <- type_prr(assess[name,]$SPH)
 69 |   assess[name,]['GDPH'] <- type_prr(assess[name,]$GDPH)
 70 | }
 71 | ```
 72 | 
 73 | ```{r}
 74 | c = order(as.numeric(gsub("P","",row.names(assess))))
 75 | assess_prr <- assess[c,][,c(1:4)]
 76 | assess_prr <- rbind(assess_prr[which(assess_prr$histology=="LUAD"),],assess_prr[which(assess_prr$histology=="LUSC"),])
 77 | assess2 <- melt(assess_prr %>% add_column(patient = row.names(assess_prr)),id="patient")
 78 | head(assess2)
 79 | assess2$patient=factor(assess2$patient, levels=row.names(assess_prr))
 80 | assess2$variable=factor(assess2$variable,levels = c("histology","CICAMS","SPH","GDPH"))
 81 | #mycolors <- 
 82 | p4 <- assess2%>%ggplot(aes(x=patient,y=variable))+
 83 |   geom_tile(aes(fill=value),color="white",size=1)+
 84 |   scale_x_discrete("",expand = c(0,0))+ 
 85 |   scale_y_discrete("",expand = c(0,0))+
 86 |   xlab("Patient")+
 87 |   theme(# axis.text.x.bottom = element_text(size=10,angle=-45,hjust=0,vjust=0.5),
 88 |         axis.text.y.left = element_text(size=10),
 89 |         axis.text.x=element_blank(), 
 90 |         axis.title.x=element_text(size=10), 
 91 |         axis.ticks.x=element_blank()
 92 |         #axis.text.x = element_text(size=2)
 93 |         )+
 94 |   scale_fill_brewer(palette = "Paired")
 95 | p4
 96 | ggsave("tile_new.png",width = 30,height = 8,units = "cm")
 97 | ```
 98 | 
 99 | 
100 | ```{r}
101 | # Filter bad stainings
102 | c = order(as.numeric(gsub("P","",row.names(assess))))
103 | assess_prr <- assess[c,][which(assess$notes==""),][,c(1:4)]
104 | assess_prr <- rbind(assess_prr[which(assess_prr$histology=="LUAD"),],assess_prr[which(assess_prr$histology=="LUSC"),])
105 | assess2 <- melt(assess_prr %>% add_column(patient = row.names(assess_prr)),id="patient")
106 | head(assess2)
107 | assess2$patient=factor(assess2$patient, levels=row.names(assess_prr))
108 | assess2$variable=factor(assess2$variable,levels = c("histology","CICAMS","SPH","GDPH"))
109 | #mycolors <- 
110 | p5 <- assess2%>%ggplot(aes(x=patient,y=variable))+
111 |   geom_tile(aes(fill=value),color="white",size=1)+
112 |   scale_x_discrete("",expand = c(0,0))+ 
113 |   scale_y_discrete("",expand = c(0,0))+
114 |   xlab("Patient")+
115 |   theme(# axis.text.x.bottom = element_text(size=10,angle=-45,hjust=0,vjust=0.5),
116 |         axis.text.y.left = element_text(size=10),
117 |         axis.text.x=element_blank(), 
118 |         axis.title.x=element_text(size=10), 
119 |         axis.ticks.x=element_blank()
120 |         #axis.text.x = element_text(size=2)
121 |         )+
122 |   scale_fill_brewer(palette = "Paired")
123 | p5
124 | ggsave("tile_new_filtered.png",width = 30,height = 8,units = "cm")
125 | ```
126 | 
127 | 


--------------------------------------------------------------------------------
/main_figure/figure2_and_related_supplemental_figure/NMF.R:
--------------------------------------------------------------------------------
  1 | library(NMF)
  2 | library(ComplexHeatmap)
  3 | library(reshape2)
  4 | library(tidyverse)
  5 | library(dplyr)
  6 | library(readxl)
  7 | ###################################################################
  8 | info <- read.csv("all_sub_cell_type.csv")
  9 | head(info)
 10 | length(unique(info$sampleID))
 11 | 
 12 | df <- table(info$sampleID,info$sub_cell_type)
 13 | ratio <- as.data.frame(df / rowSums(df))
 14 | head(ratio)
 15 | colnames(ratio) <- c("sampleID","cell.type","Freq")
 16 | head(ratio)
 17 | 
 18 | sample.info <- as.data.frame(read_excel("sample.xlsx"))
 19 | head(sample.info)
 20 | sample.info <- sample.info[sample.info$sampleID %in% info$sampleID,]
 21 | 
 22 | 
 23 | pathological_response_level <- c()
 24 | for(each in sample.info$pathological_response){
 25 |   if(each %in% c("MPR","pCR")){
 26 |     pathological_response_level <- c(pathological_response_level, "MPR")
 27 |   }else{
 28 |     pathological_response_level <- c(pathological_response_level, "non-MPR")
 29 |   }
 30 | }
 31 | 
 32 | sample.info$pathological_response_level <- pathological_response_level
 33 | 
 34 | response.meta <- sample.info[, c("sampleID","smoking_history","cancer_type","pre_treatment_staging",
 35 |                                  "PDL1_TPS","PD1","chemotherapy","targeted_therapy","cycles",
 36 |                                  "pathological_response","pathological_response_level",
 37 |                                  "pathological_response_rate","radiological_response",
 38 |                                  "RVT_pre_dominant_histology")]
 39 | response.meta <- response.meta %>% distinct(sampleID, .keep_all = TRUE)
 40 | head(response.meta)
 41 | length(unique(response.meta$sampleID))
 42 | 
 43 | ratio <- dcast(ratio, sampleID ~ ratio$cell.type, value.var = "Freq")
 44 | 
 45 | 
 46 | merge.version <- merge(ratio, response.meta, by = "sampleID", all.x = TRUE)
 47 | head(merge.version)
 48 | dim(merge.version)
 49 | 
 50 | rownames(ratio) <- ratio$sampleID
 51 | ratio <- ratio[merge.version$sampleID,]
 52 | dim(ratio)
 53 | 
 54 | ratio <- ratio[,-1]
 55 | head(ratio)
 56 | ratio[is.na(ratio)] <- 0
 57 | 
 58 | 
 59 | #normalization
 60 | scale_ratio <- apply(ratio, MARGIN = 2, function(x) (x-min(x))/(max(x)-min(x)))
 61 | head(scale_ratio)
 62 | scale_ratio <- as.data.frame(scale_ratio)
 63 | head(scale_ratio)
 64 | scale_ratio <- t(scale_ratio)
 65 | head(scale_ratio)
 66 | dim(scale_ratio)
 67 | 
 68 | ranks <- 2:10
 69 | estim.coad <- nmf(scale_ratio, ranks, nrun=100,method = "lee")
 70 | plot(estim.coad)
 71 | 
 72 | #再次NMF,rank=5
 73 | seed = 2020820
 74 | 
 75 | nmf.rank5 <- nmf(scale_ratio, 
 76 |                  rank = 5, 
 77 |                  nrun=200,
 78 |                  seed = seed, 
 79 |                  method = "lee")
 80 | 
 81 | index <- extractFeatures(nmf.rank5,"max") 
 82 | 
 83 | #change the order of the index
 84 | new.index <- list()
 85 | new.index[[1]] <- index[[1]]
 86 | new.index[[2]] <- index[[2]]
 87 | new.index[[3]] <- index[[4]]
 88 | new.index[[4]] <- index[[3]]
 89 | new.index[[5]] <- index[[5]]
 90 | 
 91 | sig.order <- unlist(new.index)
 92 | NMF.Exp.rank5 <- scale_ratio[sig.order,]
 93 | NMF.Exp.rank5 <- na.omit(NMF.Exp.rank5) 
 94 | dim(NMF.Exp.rank5)
 95 | 
 96 | group <- predict(nmf.rank5) 
 97 | 
 98 | #adjust the position of the module
 99 | new.group <- c()
100 | for(each in group){
101 |   if(each %in% c("1")){
102 |     new.group <- c(new.group, "1")
103 |   }
104 |   if(each %in% c("2")){
105 |     new.group <- c(new.group, "2")
106 |   }
107 |   if(each %in% c("4")){
108 |     new.group <- c(new.group, "3")
109 |   }
110 |   if(each %in% c("3")){
111 |     new.group <- c(new.group, "4")
112 |   }
113 |   if(each %in% c("5")){
114 |     new.group <- c(new.group, "5")
115 |   }
116 | }
117 | new.group <- factor(new.group, levels = c("1","2","3","4","5"))
118 | 
119 | 
120 | #z_score
121 | z_ratio <- scale(ratio)/4
122 | head(z_ratio)
123 | z_ratio <- as.data.frame(z_ratio)
124 | head(z_ratio)
125 | z_ratio <- t(z_ratio)
126 | 
127 | plot_matrix <- z_ratio[sig.order,]
128 | plot_matrix <- na.omit(plot_matrix)
129 | dim(plot_matrix)
130 | 
131 | 
132 | info.matrix <- as.data.frame(t(NMF.Exp.rank5))
133 | head(info.matrix)
134 | info.matrix$sampleID <- rownames(info.matrix)
135 | info.matrix$group <- new.group
136 | info.matrix <- merge(info.matrix, response.meta, by = "sampleID", all.x = TRUE)
137 | head(info.matrix)
138 | 
139 | gene.group <- c()
140 | for(each in rownames(NMF.Exp.rank5)){
141 |   if(each %in% rownames(scale_ratio)[new.index[[1]]]){
142 |     gene.group <- c(gene.group, "module1")
143 |   }else if(each %in% rownames(scale_ratio)[new.index[[2]]]){
144 |     gene.group <- c(gene.group, "module2")
145 |   }else if(each %in% rownames(scale_ratio)[new.index[[3]]]){
146 |     gene.group <- c(gene.group, "module3")
147 |   }else if(each %in% rownames(scale_ratio)[new.index[[4]]]){
148 |     gene.group <- c(gene.group, "module4")
149 |   }else if(each %in% rownames(scale_ratio)[new.index[[5]]]){
150 |     gene.group <- c(gene.group, "module5")
151 |   }
152 | }
153 | 
154 | PDL1_TPS_group <- c()
155 | for(each in info.matrix$PDL1_TPS){
156 |   if (each %in% c("<1%","0")){
157 |     PDL1_TPS_group <- c(PDL1_TPS_group,"<1%") 
158 |   }else if (each %in% c("0.01","0.02","0.05","0.08","0.03","0.3","0.2","0.35","0.4")){
159 |     PDL1_TPS_group <- c(PDL1_TPS_group,"1%-49%") 
160 |   }else if (each %in% c("0.7","0.6","0.9","0.8","0.85","1","0.55","0.75","0.65","0.5")){
161 |     PDL1_TPS_group <- c(PDL1_TPS_group,">=50%") 
162 |   }else{
163 |     PDL1_TPS_group <- c(PDL1_TPS_group,"Not tested") 
164 |   }
165 | }
166 | info.matrix$PDL1_TPS_group <- PDL1_TPS_group
167 | 
168 | 
169 | info.matrix[is.na(info.matrix)] <- "unknown"
170 | ha = HeatmapAnnotation(smokingHistory = factor(info.matrix$smoking_history, levels = c("Y","N","unknown")),
171 |                        cycles = factor(info.matrix$cycles, levels = c("unknown","2","3","4","5","6")),
172 |                        PDL1_TPS = factor(info.matrix$PDL1_TPS_group, levels = c("Not tested","<1%","1%-49%",">=50%")),
173 |                        histology = factor(info.matrix$cancer_type, levels = c("LUSC","LUAD")),
174 |                        pathologicalResponse = factor(info.matrix$pathological_response, levels = c("nPR","pPR","MPR","pCR")),
175 |                        pathologicalResponseLevel = factor(info.matrix$pathological_response_level, levels = c("MPR","non-MPR")),
176 |                        group = factor(info.matrix$group, levels = c("1","2","3","4","5")),
177 |                        col = list(smokingHistory = c("Y" = "#F6E382","N" = "#B8DCC5","unknown" = "#82B0D2"),
178 |                                   cycles = c("unknown" = "#E8E8D0","2" = "#DEDEBE","3" = "#CDCD9A","4"="#B9B973","5"="#AFAF61","6"="#949449"),
179 |                                   pathologicalResponseLevel = c("MPR" = "#2868A6", "non-MPR" = "#B1161C"),
180 |                                   pathologicalResponse = c("nPR" = "#E84445","pPR" = "#F39DA0","MPR" = "#95BCE5","pCR" = "#1999B2"),
181 |                                   PDL1_TPS = c("Not tested" = "#D1E9E9","<1%"="#B3D9D9","1%-49%"="#6FB7B7",
182 |                                                ">=50%"="#4F9D9D"),
183 |                                   histology = c("LUSC" = "#E97777", "LUAD" = "#88AB8E"),
184 |                                   RFS.group = c("notAvailable" = "#E8F3F1","not recurred <= 0.5y"= "#F2F1EB","not recurred 0.5-1y" = "#EEE7DA",
185 |                                                 "not recurred 1-2y" = "#AFC8AD","not recurred > 2y" = "#88AB8E",
186 |                                                 "recurred <= 0.5y" = "#E97777","recurred 0.5-1y"= "#FF9F9F","recurred 1-2y"="#FCDDB0"),
187 |                                   group = c("1"="#E64B35B2","2"="#4DBBD5B2",
188 |                                             "3"="#00A087B2","4"="#3C5488B2",
189 |                                             "5"="#F39B7FB2")),
190 |                        simple_anno_size = unit(0.5, "cm"))
191 | 
192 | 
193 | a <- Heatmap(plot_matrix, name = "ratio",
194 |              top_annotation = ha,
195 |              row_split = gene.group,
196 |              column_split = new.group,
197 |              row_gap = unit(2, "mm"),
198 |              column_gap = unit(2, "mm"),
199 |              cluster_rows = FALSE,
200 |              cluster_columns = FALSE,
201 |              column_order = order(factor(info.matrix$pathological_response, levels = c("non-MPR","nPR","pPR","MPR","pCR"))),
202 |              row_names_gp = grid::gpar(fontsize = 10),
203 |              column_names_gp = grid::gpar(fontsize = 5)) 
204 | a
205 | 
206 | 
207 | head(info.matrix)
208 | group <- info.matrix[,c("sampleID","group")]
209 | head(group)
210 | write.csv(group,"NMF_all_group_5.csv")
211 | 


--------------------------------------------------------------------------------
/main_figure/figure2_and_related_supplemental_figure/README.md:
--------------------------------------------------------------------------------
 1 | # file description
 2 | 
 3 | ## NMF.R
 4 | used to generate the NMF figrues in Figure 2A
 5 | 
 6 | ## robustness_of_NMF.R
 7 | used to generate the figures in Supplemental Figure S3 and S4 to validate the robustness of NMF
 8 | 
 9 | ## proportion_plot.R
10 | used to plot the proportion of each module or each cell type in all CD45+ immune cells, and some basic information like PD-L1,radiological_response and pathological response
11 | 


--------------------------------------------------------------------------------
/main_figure/figure2_and_related_supplemental_figure/proportion_plot.R:
--------------------------------------------------------------------------------
  1 | library(reshape2)
  2 | library(tidyverse)
  3 | library(dplyr)
  4 | library(readxl)
  5 | ###################################################################
  6 | info <- read.csv("all_sub_cell_type.csv")
  7 | head(info)
  8 | length(unique(info$sampleID))
  9 | 
 10 | cluster.info <- read.csv("NMF_all_group_5.csv")
 11 | cluster.info <- cluster.info[,-1]
 12 | head(cluster.info)
 13 | dim(cluster.info)
 14 | 
 15 | 
 16 | info <- info[info$sampleID %in% cluster.info$sampleID,]
 17 | length(unique(info$sampleID))
 18 | 
 19 | 
 20 | df <- table(info$sampleID,info$sub_cell_type)
 21 | ratio <- as.data.frame(df / rowSums(df))
 22 | head(ratio)
 23 | colnames(ratio) <- c("sampleID","cell.type","Freq")
 24 | head(ratio)
 25 | 
 26 | ratio <- dcast(ratio, sampleID ~ ratio$cell.type, value.var = "Freq")
 27 | rownames(ratio) <- ratio$sampleID
 28 | head(ratio)
 29 | 
 30 | ratio$group1_module <- ratio$`CD8T_NK-like_FGFBP2` + ratio$NK_CD16hi_FGFBP2 + ratio$CD4T_Tm_ANXA1 + ratio$Mφ_FCGR3A
 31 | ratio$group2_module <- ratio$Bm_TNFSF9 + ratio$Bm_FCRL4 + ratio$Bm_PDE4D + ratio$Bm_CD74 + ratio$ILC3_KIT + ratio$Bm_TNF + ratio$Bn_TCL1A
 32 | ratio$group3_module <- ratio$`CD8T_Tem_GZMK+GZMH+` + ratio$CD8T_Trm_ZNF683 + ratio$`CD8T_Tem_GZMK+NR4A1+` + ratio$CD8T_Tm_IL7R + ratio$CD8T_MAIT_KLRB1
 33 | ratio$group4_module <- ratio$CD4T_Treg_FOXP3 + ratio$CD4T_Treg_CCR8 + ratio$CD4T_Tfh_CXCL13 + ratio$`CD4T_Th1-like_CXCL13` + 
 34 |   ratio$CD4T_Treg_MKI67 + ratio$CD8T_ISG15 + ratio$CD8T_terminal_Tex_LAYN + ratio$CD8T_Tex_CXCL13
 35 | ratio$group5_module <- ratio$Mφ_VCAN + ratio$Mφ_FOLR2 + ratio$cDC2_CD1C + ratio$Mφ_CXCL2 + 
 36 |   ratio$Mφ_DNAJB1 + ratio$Mφ_ISG15 + ratio$mDC_LAMP3 + ratio$Mφ_MARCO + ratio$Mφ_CXCL10 + ratio$pDC_LILRA4 + ratio$Mφ_MMP9 + ratio$cDC1_CLEC9A
 37 | 
 38 | ratio <- merge(ratio,cluster.info,by = "sampleID")
 39 | head(ratio)
 40 | ratio$group <- paste0("group",ratio$group)
 41 | ratio$group <- factor(ratio$group, levels = c("group1","group2","group3","group4","group5"))
 42 | 
 43 | compaired <- list(c("group1","group2"),
 44 |                   c("group1","group3"),
 45 |                   c("group1","group4"),
 46 |                   c("group1","group5"))
 47 | ggboxplot(ratio, x = "group", y = "`CD8T_NK-like_FGFBP2`",
 48 |           color = "group",add="jitter",add.params=list(size=0.5),
 49 |           x.text.angle=0) + labs(y= 'CD8T_NK-like_FGFBP2 / CD45+') + 
 50 |   theme(legend.position="none") + 
 51 |   scale_color_manual(values=c("group1"="#E84C35","group2"="#4FBAD6",
 52 |                               "group3"="#00A289","group4"="#3C5487",
 53 |                               "group5"="#F29B80")) +
 54 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test)
 55 | 
 56 | 
 57 | 
 58 | 
 59 | sample.info <- as.data.frame(read_excel("sample.xlsx"))
 60 | head(sample.info)
 61 | pathological_response <- c()
 62 | for(each in sample.info$pathological_response){
 63 |   if(each %in% c("MPR","pCR")){
 64 |     pathological_response <- c(pathological_response, "MPR")
 65 |   }else{
 66 |     pathological_response <- c(pathological_response, "non-MPR")
 67 |   }
 68 | }
 69 | 
 70 | sample.info$pathological_response <- pathological_response
 71 | 
 72 | response.meta <- sample.info[, c("sampleID","smoking_history","cancer_type","pre_treatment_staging",
 73 |                                  "PDL1_TPS","PD1","chemotherapy","targeted_therapy","cycles",
 74 |                                  "pathological_response","pathological_response_rate","radiological_response",
 75 |                                  "RVT_pre_dominant_histology")]
 76 | response.meta <- response.meta %>% distinct(sampleID, .keep_all = TRUE)
 77 | head(response.meta)
 78 | 
 79 | ratio <- merge(ratio, response.meta, by = "sampleID", all.x = TRUE)
 80 | head(ratio)
 81 | 
 82 | ratio$pathological_response <- factor(ratio$pathological_response, levels = c("MPR","non-MPR"))
 83 | 
 84 | a <- table(ratio$pathological_response,ratio$group)
 85 | a <- as.data.frame(a / rowSums(a))
 86 | colnames(a) <- c("response","group","Freq")
 87 | head(a)
 88 | ggbarplot(a, x="response", y="Freq", fill = "group",
 89 |           x.text.angle=90) + theme(legend.position = "right") + 
 90 |   scale_fill_manual(values=c("group1"="#E84C35","group2"="#4FBAD6",
 91 |                              "group3"="#00A289","group4"="#3C5487",
 92 |                              "group5"="#F29B80")) 
 93 | 
 94 | LUSC <- ratio[ratio$cancer_type %in% c("LUSC"),]
 95 | a <- table(LUSC$pathological_response,LUSC$group)
 96 | a <- as.data.frame(a / rowSums(a))
 97 | colnames(a) <- c("response","group","Freq")
 98 | head(a)
 99 | ggbarplot(a, x="response", y="Freq", fill = "group",
100 |           x.text.angle=90) + theme(legend.position = "right") + 
101 |   scale_fill_manual(values=c("group1"="#E84C35","group2"="#4FBAD6",
102 |                              "group3"="#00A289","group4"="#3C5487",
103 |                              "group5"="#F29B80")) 
104 | 
105 | LUAD <- ratio[ratio$cancer_type %in% c("LUAD"),]
106 | a <- table(LUAD$pathological_response,LUAD$group)
107 | a <- as.data.frame(a / rowSums(a))
108 | colnames(a) <- c("response","group","Freq")
109 | head(a)
110 | ggbarplot(a, x="response", y="Freq", fill = "group",
111 |           x.text.angle=90) + theme(legend.position = "right") + 
112 |   scale_fill_manual(values=c("group1"="#E84C35","group2"="#4FBAD6",
113 |                              "group3"="#00A289","group4"="#3C5487",
114 |                              "group5"="#F29B80")) 
115 | 
116 | 
117 | 
118 | #PD1
119 | PDL1_TPS_group <- c()
120 | for(each in ratio$PDL1_TPS){
121 |   if (each %in% c("<1%","0")){
122 |     PDL1_TPS_group <- c(PDL1_TPS_group,"<1%") 
123 |   }else if (each %in% c("0.02","0.05","0.08","0.03","0.01","0.3","0.2","0.35","0.4")){
124 |     PDL1_TPS_group <- c(PDL1_TPS_group,"1%-49%") 
125 |   }else if (each %in% c("0.5","0.7","0.6","0.9","0.8","0.85","1","0.55","0.75","0.65")){
126 |     PDL1_TPS_group <- c(PDL1_TPS_group,">=50%") 
127 |   }else{
128 |     PDL1_TPS_group <- c(PDL1_TPS_group,"Not tested") 
129 |   }
130 | }
131 | ratio$PDL1_TPS_group <- PDL1_TPS_group
132 | 
133 | ratio.part <- ratio[ratio$PDL1_TPS_group != "Not tested",]
134 | ratio.part$PDL1_TPS_group <- factor(as.vector(ratio.part$PDL1_TPS_group), levels = c("<1%","1%-49%",">=50%"))
135 | ratio.part$cluster <- paste0(ratio.part$group,"_",ratio.part$pathological_response)
136 | 
137 | a <- table(ratio.part$cluster,ratio.part$PDL1_TPS_group)
138 | df <- as.data.frame(a)
139 | head(df)
140 | colnames(df) <- c("cluster","PDL1_TPS","number")
141 | head(df)
142 | df$cluster <- factor(df$cluster,
143 |                      levels = c("group1_MPR","group1_non-MPR",
144 |                                 "group2_MPR","group2_non-MPR",
145 |                                 "group3_MPR","group3_non-MPR",
146 |                                 "group4_MPR","group4_non-MPR",
147 |                                 "group5_MPR","group5_non-MPR"))
148 | 
149 | ggbarplot(df, x="cluster", y="number", fill = "PDL1_TPS",
150 |           x.text.angle=90) + theme(legend.position = "right") + 
151 |   scale_fill_manual(values=c("<1%"="#B3D9D9","1%-49%"="#4F9D9D",
152 |                              ">=50%"="#3D7878"))
153 | 
154 | 
155 | 
156 | 
157 | group <- c()
158 | response <- c()
159 | for(each in df$cluster){
160 |   group <- c(group, str_split(each, "_")[[1]][1])
161 |   response <- c(response, str_split(each, "_")[[1]][2])
162 | }
163 | df$group <- group
164 | df$response <- response
165 | 
166 | 
167 | ggbarplot(df, x="PDL1_TPS", y="number", fill = "group",
168 |           x.text.angle=90,facet.by = "response") + theme(legend.position = "right") + 
169 |   scale_fill_manual(values=c("group1"="#E84C35","group2"="#4FBAD6",
170 |                              "group3"="#00A289","group4"="#3C5487",
171 |                              "group5"="#F29B80"))
172 | ggsave("/home/zhangwj/data_yi/neoadjuvant/revision2/figure/PDL1_2.pdf",width = 5, height = 4)
173 | 
174 | #alluvial
175 | library(readxl)
176 | sample.info <- as.data.frame(read_excel("sample.xlsx"))
177 | head(sample.info)
178 | 
179 | cluster.info <- read.csv("NMF_all_group_5.csv")
180 | cluster.info <- cluster.info[,-1]
181 | head(cluster.info)
182 | 
183 | sample.info <- merge(sample.info, cluster.info, by = "sampleID", all.x = TRUE)
184 | head(sample.info)
185 | sample.info <- sample.info[sample.info$group %in% c("1","2","3","4","5"),]
186 | head(sample.info)
187 | 
188 | sample.info$group <- paste0("group",sample.info$group)
189 | 
190 | pathological_response <- c()
191 | for(each in sample.info$pathological_response){
192 |   if(each %in% c("MPR","pCR")){
193 |     pathological_response <- c(pathological_response, "MPR")
194 |   }else{
195 |     pathological_response <- c(pathological_response, "non-MPR")
196 |   }
197 | }
198 | 
199 | sample.info$pathological_response <- pathological_response
200 | sample.info$cluster <- paste0(sample.info$group,"_",sample.info$pathological_response)
201 | 
202 | #PD1
203 | PDL1_TPS_group <- c()
204 | for(each in sample.info$PDL1_TPS){
205 |   if (each %in% c("<1%","0")){
206 |     PDL1_TPS_group <- c(PDL1_TPS_group,"<1%") 
207 |   }else if (each %in% c("0.01","0.02","0.05","0.08","0.03","0.3","0.2","0.35","0.4")){
208 |     PDL1_TPS_group <- c(PDL1_TPS_group,"1%-49%") 
209 |   }else if (each %in% c("0.5","0.7","0.6","0.9","0.8","0.85","1","0.55","0.75","0.65")){
210 |     PDL1_TPS_group <- c(PDL1_TPS_group,">=50%") 
211 |   }else{
212 |     PDL1_TPS_group <- c(PDL1_TPS_group,"Not tested") 
213 |   }
214 | }
215 | sample.info$PDL1_TPS_group <- PDL1_TPS_group
216 | 
217 | sample.info <- sample.info[sample.info$PDL1_TPS_group != "Not tested",]
218 | sample.info$PDL1_TPS_group <- factor(as.vector(sample.info$PDL1_TPS_group), levels = c("<1%","1%-49%",">=50%"))
219 | a <- table(sample.info$cluster,sample.info$PDL1_TPS_group)
220 | df <- as.data.frame(a)
221 | head(df)
222 | colnames(df) <- c("cluster","PDL1_TPS","number")
223 | head(df)
224 | df$cluster <- factor(df$cluster,
225 |                      levels = c("group1_MPR","group1_non-MPR",
226 |                                 "group2_MPR","group2_non-MPR",
227 |                                 "group3_MPR","group3_non-MPR",
228 |                                 "group4_MPR","group4_non-MPR",
229 |                                 "group5_MPR","group5_non-MPR"))
230 | 
231 | 
232 | df$group <- sapply(as.vector(df$cluster), function(x) strsplit(x,"_")[[1]][1])
233 | head(df)
234 | df$pathological_response <- sapply(as.vector(df$cluster), function(x) strsplit(x,"_")[[1]][2])
235 | head(df)
236 | df <- df[!df$number %in% c(0),]
237 | ggplot(data = df,
238 |        aes(axis1 = PDL1_TPS,   # First variable on the X-axis
239 |            axis2 = group,   # Third variable on the X-axis
240 |            y = number)) +
241 |   geom_alluvium(aes(fill = pathological_response,order = pathological_response)) +
242 |   geom_stratum() +
243 |   geom_text(stat = "stratum",
244 |             aes(label = after_stat(stratum))) +
245 |   theme_void() +
246 |   scale_fill_manual(values=c("MPR" = "#2868A6", "non-MPR" = "#B1161C"))
247 | 
248 | library(ggalluvial)
249 | cluster.info <- read.csv("NMF_all_group_5.csv")
250 | cluster.info <- cluster.info[,-1]
251 | head(cluster.info)
252 | 
253 | sample.info <- as.data.frame(read_excel("sample.xlsx"))
254 | head(sample.info)
255 | sample.info <- sample.info[sample.info$sampleID %in% cluster.info$sampleID,]
256 | sample.info <- sample.info[,c("sampleID","pathological_response","radiological_response")]
257 | 
258 | pathological_response_level <- c()
259 | for(each in sample.info$pathological_response){
260 |   if(each %in% c("MPR","pCR")){
261 |     pathological_response_level <- c(pathological_response_level, "MPR")
262 |   }else{
263 |     pathological_response_level <- c(pathological_response_level, "non-MPR")
264 |   }
265 | }
266 | sample.info$pathological_response_level <- pathological_response_level
267 | rownames(sample.info) <- sample.info$sampleID
268 | head(sample.info)
269 | sample.info <- sample.info[cluster.info$sampleID,]
270 | sample.info$group <- cluster.info$group
271 | sample.info$group <- paste0("group",sample.info$group)
272 | sample.info$sub.group <- paste0(sample.info$group,"_",sample.info$pathological_response_level)
273 | sample.info$sub.group <- factor(sample.info$sub.group,
274 |                                 levels = c("group1_MPR","group1_non-MPR",
275 |                                            "group2_MPR","group2_non-MPR",
276 |                                            "group3_MPR","group3_non-MPR",
277 |                                            "group4_MPR","group4_non-MPR",
278 |                                            "group5_MPR","group5_non-MPR"))
279 | head(sample.info)
280 | 
281 | sample.info <- sample.info[sample.info$radiological_response %in% c("SD","PR","CR","PD"),]
282 | head(sample.info)
283 | mm <- as.data.frame(table(sample.info$sub.group,sample.info$radiological_response))
284 | colnames(mm) <- c("sub.group","radiological_response","number")
285 | mm$radiological_response <- factor(mm$radiological_response,levels = c("CR","PR","SD","PD"))
286 | head(mm)
287 | 
288 | ggplot(data = mm,
289 |        aes(axis1 = sub.group,   # First variable on the X-axis
290 |            axis2 = radiological_response,   # Third variable on the X-axis
291 |            y = number)) +
292 |   geom_alluvium(aes(fill = radiological_response,order = radiological_response)) +
293 |   geom_stratum() +
294 |   geom_text(stat = "stratum",
295 |             aes(label = after_stat(stratum))) +
296 |   theme_void() +
297 |   scale_fill_manual(values=c("CR"="#E6A4B4","PR"="#FFD9C0","SD"="#8CC0DE","PD"="#0B60B0"))
298 | 
299 | 


--------------------------------------------------------------------------------
/main_figure/figure2_and_related_supplemental_figure/robustness_of_NMF.R:
--------------------------------------------------------------------------------
 1 | library(NMF)
 2 | library(ComplexHeatmap)
 3 | library(reshape2)
 4 | library(tidyverse)
 5 | library(dplyr)
 6 | library(readxl)
 7 | library(viridis)
 8 | ###################################################################
 9 | count <- 1
10 | module_merge <- list()
11 | 
12 | for(i in 1:200){
13 |   info <- read.csv("all_sub_cell_type.csv")
14 |   group <- read.csv("NMF_all_group_5.csv")
15 |   
16 |   #remove 20% samples randomly
17 |   random.samples <- sample(group$sampleID, 45)
18 |   print(random.samples)
19 |   group <- group[!group$sampleID %in% random.samples,]
20 |   info <- info[info$sampleID %in% group$sampleID,]
21 |   print(length(unique(info$sampleID)))
22 |   
23 |   df <- table(info$sampleID,info$sub_cell_type)
24 |   ratio <- as.data.frame(df / rowSums(df))
25 |   colnames(ratio) <- c("sampleID","cell.type","Freq")
26 |   print(length(unique(ratio$sampleID)))
27 |   
28 |   ratio <- dcast(ratio, sampleID ~ ratio$cell.type, value.var = "Freq")
29 |   rownames(ratio) <- ratio$sampleID
30 |   
31 |   ratio <- ratio[,-1]
32 |   head(ratio)
33 |   ratio[is.na(ratio)] <- 0
34 |   
35 |   #normalization
36 |   scale_ratio <- apply(ratio, MARGIN = 2, function(x) (x-min(x))/(max(x)-min(x)))
37 |   scale_ratio <- as.data.frame(scale_ratio)
38 |   scale_ratio <- t(scale_ratio)
39 |   
40 |   seed = 2020820
41 |   for(rk in 2:10){
42 |     nmf.rank5 <- nmf(scale_ratio, 
43 |                      rank = rk, 
44 |                      nrun=200,
45 |                      seed = seed, 
46 |                      method = "lee")
47 |     
48 |     index <- extractFeatures(nmf.rank5,"max") 
49 |     for(j in 1:rk){
50 |       part <- scale_ratio[index[[j]],]
51 |       module_merge[[count]] <- rownames(part)
52 |       count <- count + 1
53 |     }
54 |   }
55 | }
56 | 
57 | module_merge
58 | 
59 | 
60 | Mat <- matrix(0, ncol = length(unique(info$sub_cell_type)), nrow = length(unique(info$sub_cell_type)))
61 | rownames(Mat) <- unique(info$sub_cell_type)
62 | colnames(Mat) <- unique(info$sub_cell_type)
63 | head(Mat)
64 | 
65 | for (i in 1:length(unique(info$sub_cell_type))) {
66 |   for (j in 1:length(unique(info$sub_cell_type))) {
67 |     number <- 0
68 |     for(m in 1:length(module_merge)){
69 |       if((rownames(Mat)[i] %in% module_merge[[m]]) & (rownames(Mat)[j] %in% module_merge[[m]])){
70 |         number <- number + 1
71 |       }
72 |     }
73 |     Mat[i,j] <- number
74 |   }
75 | }
76 | 
77 | custom_magma <- c(colorRampPalette(c("white", rev(magma(323, begin = 0.15))[1]))(10), rev(magma(323, begin = 0.18)))
78 | pheatmap(as.matrix(Mat), cluster_cols=T, cluster_rows=T, 
79 |          clustering_distance_rows="euclidean", color=custom_magma, 
80 |          fontsize=12,treeheight_row=0,treeheight_col=30, 
81 |          cellheight = 7,cellwidth = 7,show_rownames=T, 
82 |          show_colnames=F,clustering_method = "ward.D2", border_color = NA)
83 | 
84 | 


--------------------------------------------------------------------------------
/main_figure/figure3_and_related_supplemental_figure/TNBC_zyy.R:
--------------------------------------------------------------------------------
  1 | #downloaded data from GSE169246
  2 | TNBC <- readRDS("/home/zhangwj/data_yi/neoadjuvant/data/other_data/zhangyy/zyy_TNBC.rds")
  3 | 
  4 | dim(TNBC)
  5 | Idents(TNBC) <- "cellType_in_paper"
  6 | DimPlot(TNBC, reduction = "umap", label = FALSE,pt.size = 0.1) +NoLegend()
  7 | 
  8 | FeaturePlot(TNBC, features = c("GBP1"), cols = c("lightgrey" ,"#FD3131"),pt.size = 0.1) 
  9 | FeaturePlot(TNBC, features = c("NKG7"), cols = c("lightgrey" ,"#FD3131"))
 10 | 
 11 | NKT.part <- subset(TNBC, cellType_in_paper %in% c("t_Tn-LEF1","t_ILC1-IL32","t_CD8_Tem-GZMK",
 12 |                                                   "t_CD4-CXCL13","t_ILC1-GZMK",
 13 |                                                   "t_CD8_MAIT-KLRB1",
 14 |                                                   "t_CD4_Treg-FOXP3",
 15 |                                                   "t_CD8_Trm-ZNF683","t_CD8_Teff-GNLY","t_CD4_Tcm-LMNA",   
 16 |                                                   "t_ILC3-AREG",     
 17 |                                                   "t_CD8-CXCL13","t_ILC1-IFNG",      
 18 |                                                   "t_ILC1-FGFBP2","t_Tact-IFI6",     
 19 |                                                   "t_ILC1-ZNF683",   
 20 |                                                   "t_ILC1-CD160","t_ILC3-IL7R","t_ILC1-CX3CR1",    
 21 |                                                   "t_ILC1-SELL","t_CD4_Tact-XIST","t_ILC2-SPON2",    
 22 |                                                   "t_ILC1-CNOT2","t_Tprf-MKI67",   
 23 |                                                   "t_ILC1-VCAM1"))
 24 | dim(NKT.part)
 25 | NKT.part <- NormalizeData(NKT.part, normalization.method = "LogNormalize", scale.factor = 10000)
 26 | 
 27 | NKT.part <- FindVariableFeatures(NKT.part, selection.method = "vst",nfeatures = 1000)
 28 | #delet IgG/H/L
 29 | NKT.part@assays$RNA@var.features <- NKT.part@assays$RNA@var.features[-which(NKT.part@assays$RNA@var.features %in% grep("^IG[KHL]",NKT.part@assays$RNA@var.features,value=T))]
 30 | NKT.part@assays$RNA@var.features <- NKT.part@assays$RNA@var.features[-which(NKT.part@assays$RNA@var.features %in% grep("^MT",NKT.part@assays$RNA@var.features,value=T))]
 31 | #NKT.part@assays$RNA@var.features <- NKT.part@assays$RNA@var.features[-which(NKT.part@assays$RNA@var.features %in% grep("^RP[LS]",NKT.part@assays$RNA@var.features,value=T))]
 32 | length(NKT.part@assays$RNA@var.features)
 33 | 
 34 | all.genes <- rownames(NKT.part)
 35 | NKT.part <- ScaleData(NKT.part, features = all.genes)
 36 | NKT.part <- RunPCA(NKT.part, features = VariableFeatures(object = NKT.part))
 37 | 
 38 | ElbowPlot(NKT.part)
 39 | 
 40 | #NKT.part <- subset(NKT.part, sample %in% c("medial 2","distal 2",
 41 | #                                           "distal 1a","proximal 3",
 42 | #                                           "distal 3"))
 43 | #remove batch effect
 44 | NKT.part <- RunHarmony(NKT.part, c("sampleID"))
 45 | 
 46 | NKT.part <- NKT.part %>% 
 47 |   FindNeighbors(reduction = "harmony", dims = 1:15) %>% 
 48 |   FindClusters(resolution = 0.5) %>% 
 49 |   RunUMAP(reduction = "harmony", dims = 1:15) %>% 
 50 |   identity()
 51 | 
 52 | NKT.part <- FindClusters(NKT.part,resolution = 1)
 53 | 
 54 | DimPlot(NKT.part, reduction = "umap", label = TRUE,pt.size = 1) 
 55 | NKT.part$cellType_in_paper
 56 | 
 57 | FeaturePlot(NKT.part, features = c(""),pt.size = 1, cols = c("lightgrey" ,"#FD3131")) 
 58 | 
 59 | NKT.markers <- FindAllMarkers(NKT.part, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
 60 | NKT.markers %>%
 61 |   group_by(cluster) %>%
 62 |   slice_max(n = 10, order_by = avg_log2FC) -> NKT.top5
 63 | 
 64 | FeaturePlot(NKT.part, features = c("FCGR3A"), cols = c("lightgrey" ,"#FD3131")) 
 65 | FeaturePlot(NKT.part, features = c("FGFBP2"), cols = c("lightgrey" ,"#FD3131"))
 66 | 
 67 | DimPlot(NKT.part, reduction = "umap", label = TRUE,pt.size = 0.1) + NoLegend()
 68 | 
 69 | Idents(NKT.part) <- "seurat_clusters"
 70 | new.cluster.ids <- c("other","other","other","other","other","other",
 71 |                      "other","other","other","NK_CD16hi_FGFBP2","other",
 72 |                      "other","other","other","other","other","NK_CD16hi_FGFBP2",
 73 |                      "other")
 74 | names(new.cluster.ids) <- levels(NKT.part)
 75 | NKT.part <- RenameIdents(NKT.part, new.cluster.ids)
 76 | DimPlot(NKT.part, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() +
 77 |   scale_color_manual(values = c("#8ECFC9","#FA7F6F"))
 78 | NKT.part$new.cell.type <- Idents(NKT.part)
 79 | 
 80 | a <- table(NKT.part$sampleID,NKT.part$new.cell.type)
 81 | a <- as.data.frame(a / rowSums(a))
 82 | colnames(a) <- c("sampleID","cell.type","Freq")
 83 | head(a)
 84 | 
 85 | obs <- NKT.part@meta.data
 86 | obs <- obs[,c("sampleID","patientID","tissue","treatment_status",
 87 |                     "ICB_treatment","treatment","treatment_response")]
 88 | obs <- obs %>% distinct(sampleID, .keep_all = TRUE)
 89 | head(obs)
 90 | 
 91 | df <- merge(a, obs, by = "sampleID", all.x = TRUE)
 92 | head(df)
 93 | df$treatment_response <- factor(df$treatment_response, levels = c("PD","SD","PR"))
 94 | 
 95 | df <- df[df$cell.type == "NK_CD16hi_FGFBP2",]
 96 | df <- df[df$treatment %in% c("Chemo"),]
 97 | df <- df[df$treatment_status %in% c("Post-treatment"),]
 98 | 
 99 | treatment.response <- c()
100 | for(each in df$treatment_response){
101 |   if(each %in% c("PD","SD")){
102 |     treatment.response <- c(treatment.response,"PD/SD")
103 |   }else{
104 |     treatment.response <- c(treatment.response,"PR")
105 |   }
106 | }
107 | df$treatment.response <- treatment.response
108 | compaired <- list(c("PD/SD","PR"))
109 | 
110 | ggboxplot(df, x = "treatment.response", y = "Freq",
111 |           color = "treatment.response",add = "jitter",
112 |           x.text.angle=0,size = 0.5,pt.size = 1, facet.by = "treatment") +
113 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) +
114 |   scale_color_manual(values = c("PD/SD" = "#88AB8E","PR"="#E97777")) + theme(legend.position="none")
115 | 


--------------------------------------------------------------------------------
/main_figure/figure3_and_related_supplemental_figure/analysis_of_chemo_only_NSCLC.R:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(ggpubr)
 3 | library(reshape2)
 4 | library(tidyverse)
 5 | library(dplyr)
 6 | 
 7 | info <- read.csv("all_sub_cell_type.csv")
 8 | head(info)
 9 | df <- table(info$sampleID,info$sub_cell_type)
10 | ratio <- as.data.frame(df / rowSums(df))
11 | head(ratio)
12 | colnames(ratio) <- c("sampleID","cell.type","Freq")
13 | head(ratio)
14 | 
15 | library(readxl)
16 | sample.info <- as.data.frame(read_excel("sample.xlsx"))
17 | head(sample.info)
18 | pathological_response <- c()
19 | for(each in sample.info$pathological_response){
20 |   if(each %in% c("MPR","pCR")){
21 |     pathological_response <- c(pathological_response, "MPR")
22 |   }else{
23 |     pathological_response <- c(pathological_response, "non-MPR")
24 |   }
25 | }
26 | 
27 | sample.info$pathological_response <- pathological_response
28 | 
29 | response.meta <- sample.info[, c("sampleID","smoking_history","cancer_type","pre_treatment_staging",
30 |                                  "PDL1_TPS","PD1","chemotherapy","targeted_therapy","cycles",
31 |                                  "pathological_response","pathological_response_rate","radiological_response",
32 |                                  "RVT_pre_dominant_histology")]
33 | response.meta <- response.meta %>% distinct(sampleID, .keep_all = TRUE)
34 | head(response.meta)
35 | 
36 | ratio <- dcast(ratio, sampleID ~ ratio$cell.type, value.var = "Freq")
37 | 
38 | head(ratio)
39 | merge.version <- merge(ratio, response.meta, by = "sampleID", all.x = TRUE)
40 | head(merge.version)
41 | #patients with only chemotherapy
42 | merge.version <- merge.version[merge.version$PD1 %in% c("No"),]
43 | head(merge.version)
44 | 
45 | 
46 | ggboxplot(merge.version, x = "pathological_response", y = "`CD8T_NK-like_FGFBP2`",
47 |           color = "pathological_response",add = "jitter",
48 |           x.text.angle=0,size = 0.5,pt.size = 1) +
49 |   stat_compare_means(aes(group = pathological_response)) + 
50 |   scale_color_manual(values = c("MPR" = "#2868A6", "non-MPR" = "#B1161C")) + 
51 |   theme(legend.position="none")
52 | 


--------------------------------------------------------------------------------
/main_figure/figure3_and_related_supplemental_figure/number_B_aggrates.R:
--------------------------------------------------------------------------------
 1 | library(readxl)
 2 | #只保留了T1的数目
 3 | B.info <- as.data.frame(read_excel("B_cell_aggregates.xlsx"))
 4 | head(B.info)
 5 | 
 6 | cluster.info <- read.csv("NMF_all_group_5.csv")
 7 | cluster.info <- cluster.info[,-1]
 8 | head(cluster.info)
 9 | 
10 | df <- merge(cluster.info, B.info, by = "sampleID", all.x = TRUE)
11 | head(df)
12 | df <- df[df$group %in% c("1","2","3","4","5"),]
13 | head(df)
14 | df[is.na(df)] <- "unknown"
15 | head(df)
16 | df <- df[df$number_of_B_cell_aggregates != "unknown",]
17 | dim(df)
18 | df$number_of_B_cell_aggregates <- as.numeric(df$number_of_B_cell_aggregates)
19 | df$number_of_B_cell_aggregates <- 4 * df$number_of_B_cell_aggregates
20 | df$group <- paste0("group", df$group)
21 | df$group <- factor(as.vector(df$group), levels = c("group1","group2","group3",
22 |                                                    "group4","group5"))
23 | 
24 | compaired <- list(c("group1", "group2"),c("group2", "group3"),
25 |                   c("group2", "group4"),c("group2", "group5"))
26 | 
27 | ggboxplot(df, x = "group", y = "number_of_B_cell_aggregates",
28 |           color = "group",add="jitter",add.params=list(size=0.7),
29 |           x.text.angle=0) + labs(x='group', y= 'number_of_B_cell_aggregates / cm2') + 
30 |   theme(legend.position="none") +
31 |   scale_color_manual(values=c("group1"="#E84C35","group2"="#4FBAD6",
32 |                               "group3"="#00A289","group4"="#3C5487",
33 |                               "group5"="#F29B80"),guide = "none") +  
34 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 
35 | 
36 | 
37 | new_group <- c()
38 | for(each in df$group){
39 |   if(each %in% c("group1","group3","group4","group5")){
40 |     new_group <- c(new_group, "other_group")
41 |   }else{
42 |     new_group <- c(new_group, "group2")
43 |   }
44 | }
45 | 
46 | df$new_group <- new_group
47 | df$new_group <- factor(df$new_group, levels = c("group2","other_group"))
48 | 
49 | compaired <- list(c("group2","other_group"))
50 | ggboxplot(df, x = "new_group", y = "number_of_B_cell_aggregates",
51 |           color = "new_group",add="jitter",add.params=list(size=0.7),
52 |           x.text.angle=0) + labs(x='group', y= 'number_of_B_cell_aggregates / cm2') + 
53 |   theme(legend.position="none") +
54 |   scale_color_manual(values=c("other_group"="#8491B4CC","group2"="#4FBAD6"),guide = "none") +  
55 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/main_figure/figure3_and_related_supplemental_figure/startrac.R:
--------------------------------------------------------------------------------
 1 | library(Startrac)
 2 | library(ggpubr)
 3 | library(ggplot2)
 4 | library(circlize)
 5 | library(ggpmisc)
 6 | library(ggsci)
 7 | 
 8 | ####################################################################
 9 | TCR.data <- read.csv("T_with_TCR.csv")
10 | # TCR.data must include the clone type, expansion state and the cluster
11 | head(TCR.data)
12 | dim(TCR.data)
13 | 
14 | #only include CD8 T cells
15 | TCR.data <- TCR.data[TCR.data$sub_cell_type %in% c("your sub cell types"),]
16 | head(TCR.data)
17 | 
18 | head(TCR.data)
19 | 
20 | 
21 | in.dat <- TCR.data[,c("sampleID","cellID","clonetype","expansion",
22 |                       "sub_cell_type")]
23 | head(in.dat)
24 | 
25 | colnames(in.dat) <- c("patient","Cell_Name","clone.id","clone.status",
26 |                       "majorCluster")
27 | head(in.dat)
28 | 
29 | in.dat$loc = "T"
30 | 
31 | head(in.dat)
32 | 
33 | out <- Startrac.run(in.dat, proj="NSCLC",verbose=F)
34 | 
35 | expan <- out@cluster.data
36 | head(expan)
37 | expan$majorCluster <- factor(expan$majorCluster, levels = c("your sub cell types"))
38 | 
39 | compaired <- list(c(),c())
40 | ggboxplot(expan, x = "majorCluster", y = "expa",
41 |           color = "majorCluster",add="jitter",add.params=list(size=0.5),
42 |           x.text.angle=45) + labs(x='cell.type', y= 'expa') + 
43 |   theme(legend.position="none")  +
44 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test)
45 | 
46 | 


--------------------------------------------------------------------------------
/main_figure/figure3_and_related_supplemental_figure/visualization_of_NKT_clones.Rmd:
--------------------------------------------------------------------------------
  1 | # P107 plot
  2 | coerced.cell.types <- c("CD8T_Tem_GZMK+GZMH+", "CD4T_Tfh_CXCL13", "CD4T_Tm_XCL1",
  3 |                         "CD4T_Tem_GZMA", "ILC3_KIT", "CD8T_Trm_ZNF683", "CD8T_prf_MKI67",
  4 |                         "CD4T_Tm_ANXA1", "CD8T_MAIT_KLRB1", "CD4T_Tn_CCR7", 
  5 |                         "CD4T_Treg_MKI67", "CD8T_Tem_GZMK+NR4A1+", "CD8T_Tm_IL7R",
  6 |                         "T_gdT_TRDV2", "T_gdT_TRDV1", "NK_CD16hi_FGFBP2",
  7 |                         "CD4T_Th1-like_CXCL13")
  8 | 
  9 | color.value <- c(
 10 |   "expanded_terminal_Tex"="#004949",
 11 |   "Expanded CCR8+Treg"="#A50021",
 12 |   "other"="#BCBCBC",
 13 |   "CD8Texp"="#418849",
 14 |   "CD8T_NK-like_FGFBP2"="#6551CC"
 15 | )
 16 | 
 17 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P107")
 18 | 
 19 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq))
 20 |   x.orders <- as.vector(tcr.count.table$Var1)
 21 |   by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n())
 22 |   by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2",  "other"))
 23 |   p <- ggplot(by.cell.type.count.table, 
 24 |               aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 
 25 |     theme_classic() + 
 26 |     scale_fill_manual(values = color.value) +
 27 |     geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) +
 28 |     ggtitle("P107")
 29 | p
 30 | ggsave("figures/P107_clonal_composition.pdf", p, width = 10, height =5)
 31 | 
 32 | 
 33 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P471")
 34 | 
 35 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq))
 36 |   x.orders <- as.vector(tcr.count.table$Var1)
 37 |   by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n())
 38 |   by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2",  "other"))
 39 |   p <- ggplot(by.cell.type.count.table, 
 40 |               aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 
 41 |     theme_classic() + 
 42 |     scale_fill_manual(values = color.value) +
 43 |     geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) +
 44 |     ggtitle("P471")
 45 | p
 46 | 
 47 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P325")
 48 | 
 49 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq))
 50 |   x.orders <- as.vector(tcr.count.table$Var1)
 51 |   by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n())
 52 |   by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2",  "other"))
 53 |   p <- ggplot(by.cell.type.count.table, 
 54 |               aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 
 55 |     theme_classic() + 
 56 |     scale_fill_manual(values = color.value) +
 57 |     geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) +
 58 |     ggtitle("P325")
 59 | p
 60 | 
 61 | 
 62 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P23")
 63 | 
 64 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq))
 65 |   x.orders <- as.vector(tcr.count.table$Var1)
 66 |   by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n())
 67 |   by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2",  "other"))
 68 |   p <- ggplot(by.cell.type.count.table, 
 69 |               aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 
 70 |     theme_classic() + 
 71 |     scale_fill_manual(values = color.value) +
 72 |     geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) +
 73 |     ggtitle("P23")
 74 | p
 75 | 
 76 | 
 77 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P258")
 78 | 
 79 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq))
 80 |   x.orders <- as.vector(tcr.count.table$Var1)
 81 |   by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n())
 82 |   by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2",  "other"))
 83 |   p <- ggplot(by.cell.type.count.table, 
 84 |               aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 
 85 |     theme_classic() + 
 86 |     scale_fill_manual(values = color.value) +
 87 |     geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) +
 88 |     ggtitle("P258")
 89 | p
 90 | ggsave("figures/P258_clonal_composition.pdf", p, width = 10, height =5)
 91 | 
 92 | 
 93 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P365")
 94 | 
 95 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq))
 96 |   x.orders <- as.vector(tcr.count.table$Var1)
 97 |   by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n())
 98 |   by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2",  "other"))
 99 |   p <- ggplot(by.cell.type.count.table, 
100 |               aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 
101 |     theme_classic() + 
102 |     scale_fill_manual(values = color.value) +
103 |     geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) +
104 |     ggtitle("P365")
105 | p
106 | 
107 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P412")
108 | 
109 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq))
110 |   x.orders <- as.vector(tcr.count.table$Var1)
111 |   by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n())
112 |   by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2",  "other"))
113 |   p <- ggplot(by.cell.type.count.table, 
114 |               aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 
115 |     theme_classic() + 
116 |     scale_fill_manual(values = color.value) +
117 |     geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) +
118 |     ggtitle("P412")
119 | p
120 | 
121 | ggsave("figures/P412_clonal_composition.pdf", p, width = 10, height =5)
122 | 
123 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P59")
124 | 
125 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq))
126 |   x.orders <- as.vector(tcr.count.table$Var1)
127 |   by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n())
128 |   by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "CD8T_NK-like_FGFBP2",  "other"))
129 |   p <- ggplot(by.cell.type.count.table, 
130 |               aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 
131 |     theme_classic() + 
132 |     scale_fill_manual(values = color.value) +
133 |     geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) +
134 |     ggtitle("P59")
135 | p
136 | 


--------------------------------------------------------------------------------
/main_figure/figure4_and_related_supplemental_figure/CCR8.IF.Rmd:
--------------------------------------------------------------------------------
 1 | ccr8.ihc <- read_csv("CCR8_IF.csv") %>% mutate(group=as.character(group))
 2 | 
 3 | compaired <- list(c("3", "4"))
 4 | 
 5 | ggboxplot(ccr8.ihc, x = "group", y = "FOXP3.area",
 6 |           color = "group",add="jitter",add.params=list(size=0.5),
 7 |           x.text.angle=0) + labs(x='group', y= 'number of FOXP3+ Cells per mm2') +
 8 |   theme(legend.position="none") +
 9 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 
10 |   scale_color_manual(values=c("3"="#00A289","4"="#3C5487"))
11 | ggsave("number.FOXP3.pdf", width = 2.5, height = 3.5)
12 | 
13 | ggboxplot(ccr8.ihc, x = "group", y = "CCR8.area",
14 |           color = "group",add="jitter",add.params=list(size=0.5),
15 |           x.text.angle=0) + labs(x='group', y= 'number of FOXP3+CCR8+ Cells per mm2') +
16 |   theme(legend.position="none") +
17 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 
18 |   scale_color_manual(values=c("3"="#00A289","4"="#3C5487"))
19 | ggsave("number.CCR8.pdf", width = 2.5, height = 3.5)
20 | 
21 | ggboxplot(ccr8.ihc, x = "group", y = "CCR8.FOXP3",
22 |           color = "group",add="jitter",add.params=list(size=0.5),
23 |           x.text.angle=0) + labs(x='group', y= 'percentage of FOXP3+CCR8+ Cells in FOXP3+ cells') +
24 |   theme(legend.position="none") +
25 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 
26 |   scale_color_manual(values=c("3"="#00A289","4"="#3C5487"))
27 | ggsave("CCR8.FOXP3.pdf", width = 2.5, height = 3.5)
28 | 


--------------------------------------------------------------------------------
/main_figure/figure4_and_related_supplemental_figure/CCR8_IF.csv:
--------------------------------------------------------------------------------
 1 | group,sampleID,FOXP3,CCR8,area,FOXP3.area,CCR8.area,CCR8.FOXP3
 2 | 3,P46,19,2,69619,273,29,0.11
 3 | 3,P46,16,2,69619,230,29,0.13
 4 | 3,P46,22,1,69619,316,14,0.05
 5 | 3,P46,22,2,69619,316,29,0.09
 6 | 3,P46,27,6,69619,388,86,0.22
 7 | 3,P31,17,3,61634,276,49,0.18
 8 | 3,P31,22,4,61634,357,65,0.18
 9 | 3,P31,21,6,61634,341,97,0.29
10 | 3,P31,17,5,61634,276,81,0.29
11 | 3,P31,20,4,61634,324,65,0.2
12 | 3,P57,22,6,54563,403,110,0.27
13 | 3,P57,17,1,54563,312,18,0.06
14 | 3,P57,23,3,54563,422,55,0.13
15 | 3,P57,23,4,54563,422,73,0.17
16 | 3,P57,20,7,54563,367,128,0.35
17 | 4,P22,31,11,37335,830,295,0.35
18 | 4,P22,26,14,37335,696,375,0.54
19 | 4,P22,32,16,37335,857,429,0.5
20 | 4,P22,18,10,37335,482,268,0.56
21 | 4,P22,21,9,37335,562,241,0.43
22 | 4,P32,16,7,35817,447,195,0.44
23 | 4,P32,22,7,35817,614,195,0.32
24 | 4,P32,28,8,35817,782,223,0.29
25 | 4,P32,25,9,35817,698,251,0.36
26 | 4,P32,31,8,35817,866,223,0.26
27 | 4,P106,18,6,19437,926,309,0.33
28 | 4,P106,16,7,19437,823,360,0.44
29 | 4,P106,21,7,19437,1080,360,0.33
30 | 4,P106,8,3,19437,412,154,0.38
31 | 4,P106,8,2,19437,412,103,0.25
32 | 


--------------------------------------------------------------------------------
/main_figure/figure4_and_related_supplemental_figure/CCR8_Treg_in_all_Treg.R:
--------------------------------------------------------------------------------
 1 | #############################################################################################################
 2 | info <- read.csv("all_sub_cell_type.csv")
 3 | head(info)
 4 | length(unique(info$sampleID))
 5 | 
 6 | info <- info[info$sub_cell_type %in% c("CD4T_Treg_CCR8","CD4T_Treg_FOXP3","CD4T_Treg_MKI67"),]
 7 | 
 8 | cluster.info <- read.csv("NMF_all_group_5.csv")
 9 | cluster.info <- cluster.info[,-1]
10 | head(cluster.info)
11 | dim(cluster.info)
12 | 
13 | 
14 | info <- info[info$sampleID %in% cluster.info$sampleID,]
15 | length(unique(info$sampleID))
16 | 
17 | 
18 | df <- table(info$sampleID,info$sub_cell_type)
19 | ratio <- as.data.frame(df / rowSums(df))
20 | head(ratio)
21 | colnames(ratio) <- c("sampleID","cell.type","Freq")
22 | head(ratio)
23 | 
24 | ratio <- merge(ratio,cluster.info)
25 | head(ratio)
26 | ratio$group <- paste0("group",ratio$group)
27 | ratio$group <- factor(ratio$group, levels = c("group1","group2","group3","group4","group5"))
28 | head(ratio)
29 | 
30 | CCR8 <- ratio[ratio$cell.type %in% c("CD4T_Treg_CCR8"),]
31 | 
32 | compaired <- list(c("group3","group4"))
33 | 
34 | ggboxplot(CCR8[CCR8$group %in% c("group3","group4"),], x = "group", y = "Freq",
35 |           color = "group",add="jitter",add.params=list(size=0.5),
36 |           x.text.angle=0) + labs(x='group', y= 'CCR8+ Treg in all Tregs') +
37 |   theme(legend.position="none") +
38 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 
39 |   scale_color_manual(values=c("group3"="#00A289","group4"="#3C5487"))
40 |                                                                                                                               "group3"="#00A289","group4"="#3C5487",
41 | 


--------------------------------------------------------------------------------
/main_figure/figure4_and_related_supplemental_figure/DEG_volcano_plot.R:
--------------------------------------------------------------------------------
 1 | ########################################################################################
 2 | #load library
 3 | #########################################################################################
 4 | suppressMessages(library(Seurat))
 5 | suppressMessages(library(dplyr))
 6 | suppressMessages(library(ggplot2))
 7 | suppressMessages(library(readr))
 8 | suppressMessages(library(harmony))
 9 | suppressMessages(library(ggpubr))
10 | suppressMessages(library(ggpmisc))
11 | suppressMessages(library(ggrepel))
12 | suppressMessages(library(readxl))
13 | ##############################################################################################
14 | #B cell
15 | ##########################################################################
16 | scRNA <- readRDS("Treg_count.rds")
17 | dim(scRNA)
18 | Idents(scRNA) <- "sub_cell_type"
19 | 
20 | scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
21 | 
22 | scRNA <- FindVariableFeatures(scRNA, selection.method = "vst",nfeatures = 1000)
23 | length(scRNA@assays$RNA@var.features)
24 | 
25 | all.genes <- rownames(scRNA)
26 | scRNA <- ScaleData(scRNA, features = all.genes)
27 | scRNA <- RunPCA(scRNA, features = VariableFeatures(object = scRNA))
28 | 
29 | ElbowPlot(scRNA)
30 | 
31 | Idents(scRNA) <- "sub_cell_type"
32 | cluster1.markers <- FindMarkers(scRNA, ident.1 = "CD4T_Treg_CCR8",min.pct = 0.5,
33 |                                 test.use = "wilcox_limma",slot = "counts")
34 | head(cluster1.markers, n = 5)
35 | 
36 | data.markers <- cluster1.markers
37 | 
38 | data.markers$symbol <- rownames(data.markers)
39 | data.markers$logP <- -log10(data.markers$p_val_adj + 1e-100)
40 | dim(data.markers)
41 | data.markers$Group = "not-significant"
42 | data.markers$Group[which((data.markers$p_val_adj < 0.05) & (data.markers$avg_log2FC > 1.4))] = "CD4T_Treg_CCR8"
43 | data.markers$Group[which((data.markers$p_val_adj < 0.05) & (data.markers$avg_log2FC < -0.9))] = "CD4T_Treg_FOXP3"
44 | table(data.markers$Group)
45 | 
46 | data.markers$label = ""
47 | #对差异基因的p值进行从小到大的排序
48 | data.markers <- data.markers[order(data.markers$avg_log2FC, decreasing = TRUE),]
49 | #高表达基因中选取p_val_adj最小的10个
50 | up.genes <- head(data.markers$symbol[which(data.markers$Group == "CD4T_Treg_CCR8")], 13)
51 | #低表达基因中选取p_val_adj最小的10个
52 | down.genes <- tail(data.markers$symbol[which(data.markers$Group == "CD4T_Treg_FOXP3")], 7)
53 | #将up.genes和down.genes合并并加入到Label
54 | data.top10.genes <- c(as.character(up.genes), as.character(down.genes))
55 | data.markers$label[match(data.top10.genes, data.markers$symbol)] <- data.top10.genes
56 | ggscatter(data.markers, x = "avg_log2FC", y = "logP", color = "Group",
57 |           palette = c("#CC0000","#2f5688","#BBBBBB"),
58 |           size = 1, font.label = 18,
59 |           repel = T, xlab = "log2FoldChange",
60 |           ylab = "-log10(Adjust P-value)") + 
61 |   geom_text_repel(size=3,point.padding = NA,label = data.markers$label, max.overlaps = 1000)
62 | 


--------------------------------------------------------------------------------
/main_figure/figure4_and_related_supplemental_figure/T_cell_clonal_composition_in_individual_patients.Rmd:
--------------------------------------------------------------------------------
 1 | # select expanded TCRs
 2 | all.tcr.table <- read_csv("T_with_TCR_obs_V3.csv")
 3 | all.tcr.table 
 4 | 
 5 | unique.tcr <- unique(all.tcr.table$clonetype)
 6 | tcr.frequency.table <- all.tcr.table %>% group_by(clonetype) %>% summarise(n=n())
 7 | tcr.frequency.table
 8 | TCRs.expansion.cutoff <- 10
 9 | expanded.TCR.frequency.table <- tcr.frequency.table %>% filter(n>=TCRs.expansion.cutoff)
10 | expanded.TCR.frequency.table
11 | expanded.TCRs <- as.vector(expanded.TCR.frequency.table$clonetype)
12 | 
13 | expanded.tcr.table <- all.tcr.table %>% filter(clonetype %in% expanded.TCRs)
14 | 
15 | color.value <- c(
16 |   "expanded_terminal_Tex"="#004949",
17 |   "Expanded CCR8+Treg"="#A50021",
18 |   "other"="#BCBCBC",
19 |   "CD8Texp"="#418849"
20 | )
21 | 
22 | 
23 | tcr.table <- expanded.tcr.table %>% filter(sampleID=="P9")
24 | 
25 | tcr.count.table <- as.data.frame(table(tcr.table$clonetype)) %>% arrange(desc(Freq))
26 |   x.orders <- as.vector(tcr.count.table$Var1)
27 |   by.cell.type.count.table <- tcr.table %>% group_by(clonetype, T_new_name) %>% summarise(n=n())
28 |   by.cell.type.count.table$T_new_name <- factor(by.cell.type.count.table$T_new_name, levels = c("expanded_terminal_Tex", "CD8Texp", "Expanded CCR8+Treg", "other"))
29 |   p <- ggplot(by.cell.type.count.table, 
30 |               aes(x=factor(clonetype, level=x.orders), fill=T_new_name, y=n)) + 
31 |     theme_classic() + 
32 |     scale_fill_manual(values = color.value) +
33 |     geom_bar(position="stack", stat="identity") + theme(axis.text.x = element_text(angle = 90)) +
34 |     ggtitle("P9")
35 | p
36 | ggsave("figures/P9_clonal_composition.pdf", p, width = 10, height =5)
37 | 


--------------------------------------------------------------------------------
/main_figure/figure5_and_related_supplemental_figure/CCR8IHC.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zwj-tina/neoadjuvant-treatment-of-NSCLC/63ba67130a8e4c11e0bb7f5fac31b12ffcb2fab4/main_figure/figure5_and_related_supplemental_figure/CCR8IHC.xlsm


--------------------------------------------------------------------------------
/main_figure/figure5_and_related_supplemental_figure/CCR8_IHC_non-MPR_subtypes.Rmd:
--------------------------------------------------------------------------------
 1 | ```{r}
 2 | library(dplyr)
 3 | library(tidyverse)
 4 | library(readxl)
 5 | library(ggpubr)
 6 | library(survival)
 7 | library(ggsurvfit)
 8 | library(survminer)
 9 | ```
10 | 
11 | ```{r}
12 | nmf.data <- read_csv("clean_nmf_data.csv")
13 | ccr8.data <- read_excel("CCR8IHC.xlsm") %>% filter(SampleID != "P189") %>% left_join(nmf.data, by="SampleID") %>% filter((MPR=="non-MPR"), !is.na(numberTregClone)) %>% mutate(TCR.classification=ifelse(numberTregClone>=9, "type I non-MPR", "type II non-MPR"))
14 | ccr8.data
15 | 
16 | table(ccr8.data$TCR.classification)
17 | ```
18 | 
19 | ```{r}
20 | ccr8.data %>% group_by(TCR.classification) %>% count(group)
21 | ```
22 | 
23 | 
24 | 
25 | ```{r}
26 | compaired <- list(c("type I non-MPR", "type II non-MPR"))
27 | 
28 | ggboxplot(ccr8.data, x = "TCR.classification", y = "number.CCR8.cells",
29 |           color = "TCR.classification",add="jitter",add.params=list(size=0.5),
30 |           x.text.angle=0) + labs(x='TCR.classification', y= 'number of CCR8+ Cells per mm2') +
31 |   theme(legend.position="none") +
32 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 
33 |   scale_color_manual(values=c("type I non-MPR"="#8CB4A3","type II non-MPR"="#7998AD")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
34 | ggsave("number.CCR8.cells.pdf", width = 2.5, height = 3.5)
35 | 
36 | ggboxplot(ccr8.data, x = "TCR.classification", y = "number.IC",
37 |           color = "TCR.classification",add="jitter",add.params=list(size=0.5),
38 |           x.text.angle=0) + labs(x='TCR.classification', y= 'number of immune Cells per mm2') +
39 |   theme(legend.position="none") +
40 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 
41 |   scale_color_manual(values=c("type I non-MPR"="#8CB4A3","type II non-MPR"="#7998AD")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
42 | ggsave("number.immune.cells.pdf", width = 2.5, height = 3.5)
43 | 
44 | ggboxplot(ccr8.data, x = "TCR.classification", y = "percentage.CCR8.IC",
45 |           color = "TCR.classification",add="jitter",add.params=list(size=0.5),
46 |           x.text.angle=0) + labs(x='TCR.classification', y= 'proportion of CCR8+ cells in immune Cells') +
47 |   theme(legend.position="none") +
48 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 
49 |   scale_color_manual(values=c("type I non-MPR"="#8CB4A3","type II non-MPR"="#7998AD")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
50 | ggsave("proportion.CCR8.cells.pdf", width = 2.5, height = 3.5)
51 | 
52 | ggboxplot(ccr8.data, x = "TCR.classification", y = "percentage.CCR8.IC",
53 |           color = "TCR.classification",add="jitter",add.params=list(size=0.5),
54 |           x.text.angle=0) + labs(x='TCR.classification', y= 'proportion of CCR8+ cells in immune Cells') +
55 |   theme(legend.position="none") +
56 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + 
57 |   scale_color_manual(values=c("type I non-MPR"="#8CB4A3","type II non-MPR"="#7998AD")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
58 | 
59 | 
60 | ggplot(ccr8.data %>% mutate(value=1), aes(fill=as.factor(group), y=value, x=TCR.classification)) + 
61 |     geom_bar(position="stack", stat="identity") + scale_fill_manual(values=c("#4DBBD5", "#00A087", "3C5488", "#F39B7F")) + theme_classic()
62 | 
63 | ggsave("CCR8.IHC.patient.number.pdf", width = 3, height = 4)
64 | ```
65 | 
66 | 


--------------------------------------------------------------------------------
/main_figure/figure5_and_related_supplemental_figure/Tex_relevant_clononumber_6group.R:
--------------------------------------------------------------------------------
 1 | Tex_relevant <- read.csv("CD8Tex_relevant_clonotype_number_over2.csv")
 2 | dim(Tex_relevant)
 3 | Tex_relevant <- Tex_relevant[,-1]
 4 | colnames(Tex_relevant) <- c("sampleID","number")
 5 | rownames(Tex_relevant) <- Tex_relevant$sampleID
 6 | head(Tex_relevant)
 7 | 
 8 | 
 9 | cluster.info <- read.csv("NMF_all_group_5.csv")
10 | dim(cluster.info)
11 | cluster.info <- cluster.info[,-1]
12 | rownames(cluster.info) <- cluster.info$sampleID
13 | head(cluster.info)
14 | 
15 | #include samples with TCR data and in our clustering data
16 | samples <- intersect(Tex_relevant$sampleID, cluster.info$sampleID)
17 | length(samples)
18 | 
19 | cluster.info <- cluster.info[samples,]
20 | Tex_relevant <- Tex_relevant[samples,]
21 | 
22 | Tex_relevant$group <- paste0("group",cluster.info$group)
23 | Tex_relevant$group <- factor(Tex_relevant$group,levels = c("group1","group2","group3","group4","group5"))
24 | head(Tex_relevant)
25 | 
26 | 
27 | 
28 | library(readxl)
29 | sample.info <- as.data.frame(read_excel("neoadjuvant/data/other/sample.xlsx"))
30 | sample.info <- sample.info[,c("sampleID","pathological_response","cancer_type")]
31 | pathology <- c()
32 | for(each in sample.info$pathological_response){
33 |   if(each %in% c("MPR","pCR")){
34 |     pathology <- c(pathology, "MPR")
35 |   }else{
36 |     pathology <- c(pathology, "non-MPR")
37 |   }
38 | }
39 | sample.info$pathology <- pathology
40 | rownames(sample.info) <- sample.info$sampleID
41 | sample.info <- sample.info[sample.info$sampleID %in% c(Tex_relevant$sampleID),]
42 | dim(sample.info)
43 | head(sample.info)
44 | 
45 | Tex_relevant <- merge(Tex_relevant,sample.info,by = "sampleID")
46 | head(Tex_relevant)
47 | 
48 | new_group <- c()
49 | for(each in 1:length(Tex_relevant$sampleID)){
50 |   print(Tex_relevant$group[[each]])
51 |   if(Tex_relevant$group[each] %in% c("group3")){
52 |     new_group <- c(new_group, paste0(Tex_relevant$group[[each]],"_",Tex_relevant$cancer_type[[each]]))
53 |   }else{
54 |     new_group <- c(new_group,as.vector(Tex_relevant$group)[[each]])
55 |   }
56 | }
57 | Tex_relevant$new_group <- new_group
58 | head(Tex_relevant)
59 | Tex_relevant$new_group <- factor(Tex_relevant$new_group,levels = c("group1",
60 |                                                                "group2",
61 |                                                                "group3_LUSC",
62 |                                                                "group3_LUAD",
63 |                                                                "group4",
64 |                                                                "group5"))
65 | ggboxplot(Tex_relevant, x = "new_group", y = "number",
66 |           color = "pathology",add="jitter",add.params=list(size=0.5),
67 |           x.text.angle=90) + labs(x='group', y= 'clonotype number of Tex-relevent cells') +
68 |   scale_color_manual(values=c("MPR" = "#2868A6", "non-MPR" = "#B1161C")) + 
69 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test)
70 | 
71 | Tex_relevant$cluster <- paste0(Tex_relevant$new_group,"_",Tex_relevant$pathology)
72 | Tex_relevant$cluster <- factor(Tex_relevant$cluster,levels = c("group1_MPR","group1_non-MPR",
73 |                                                        "group2_MPR","group2_non-MPR",
74 |                                                        "group3_LUSC_MPR","group3_LUSC_non-MPR",
75 |                                                        "group3_LUAD_MPR","group3_LUAD_non-MPR",
76 |                                                        "group4_MPR","group4_non-MPR",
77 |                                                        "group5_MPR","group5_non-MPR"))
78 | 
79 | compaired <- list(c("group2_MPR","group2_non-MPR"),
80 |                   c("group3_LUSC_MPR","group3_LUSC_non-MPR"),
81 |                   c("group3_LUAD_MPR","group3_LUAD_non-MPR"),
82 |                   c("group5_MPR","group5_non-MPR"),
83 |                   c("group4_non-MPR","group3_LUSC_non-MPR"),
84 |                   c("group4_non-MPR","group3_LUAD_non-MPR"),
85 |                   c("group3_LUSC_non-MPR","group3_LUAD_non-MPR"),
86 |                   c("group4_non-MPR","group3_LUSC_MPR"),
87 |                   c("group4_non-MPR","group3_LUAD_MPR"))
88 | 
89 | ggboxplot(Tex_relevant, x = "cluster", y = "number",
90 |           color = "cluster",add="jitter",add.params=list(size=0.5),
91 |           x.text.angle=90) + labs(x='group', y= 'clonotype number of Tex-relevent cells') +
92 |   theme(legend.position="none") +
93 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test)
94 | 


--------------------------------------------------------------------------------
/main_figure/figure5_and_related_supplemental_figure/Treg_clonenumber_6group.R:
--------------------------------------------------------------------------------
 1 | Treg_clone <- read.csv("expanded_CD4Treg_clonotype_number_over2.csv")
 2 | dim(Treg_clone)
 3 | Treg_clone <- Treg_clone[,-1]
 4 | colnames(Treg_clone) <- c("sampleID","number")
 5 | rownames(Treg_clone) <- Treg_clone$sampleID
 6 | head(Treg_clone)
 7 | 
 8 | 
 9 | cluster.info <- read.csv("NMF_all_group_5.csv")
10 | dim(cluster.info)
11 | cluster.info <- cluster.info[,-1]
12 | rownames(cluster.info) <- cluster.info$sampleID
13 | head(cluster.info)
14 | 
15 | #include samples with TCR data and in our clustering data
16 | samples <- intersect(Treg_clone$sampleID, cluster.info$sampleID)
17 | length(samples)
18 | 
19 | cluster.info <- cluster.info[samples,]
20 | Treg_clone <- Treg_clone[samples,]
21 | 
22 | Treg_clone$group <- paste0("group",cluster.info$group)
23 | Treg_clone$group <- factor(Treg_clone$group,levels = c("group1","group2","group3","group4","group5"))
24 | head(Treg_clone)
25 | 
26 | 
27 | 
28 | library(readxl)
29 | sample.info <- as.data.frame(read_excel("sample.xlsx"))
30 | sample.info <- sample.info[,c("sampleID","pathological_response","cancer_type")]
31 | pathology <- c()
32 | for(each in sample.info$pathological_response){
33 |   if(each %in% c("MPR","pCR")){
34 |     pathology <- c(pathology, "MPR")
35 |   }else{
36 |     pathology <- c(pathology, "non-MPR")
37 |   }
38 | }
39 | sample.info$pathology <- pathology
40 | rownames(sample.info) <- sample.info$sampleID
41 | sample.info <- sample.info[sample.info$sampleID %in% c(Treg_clone$sampleID),]
42 | dim(sample.info)
43 | head(sample.info)
44 | 
45 | Treg_clone <- merge(Treg_clone,sample.info,by = "sampleID")
46 | head(Treg_clone)
47 | 
48 | new_group <- c()
49 | for(each in 1:length(Treg_clone$sampleID)){
50 |   print(Treg_clone$group[[each]])
51 |   if(Treg_clone$group[each] %in% c("group3")){
52 |     new_group <- c(new_group, paste0(Treg_clone$group[[each]],"_",Treg_clone$cancer_type[[each]]))
53 |   }else{
54 |     new_group <- c(new_group,as.vector(Treg_clone$group)[[each]])
55 |   }
56 | }
57 | Treg_clone$new_group <- new_group
58 | head(Treg_clone)
59 | 
60 | head(Treg_clone)
61 | 
62 | Treg_clone$new_group <- factor(Treg_clone$new_group,levels = c("group1",
63 |                                                                    "group2",
64 |                                                                    "group3_LUSC",
65 |                                                                    "group3_LUAD",
66 |                                                                    "group4",
67 |                                                                    "group5"))
68 | ggboxplot(Treg_clone, x = "new_group", y = "number",
69 |           color = "pathology",add="jitter",add.params=list(size=0.5),
70 |           x.text.angle=90) + labs(x='group', y= 'clonotype number of Tex-relevent cells') +
71 |   scale_color_manual(values=c("MPR" = "#2868A6", "non-MPR" = "#B1161C")) + 
72 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test)
73 | 
74 | Treg_clone$cluster <- paste0(Treg_clone$new_group,"_",Treg_clone$pathology)
75 | Treg_clone$cluster <- factor(Treg_clone$cluster,levels = c("group1_MPR","group1_non-MPR",
76 |                                                                "group2_MPR","group2_non-MPR",
77 |                                                                "group3_LUSC_MPR","group3_LUSC_non-MPR",
78 |                                                                "group3_LUAD_MPR","group3_LUAD_non-MPR",
79 |                                                                "group4_MPR","group4_non-MPR",
80 |                                                                "group5_MPR","group5_non-MPR"))
81 | 
82 | compaired <- list(c("group2_MPR","group2_non-MPR"),
83 |                   c("group3_LUSC_MPR","group3_LUSC_non-MPR"),
84 |                   c("group3_LUAD_MPR","group3_LUAD_non-MPR"),
85 |                   c("group5_MPR","group5_non-MPR"),
86 |                   c("group3_LUSC_non-MPR","group3_LUAD_non-MPR"),
87 |                   c("group4_non-MPR","group3_LUSC_non-MPR"),
88 |                   c("group4_non-MPR","group3_LUAD_non-MPR"),
89 |                   c("group4_non-MPR","group3_LUSC_MPR"),
90 |                   c("group4_non-MPR","group3_LUAD_MPR"))
91 | 
92 | ggboxplot(Treg_clone, x = "cluster", y = "number",
93 |           color = "cluster",add="jitter",add.params=list(size=0.5),
94 |           x.text.angle=90) + labs(x='group', y= 'clonotype number of Tex-relevent cells') +
95 |   theme(legend.position="none") +
96 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test)
97 | 


--------------------------------------------------------------------------------
/main_figure/figure5_and_related_supplemental_figure/fig5D_Tex_Treg_clone_number_scatter.R:
--------------------------------------------------------------------------------
 1 | Tex_relevant <- read.csv("CD8Tex_relevant_clonotype_number_over2.csv")
 2 | Tex_relevant <- Tex_relevant[,-1]
 3 | colnames(Tex_relevant) <- c("sampleID","number")
 4 | rownames(Tex_relevant) <- Tex_relevant$sampleID
 5 | 
 6 | all.group <- read.csv("/home/zhangwj/data_yi/neoadjuvant/revision2/data/NMF_all_group_5.csv")
 7 | all.group <- all.group[,-1]
 8 | rownames(all.group) <- all.group$sampleID
 9 | 
10 | samples <- intersect(Tex_relevant$sampleID, all.group$sampleID)
11 | length(samples)
12 | 
13 | Tex_relevant <- Tex_relevant[samples,]
14 | all.group <- all.group[samples,]
15 | 
16 | Tex_relevant$group <- all.group$group
17 | head(Tex_relevant)
18 | 
19 | library(readxl)
20 | sample.info <- as.data.frame(read_excel("sample.xlsx"))
21 | sample.info <- sample.info[,c("sampleID","pathological_response","cancer_type")]
22 | head(sample.info)
23 | pathology <- c()
24 | for(each in sample.info$pathological_response){
25 |   if(each %in% c("MPR","pCR")){
26 |     pathology <- c(pathology, "MPR")
27 |   }else{
28 |     pathology <- c(pathology, "non-MPR")
29 |   }
30 | }
31 | sample.info$pathology <- pathology
32 | rownames(sample.info) <- sample.info$sampleID
33 | sample.info <- sample.info[rownames(Tex_relevant),]
34 | 
35 | Tex_relevant <- merge(Tex_relevant,sample.info,by = "sampleID")
36 | head(Tex_relevant)
37 | Tex_relevant$group <- paste0("group",Tex_relevant$group)
38 | colnames(Tex_relevant) <- c("sampleID","Tex_number","group","pathological_response_detail","cancer_type","pathological_response")
39 | 
40 | Treg <- read.csv("expanded_CD4Treg_clonotype_number_over2.csv")
41 | Treg <- Treg[,-1]
42 | colnames(Treg) <- c("sampleID","number")
43 | rownames(Treg) <- Treg$sampleID
44 | head(Treg)
45 | dim(Treg)
46 | Treg <- Treg[Tex_relevant$sampleID,]
47 | head(Treg)
48 | 
49 | Tex_relevant$Treg_number <- Treg$number
50 | head(Tex_relevant)
51 | 
52 | ggscatter(Tex_relevant[Tex_relevant$pathological_response %in% c("non-MPR"),], 
53 |           x = "Tex_number",
54 |           y = "Treg_number", color = "group",shape = "cancer_type")+
55 |   geom_hline(yintercept = 9,linetype = "dashed") + 
56 |   scale_color_manual(values=c("group1"="#E84C35","group2"="#4FBAD6",
57 |                               "group3"="#00A289","group4"="#3C5487",
58 |                               "group5"="#F29B80")) + geom_abline(intercept = 0, slope = 1,linetype = "dashed") +
59 |   xlim(0,100) +ylim(0,100)
60 | 
61 | 
62 | ggscatter(Tex_relevant[Tex_relevant$pathological_response %in% c("MPR"),], 
63 |           x = "Tex_number",
64 |           y = "Treg_number", color = "group",shape = "cancer_type")+
65 |   geom_hline(yintercept = 9,linetype = "dashed") + 
66 |   scale_color_manual(values=c("group1"="#E84C35","group2"="#4FBAD6",
67 |                               "group3"="#00A289","group4"="#3C5487",
68 |                               "group5"="#F29B80")) + geom_abline(intercept = 0, slope = 1,linetype = "dashed") +
69 |   xlim(0,100) +ylim(0,100)
70 | 


--------------------------------------------------------------------------------
/main_figure/figure5_and_related_supplemental_figure/figure5E_alluvium.R:
--------------------------------------------------------------------------------
 1 | library(ggalluvial)
 2 | cluster.info <- read.csv("/home/zhangwj/data_yi/neoadjuvant/revision2/data/NMF_all_group_5.csv")
 3 | cluster.info <- cluster.info[,-1]
 4 | head(cluster.info)
 5 | 
 6 | sample.info <- as.data.frame(read_excel("/home/zhangwj/data_yi/neoadjuvant/data/other/sample.xlsx"))
 7 | head(sample.info)
 8 | sample.info <- sample.info[sample.info$sampleID %in% cluster.info$sampleID,]
 9 | sample.info <- sample.info[,c("sampleID","pathological_response","cancer_type")]
10 | 
11 | pathological_response_level <- c()
12 | for(each in sample.info$pathological_response){
13 |   if(each %in% c("MPR","pCR")){
14 |     pathological_response_level <- c(pathological_response_level, "MPR")
15 |   }else{
16 |     pathological_response_level <- c(pathological_response_level, "non-MPR")
17 |   }
18 | }
19 | sample.info$pathological_response_level <- pathological_response_level
20 | rownames(sample.info) <- sample.info$sampleID
21 | head(sample.info)
22 | sample.info <- sample.info[cluster.info$sampleID,]
23 | sample.info$group <- cluster.info$group
24 | sample.info$group <- paste0("group",sample.info$group)
25 | sample.info$sub.group <- paste0(sample.info$group,"_",sample.info$pathological_response_level)
26 | sample.info$sub.group <- factor(sample.info$sub.group,
27 |                                 levels = c("group1_MPR","group1_non-MPR",
28 |                                            "group2_MPR","group2_non-MPR",
29 |                                            "group3_MPR","group3_non-MPR",
30 |                                            "group4_MPR","group4_non-MPR",
31 |                                            "group5_MPR","group5_non-MPR"))
32 | head(sample.info)
33 | 
34 | 
35 | Treg_clone <- read.csv("expanded_CD4Treg_clonotype_number_over2.csv")
36 | dim(Treg_clone)
37 | Treg_clone <- Treg_clone[,-1]
38 | colnames(Treg_clone) <- c("sampleID","number")
39 | rownames(Treg_clone) <- Treg_clone$sampleID
40 | head(Treg_clone)
41 | 
42 | Treg_clone <- Treg_clone[Treg_clone$sampleID %in% cluster.info$sampleID,]
43 | dim(Treg_clone)
44 | Treg_level <- c()
45 | for(each in Treg_clone$number){
46 |   print(each)
47 |   if(each >= 9){
48 |     Treg_level <- c(Treg_level,"high")
49 |   }else{
50 |     Treg_level <- c(Treg_level,"low")
51 |   }
52 | }
53 | Treg_clone$Treg_level <- Treg_level
54 | colnames(Treg_clone) <- c("sampleID","Treg_number","Treg_level")
55 | head(Treg_clone)
56 | dim(Treg_clone)
57 | 
58 | head(sample.info)
59 | 
60 | sample.info <- merge(sample.info, Treg_clone,by = "sampleID")
61 | head(sample.info)
62 | 
63 | cluster <- c()
64 | for(i in 1:length(sample.info$sampleID)){
65 |   if(sample.info$pathological_response_level[[i]] %in% c("MPR")){
66 |     cluster <- c(cluster, "MPR")
67 |   }else{
68 |     cluster <- c(cluster, paste0(sample.info$pathological_response_level[[i]],"_",sample.info$Treg_level[[i]]))
69 |   }
70 | }
71 | unique(cluster)
72 | 
73 | sample.info$cluster <- cluster
74 | sample.info$cluster <- factor(sample.info$cluster, levels = c("MPR","non-MPR_high","non-MPR_low"))
75 | head(sample.info)
76 | 
77 | LUSC <- sample.info[sample.info$cancer_type %in% c("LUSC"),]
78 | 
79 | mm <- as.data.frame(table(sample.info$sub.group,sample.info$cluster))
80 | colnames(mm) <- c("sub.group","tolerance","number")
81 | mm$tolerance <- factor(mm$tolerance,levels = c("MPR","non-MPR_high",
82 |                                                "non-MPR_low"))
83 | head(mm)
84 | 
85 | ggplot(data = mm,
86 |        aes(axis1 = sub.group,   # First variable on the X-axis
87 |            axis2 = tolerance,   # Third variable on the X-axis
88 |            y = number)) +
89 |   geom_alluvium(aes(fill = tolerance,order = tolerance)) +
90 |   geom_stratum() +
91 |   geom_text(stat = "stratum",
92 |             aes(label = after_stat(stratum))) +
93 |   theme_void() +
94 |   scale_fill_manual(values=c("MPR"="#D9BFAE",
95 |                              "non-MPR_high"="#8CB4A3",
96 |                              "non-MPR_low"="#7998AD"))
97 | 


--------------------------------------------------------------------------------
/main_figure/figure6_and_related_supplemental_figure/HNSCC.analysis.Rmd:
--------------------------------------------------------------------------------
  1 | ```{r}
  2 | library(dplyr)
  3 | library(tidyverse)
  4 | library(readxl)
  5 | library(ggpubr)
  6 | library(survival)
  7 | library(ggsurvfit)
  8 | library(survminer)
  9 | ```
 10 | 
 11 | ```{r}
 12 | HNSCC.meta <- read_csv("HNSCC.meta.csv")
 13 | 
 14 | HNSCC.data <- read_csv("HNSCC.T_with_TCR_harmony.csv") 
 15 | HNSCC.data
 16 | HNSCC.post <- HNSCC.data %>% filter(state=="post-Tx") %>% left_join(HNSCC.meta, by="patientID")
 17 | HNSCC.post
 18 | ```
 19 | 
 20 | ```{r}
 21 | HNSCC.post.summary <- HNSCC.post %>% filter(T_new_name %in% c("Texp", "expanded terminal Tex")) %>% group_by(patientID, T_new_name) %>% summarise(n=n()) %>%
 22 |   mutate(freq = n / sum(n)) %>% filter(T_new_name=="Texp") %>% left_join(HNSCC.meta, by="patientID")
 23 | HNSCC.post.summary
 24 | 
 25 | ggboxplot(HNSCC.post.summary, x = "RFS_event", y ="Pathological response %" ,
 26 |           color = "RFS_event",add="jitter",add.params=list(size=0.5),
 27 |           x.text.angle=0) + labs(x='RFS_event', y= 'PRR') +
 28 |   theme(legend.position="none") +
 29 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + scale_color_manual(values=c("yes"="#C6595A","no"="#878586")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
 30 | 
 31 | ggsave("HNSCC.RFS.PRR.pdf", width = 2.5, height = 3.5)
 32 | ```
 33 | 
 34 | 
 35 | ```{r}
 36 | # plot texp in association with RFS
 37 | HNSCC.post.summary <- HNSCC.post %>% filter(T_new_name %in% c("Texp", "expanded terminal Tex")) %>% group_by(patientID, T_new_name) %>% summarise(n=n()) %>%
 38 |   mutate(freq = n / sum(n)) %>% filter(T_new_name=="Texp") %>% left_join(HNSCC.meta, by="patientID")
 39 | HNSCC.post.summary
 40 | 
 41 | compaired <- list(c("yes", "no"))
 42 | 
 43 | ggboxplot(HNSCC.post.summary, x = "RFS_event", y = "freq",
 44 |           color = "RFS_event",add="jitter",add.params=list(size=0.5),
 45 |           x.text.angle=0) + labs(x='RFS_event', y= 'Texp in tex-relevant cells') +
 46 |   theme(legend.position="none") +
 47 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + scale_color_manual(values=c("yes"="#C6595A","no"="#878586")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
 48 | ggsave("HNSCC.RFS.Texp.pdf", width = 2.5, height = 3.5)
 49 | 
 50 | 
 51 | ```
 52 | 
 53 | 
 54 | 
 55 | ```{r}
 56 | # plot treg in association with RFS
 57 | HNSCC.post.summary <- HNSCC.post %>% filter(majority_voting %in% c("CD4T_Treg_CCR8", "CD4T_Treg_FOXP3", "CD4T_Treg_MKI67")) %>% group_by(patientID, majority_voting) %>% summarise(n=n()) %>%
 58 |   mutate(freq = n / sum(n)) %>% filter(majority_voting=="CD4T_Treg_CCR8") %>% left_join(HNSCC.meta, by="patientID")
 59 | HNSCC.post.summary
 60 | 
 61 | compaired <- list(c("yes", "no"))
 62 | 
 63 | ggboxplot(HNSCC.post.summary, x = "RFS_event", y = "freq",
 64 |           color = "RFS_event",add="jitter",add.params=list(size=0.5),
 65 |           x.text.angle=0) + labs(x='RFS_event', y= 'Texp in tex-relevant cells') +
 66 |   theme(legend.position="none") +
 67 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + scale_color_manual(values=c("yes"="#C6595A","no"="#878586")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
 68 | #ggsave("HNSCC.RFS.Texp.pdf", width = 2.5, height = 3.5)
 69 | ```
 70 | 
 71 | ```{r}
 72 | # plot treg in association with MPR
 73 | HNSCC.post.summary <- HNSCC.post %>% filter(majority_voting %in% c("CD4T_Treg_CCR8", "CD4T_Treg_FOXP3", "CD4T_Treg_MKI67")) %>% group_by(patientID, majority_voting) %>% summarise(n=n()) %>%
 74 |   mutate(freq = n / sum(n)) %>% filter(majority_voting=="CD4T_Treg_CCR8") %>% left_join(HNSCC.meta, by="patientID")
 75 | HNSCC.post.summary
 76 | 
 77 | compaired <- list(c("yes", "no"))
 78 | 
 79 | ggboxplot(HNSCC.post.summary, x = "PRR.cat", y = "freq",
 80 |           color = "PRR.cat",add="jitter",add.params=list(size=0.5),
 81 |           x.text.angle=0) + labs(x='PRR.cat', y= 'CCR8 Treg in all Tregs') +
 82 |   theme(legend.position="none") +
 83 |   geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = F,test = wilcox.test) + scale_color_manual(values=c("yes"="#C6595A","no"="#878586")) + theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))
 84 | #ggsave("HNSCC.RFS.Texp.pdf", width = 2.5, height = 3.5)
 85 | ```
 86 | 
 87 | ```{r}
 88 | # plot treg in correlation with terminal Tex
 89 | HNSCC.post.summary <- HNSCC.post %>% filter(majority_voting %in% c("CD8T_terminal_Tex_LAYN", "CD4T_Treg_CCR8"), clononumber>=3) %>% group_by(patientID, majority_voting) %>% summarise(n=n_distinct(clonotype))
 90 | HNSCC.post.summary
 91 | #ggsave("HNSCC.RFS.Texp.pdf", width = 2.5, height = 3.5)
 92 | 
 93 | HNSCC.Treg.summary <- HNSCC.post.summary %>% filter(majority_voting=="CD4T_Treg_CCR8")
 94 | colnames(HNSCC.Treg.summary)[3] <- "numberTregClone"
 95 | HNSCC.Treg.summary
 96 | HNSCC.Tex.summary <- HNSCC.post.summary %>% filter(majority_voting=="CD8T_terminal_Tex_LAYN")
 97 | colnames(HNSCC.Tex.summary)[3] <- "numberTexClone"
 98 | HNSCC.Clone.summary <- HNSCC.Tex.summary %>% left_join(HNSCC.Treg.summary %>% dplyr::select(patientID, numberTregClone), by="patientID") %>% mutate(numberTregClone=ifelse(is.na(numberTregClone), 0, numberTregClone)) %>% left_join(HNSCC.meta, by="patientID")
 99 | HNSCC.Clone.summary
100 | 
101 | 
102 | ggplot(HNSCC.Clone.summary, aes(x=numberTexClone, y=numberTregClone, color=MPR)) + geom_point() + theme_classic() + coord_equal()
103 | ```
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/main_figure/figure6_and_related_supplemental_figure/HNSCC.meta.csv:
--------------------------------------------------------------------------------
 1 | patientID,Cohort,Cycles delivered,RFS_event,OS,Died ,Clinical to pathologic downstaging,RECIST response excluding non measurable,Volumetric response,Viable Tumor % post,Pathological response %,PRR.cat,MPR,Any response >=10% (90% or less viable tumor)  ,Comment
 2 | P01,Mono,1,no,yes,no,NA,stable/progress,yes,NA,NA,NA,NA,NA,"P01 did not have tumor resection following treatment and thus tumor regression score, pathological response, and downstaging could not be assessed"
 3 | P02,Mono,2,NA,NA,NA,no,stable/progress,no,90,10,Medium,no,yes,"P02 was originally enrolled but later deemed to have been ineligible and therefore excluded from response analyses, although sample collection was performed"
 4 | P04,Mono,2,no,yes,no,yes,not measurable,no,90,10,Medium,no,yes,P03 ID was skipped because consented patient was excluded upon screening and not treated
 5 | P05,Combo,2,no,yes,no,yes,not measurable,yes,5,95,High,yes,yes,
 6 | P06,Combo,2,no,yes,no,yes,not measurable,no,70,30,Medium,no,yes,
 7 | P07,Combo,2,no,yes,no,no,not measurable,no,90,10,Medium,no,yes,
 8 | P08,Mono,2,no,yes,no,no,not measurable,no,95,5,Low,no,no,
 9 | P09,Mono,2,no,yes,no,yes,not measurable,yes,20,80,High,no,yes,
10 | P10,Combo,2,yes,no,yes,no,stable/progress,no,85,15,Medium,no,yes,
11 | P12,Combo,2,no,yes,no,no,not measurable,yes,95,5,Low,no,no,P11 ID was skipped because consented patient was excluded upon screening and not treated
12 | P13,Combo,2,no,yes,no,yes,response,yes,0,100,High,yes,yes,
13 | P14,Mono,2,yes,yes,no,yes,stable/progress,yes,70,30,Medium,no,yes,
14 | P15,Mono,2,no,yes,no,yes,stable/progress,no,95,5,Low,no,no,
15 | P16,Combo,2,no,yes,no,no,not measurable,yes,95,5,Low,no,no,
16 | P17,Combo,2,no,yes,no,yes,response,yes,5,95,High,yes,yes,
17 | P18,Combo,1,no,yes,no,yes,not measurable,no,90,10,Medium,no,yes,
18 | P19,Mono,1,no,yes,no,yes,not measurable,yes,5,95,High,yes,yes,
19 | P20,Mono,2,no,yes,no,no,not measurable,no,95,5,Low,no,no,
20 | P21,Combo,1,yes,no,no,no,stable/progress,no,30,70,High,no,yes,
21 | P22,Mono,2,yes,yes,no,yes,not measurable,yes,80,20,Medium,no,yes,
22 | P23,Mono,2,no,yes,no,yes,stable/progress,no,90,10,Medium,no,yes,
23 | P24,Combo,2,no,yes,no,no,stable/progress,no,95,5,Low,no,no,
24 | P25,Combo,1,no,yes,no,no,stable/progress,no,95,5,Low,no,no,
25 | P26,Combo,2,yes,no,yes,yes,response,yes,90,10,Medium,no,yes,
26 | P27,Mono,2,no,yes,no,yes,stable/progress,no,95,5,Low,no,no,
27 | P28,Mono,2,no,yes,no,no,response,yes,85,15,Medium,no,yes,
28 | P29,Mono,2,no,yes,no,yes,stable/progress,yes,95,5,Low,no,no,
29 | P30,Combo,2,no,yes,no,yes,not measurable,yes,80,20,Medium,no,yes,
30 | P31,Mono,2,yes,no,yes,no,stable/progress,no,95,5,Low,no,no,
31 | P32,Combo,1,no,yes,no,yes,stable/progress,yes,40,60,High,no,yes,
32 | ,,,,,,,,,,,,,,
33 | ,,,,,,,,,,,,,,
34 | 


--------------------------------------------------------------------------------
/main_figure/figure6_and_related_supplemental_figure/README.md:
--------------------------------------------------------------------------------
 1 | # description of file
 2 | 
 3 | ## celltypist.ipynb
 4 | used to analyze the HNSCC data mentioned in Supplemental Figure S8
 5 | 
 6 | ## survival_analysis.Rmd
 7 | used to generate figrues in Figure 6 and Supplemental Figure S8.
 8 | involves data generated by this study
 9 | 
10 | ## HNSCC.analysis.Rmd
11 | used to generate HNSCC-related figrue in Supplemntal Figure S8
12 | 
13 | ## analysis_of_bulk_RNA-seq_survival.Rmd
14 | used to generate bulk RNA-seq survival figrue in Supplemntal Figure S8
15 | 


--------------------------------------------------------------------------------
/main_figure/figure6_and_related_supplemental_figure/analysis_of_bulk_RNAseq_survival.Rmd:
--------------------------------------------------------------------------------
  1 | ```{r}
  2 | library(matrixStats)
  3 | library(tidyverse)
  4 | library(readxl)
  5 | library(ggpubr)
  6 | library(survival)
  7 | #library(ggsurvfit)
  8 | library(survminer)
  9 | ```
 10 | 
 11 | 
 12 | 
 13 | # OAK study
 14 | ```{r}
 15 | OAK.meta <- read.csv("Genetech/EGAF00005797822/go28915_anon_subsetted_BYN_n699.csv")
 16 | #dim(OAK.meta)
 17 | #head(OAK.meta)
 18 | 
 19 | OAK <- read.csv("Genetech/EGAF00005797824/anonymized_OAK-TPMs2.csv")
 20 | dim(OAK)
 21 | colnames(OAK) <- sapply(colnames(OAK), function(x) str_split(x, "[.]")[[1]][2])
 22 | OAK[1:10,1:10]
 23 | 
 24 | OAK.meta$sample <- colnames(OAK)
 25 | 
 26 | OAK <- t(t(OAK) / rowSums(t(OAK)) * 1000000)
 27 | #OAK
 28 | #colSums(OAK)
 29 | 
 30 | OAK.meta.ICI <- OAK.meta %>% filter(ACTARM=="MPDL3280A" & HIST=="NON-SQUAMOUS")
 31 | ICI.sampleID <- OAK.meta.ICI$sample
 32 | OAK.ICI <- OAK[, ICI.sampleID]
 33 | 
 34 | OAK.ICI.z.scored <- (OAK.ICI - rowMeans(OAK.ICI) ) / rowSds(OAK.ICI) 
 35 | 
 36 | ```
 37 | ## validating PD1 vs chemo
 38 | ```{r}
 39 | fit.data <- OAK.meta
 40 | fit.cox <- coxph(Surv(fit.data$OS_MONTHS, fit.data$OS_CENSOR) ~ fit.data$ACTARM, data = fit.data)
 41 | summary(fit.cox)
 42 | 
 43 | fit <- survfit(Surv(fit.data$OS_MONTHS, fit.data$OS_CENSOR) ~ fit.data$ACTARM, data = fit.data)
 44 | print(fit)
 45 | 
 46 | p <- ggsurvplot(fit,
 47 |           pval = TRUE, conf.int = FALSE,
 48 |           risk.table = TRUE, # Add risk table
 49 |           risk.table.col = "strata", # Change risk table color by groups
 50 |           linetype = "strata", # Change line type by groups
 51 |           #surv.median.line = "hv", # Specify median survival
 52 |           ggtheme = theme_classic(), # Change ggplot2 theme
 53 |           risk.table.pos = "in",
 54 |           xlab = "Months from surgery",
 55 |           title = "",
 56 |           palette = c("#00C598", "#C9EFE8","#9FDC98", "#24A07E")
 57 | )
 58 | p
 59 | 
 60 | 
 61 | ## PFS
 62 | fit.cox <- coxph(Surv(fit.data$OS_MONTHS, fit.data$PFS_CENSOR) ~ fit.data$ACTARM, data = fit.data)
 63 | summary(fit.cox)
 64 | 
 65 | fit <- survfit(Surv(fit.data$OS_MONTHS, fit.data$PFS_CENSOR) ~ fit.data$ACTARM, data = fit.data)
 66 | print(fit)
 67 | 
 68 | p <- ggsurvplot(fit,
 69 |           pval = TRUE, conf.int = FALSE,
 70 |           risk.table = TRUE, # Add risk table
 71 |           risk.table.col = "strata", # Change risk table color by groups
 72 |           linetype = "strata", # Change line type by groups
 73 |           #surv.median.line = "hv", # Specify median survival
 74 |           ggtheme = theme_classic(), # Change ggplot2 theme
 75 |           risk.table.pos = "in",
 76 |           xlab = "Months from surgery",
 77 |           title = "",
 78 |           palette = c("#00C598", "#C9EFE8","#9FDC98", "#24A07E")
 79 | )
 80 | p
 81 | 
 82 | ```
 83 | 
 84 | 
 85 | ## signature approach
 86 | ```{r}
 87 | b.cell.signature <- c("CD79A", "FCRL4")
 88 | treg.signature <- c("FOXP3", "CCR8")
 89 | texp.signature <- c("GZMK", "IL7R")
 90 | 
 91 | fit.data <- OAK.meta.ICI %>% mutate(
 92 |   b.cell.signature=colMeans2(OAK.ICI.z.scored[b.cell.signature,]),
 93 |   texp.signature=colMeans2(OAK.ICI.z.scored[texp.signature,]),
 94 | ) %>% mutate(
 95 |   b.cell.signature=ifelse(b.cell.signature<median(b.cell.signature), "low", "high"),
 96 |   texp.signature=ifelse(texp.signature<median(texp.signature), "low", "high"),
 97 | )
 98 | 
 99 | # fit.data
100 | ```
101 | 
102 | ### b.cell
103 | ```{r}
104 | fit.cox <- coxph(Surv(fit.data$OS_MONTHS, fit.data$OS_CENSOR) ~ fit.data$b.cell.signature, data = fit.data)
105 | summary(fit.cox)
106 | 
107 | fit <- survfit(Surv(fit.data$OS_MONTHS, fit.data$OS_CENSOR) ~ fit.data$b.cell.signature, data = fit.data)
108 | print(fit)
109 | 
110 | p <- ggsurvplot(fit,
111 |           pval = TRUE, conf.int = FALSE,
112 |           risk.table = TRUE, # Add risk table
113 |           risk.table.col = "strata", # Change risk table color by groups
114 |           linetype = "solid", # Change line type by groups
115 |           #surv.median.line = "hv", # Specify median survival
116 |           ggtheme = theme_classic(), # Change ggplot2 theme
117 |           risk.table.pos = "in",
118 |           xlab = "Months",
119 |           ylab = "Overall survival",
120 |           title = "",
121 |           palette = c("#077E64","#878586")
122 | )
123 | p
124 | 
125 | #ggsave("bulk_data_plots/OAK_dataset_b.cell.signature.pdf", width = 8, height = 6)
126 | ```
127 | 
128 | 
129 | ### texp
130 | ```{r}
131 | fit.cox <- coxph(Surv(fit.data$OS_MONTHS, fit.data$OS_CENSOR) ~ fit.data$texp.signature, data = fit.data)
132 | summary(fit.cox)
133 | 
134 | fit <- survfit(Surv(fit.data$OS_MONTHS, fit.data$OS_CENSOR) ~ fit.data$texp.signature, data = fit.data)
135 | print(fit)
136 | 
137 | p <- ggsurvplot(fit,
138 |           pval = TRUE, conf.int = FALSE,
139 |           risk.table = TRUE, # Add risk table
140 |           risk.table.col = "strata", # Change risk table color by groups
141 |           linetype = "solid", # Change line type by groups
142 |           #surv.median.line = "hv", # Specify median survival
143 |           ggtheme = theme_classic(), # Change ggplot2 theme
144 |           risk.table.pos = "in",
145 |           xlab = "Months",
146 |           ylab = "Overall survival",
147 |           title = "",
148 |           palette = c("#077E64","#878586")
149 | )
150 | p
151 | 
152 | #ggsave("bulk_data_plots/OAK_dataset_texp.signature.pdf", width = 8, height = 6)
153 | ```
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/main_figure/figure6_and_related_supplemental_figure/celltypist.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 52,
  6 |    "id": "aa184015",
  7 |    "metadata": {
  8 |     "scrolled": true
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "import scanpy as sc\n",
 13 |     "import celltypist\n",
 14 |     "import time\n",
 15 |     "import numpy as np"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 67,
 21 |    "id": "e33ca71e",
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "data": {
 26 |       "text/plain": [
 27 |        "AnnData object with n_obs × n_vars = 3756 × 33538\n",
 28 |        "    obs: 'sampleID', 'patientID', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'TCR_type'\n",
 29 |        "    var: 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'"
 30 |       ]
 31 |      },
 32 |      "execution_count": 67,
 33 |      "metadata": {},
 34 |      "output_type": "execute_result"
 35 |     }
 36 |    ],
 37 |    "source": [
 38 |     "adata_Elmentaite = sc.read_h5ad(\"training.h5ad\")\n",
 39 |     "adata_Elmentaite"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 68,
 45 |    "id": "347deb11",
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "sc.pp.normalize_total(adata_Elmentaite, target_sum = 1e4)\n",
 50 |     "sc.pp.log1p(adata_Elmentaite)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 69,
 56 |    "id": "b4df283b",
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "# Sample 500 cells from each cell type for `adata_Elmentaite`.\n",
 61 |     "# All cells from a given cell type will be selected if the cell type size is < 500.\n",
 62 |     "sampled_cell_index = celltypist.samples.downsample_adata(adata_Elmentaite, mode = 'each', n_cells = 500, by = 'sub_cell_type', return_index = True)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 70,
 68 |    "id": "ae58fc98",
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "name": "stderr",
 73 |      "output_type": "stream",
 74 |      "text": [
 75 |       "🍳 Preparing data before training\n",
 76 |       "✂️ 18295 non-expressed genes are filtered out\n",
 77 |       "🔬 Input data has 1000 cells and 15243 genes\n",
 78 |       "⚖️ Scaling input data\n",
 79 |       "🏋️ Training data using SGD logistic regression\n",
 80 |       "✅ Model training done!\n"
 81 |      ]
 82 |     },
 83 |     {
 84 |      "name": "stdout",
 85 |      "output_type": "stream",
 86 |      "text": [
 87 |       "Time elapsed: 0.6984186172485352 seconds\n"
 88 |      ]
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "# Use `celltypist.train` to quickly train a rough CellTypist model.\n",
 93 |     "# You can also set `mini_batch = True` to enable mini-batch training.\n",
 94 |     "t_start = time.time()\n",
 95 |     "model_fs = celltypist.train(adata_Elmentaite[sampled_cell_index], 'sub_cell_type', n_jobs = 10, max_iter = 5, use_SGD = True)\n",
 96 |     "t_end = time.time()\n",
 97 |     "print(f\"Time elapsed: {t_end - t_start} seconds\")"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 71,
103 |    "id": "903d5396",
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "gene_index = np.argpartition(np.abs(model_fs.classifier.coef_), -100, axis = 1)[:, -100:]"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 72,
113 |    "id": "f2b7e210",
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "gene_index = np.unique(gene_index)"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 73,
123 |    "id": "65b67b67",
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "name": "stdout",
128 |      "output_type": "stream",
129 |      "text": [
130 |       "Number of genes selected: 100\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "print(f\"Number of genes selected: {len(gene_index)}\")"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 74,
141 |    "id": "baf60e1f",
142 |    "metadata": {
143 |     "scrolled": true
144 |    },
145 |    "outputs": [
146 |     {
147 |      "name": "stderr",
148 |      "output_type": "stream",
149 |      "text": [
150 |       "🍳 Preparing data before training\n",
151 |       "✂️ 48 non-expressed genes are filtered out\n",
152 |       "🔬 Input data has 1000 cells and 52 genes\n",
153 |       "⚖️ Scaling input data\n",
154 |       "🏋️ Training data using logistic regression\n",
155 |       "✅ Model training done!\n"
156 |      ]
157 |     },
158 |     {
159 |      "name": "stdout",
160 |      "output_type": "stream",
161 |      "text": [
162 |       "Time elapsed: 0.021984867254892983 minutes\n"
163 |      ]
164 |     }
165 |    ],
166 |    "source": [
167 |     "# Add `check_expression = False` to bypass expression check with only a subset of genes.\n",
168 |     "t_start = time.time()\n",
169 |     "model = celltypist.train(adata_Elmentaite[sampled_cell_index, gene_index], 'TCR_type', check_expression = False, n_jobs = 10, max_iter = 100)\n",
170 |     "t_end = time.time()\n",
171 |     "print(f\"Time elapsed: {(t_end - t_start)/60} minutes\")"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 75,
177 |    "id": "c4ef386e",
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "# Save the model.\n",
182 |     "model.write('model_from_Elmentaite_specific.pkl')"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "id": "426952f5",
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": []
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "id": "0c33f53f",
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": []
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 76,
204 |    "id": "7b3f1e54",
205 |    "metadata": {
206 |     "scrolled": false
207 |    },
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "AnnData object with n_obs × n_vars = 144162 × 31831\n",
213 |        "    obs: 'sampleID', 'cellID', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'sub_cell_type', 'major_cell_type'\n",
214 |        "    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'"
215 |       ]
216 |      },
217 |      "execution_count": 76,
218 |      "metadata": {},
219 |      "output_type": "execute_result"
220 |     }
221 |    ],
222 |    "source": [
223 |     "adata_James = sc.read_h5ad('predict.h5ad')\n",
224 |     "adata_James"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 77,
230 |    "id": "faf5b1cc",
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "sc.pp.normalize_total(adata_James, target_sum = 1e4)\n",
235 |     "sc.pp.log1p(adata_James)"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 78,
241 |    "id": "f9ad7b73",
242 |    "metadata": {
243 |     "scrolled": true
244 |    },
245 |    "outputs": [
246 |     {
247 |      "name": "stderr",
248 |      "output_type": "stream",
249 |      "text": [
250 |       "🔬 Input data has 144162 cells and 31831 genes\n",
251 |       "🔗 Matching reference genes in the model\n",
252 |       "🧬 52 features used for prediction\n",
253 |       "⚖️ Scaling input data\n",
254 |       "🖋️ Predicting labels\n",
255 |       "✅ Prediction done!\n",
256 |       "👀 Can not detect a neighborhood graph, will construct one before the over-clustering\n",
257 |       "⛓️ Over-clustering input data with resolution set to 25\n",
258 |       "🗳️ Majority voting the predictions\n",
259 |       "✅ Majority voting done!\n"
260 |      ]
261 |     },
262 |     {
263 |      "name": "stdout",
264 |      "output_type": "stream",
265 |      "text": [
266 |       "Time elapsed: 335.8447313308716 seconds\n"
267 |      ]
268 |     }
269 |    ],
270 |    "source": [
271 |     "# CellTypist prediction with over-clustering and majority-voting.\n",
272 |     "t_start = time.time()\n",
273 |     "predictions = celltypist.annotate(adata_James, model = 'model_from_Elmentaite_specific.pkl', majority_voting = True)\n",
274 |     "t_end = time.time()\n",
275 |     "print(f\"Time elapsed: {t_end - t_start} seconds\")"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 79,
281 |    "id": "2b4dbc7a",
282 |    "metadata": {},
283 |    "outputs": [
284 |     {
285 |      "data": {
286 |       "text/html": [
287 |        "<div>\n",
288 |        "<style scoped>\n",
289 |        "    .dataframe tbody tr th:only-of-type {\n",
290 |        "        vertical-align: middle;\n",
291 |        "    }\n",
292 |        "\n",
293 |        "    .dataframe tbody tr th {\n",
294 |        "        vertical-align: top;\n",
295 |        "    }\n",
296 |        "\n",
297 |        "    .dataframe thead th {\n",
298 |        "        text-align: right;\n",
299 |        "    }\n",
300 |        "</style>\n",
301 |        "<table border=\"1\" class=\"dataframe\">\n",
302 |        "  <thead>\n",
303 |        "    <tr style=\"text-align: right;\">\n",
304 |        "      <th></th>\n",
305 |        "      <th>predicted_labels</th>\n",
306 |        "      <th>over_clustering</th>\n",
307 |        "      <th>majority_voting</th>\n",
308 |        "    </tr>\n",
309 |        "  </thead>\n",
310 |        "  <tbody>\n",
311 |        "    <tr>\n",
312 |        "      <th>P304_ACGAGCCGTGTGCCTG_1</th>\n",
313 |        "      <td>MANA specific</td>\n",
314 |        "      <td>117</td>\n",
315 |        "      <td>MANA specific</td>\n",
316 |        "    </tr>\n",
317 |        "    <tr>\n",
318 |        "      <th>P64_TACTTACCAGGTCTCG_1</th>\n",
319 |        "      <td>MANA specific</td>\n",
320 |        "      <td>307</td>\n",
321 |        "      <td>Viral specific</td>\n",
322 |        "    </tr>\n",
323 |        "    <tr>\n",
324 |        "      <th>P481_GTGCATAGTAAATGAC_1</th>\n",
325 |        "      <td>Viral specific</td>\n",
326 |        "      <td>42</td>\n",
327 |        "      <td>MANA specific</td>\n",
328 |        "    </tr>\n",
329 |        "    <tr>\n",
330 |        "      <th>P435_CGATCGGGTTATCGGT_1</th>\n",
331 |        "      <td>MANA specific</td>\n",
332 |        "      <td>15</td>\n",
333 |        "      <td>MANA specific</td>\n",
334 |        "    </tr>\n",
335 |        "    <tr>\n",
336 |        "      <th>P182_GTGCGGTTCCAACCAA_1</th>\n",
337 |        "      <td>Viral specific</td>\n",
338 |        "      <td>6</td>\n",
339 |        "      <td>MANA specific</td>\n",
340 |        "    </tr>\n",
341 |        "    <tr>\n",
342 |        "      <th>...</th>\n",
343 |        "      <td>...</td>\n",
344 |        "      <td>...</td>\n",
345 |        "      <td>...</td>\n",
346 |        "    </tr>\n",
347 |        "    <tr>\n",
348 |        "      <th>P469_GGTGTTATCAGGCAAG_1</th>\n",
349 |        "      <td>MANA specific</td>\n",
350 |        "      <td>309</td>\n",
351 |        "      <td>MANA specific</td>\n",
352 |        "    </tr>\n",
353 |        "    <tr>\n",
354 |        "      <th>P454_GGGTCTGCAGACGTAG_1</th>\n",
355 |        "      <td>Viral specific</td>\n",
356 |        "      <td>41</td>\n",
357 |        "      <td>Viral specific</td>\n",
358 |        "    </tr>\n",
359 |        "    <tr>\n",
360 |        "      <th>P53_ACTGCTCTCCAGATCA_1</th>\n",
361 |        "      <td>Viral specific</td>\n",
362 |        "      <td>155</td>\n",
363 |        "      <td>Viral specific</td>\n",
364 |        "    </tr>\n",
365 |        "    <tr>\n",
366 |        "      <th>P44_CTGATAGGTTCGTCTC_1</th>\n",
367 |        "      <td>MANA specific</td>\n",
368 |        "      <td>1</td>\n",
369 |        "      <td>MANA specific</td>\n",
370 |        "    </tr>\n",
371 |        "    <tr>\n",
372 |        "      <th>P45_GCTCCTACACAGGTTT_1</th>\n",
373 |        "      <td>Viral specific</td>\n",
374 |        "      <td>66</td>\n",
375 |        "      <td>MANA specific</td>\n",
376 |        "    </tr>\n",
377 |        "  </tbody>\n",
378 |        "</table>\n",
379 |        "<p>144162 rows × 3 columns</p>\n",
380 |        "</div>"
381 |       ],
382 |       "text/plain": [
383 |        "                        predicted_labels over_clustering majority_voting\n",
384 |        "P304_ACGAGCCGTGTGCCTG_1    MANA specific             117   MANA specific\n",
385 |        "P64_TACTTACCAGGTCTCG_1     MANA specific             307  Viral specific\n",
386 |        "P481_GTGCATAGTAAATGAC_1   Viral specific              42   MANA specific\n",
387 |        "P435_CGATCGGGTTATCGGT_1    MANA specific              15   MANA specific\n",
388 |        "P182_GTGCGGTTCCAACCAA_1   Viral specific               6   MANA specific\n",
389 |        "...                                  ...             ...             ...\n",
390 |        "P469_GGTGTTATCAGGCAAG_1    MANA specific             309   MANA specific\n",
391 |        "P454_GGGTCTGCAGACGTAG_1   Viral specific              41  Viral specific\n",
392 |        "P53_ACTGCTCTCCAGATCA_1    Viral specific             155  Viral specific\n",
393 |        "P44_CTGATAGGTTCGTCTC_1     MANA specific               1   MANA specific\n",
394 |        "P45_GCTCCTACACAGGTTT_1    Viral specific              66   MANA specific\n",
395 |        "\n",
396 |        "[144162 rows x 3 columns]"
397 |       ]
398 |      },
399 |      "execution_count": 79,
400 |      "metadata": {},
401 |      "output_type": "execute_result"
402 |     }
403 |    ],
404 |    "source": [
405 |     "predictions.predicted_labels"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": null,
411 |    "id": "f7c3b5b1",
412 |    "metadata": {},
413 |    "outputs": [],
414 |    "source": []
415 |   }
416 |  ],
417 |  "metadata": {
418 |   "kernelspec": {
419 |    "display_name": "Python 3 (ipykernel)",
420 |    "language": "python",
421 |    "name": "python3"
422 |   },
423 |   "language_info": {
424 |    "codemirror_mode": {
425 |     "name": "ipython",
426 |     "version": 3
427 |    },
428 |    "file_extension": ".py",
429 |    "mimetype": "text/x-python",
430 |    "name": "python",
431 |    "nbconvert_exporter": "python",
432 |    "pygments_lexer": "ipython3",
433 |    "version": "3.9.11"
434 |   }
435 |  },
436 |  "nbformat": 4,
437 |  "nbformat_minor": 5
438 | }
439 | 


--------------------------------------------------------------------------------
/main_figure/figure6_and_related_supplemental_figure/survival_analysis.Rmd:
--------------------------------------------------------------------------------
  1 | ```{r}
  2 | library(tidyverse)
  3 | library(dplyr)
  4 | library(readxl)
  5 | library(ggpubr)
  6 | library(survival)
  7 | library(ggsurvfit)
  8 | library(survminer)
  9 | library(risksetROC)
 10 | library(forestplot)
 11 | ```
 12 | 
 13 | ```{r}
 14 | survival.metadata.clean <- read_csv("survival.metadata.final.csv") %>% mutate(MPR_pPR_nPR=ifelse(PRR_group=="pCR", "MPR", PRR_group))
 15 | survival.metadata.clean
 16 | ```
 17 | 
 18 | 
 19 | ```{r}
 20 | survival.metadata.clean %>% dplyr::group_by(histology) %>% count(MPR)
 21 | ```
 22 | 
 23 | ```{r}
 24 | survival.metadata.clean %>% group_by(TCR.classification) %>% count(RFS_status)
 25 | survival.metadata.clean %>% group_by(TCR.classification) %>% count(is.na(numberTexClone))
 26 | survival.metadata.clean %>% group_by(TCR.classification) %>% count(numberTexClone==0)
 27 | survival.metadata.clean %>% group_by(TCR.classification) %>% count(is.na(Texp.in.Tex.relevant))
 28 | ```
 29 | 
 30 | 
 31 | ### global analysis of risk factors
 32 | ```{r}
 33 | fit.data <- survival.metadata.clean
 34 | 
 35 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE)
 36 | 
 37 | covariates <- c("histology", "PRR", "numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8" )
 38 | univ_formulas <- sapply(covariates,
 39 |                         function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x)))
 40 |                         
 41 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)})
 42 | # Extract data 
 43 | univ_results <- lapply(univ_models,
 44 |                        function(x){ 
 45 |                           x <- summary(x)
 46 |                           p.value<-signif(x$wald["pvalue"], digits=2)
 47 |                           #wald.test<-signif(x$wald["test"], digits=2)
 48 |                           #beta<-signif(x$coef[1], digits=2);#coeficient beta
 49 |                           HR <-signif(x$coef[2], digits=2);#exp(beta)
 50 |                           HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
 51 |                           HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
 52 |                           # HR <- paste0(HR, " (", 
 53 |                           #             HR.confint.lower, "-", HR.confint.upper, ")")
 54 |                           res<-c(HR, HR.confint.lower, HR.confint.upper, p.value)
 55 |                           names(res)<-c("HR", "HR.lower", "HR.upper", 
 56 |                                         "p.value")
 57 |                           return(res)
 58 |                           #return(exp(cbind(coef(x),confint(x))))
 59 |                          })
 60 | univ_results
 61 | result <- as.data.frame(univ_results)
 62 | result <- as.data.frame(t(result))
 63 | result #%>% filter(p.value <0.05)
 64 | 
 65 | tabletext1<-as.character(rownames(result))
 66 | tabletext2<-as.numeric(result[,"p.value"])
 67 | tabletext<-cbind(tabletext1,tabletext2)
 68 | 
 69 | 
 70 | pdf("final_plots/all.patients.HR.pdf",         # File name
 71 |     width = 8, height = 5, # Width and height in inches
 72 |     bg = "white",          # Background color
 73 |     colormodel = "RGB",    # Color model (cmyk is required for most publications)
 74 |     paper = "A4")          # Paper size
 75 | 
 76 | # Closing the graphical device
 77 | 
 78 | forestplot(labeltext=tabletext, #文本信息  
 79 |            mean = result[,'HR'],##HR值
 80 | lower = result[,"HR.lower"],##95%置信区间
 81 | upper = result[,"HR.upper"],#95%置信区间
 82 |            boxsize = 0.1,##大小
 83 |            graph.pos=3,#图在表中的列位置
 84 |            graphwidth = unit(0.4,"npc"),#图在表中的宽度比例
 85 |            #fn.ci_norm="fpDrawDiamondCI",#box类型选择钻石,可以更改fpDrawNormalCI；fpDrawCircleCI等
 86 | col=fpColors(box="steelblue", lines="black", zero = "black"),#颜色设置
 87 |            lwd.ci=2,ci.vertices.height = 0.1,ci.vertices=TRUE,#置信区间用线宽、高、型
 88 |            zero=1,#zero线横坐标
 89 |            lwd.zero=2,#zero线宽
 90 |            grid=T,
 91 |            lwd.xaxis=2,#X轴线宽
 92 |            title="Hazard Ratio",
 93 |            xlab="",#X轴标题
 94 |            clip=c(-Inf,4),#边界
 95 |            colgap = unit(0.5,"cm")   
 96 | )
 97 | dev.off()
 98 | ```
 99 | 
100 | 
101 | ```{r}
102 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$PRR + fit.data$Texp.in.Tex.relevant + fit.data$Treg_CCR8, data = fit.data)
103 | summary(fit.cox)
104 | ```
105 | 
106 | 
107 | ```{r}
108 | pdf("final_plots/all.patients.ROC.pdf",         # File name
109 |     width = 5, height = 5, # Width and height in inches
110 |     bg = "white",    # Color model (cmyk is required for most publications)
111 |     paper = "A4")  
112 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant))
113 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Texp.in.Tex.relevant, data = fit.data)
114 | PI<-fit.cox$linear.predictor
115 | tROC.hf.texp <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months), method="Cox",col="#418849",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC")
116 | tROC.hf.texp$AUC
117 | 
118 | 
119 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant))
120 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$PRR, data = fit.data)
121 | PI<-fit.cox$linear.predictor
122 | tROC.hf.prr <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months), method="Cox",col="blue",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC")
123 | tROC.hf.prr$AUC
124 | 
125 | 
126 | lines(tROC.hf.texp$FP, tROC.hf.texp$TP, type="l",col="#418849",xlim=c(0,1), ylim=c(0,1))
127 | lines(tROC.hf.prr$FP, tROC.hf.prr$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1))
128 | legend("bottomright",c(paste("AUC of texp: ",round(tROC.hf.texp$AUC,3)), paste("AUC of PRR : ",round(tROC.hf.prr$AUC,3))),col=c("#418849","blue"),lty=1,lwd=2,bty = "n") 
129 | dev.off()
130 | ```
131 | 
132 | 
133 | ```{r}
134 | fit.data <- survival.metadata.clean %>% mutate(numberTregClone.bin= ifelse(
135 |   numberTregClone <as.vector(quantile(numberTregClone, na.rm=T)[4]), "<top 25%", ">= top25%" # the quantile of survival analysis cohort and nmf cohort are almost the same quantile.
136 | ))
137 | 
138 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$numberTregClone.bin, data = fit.data)
139 | 
140 | p <- ggsurvplot(fit,
141 |           pval = TRUE, conf.int = FALSE,
142 |           risk.table = TRUE, # Add risk table
143 |           risk.table.col = "strata", # Change risk table color by groups
144 |           linetype = "solid", # Change line type by groups
145 |           #surv.median.line = "hv", # Specify median survival
146 |           ggtheme = theme_classic(), # Change ggplot2 theme
147 |           risk.table.pos = "in",
148 |           xlab = "Months from surgery",
149 |           title = "all.patients.numberTregClone",
150 |           palette =  c("#077E64", "#C6595A")
151 | )
152 | p
153 | ggsave("final_plots/all.patients.numberTregClone.pdf", width = 8, height = 6)
154 | 
155 | # 0.235771 0.291866 0.342278 0.520000 
156 | #===============
157 | fit.data <- survival.metadata.clean %>% mutate(Treg_CCR8.bin= ifelse(
158 |   Treg_CCR8 <as.vector(quantile(Treg_CCR8, na.rm=T)[2]),
159 |   #Treg_CCR8 < 0.235771, 
160 |   "<top 75%", ">= top75%" # the quantile of survival analysis cohort and nmf cohort are almost the same
161 |   ))
162 | 
163 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Treg_CCR8.bin, data = fit.data)
164 | 
165 | p <- ggsurvplot(fit,
166 |           pval = TRUE, conf.int = FALSE,
167 |           risk.table = TRUE, # Add risk table
168 |           risk.table.col = "strata", # Change risk table color by groups
169 |           linetype = "solid", # Change line type by groups
170 |           #surv.median.line = "hv", # Specify median survival
171 |           ggtheme = theme_classic(), # Change ggplot2 theme
172 |           risk.table.pos = "in",
173 |           xlab = "Months from surgery",
174 |           title = "all.patients.CCR8Treg.in.allTreg",
175 |           palette =  c("#077E64", "#C6595A")
176 | )
177 | p
178 | ggsave("final_plots/all.patients.CCR8Treg.in.allTreg.pdf", width = 8, height = 6)
179 | #============
180 | 
181 | 
182 | #==========
183 | fit.data <- survival.metadata.clean %>% mutate(Texp.in.Tex.relevant.bin= ifelse(
184 |   Texp.in.Tex.relevant <as.vector(quantile(Texp.in.Tex.relevant, na.rm=T)[2]),
185 |   #Texp.in.Tex.relevant < 0.7592234, 
186 |   "<top 75%", ">= top75%" # the quantile of survival analysis cohort and nmf cohort are almost the same
187 | ))
188 | 
189 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant.bin, data = fit.data)
190 | 
191 | p <- ggsurvplot(fit,
192 |           pval = TRUE, conf.int = FALSE,
193 |           risk.table = TRUE, # Add risk table
194 |           risk.table.col = "strata", # Change risk table color by groups
195 |           linetype = "solid", # Change line type by groups
196 |           #surv.median.line = "hv", # Specify median survival
197 |           ggtheme = theme_classic(), # Change ggplot2 theme
198 |           risk.table.pos = "in",
199 |           xlab = "Months from surgery",
200 |           title = "all.patients.Texp",
201 |           palette =  c( "#C6595A","#077E64")
202 | )
203 | p
204 | ggsave("final_plots/all.patients.Texp.pdf", width = 8, height = 6)
205 | #============
206 | 
207 | #========
208 | fit.data <- survival.metadata.clean
209 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$MPR_pPR_nPR, data = fit.data)
210 | 
211 | p <- ggsurvplot(fit,
212 |           pval = TRUE, conf.int = FALSE,
213 |           risk.table = TRUE, # Add risk table
214 |           risk.table.col = "strata", # Change risk table color by groups
215 |           linetype = "solid", # Change line type by groups
216 |           #surv.median.line = "hv", # Specify median survival
217 |           ggtheme = theme_classic(), # Change ggplot2 theme
218 |           risk.table.pos = "in",
219 |           xlab = "Months from surgery",
220 |           title = "all_patients_by_PRR",
221 |           palette = c("#077E64", "#C6595A", "#878586")
222 | )
223 | p
224 | ggsave("final_plots/all.patients.PRR.pdf", width = 8, height = 6)
225 | #============
226 | 
227 | #======
228 | fit.data <- survival.metadata.clean
229 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  as.factor(fit.data$new_group), data = fit.data)
230 | summary(fit.cox)
231 | 
232 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ as.factor(fit.data$new_group), data = fit.data)
233 | print(fit)
234 | 
235 | p <- ggsurvplot(fit,
236 |           pval = TRUE, conf.int = FALSE,
237 |           risk.table = TRUE, # Add risk table
238 |           risk.table.col = "strata", # Change risk table color by groups
239 |           linetype = "solid", # Change line type by groups
240 |           #surv.median.line = "hv", # Specify median survival
241 |           ggtheme = theme_classic(), # Change ggplot2 theme
242 |           risk.table.pos = "in",
243 |           xlab = "Months from surgery",
244 |           title = "all_patients_by_TIME",
245 |           palette =  c("#E64B35", "#4DBBD5", "#00A087", "#3C5488", "#F39B7F")
246 | )
247 | p
248 | ggsave("final_plots/all.patients.TIME.pdf", width = 8, height = 6)
249 | ```
250 | 
251 | 
252 | ### focusing on non-MPR patients
253 | ```{r}
254 | fit.data <- survival.metadata.clean 
255 | 
256 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$TCR.classification, data = fit.data)
257 | 
258 | p <- ggsurvplot(fit,
259 |           pval = TRUE, conf.int = FALSE,
260 |           risk.table = TRUE, # Add risk table
261 |           risk.table.col = "strata", # Change risk table color by groups
262 |           linetype = "solid", # Change line type by groups
263 |           #surv.median.line = "hv", # Specify median survival
264 |           ggtheme = theme_classic(), # Change ggplot2 theme
265 |           risk.table.pos = "in",
266 |           xlab = "Months from surgery",
267 |           title = "all_patients_by_non-MPR_subtype",
268 |           palette = c("#D9BFAE", "#8CB4A3", "#7998AD")
269 | )
270 | p
271 | 
272 | ggsave("final_plots/all.patients.non-MPR.subtype.pdf", width = 8, height = 6)
273 | ```
274 | 
275 | 
276 | ### type I patients
277 | ```{r}
278 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="type I")
279 | 
280 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE)
281 | 
282 | covariates <- c("histology","PRR","numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8")
283 | univ_formulas <- sapply(covariates,
284 |                         function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x)))
285 |                         
286 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)})
287 | # Extract data 
288 | univ_results <- lapply(univ_models,
289 |                        function(x){ 
290 |                           x <- summary(x)
291 |                           p.value<-signif(x$wald["pvalue"], digits=2)
292 |                           #wald.test<-signif(x$wald["test"], digits=2)
293 |                           #beta<-signif(x$coef[1], digits=2);#coeficient beta
294 |                           HR <-signif(x$coef[2], digits=2);#exp(beta)
295 |                           HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
296 |                           HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
297 |                           # HR <- paste0(HR, " (", 
298 |                           #             HR.confint.lower, "-", HR.confint.upper, ")")
299 |                           res<-c(HR, HR.confint.lower, HR.confint.upper, p.value)
300 |                           names(res)<-c("HR", "HR.lower", "HR.upper", 
301 |                                         "p.value")
302 |                           return(res)
303 |                           #return(exp(cbind(coef(x),confint(x))))
304 |                          })
305 | univ_results
306 | result <- as.data.frame(univ_results)
307 | result <- as.data.frame(t(result))
308 | result #%>% filter(p.value <0.05)
309 | 
310 | tabletext1<-as.character(rownames(result))
311 | tabletext2<-as.numeric(result[,"p.value"])
312 | tabletext<-cbind(tabletext1,tabletext2)
313 | 
314 | pdf("final_plots/typeI.patients.HR.pdf",         # File name
315 |     width = 8, height = 5, # Width and height in inches
316 |     bg = "white",          # Background color
317 |     colormodel = "RGB",    # Color model (cmyk is required for most publications)
318 |     paper = "A4")          # Paper size
319 | forestplot(labeltext=tabletext, #文本信息  
320 |            mean = result[,'HR'],##HR值
321 | lower = result[,"HR.lower"],##95%置信区间
322 | upper = result[,"HR.upper"],,#95%置信区间
323 |            boxsize = 0.1,##大小
324 |            graph.pos=3,#图在表中的列位置
325 |            graphwidth = unit(0.4,"npc"),#图在表中的宽度比例
326 |            #fn.ci_norm="fpDrawDiamondCI",#box类型选择钻石,可以更改fpDrawNormalCI；fpDrawCircleCI等
327 | col=fpColors(box="steelblue", lines="black", zero = "black"),#颜色设置
328 |            lwd.ci=2,ci.vertices.height = 0.1,ci.vertices=TRUE,#置信区间用线宽、高、型
329 |            zero=1,#zero线横坐标
330 |            lwd.zero=2,#zero线宽
331 |            grid=T,
332 |            lwd.xaxis=2,#X轴线宽
333 |            title="Hazard Ratio",
334 |            xlab="",#X轴标题
335 |            clip=c(-Inf,4),#边界
336 |            colgap = unit(0.5,"cm")   
337 | )
338 | dev.off()
339 | 
340 | 
341 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Surgery_N, data = fit.data)
342 | summary(fit.cox)
343 | 
344 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Surgery_N + fit.data$Texp.in.Tex.relevant, data = fit.data)
345 | summary(fit.cox)
346 | ```
347 | 
348 | ```{r}
349 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="type I") %>% mutate(Texp.in.Tex.relevant.bin= ifelse(
350 |   #Texp.in.Tex.relevant < 0.7592234, 
351 |   Texp.in.Tex.relevant < as.vector(quantile(Texp.in.Tex.relevant,na.rm=T)[3]),
352 |   "<top 25%", ">= top25%" # the quantile of survival analysis cohort and nmf cohort are almost the same
353 | ))
354 | 
355 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant.bin, data = fit.data)
356 | 
357 | p <- ggsurvplot(fit,
358 |           pval = TRUE, conf.int = FALSE,
359 |           risk.table = TRUE, # Add risk table
360 |           risk.table.col = "strata", # Change risk table color by groups
361 |           linetype = "solid", # Change line type by groups
362 |           #surv.median.line = "hv", # Specify median survival
363 |           ggtheme = theme_classic(), # Change ggplot2 theme
364 |           risk.table.pos = "in",
365 |           xlab = "Months from surgery",
366 |           title = "typeI.non-MPR.by.Texp",
367 |           palette =  c("#C6595A", "#077E64")
368 | )
369 | p
370 | ggsave("final_plots/typeI.non-MPR.by.Texp.pdf", width = 8, height = 6)
371 | ```
372 | 
373 | 
374 | ### type II non-MPR patients 
375 | ```{r}
376 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="type II")
377 | 
378 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE)
379 | 
380 | covariates <- c("histology","PRR","numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8")
381 | univ_formulas <- sapply(covariates,
382 |                         function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x)))
383 |                         
384 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)})
385 | # Extract data 
386 | univ_results <- lapply(univ_models,
387 |                        function(x){ 
388 |                           x <- summary(x)
389 |                           p.value<-signif(x$wald["pvalue"], digits=2)
390 |                           #wald.test<-signif(x$wald["test"], digits=2)
391 |                           #beta<-signif(x$coef[1], digits=2);#coeficient beta
392 |                           HR <-signif(x$coef[2], digits=2);#exp(beta)
393 |                           HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
394 |                           HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
395 |                           # HR <- paste0(HR, " (", 
396 |                           #             HR.confint.lower, "-", HR.confint.upper, ")")
397 |                           res<-c(HR, HR.confint.lower, HR.confint.upper, p.value)
398 |                           names(res)<-c("HR", "HR.lower", "HR.upper", 
399 |                                         "p.value")
400 |                           return(res)
401 |                           #return(exp(cbind(coef(x),confint(x))))
402 |                          })
403 | univ_results
404 | result <- as.data.frame(univ_results)
405 | result <- as.data.frame(t(result))
406 | result #%>% filter(p.value <0.05)
407 | 
408 | tabletext1<-as.character(rownames(result))
409 | tabletext2<-as.numeric(result[,"p.value"])
410 | tabletext<-cbind(tabletext1,tabletext2)
411 | 
412 | pdf("final_plots/typeII.patients.HR.pdf",         # File name
413 |     width = 8, height = 5, # Width and height in inches
414 |     bg = "white",          # Background color
415 |     colormodel = "RGB",    # Color model (cmyk is required for most publications)
416 |     paper = "A4")          # Paper size
417 | forestplot(labeltext=tabletext, #文本信息  
418 |            mean = result[,'HR'],##HR值
419 | lower = result[,"HR.lower"],##95%置信区间
420 | upper = result[,"HR.upper"],,#95%置信区间
421 |            boxsize = 0.1,##大小
422 |            graph.pos=3,#图在表中的列位置
423 |            graphwidth = unit(0.4,"npc"),#图在表中的宽度比例
424 |            #fn.ci_norm="fpDrawDiamondCI",#box类型选择钻石,可以更改fpDrawNormalCI；fpDrawCircleCI等
425 | col=fpColors(box="steelblue", lines="black", zero = "black"),#颜色设置
426 |            lwd.ci=2,ci.vertices.height = 0.1,ci.vertices=TRUE,#置信区间用线宽、高、型
427 |            zero=1,#zero线横坐标
428 |            lwd.zero=2,#zero线宽
429 |            grid=T,
430 |            lwd.xaxis=2,#X轴线宽
431 |            title="Hazard Ratio",
432 |            xlab="",#X轴标题
433 |            clip=c(-Inf,4),#边界
434 |            colgap = unit(0.5,"cm")   
435 | )
436 | dev.off()
437 | 
438 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Surgery_N, data = fit.data)
439 | summary(fit.cox)
440 | 
441 | 
442 | ```
443 | 
444 | ```{r}
445 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="type II") %>% mutate(Texp.in.Tex.relevant.bin= ifelse(
446 |   #Texp.in.Tex.relevant < 0.7592234, 
447 |   Texp.in.Tex.relevant < as.vector(quantile(Texp.in.Tex.relevant,na.rm=T)[3]),
448 |   "<top 25%", ">= top25%" # the quantile of survival analysis cohort and nmf cohort are almost the same
449 | ))
450 | 
451 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant.bin, data = fit.data)
452 | 
453 | p <- ggsurvplot(fit,
454 |           pval = TRUE, conf.int = FALSE,
455 |           risk.table = TRUE, # Add risk table
456 |           risk.table.col = "strata", # Change risk table color by groups
457 |           linetype = "solid", # Change line type by groups
458 |           #surv.median.line = "hv", # Specify median survival
459 |           ggtheme = theme_classic(), # Change ggplot2 theme
460 |           risk.table.pos = "in",
461 |           xlab = "Months from surgery",
462 |           title = "typeII.non-MPR.by.subtypes",
463 |           palette =  c("#C6595A", "#077E64")
464 | )
465 | p
466 | ggsave("final_plots/typeII.non-MPR.by.Texp.pdf", width = 8, height = 6)
467 | ```
468 | 
469 | 
470 | ### MPR patients
471 | ```{r}
472 | fit.data <- survival.metadata.clean %>% filter(TCR.classification=="MPR")
473 | 
474 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE)
475 | 
476 | covariates <- c("numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8", "PRR", "histology")
477 | univ_formulas <- sapply(covariates,
478 |                         function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x)))
479 |                         
480 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)})
481 | # Extract data 
482 | univ_results <- lapply(univ_models,
483 |                        function(x){ 
484 |                           x <- summary(x)
485 |                           p.value<-signif(x$wald["pvalue"], digits=2)
486 |                           wald.test<-signif(x$wald["test"], digits=2)
487 |                           beta<-signif(x$coef[1], digits=2);#coeficient beta
488 |                           HR <-signif(x$coef[2], digits=2);#exp(beta)
489 |                           HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
490 |                           HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
491 |                           HR <- paste0(HR, " (", 
492 |                                        HR.confint.lower, "-", HR.confint.upper, ")")
493 |                           res<-c(beta, HR, wald.test, p.value)
494 |                           names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 
495 |                                         "p.value")
496 |                           return(res)
497 |                           #return(exp(cbind(coef(x),confint(x))))
498 |                          })
499 | univ_results
500 | result <- as.data.frame(univ_results)
501 | result <- as.data.frame(t(result))
502 | result #%>% filter(p.value <0.05)
503 | 
504 | 
505 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Surgery_N, data = fit.data)
506 | summary(fit.cox)
507 | ```
508 | 
509 | ### all non-MPR patients
510 | ```{r}
511 | fit.data <- survival.metadata.clean %>% filter(TCR.classification %in% c("type I", "type II"))
512 | 
513 | #fit.data <- fit.data %>% mutate(filter=ifelse(new_group==1 & MPR=="MPR", TRUE, FALSE)) %>% filter(filter==FALSE)
514 | 
515 | covariates <- c("numberTexClone", "numberTregClone", "Texp.in.Tex.relevant", "Treg_CCR8", "PRR", "histology")
516 | univ_formulas <- sapply(covariates,
517 |                         function(x) as.formula(paste('Surv(RFS_months_new, RFS_status_new)~', x)))
518 |                         
519 | univ_models <- lapply( univ_formulas, function(x){coxph(x, data = fit.data)})
520 | # Extract data 
521 | univ_results <- lapply(univ_models,
522 |                        function(x){ 
523 |                           x <- summary(x)
524 |                           p.value<-signif(x$wald["pvalue"], digits=2)
525 |                           #wald.test<-signif(x$wald["test"], digits=2)
526 |                           #beta<-signif(x$coef[1], digits=2);#coeficient beta
527 |                           HR <-signif(x$coef[2], digits=2);#exp(beta)
528 |                           HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
529 |                           HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
530 |                           # HR <- paste0(HR, " (", 
531 |                           #             HR.confint.lower, "-", HR.confint.upper, ")")
532 |                           res<-c(HR, HR.confint.lower, HR.confint.upper, p.value)
533 |                           names(res)<-c("HR", "HR.lower", "HR.upper", 
534 |                                         "p.value")
535 |                           return(res)
536 |                           #return(exp(cbind(coef(x),confint(x))))
537 |                          })
538 | univ_results
539 | result <- as.data.frame(univ_results)
540 | result <- as.data.frame(t(result))
541 | result #%>% filter(p.value <0.05)
542 | 
543 | tabletext1<-as.character(rownames(result))
544 | tabletext2<-as.numeric(result[,"p.value"])
545 | tabletext<-cbind(tabletext1,tabletext2)
546 | 
547 | 
548 | forestplot(labeltext=tabletext, #文本信息  
549 |            mean = result[,'HR'],##HR值
550 | lower = result[,"HR.lower"],##95%置信区间
551 | upper = result[,"HR.upper"],,#95%置信区间
552 |            boxsize = 0.1,##大小
553 |            graph.pos=3,#图在表中的列位置
554 |            graphwidth = unit(0.4,"npc"),#图在表中的宽度比例
555 |            #fn.ci_norm="fpDrawDiamondCI",#box类型选择钻石,可以更改fpDrawNormalCI；fpDrawCircleCI等
556 | col=fpColors(box="steelblue", lines="black", zero = "black"),#颜色设置
557 |            lwd.ci=2,ci.vertices.height = 0.1,ci.vertices=TRUE,#置信区间用线宽、高、型
558 |            zero=1,#zero线横坐标
559 |            lwd.zero=2,#zero线宽
560 |            grid=T,
561 |            lwd.xaxis=2,#X轴线宽
562 |            title="Hazard Ratio",
563 |            xlab="",#X轴标题
564 |            clip=c(-Inf,4),#边界
565 |            colgap = unit(0.5,"cm")   
566 | )
567 | 
568 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$TCR.classification, data = fit.data)
569 | summary(fit.cox)
570 | 
571 | 
572 | ```
573 | 
574 | ```{r}
575 | ggplot(survival.metadata.clean %>% filter(MPR=="MPR"), aes(x=Texp.in.Tex.relevant)) + geom_histogram()
576 | quantile(survival.metadata.clean$Texp.in.Tex.relevant, na.rm = T)
577 | ```
578 | 
579 | 
580 | ```{r}
581 | temp.data <- survival.metadata.clean %>% filter(MPR=="non-MPR")
582 | fit.data <- survival.metadata.clean %>% mutate(Texp.in.Tex.relevant.bin= ifelse(MPR=="MPR", "MPR",
583 |                                                                                 ifelse(
584 |   Texp.in.Tex.relevant < as.vector(quantile(temp.data$Texp.in.Tex.relevant,na.rm=T)[3]),
585 |   "<top 50%", ">= top50%" # the quantile of survival analysis cohort and nmf cohort are almost the same
586 | )))
587 | 
588 | fit.data <- fit.data
589 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ factor(fit.data$Texp.in.Tex.relevant.bin, levels=c("MPR", ">= top50%", "<top 50%")), data = fit.data)
590 | summary(fit.cox)
591 | 
592 | fit <- survfit(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~ fit.data$Texp.in.Tex.relevant.bin, data = fit.data)
593 | p <- ggsurvplot(fit,
594 |           pval = TRUE, conf.int = FALSE,
595 |           risk.table = TRUE, # Add risk table
596 |           risk.table.col = "strata", # Change risk table color by groups
597 |           linetype = "solid", # Change line type by groups
598 |           #surv.median.line = "hv", # Specify median survival
599 |           ggtheme = theme_classic(), # Change ggplot2 theme
600 |           risk.table.pos = "in",
601 |           xlab = "Months from surgery",
602 |           title = "all.non-MPR.by.Texp",
603 |           palette =  c("#C6595A", "#077E64","#D9BFAE")
604 | )
605 | p
606 | ggsave("final_plots/all.non-MPR.by.Texp.pdf", width = 8, height = 6)
607 | 
608 | 
609 | fit.data %>% group_by(Texp.in.Tex.relevant.bin) %>% count(PRR_group)
610 | ```
611 | 
612 | 
613 | ```{r}
614 | plot.data <- fit.data %>% filter(Texp.in.Tex.relevant.bin %in% c("<top 50%", ">= top50%")) %>% mutate(value=1, PRR_group=factor(PRR_group, levels = c("pPR", "nPR")))
615 | plot.data 
616 | ggplot(plot.data, aes(fill=PRR_group, y=value, x=Texp.in.Tex.relevant.bin)) + 
617 |     geom_bar(position="stack", stat="identity") + scale_fill_manual(values=c("#878586","#C6595A")) + theme_classic()
618 | 
619 | ggsave("final_plots/pPRnPR_Texp.pdf", width = 3, height = 4)
620 | 
621 | 
622 | plot.data <- fit.data %>% filter(Texp.in.Tex.relevant.bin %in% c("<top 50%", ">= top50%")) %>% mutate(value=1, histology=factor(histology, levels = c("LUSC", "LUAD")))
623 | plot.data 
624 | ggplot(plot.data, aes(fill=histology, y=value, x=Texp.in.Tex.relevant.bin)) + 
625 |     geom_bar(position="stack", stat="identity") + scale_fill_manual(values=c("#E97777","#88AB8E")) + theme_classic()
626 | 
627 | ggsave("final_plots/LUSCLUAD_Texp.pdf", width = 3, height = 4)
628 | ```
629 | 
630 | 
631 | ```{r}
632 | pdf("final_plots/non-MPR.patients.ROC.pdf",         # File name
633 |     width = 5, height = 5, # Width and height in inches
634 |     bg = "white",    # Color model (cmyk is required for most publications)
635 |     paper = "A4")  
636 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant),MPR=="non-MPR")
637 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Texp.in.Tex.relevant, data = fit.data)
638 | PI<-fit.cox$linear.predictor
639 | tROC.hf.texp <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months), method="Cox",col="#418849",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC")
640 | #tROC.hf.texp$AUC
641 | 
642 | 
643 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant),MPR=="non-MPR")
644 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$PRR, data = fit.data)
645 | PI<-fit.cox$linear.predictor
646 | tROC.hf.prr <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months), method="Cox",col="blue",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC")
647 | #tROC.hf.prr$AUC
648 | 
649 | 
650 | lines(tROC.hf.texp$FP, tROC.hf.texp$TP, type="l",col="#418849",xlim=c(0,1), ylim=c(0,1))
651 | lines(tROC.hf.prr$FP, tROC.hf.prr$TP, type="l",col="blue",xlim=c(0,1), ylim=c(0,1))
652 | legend("bottomright",c(paste("AUC of texp: ",round(tROC.hf.texp$AUC,3)), paste("AUC of PRR : ",round(tROC.hf.prr$AUC,3))),col=c("#418849","blue"),lty=1,lwd=2,bty = "n") 
653 | dev.off()
654 | ```
655 | 
656 | 
657 | ```{r}
658 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Surgery_N, data = fit.data)
659 | summary(fit.cox)
660 | 
661 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Surgery_N + fit.data$Texp.in.Tex.relevant, data = fit.data)
662 | summary(fit.cox)
663 | 
664 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$PRR + fit.data$Texp.in.Tex.relevant, data = fit.data)
665 | summary(fit.cox)
666 | 
667 | 
668 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant))
669 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Texp.in.Tex.relevant, data = fit.data)
670 | PI<-fit.cox$linear.predictor
671 | tROC.hf <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months_new), method="Cox",col="green",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC")
672 | tROC.hf$AUC
673 | 
674 | 
675 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant))
676 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$PRR, data = fit.data)
677 | PI<-fit.cox$linear.predictor
678 | tROC.hf <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months_new), method="Cox",col="green",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC")
679 | tROC.hf$AUC
680 | 
681 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant))
682 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Texp.in.Tex.relevant + fit.data$Surgery_N, data = fit.data)
683 | PI<-fit.cox$linear.predictor
684 | tROC.hf <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months_new), method="Cox",col="green",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC")
685 | tROC.hf$AUC
686 | 
687 | fit.data <- fit.data %>% filter(!is.na(Texp.in.Tex.relevant))
688 | fit.cox <- coxph(Surv(fit.data$RFS_months_new, fit.data$RFS_status_new) ~  fit.data$Surgery_N, data = fit.data)
689 | PI<-fit.cox$linear.predictor
690 | tROC.hf <-risksetROC(Stime=fit.data$RFS_months_new,status=fit.data$RFS_status_new,marker=PI, predict.time = median(fit.data$RFS_months_new), method="Cox",col="green",lty=1,lwd=1, xlab="FP:1-Specificity",ylab="TP:sensitivity",main="Time Dependent ROC")
691 | tROC.hf$AUC
692 | ```
693 | 
694 | 
695 | 
696 | 
697 | 
698 | 
699 | 
700 | 
701 | 
702 | 


--------------------------------------------------------------------------------