├── .gitignore
├── LICENSE
├── Matlab
    ├── AddPGandMetaclusterstoSession.m
    ├── ImportTumorStromamasks_histoCAT.m
    ├── Run_tsne_Callback_noGUI.m
    ├── SetTumorStromaCelltype.m
    ├── WriteOutDistancesforR.m
    ├── extract_graph_measures_basel.m
    ├── extract_graph_measures_zurich.m
    ├── get_Image_means.m
    ├── histoCAT_adapted
    │   ├── fuse_images.m
    │   ├── heatmap_images_overlay.m
    │   ├── plot_mask_Callback.m
    │   └── show_selected_area_onTiff.m
    ├── plot_additionalLayer_mask_Callback.m
    └── write_out_SCdata_from_histoCAT_forSpilloverCorr.m
├── R
    ├── BaselTMA_pipeline.Rmd
    ├── ZurichTMA_pipeline.Rmd
    └── spillover_compensation
    │   ├── Spillover_compensation.Rmd
    │   └── Spillover_matrix.Rmd
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.Rproj.user
2 | *.Rhistory
3 | *.RData
4 | *.Ruserdata
5 | *.csv
6 | *.txt
7 | *.fcs
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 BodenmillerGroup
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Matlab/AddPGandMetaclusterstoSession.m:
--------------------------------------------------------------------------------
 1 | %Import phenograph (or any other clustering) info into session if not 
 2 | %already there and add metaclusters for our case too (only necessary when 
 3 | %a clustering was run or save somewhere else and needs to be added to 
 4 | %current histoCAT session). Columns are added to session.
 5 | 
 6 | %Retrieve session info
 7 | ses = retr('sessionData');
 8 | gates = retr('gates');
 9 | 
10 | %Import PG info (this script assumes that the images and single-cells are
11 | %already in the same order for both the session and the PG fcs file, if
12 | %this is not the case match cells first based on Image and CellID);
13 | phenoBasel = readtable('/home/jana/Desktop/R_dat/PhenoGraphBasel_k20_2.csv','Delimiter',',');
14 | phenoBasel.Properties.VariableNames = {'core','CellId','cluster'};
15 | ses(:,end+1) = table2array(phenoBasel(:,'cluster'));
16 | et_gates = cellfun(@(x) [x, 'PhenographBasel'],gates(:,3),'UniformOutput',false);
17 | gates(:,3) = et_gates;
18 | put('gates',gates);
19 | put('sessionData',ses);
20 | 
21 | %Metaclusters, according to the way we grouped the small clusters in R
22 | %pipeline
23 | %HARDCODED!!!
24 | pheno_col = ses(:,end);
25 | meta_pheno(pheno_col == 25) = 1;
26 | meta_pheno(pheno_col == 19) = 2;
27 | meta_pheno(pheno_col == 2) = 3;
28 | meta_pheno(pheno_col == 6) = 4;
29 | meta_pheno(pheno_col == 38) = 5;
30 | meta_pheno(pheno_col == 70) = 6;
31 | meta_pheno(pheno_col == 10) = 7;
32 | meta_pheno(pheno_col == 3) = 8;
33 | meta_pheno(pheno_col == 4) = 9;
34 | meta_pheno(pheno_col == 1) = 10;
35 | meta_pheno(pheno_col == 15) = 11;
36 | meta_pheno(pheno_col == 71) = 12;
37 | meta_pheno(pheno_col == 36) = 13;
38 | meta_pheno(pheno_col == 11) = 14;
39 | meta_pheno(pheno_col == 66 | pheno_col == 43 | pheno_col == 32 | pheno_col == 49 | pheno_col == 56) = 15;
40 | meta_pheno(pheno_col == 35 | pheno_col == 68 | pheno_col == 8 | pheno_col == 60) = 16;
41 | meta_pheno(pheno_col == 23 | pheno_col == 33 | pheno_col == 26) = 17;
42 | meta_pheno(pheno_col == 47 | pheno_col == 57 | pheno_col == 17 | pheno_col == 50) = 18;
43 | meta_pheno(pheno_col == 16 | pheno_col == 69 | pheno_col == 67) = 27;
44 | meta_pheno(pheno_col == 52 | pheno_col == 28 | pheno_col == 64 | pheno_col == 45) = 19;
45 | meta_pheno(pheno_col == 55 | pheno_col == 13 | pheno_col == 40 | pheno_col == 51 | pheno_col == 42) = 20;
46 | meta_pheno(pheno_col == 21 | pheno_col == 65 | pheno_col == 7) = 21;
47 | meta_pheno(pheno_col == 5 | pheno_col == 41 | pheno_col == 30 | pheno_col == 27 | pheno_col == 58) = 22;
48 | meta_pheno(pheno_col == 59 | pheno_col == 61 | pheno_col == 14 | pheno_col == 48 | pheno_col == 62 | pheno_col == 20 | pheno_col == 24) = 23;
49 | meta_pheno(pheno_col == 18 | pheno_col == 46 | pheno_col == 53 | pheno_col == 37 | pheno_col == 31) = 24;
50 | meta_pheno(pheno_col == 39 | pheno_col == 9 | pheno_col == 12 | pheno_col == 29 | pheno_col == 34 | pheno_col == 22) = 25;
51 | meta_pheno(pheno_col == 44 | pheno_col == 54 | pheno_col == 63) = 26;
52 | 
53 | %Store in session
54 | ses(:,end+1) = meta_pheno';
55 | et_gates = cellfun(@(x) [x, 'PhenographMetaclusters'],gates(:,3),'UniformOutput',false);
56 | gates(:,3) = et_gates;
57 | put('gates',gates);
58 | put('sessionData',ses);
59 | 
60 | %In order to import the clusters as gates, run the
61 | %parse_Phenographclusters.m function
62 | 
63 | 


--------------------------------------------------------------------------------
/Matlab/ImportTumorStromamasks_histoCAT.m:
--------------------------------------------------------------------------------
  1 | %Imports the Tumor-Stroma or other masks (in addition to single-cell masks)
  2 | %into the histoCAT session and calculates distance of every single cell to 
  3 | %the tumor-stroma boundary, to display distances or masks in images or
  4 | %extract them for plotting in R (Mask logic vector and single cell 
  5 | %distances vector will be added as additional channels in the session)
  6 | 
  7 | %Retrieve necessary variables
  8 | global Mask_all;
  9 | masks = Mask_all;
 10 | handles = gethand;
 11 | Tiff_all = retr('Tiff_all');
 12 | Tiff_name = retr('Tiff_name');
 13 | Tiff_all = Tiff_all;
 14 | Tiff_name = Tiff_name;
 15 | gates = retr('gates');
 16 | sessionData = retr('sessionData');
 17 | 
 18 | %Select folder containing the tumor masks and get all files corresponding
 19 | %to the masks in question
 20 | CNNmaskfolder = uipickfiles('Prompt','Select folder containging CNN masks'); 
 21 | all_files = dir(char(CNNmaskfolder));
 22 | all_file_names = {all_files(:).name};
 23 | all_file_names = all_file_names(~cellfun(@(x) contains(x,'._'),all_file_names));
 24 | all_file_names = all_file_names(3:end);
 25 | all_mask_files = all_file_names(~cellfun('isempty',regexpi(all_file_names,'_mask_AllTumorFilled.tiff')));
 26 | all_mask_files = all_mask_files (contains(all_mask_files ,'Basel'));
 27 | 
 28 | %Store masks in temporary variables
 29 | temp_Mask_all = struct('Image',[]);
 30 | image_names = cellfun(@(x) extractBefore(x,'a0') ,all_mask_files,'UniformOutput',false);
 31 | unique_image_name = unique(image_names);
 32 | store_preds = [];
 33 | 
 34 | %Loop through all images and find the corresponding masks based on the
 35 | %image name
 36 | for i=1:length(unique_image_name)
 37 |     
 38 |     cur_image = unique_image_name(i);
 39 |     all_masks_fo_curr_image_names = all_mask_files(contains(all_mask_files,cur_image));
 40 |     
 41 |     %Read in masks
 42 |     MultiMasks = cellfun(@(x) imread(fullfile(char(CNNmaskfolder),x)), all_masks_fo_curr_image_names,'UniformOutput',false);
 43 |     names_keep = all_masks_fo_curr_image_names;
 44 |     
 45 |     %Don't read in single-cell maak again
 46 |     kick_out_normal_seg = cellfun(@(x) contains(x,'full_mask.tif'),names_keep);
 47 |     kick_out_test = cellfun(@(x) contains(x,'test'),names_keep);
 48 |     MultiMasks = MultiMasks(~(kick_out_normal_seg | kick_out_test));
 49 |     
 50 |     %Set 65535 to 1 incase necessary
 51 |     for j=1:length(MultiMasks)
 52 |         [a,b] = ismember(MultiMasks{j},65535);
 53 |         MultiMasks{j}(a) = 1;
 54 |     end
 55 |     
 56 |     %Find image in session corresponding to mask and add new masks to
 57 |     %single-cell mask
 58 |     corresp_mask = find(contains(gates(:,1),cur_image));
 59 |     MultiMasks_ext = [{masks(corresp_mask).Image},MultiMasks];
 60 |     temp_Mask_all(i,1,:).Image = MultiMasks_ext;
 61 |     
 62 | end
 63 | 
 64 | %Find all affected images in session (for which additional masks exist)
 65 | found_name_in_session = sum(cell2mat(cellfun(@(x) contains(gates(:,1),x) ,unique_image_name,'UniformOutput',false)),2);
 66 | 
 67 | %Loop through all new masks and extract single cell distances to rim
 68 | row_store = [];
 69 | full_session = [];
 70 | not_working = [];
 71 | name_not_working  = {};
 72 | for k=1:length(temp_Mask_all)
 73 |     
 74 |     %Get other mask layers (additional to single-cell mask)
 75 |     other_Masks = temp_Mask_all(k).Image(2:end);
 76 |     singlecellmask = temp_Mask_all(k).Image{1};
 77 |     
 78 |     %Find single-cell rows in session corresponding to current mask image
 79 |     cur_image_name = unique_image_name(k);
 80 |     rows = [gates{contains(gates(:,1),cur_image_name),2}];
 81 |     row_store = [row_store,rows];
 82 |     cur_session = sessionData(rows,:);
 83 |     
 84 | 
 85 |      try   
 86 |         layer_names = {};
 87 |         sc_names = {};
 88 |         for layer = 1:length(other_Masks)
 89 | 
 90 |             store_dist = {};
 91 | 
 92 |             %Get single cells within each object of other layer
 93 |             get_single_cells_within = cellfun(@unique, struct2cell(regionprops(other_Masks{layer}, singlecellmask, 'PixelValues')),'UniformOutput',false);
 94 |             new_matrix = zeros(size(cur_session,1),1);
 95 | 
 96 |             for obj = 1:length(get_single_cells_within)
 97 |                 curr_obj = get_single_cells_within(obj);
 98 |                 rowfound_in = ismember(double(cur_session(:,2)), curr_obj{1});
 99 |                 new_matrix(rowfound_in,1) = obj;
100 |             end
101 | 
102 |             %Get all pixel distances first and then extract for regions of
103 |             %interest
104 |             all_pos = singlecellmask;
105 |             all_pos(:) = 1;
106 | 
107 |             %Get distances of each pixel to edge of mask of current layer object from
108 |             %inside
109 |             pixel_dists = bwdistgeodesic(logical(all_pos),logical(other_Masks{layer}));
110 | 
111 |             %Get smallest pixeldist for each cell
112 |             cell_dist = cellfun(@min, struct2cell(regionprops(singlecellmask, pixel_dists, 'PixelValues')),'UniformOutput',false);
113 |             cell_dist = cell2mat(cell_dist)';
114 |             cell_dist(isnan(cell_dist)) = 0;
115 |             store_dist = cell_dist;
116 | 
117 |             %Get distances of each pixel to edge of closest mask object of current layer from outside mask
118 |             pixel_dists_outside = bwdistgeodesic(logical(all_pos),logical(~other_Masks{layer}));
119 |             cell_dist_out = cellfun(@min, struct2cell(regionprops(singlecellmask, pixel_dists_outside, 'PixelValues')),'UniformOutput',false);
120 |             cell_dist_out = cell2mat(cell_dist_out)';
121 |             cell_dist_out(isnan(cell_dist_out)) = 0;
122 |             store_dist_out = cell_dist_out;
123 | 
124 |             %Overlay the distances for in and outside of mask into one column
125 |             distances =sum(horzcat(store_dist,store_dist_out),2);                   
126 |             if isempty(distances)
127 |                 distances = zeros(length(new_matrix),1);
128 |             end
129 | 
130 |             %Add to fcs matrix
131 |             cur_session = [cur_session, new_matrix,distances];
132 | 
133 |             %Add variable names
134 |             current_name_dist = {'Mask_AllTumorFilled',strcat('Mask_AllTumorFilled','_distance_to_edge')};
135 |             layer_names = [layer_names,current_name_dist];
136 |         end
137 | 
138 |      catch
139 |          %Record failed images/masks
140 |          not_working = [not_working,k];
141 |          name_not_working = [name_not_working,cur_image_name];
142 |          new_matrix = zeros(size(cur_session,1),1);
143 |          distances = zeros(length(new_matrix),1);
144 |          cur_session = [cur_session, new_matrix,distances];
145 | 
146 |      end
147 |     %Add current mask layer distances to session
148 |     full_session = [full_session; cur_session];
149 | 
150 | end
151 | 
152 | %Add Full_session to sessionData in rows that correspond
153 | sessionData(row_store,1:size(full_session,2)) = full_session;
154 | 
155 | %Same for gate names
156 | et_gates = cellfun(@(x) [x, layer_names],gates(:,3),'UniformOutput',false);
157 | gates(:,3) = et_gates;
158 | 
159 | %Update variables
160 | Mask_all(logical(found_name_in_session)) = temp_Mask_all;
161 | put('Tiff_all',Tiff_all);
162 | put('Tiff_name',Tiff_name);
163 | put ('sessionData',sessionData);
164 | put('gates',gates);
165 | 
166 | list_samples_Callback;
167 | 
168 | 
169 | 
170 | 


--------------------------------------------------------------------------------
/Matlab/Run_tsne_Callback_noGUI.m:
--------------------------------------------------------------------------------
 1 | %Script to run tsne without GUI and adapted to subsample cells
 2 | 
 3 | %fast tsne is only implemented for 2 dims.
 4 | ndims = 2;
 5 | 
 6 | %retrieve gui data
 7 | handles = gethand;
 8 | selected_gates = get(handles.list_samples,'Value');
 9 | selected_channels = get(handles.list_channels,'Value');
10 | gates = retr('gates');
11 | sessionData = retr('sessionData');
12 | gate_context = retr('gateContext');
13 | custom_gatesfolder = retr('custom_gatesfolder');
14 | 
15 | %HARDCODED which channels to use, adapt if anything changes
16 | rows = [gates{selected_gates,2}];
17 | sel_channels = [9:20,23:25,28,31:33,42:44,46,49:51,53:54,60:62,65,72];
18 | selectedset = ses(rows,sel_channels);
19 | 
20 | %Subsample 20% of cells from each image
21 | comb = [sessionData(:,1:2), selectedset];
22 | un_im = unique(comb(:,1),'stable');
23 | store_sampled_id =[];
24 | data = [];
25 | for i = 1:length(un_im)
26 |     cur_im = un_im(i);
27 |     cur_dat = comb(comb(:,1) == cur_im,:);
28 |     [sampled,idx] = datasample(cur_dat, ceil((size(cur_dat,1) * 0.2)),'Replace',false);
29 |     store_sampled_id = [store_sampled_id;cur_dat(idx,1:2)];
30 |     data = [data; sampled];
31 |     
32 | end
33 | 
34 | %Run tsne on subsampled data
35 | map = fast_tsne(data, 110);
36 | 
37 | %Write out results
38 | id_map = [array2table(store_sampled_id), map];
39 | id_map = table2array(id_map);
40 | names = gates(:,1);
41 | store_names = {};
42 | for i = 1:length(un_im)
43 |     cur_rows = id_map(:,1) == un_im(i);
44 |     store_names(cur_rows,1) = names(i);
45 |     
46 | end
47 | t = [store_names,array2table(id_map(:,2:end))];
48 | t.Properties.VariableNames = {'core','CellId','tsne1','tsne2'};
49 | 
50 | writetable(t,'tsne_combined_20perc.csv');
51 | 


--------------------------------------------------------------------------------
/Matlab/SetTumorStromaCelltype.m:
--------------------------------------------------------------------------------
 1 | %Set all tumor or all stromal cells to same cell type for neighbrohood
 2 | %analysis with specific question (for the analysis to get the TMEs
 3 | %irrespective of the tumor celltypes, set all Tumor cell types to a
 4 | %separate cluster number, that is not already used, here 100)
 5 | 
 6 | %Retrieve session data
 7 | ses = retr('sessionData');
 8 | gates = retr('gates');
 9 | 
10 | %Hard coded!! Enter column of PhenoGraph clusters of interest/ celltype
11 | %numbers to be adapted
12 | tumor_pheno = ses(:,74);%Basel
13 | tumor_pheno = ses(:,73);%Zuri
14 | 
15 | %Set all tumor cell types to label 100 
16 | tumor_pheno(ismember(tumor_pheno,14:27)) = 100; %Basel
17 | tumor_pheno(ismember(tumor_pheno,[4,6,8:15,17,19:20,22:27,29:38,40])) = 100;%Zuri
18 | 
19 | %Add to session as new Clustering column
20 | ses(:,end+1) = tumor_pheno;
21 | et_gates = cellfun(@(x) [x(1:(end-1)), 'PhenographTumor100'],gates(:,3),'UniformOutput',false);
22 | gates(:,3) = et_gates;
23 | put('gates',gates);
24 | put('sessionData',ses);


--------------------------------------------------------------------------------
/Matlab/WriteOutDistancesforR.m:
--------------------------------------------------------------------------------
 1 | %Write  out single cell distance information from histoCAT to use in R for 
 2 | %plotting
 3 | 
 4 | %Get session info
 5 | ses = retr('sessionData');
 6 | gates = retr('gates');
 7 | 
 8 | %Convert image hash ID to original file name and put into first column of
 9 | %output table
10 | names = gates(:,1);
11 | names = names(:);
12 | rows = [gates{:,2}];
13 | un = unique(ses(rows,1),'stable');
14 | for i=1:length(un)
15 |     cur_un = un(i);
16 |     cur_rows = ses(rows,1) == cur_un;
17 |     out_table(cur_rows,1) = names(i);
18 | end
19 | 
20 | %Add the cell IDs from the session to the second column of the output table
21 | out_table(:,2) = num2cell(ses(rows,2));
22 | 
23 | %Hardcoded!! Adapt to column numbers of distances and tumor region mask and
24 | %add those to third and fourth column of output table
25 | out_table(:,3) = num2cell((ses(rows,74)));
26 | out_table(:,4) = num2cell((ses(rows,73)));
27 | 
28 | %Write out as csv to read in R for plotting
29 | t = array2table(out_table,'VariableNames',{'core','CellId','Distances','Mask'});
30 | writetable(t,'Tumor_mask.csv');


--------------------------------------------------------------------------------
/Matlab/extract_graph_measures_basel.m:
--------------------------------------------------------------------------------
  1 | %Script to convert tissues to topological neighborhood graph representation
  2 | %and then run Louvain community detection on either all cells to detect
  3 | %microenvironment communities or only tumor cells for tumor communities.
  4 | 
  5 | %Retrieve session Data from histoCA
  6 | gates = retr('gates');
  7 | sessionData = retr('sessionData');
  8 | 
  9 | %Fragmentation score & communities on tumor cells only
 10 | %Loop through images
 11 | mean_size = [];
 12 | failed = [];
 13 | label_out = [];
 14 | for image_num = 1:381
 15 | 
 16 |     %Get single-cell data rows of current image in session Data
 17 |     rows = sessionData(gates{image_num,2},2);
 18 |     selectedall_gates = image_num;
 19 |     
 20 |     %Get neighbor column names
 21 |     pixelexpansion = 4;
 22 |     expansion_name = ['neighbour_',num2str(pixelexpansion),'_CellId'];
 23 |     
 24 |     %Get index of neighbor columns
 25 |     neigb_index = cellfun(@(x) find(~cellfun('isempty',regexp(x,expansion_name))),...
 26 |     gates(selectedall_gates,3),'UniformOutput',false);
 27 |     
 28 |     %Get neighbor matrix containing info of neighbors of each single-cell
 29 |     Neighbor_Matrix = sessionData(gates{selectedall_gates,2},[neigb_index{1}]);
 30 |     
 31 |     %Get column containing the PG of interest (relevant to distinguish
 32 |     %tumor from stromal cells)
 33 |     PG_idx = 74; 
 34 |     Phenograph_Vector_orig = sessionData(gates{selectedall_gates,2},PG_idx);
 35 |     Phenograph_Vector = Phenograph_Vector_orig;
 36 |     
 37 |     %Save for plotting
 38 |     rows_orig = rows;
 39 |     curIDs = sessionData(gates{selectedall_gates,2},2);
 40 |     
 41 |     %Use only tumor cell types
 42 |     idx_tumor_pheno = ismember(Phenograph_Vector_orig, 14:27);
 43 |     curIDs_tumor = curIDs(idx_tumor_pheno);
 44 |     Phenograph_Vector = Phenograph_Vector_orig(idx_tumor_pheno);
 45 |     Neighbor_Matrix = Neighbor_Matrix(idx_tumor_pheno,:);
 46 |     Neighbor_Matrix(~ismember(Neighbor_Matrix,curIDs_tumor)) = 0;
 47 |     rows = rows(idx_tumor_pheno);
 48 | 
 49 |     %Convert to topological neighborhood graph
 50 |     S = sparse(repmat(rows,1,size(Neighbor_Matrix,2)),(Neighbor_Matrix+1),1);
 51 |     fs = full(S);
 52 |     cutS = fs(:,2:size(fs,2));
 53 |     size1 = size(cutS,1);
 54 |     size2 = size(cutS,2);
 55 |     if size1 > size2
 56 |         cutS = [cutS, zeros(size1,size1-size2)];
 57 |     elseif size2 > size1
 58 |         cutS = [cutS; zeros(size1-size2, size2)];
 59 |     end
 60 |     
 61 |     %%%%%%%%%Plot graph, uncomment this part when looping over all images
 62 |     handles = gethand;
 63 |     idx_gates = selectedall_gates;
 64 |     global Mask_all
 65 |     mask = Mask_all(idx_gates).Image;
 66 |     mask(~ismember(mask,curIDs_tumor)) = 0;
 67 |     
 68 |     if ~isempty(mask)
 69 | 
 70 |         %Get single-cell centroids to plot graph nodes
 71 |         centroids_cell = struct2cell(regionprops(mask,'Centroid'));
 72 |         centroids = cell2mat(centroids_cell');
 73 |         centroids(isnan(centroids)) = 0;
 74 |         
 75 |         %Plot
 76 |         G = graph(cutS);
 77 |         figure;
 78 |         p = plot(G,'XData',centroids(:,1),'YData',centroids(:,2),'MarkerSize',3,'LineWidth',2);
 79 |         set(gca,'Ydir','reverse');
 80 |     end
 81 |     %%%%%%%%%
 82 | try
 83 |     %Run louvain community detection
 84 |     custom_gates = retr('custom_gatesfolder');	
 85 |     Graph2Binary(sparse(cutS),fullfile(custom_gates,'S'));	
 86 |     niter = 20;
 87 |     if ispc == 1	
 88 |         [c,Q,labels,communities] = LouvainfromBin_Windows(fullfile(custom_gates,'S.bin'),niter);	
 89 |     else	
 90 |         [c,Q,labels,communities] = LouvainfromBin_ubuntu(fullfile(custom_gates,'S.bin'),niter,'Yes');	
 91 |     end	
 92 |     llim = max([ceil(length(labels)./1e4) 1]);	
 93 | 
 94 |     %Get all detected community labels
 95 |     unLabel = unique(labels);	
 96 |     
 97 |     %Get sizes of the communities and find all communities that include
 98 |     %more than 1 cells and calculate average community size (for
 99 |     %fragmentation/cohesiveness score)
100 |     sizes = arrayfun(@(x) length(find(labels == x)) ,unLabel);
101 |     idxgr1 = sizes > 1;
102 |     curr_mean = mean(arrayfun(@(x) length(find(labels == x)) ,unLabel(idxgr1)));
103 | 
104 |     fragm_out = array2table(gates(image_num,1),'VariableNames',{'core'});
105 |     fragm_out(:,'frag') = {curr_mean};
106 |     mean_size = [mean_size; fragm_out];
107 |     
108 |     %Return all communities above certain size for analysis in R pipeline
109 |     larger = arrayfun(@(x) length(find(labels == x)) > 10 ,unLabel);
110 |     labels_use = unLabel(larger);
111 |     
112 |     %Save Communities of each image and involved single-cells to dataset
113 |     currPheno = {};	
114 |     ClusteringCoeff_perCommunity = {};	
115 |     comm = {};
116 |     core = {};
117 |     id = {};
118 |     for c=1:length(labels_use)
119 |         currlab = labels == labels_use(c);	
120 |         tempS = cutS;	
121 |         tempS(~currlab,~currlab) = 0;		
122 |         [acc, ~ ] = avgClusteringCoefficient(tempS);	
123 |         ClusteringCoeff_perCommunity{c} = repmat(acc,sum(currlab),1);	
124 |         currPheno{c} = Phenograph_Vector_orig(currlab);	
125 |         comm{c} = repmat(labels_use(c),sum(currlab),1);
126 |         core{c} = repmat(gates(image_num,1),sum(currlab),1);
127 |         id{c} = rows_orig(currlab);
128 | 
129 |     end
130 |     out_put = array2table([cell2mat(comm'),cell2mat(currPheno'),cell2mat(ClusteringCoeff_perCommunity'),cell2mat(id')],'VariableNames',{'Community','Pheno','ClusteringCoef','CellId'});
131 |     out_put(:,'core') = vertcat(core{:});
132 |     label_out = [label_out;out_put];
133 |     
134 | catch
135 |     failed = [failed, image_num];
136 |     continue
137 | end
138 | 
139 | end
140 | 
141 | %Write out mean size of communities as indicator of tumor fragmentation
142 | writetable(mean_size,'fragmentation.csv');
143 | %Write out community data for analysis in R
144 | writetable(label_out,'nodules_stromal_basel.csv');
145 | 
146 | 
147 | %Highlight tumor communities in different colors on graph
148 | custom_gates = retr('custom_gatesfolder');	
149 | Graph2Binary(sparse(cutS),fullfile(custom_gates,'S'));	
150 | niter = 20;
151 | if ispc == 1	
152 |     [c,Q,labels,communities] = LouvainfromBin_Windows(fullfile(custom_gates,'S.bin'),niter);	
153 | else	
154 |     [c,Q,labels,communities] = LouvainfromBin_ubuntu(fullfile(custom_gates,'S.bin'),niter,'Yes');	
155 | end	
156 | llim = max([ceil(length(labels)./1e4) 1]);	
157 |     	
158 | %Visualize communities of at least a certain amount of nodes
159 | unLabel = unique(labels);	
160 | larger = arrayfun(@(x) length(find(labels == x)) > 10,unLabel);
161 | labels_use = unLabel(larger);
162 | 
163 | %For each community highlight it on graph
164 | currPheno = {};	
165 | ClusteringCoeff_perCommunity = {};	
166 | comm = {};
167 | for c=1:length(labels_use)
168 |     currlab = labels == labels_use(c);	
169 |     tempS = cutS;	
170 |     tempS(~currlab,~currlab) = 0;	
171 |     tG = graph(tempS);
172 |     tp = plot(tG,'XData',centroids(:,1),'YData',centroids(:,2),'MarkerSize',3,'LineWidth',3);
173 |         set(gca,'Ydir','reverse');           
174 |     hold on;
175 | 
176 | end
177 | 
178 | 
179 | 
180 | %Visualize example images picked in R pipeline
181 | highlight = readtable('/home/jana/Desktop/R_dat/community_examples.csv');
182 | cellid = table2array(highlight(:,'CellId'));
183 | show = ismember(curIDs,cellid);
184 | cutcutS = cutS;
185 | cutcutS(~show,~show) = 0;
186 | 
187 | %Visualize individual communities of a given type from R pipeline
188 | labels_use = unique(table2array(highlight(:,'Community')));
189 | labels = table2array(highlight(:,'Community'));
190 | currPheno = {};	
191 | ClusteringCoeff_perCommunity = {};	
192 | comm = {};
193 | core = {};
194 | id = {};
195 | for c=1:length(labels_use)
196 |     currlab = ismember(curIDs,cellid(labels == labels_use(c)));	
197 |     tempS = cutcutS;	
198 |     tempS(~currlab,~currlab) = 0;	
199 | 
200 |     tG = graph(tempS);
201 |     tp = plot(tG,'XData',centroids(:,1),'YData',centroids(:,2),'MarkerSize',3,'LineWidth',3);
202 |     set(gca,'Ydir','reverse');
203 | 
204 |     hold on;
205 | 
206 |     [acc, ~ ] = avgClusteringCoefficient(tempS);	
207 |     ClusteringCoeff_perCommunity{c} = repmat(acc,sum(currlab),1);	
208 |     currPheno{c} = Phenograph_Vector_orig(currlab);	
209 |     comm{c} = repmat(labels_use(c),sum(currlab),1);
210 |     core{c} = repmat(gates(image_num,1),sum(currlab),1);
211 |     id{c} = rows_orig(currlab);
212 | 
213 | end
214 | 
215 | %Export visualized community graph to overlay onto label image
216 | export_fig('/home/jana/Desktop/R_dat/filename.pdf','-transparent','-dpdf')
217 | 
218 | 
219 | 
220 | %Microenvironment communities on all cell types
221 | %Loop through all images
222 | mean_size = [];
223 | failed = [];
224 | label_out = [];
225 | for image_num = 1:381
226 | 
227 |     %Get single-cell data rows of current image in session Data
228 |     rows = sessionData(gates{image_num,2},2);
229 |     selectedall_gates = image_num;
230 |     
231 |     %Get neighbor column names
232 |     pixelexpansion = 4;
233 |     expansion_name = ['neighbour_',num2str(pixelexpansion),'_CellId'];
234 |     
235 |     %Get index of neighbor columns
236 |     neigb_index = cellfun(@(x) find(~cellfun('isempty',regexp(x,expansion_name))),...
237 |     gates(selectedall_gates,3),'UniformOutput',false);
238 |     
239 |     %Get neighbor matrix containing info of neighbors of each single-cell
240 |     Neighbor_Matrix = sessionData(gates{selectedall_gates,2},[neigb_index{1}]);
241 |     
242 |     %Get the cell type label column that has the same label for all tumor
243 |     %cells and separate labels for stroma and immune cells    
244 |     PG_idx = 75; 
245 |     Phenograph_Vector_orig = sessionData(gates{selectedall_gates,2},PG_idx);
246 |     Phenograph_Vector = Phenograph_Vector_orig; 
247 |     rows_orig = rows;
248 |     curIDs = sessionData(gates{selectedall_gates,2},2);
249 |     
250 |     %Convert to topological neighborhood graph
251 |     S = sparse(repmat(rows,1,size(Neighbor_Matrix,2)),(Neighbor_Matrix+1),1);
252 |     fs = full(S);
253 |     cutS = fs(:,2:size(fs,2));
254 |     size1 = size(cutS,1);
255 |     size2 = size(cutS,2);
256 |     if size1 > size2
257 |         cutS = [cutS, zeros(size1,size1-size2)];
258 |     elseif size2 > size1
259 |         cutS = [cutS; zeros(size1-size2, size2)];
260 |     end
261 |     
262 |     %%%%%%%%%Plot graph, uncomment this part when looping over all images
263 |     handles = gethand;
264 |     idx_gates = selectedall_gates;
265 |     mask = Mask_all(idx_gates).Image;
266 | 
267 |     if ~isempty(mask)
268 |         
269 |         centroids_cell = struct2cell(regionprops(mask,'Centroid'));
270 |         centroids = cell2mat(centroids_cell');
271 |         centroids(isnan(centroids)) = 0;
272 | 
273 |         G = graph(cutS);
274 |         figure;
275 |         pg_ranks = centrality(G,'pagerank');
276 |         closeness = centrality(G,'closeness');
277 |         betweenness = centrality(G,'betweenness');
278 |         eigenvector = centrality(G,'eigenvector');
279 |         degree = centrality(G,'degree');
280 | 
281 |         p = plot(G,'XData',centroids(:,1),'YData',centroids(:,2),'MarkerSize',3,'LineWidth',2);
282 |         set(gca,'Ydir','reverse');
283 |     end
284 |     %%%%%%%%%%%
285 | try
286 |     %Run louvain community detection
287 |     custom_gates = retr('custom_gatesfolder');	
288 |     Graph2Binary(sparse(cutS),fullfile(custom_gates,'S'));	
289 |     niter = 20;
290 |     if ispc == 1	
291 |         [c,Q,labels,communities] = LouvainfromBin_Windows(fullfile(custom_gates,'S.bin'),niter);	
292 |     else	
293 |         [c,Q,labels,communities] = LouvainfromBin_ubuntu(fullfile(custom_gates,'S.bin'),niter,'Yes');	
294 |     end	
295 |     llim = max([ceil(length(labels)./1e4) 1]);	
296 | 
297 |     %Return communities including at least a minimum number of nodes
298 |     unLabel = unique(labels);	
299 |     larger = arrayfun(@(x) length(find(labels == x)) > 10 ,unLabel);
300 |     labels_use = unLabel(larger);
301 |     
302 |     %Save Communities of each image and involved single-cells to dataset
303 |     currPheno = {};	
304 |     ClusteringCoeff_perCommunity = {};	
305 |     comm = {};
306 |     core = {};
307 |     id = {};
308 |     for c=1:length(labels_use)
309 |         currlab = labels == labels_use(c);	
310 |         tempS = cutS;	
311 |         tempS(~currlab,~currlab) = 0;	
312 |         [acc, ~ ] = avgClusteringCoefficient(tempS);	
313 |         ClusteringCoeff_perCommunity{c} = repmat(acc,sum(currlab),1);	
314 |         currPheno{c} = Phenograph_Vector_orig(currlab);	
315 |         comm{c} = repmat(labels_use(c),sum(currlab),1);
316 |         core{c} = repmat(gates(image_num,1),sum(currlab),1);
317 |         id{c} = rows_orig(currlab);
318 | 
319 |     end
320 |     out_put = array2table([cell2mat(comm'),cell2mat(currPheno'),cell2mat(ClusteringCoeff_perCommunity'),cell2mat(id')],'VariableNames',{'Community','Pheno','ClusteringCoef','CellId'});
321 |     out_put(:,'core') = vertcat(core{:});
322 |     label_out = [label_out;out_put];
323 |     
324 | catch
325 |     failed = [failed, image_num];
326 |     continue
327 | end
328 | 
329 | end
330 | 
331 | %Write out community data for further analysis in R
332 | writetable(label_out,'nodules_stromal_basel.csv');
333 | 
334 | 
335 | %Highlight tumor communities in different colors on graph
336 | custom_gates = retr('custom_gatesfolder');	
337 | Graph2Binary(sparse(cutS),fullfile(custom_gates,'S'));	
338 | niter = 20;
339 | if ispc == 1	
340 |     [c,Q,labels,communities] = LouvainfromBin_Windows(fullfile(custom_gates,'S.bin'),niter);	
341 | else	
342 |     [c,Q,labels,communities] = LouvainfromBin_ubuntu(fullfile(custom_gates,'S.bin'),niter,'Yes');	
343 | end	
344 | llim = max([ceil(length(labels)./1e4) 1]);	
345 |     	
346 | unLabel = unique(labels);	
347 | larger = arrayfun(@(x) length(find(labels == x)) > 10,unLabel);
348 | labels_use = unLabel(larger);
349 | 
350 | %Highlight each community on graph	
351 | currPheno = {};	
352 | ClusteringCoeff_perCommunity = {};	
353 | comm = {};
354 | for c=1:length(labels_use)
355 |     currlab = labels == labels_use(c);	
356 |     tempS = cutS;	
357 |     tempS(~currlab,~currlab) = 0;	
358 | 
359 |     tG = graph(tempS);
360 |     tp = plot(tG,'XData',centroids(:,1),'YData',centroids(:,2),'MarkerSize',3,'LineWidth',3);
361 |     set(gca,'Ydir','reverse');
362 |             
363 |     hold on;
364 | 
365 | end
366 | 
367 |     
368 | 


--------------------------------------------------------------------------------
/Matlab/extract_graph_measures_zurich.m:
--------------------------------------------------------------------------------
  1 | %Script to convert tissues to topological neighborhood graph representation
  2 | %and then run Louvain community detection on either all cells to detect
  3 | %microenvironment communities or only tumor cells for tumor communities.
  4 | 
  5 | %Retrieve session Data from histoCAT
  6 | gates = retr('gates');
  7 | sessionData = retr('sessionData');
  8 | 
  9 | %Fragmentation score & communities on tumor cells only
 10 | %Loop through images
 11 | mean_size = [];
 12 | failed = [];
 13 | label_out = [];
 14 | for image_num = 1:353
 15 |   
 16 |     %Get single-cell data rows of current image in session Data
 17 |     rows = sessionData(gates{image_num,2},2);
 18 |     selectedall_gates = image_num;
 19 |     
 20 |     %Get neighbor column names
 21 |     pixelexpansion = 4;
 22 |     expansion_name = ['neighbour_',num2str(pixelexpansion),'_CellId'];
 23 |     
 24 |     %Get index of neighbor columns
 25 |     neigb_index = cellfun(@(x) find(~cellfun('isempty',regexp(x,expansion_name))),...
 26 |     gates(selectedall_gates,3),'UniformOutput',false);
 27 |     
 28 |     %Get neighbor matrix containing info of neighbors of each single-cell
 29 |     Neighbor_Matrix = sessionData(gates{selectedall_gates,2},[neigb_index{1}]);
 30 |     
 31 |     %Get column containing the PG of interest (relevant to distinguish
 32 |     %tumor from stromal cells)
 33 |     PG_idx = 73;
 34 |     Phenograph_Vector_orig = sessionData(gates{selectedall_gates,2},PG_idx);
 35 |     Phenograph_Vector = Phenograph_Vector_orig; 
 36 |         
 37 |     %Save for plotting
 38 |     rows_orig = rows;
 39 |     
 40 |     %Use only tumor cell types
 41 |     idx_tumor_pheno = ismember(Phenograph_Vector_orig, [4,6,8:15,17,19:20,22:27,29:38,40]);
 42 |     curIDs = sessionData(gates{selectedall_gates,2},2);
 43 |     curIDs_tumor = curIDs(idx_tumor_pheno);
 44 |    
 45 |     Phenograph_Vector = Phenograph_Vector_orig(idx_tumor_pheno);
 46 |     Neighbor_Matrix = Neighbor_Matrix(idx_tumor_pheno,:);
 47 |     Neighbor_Matrix(~ismember(Neighbor_Matrix,curIDs_tumor)) = 0;
 48 |     rows = rows(idx_tumor_pheno);
 49 | 
 50 |     %Convert to topological neighborhood graph
 51 |     S = sparse(repmat(rows,1,size(Neighbor_Matrix,2)),(Neighbor_Matrix+1),1);
 52 |     fs = full(S);
 53 |     cutS = fs(:,2:size(fs,2));
 54 |     size1 = size(cutS,1);
 55 |     size2 = size(cutS,2);
 56 |     if size1 > size2
 57 |         cutS = [cutS, zeros(size1,size1-size2)];
 58 |     elseif size2 > size1
 59 |         cutS = [cutS; zeros(size1-size2, size2)];
 60 |     end
 61 |     
 62 |     %%%%%%%%%Plot graph, uncomment this part when looping over all images
 63 |     handles = gethand;
 64 |     idx_gates = selectedall_gates;
 65 |     global Mask_all
 66 |     mask = Mask_all(idx_gates).Image;
 67 |     mask(~ismember(mask,curIDs_tumor)) = 0;
 68 |     
 69 |     if ~isempty(mask)
 70 | 
 71 |         %Get single-cell centroids to plot graph nodes
 72 |         centroids_cell = struct2cell(regionprops(mask,'Centroid'));
 73 |         centroids = cell2mat(centroids_cell');
 74 |         centroids(isnan(centroids)) = 0;
 75 |         
 76 |         %Plot
 77 |         G = graph(cutS);
 78 |         figure;
 79 |         p = plot(G,'XData',centroids(:,1),'YData',centroids(:,2),'MarkerSize',3,'LineWidth',2);
 80 |         set(gca,'Ydir','reverse');
 81 | 
 82 |     end
 83 |     %%%%%%%%%
 84 |         
 85 | try
 86 |     %Run louvain community detection
 87 |     custom_gates = retr('custom_gatesfolder');	
 88 |     Graph2Binary(sparse(cutS),fullfile(custom_gates,'S'));	
 89 |     niter = 20;
 90 |     if ispc == 1	
 91 |         [c,Q,labels,communities] = LouvainfromBin_Windows(fullfile(custom_gates,'S.bin'),niter,'Yes');	
 92 |     else	
 93 |         [c,Q,labels,communities] = LouvainfromBin_ubuntu(fullfile(custom_gates,'S.bin'),niter,'Yes');	
 94 |     end	
 95 |     llim = max([ceil(length(labels)./1e4) 1]);	
 96 | 
 97 |     %Get all detected community labels
 98 |     unLabel = unique(labels);	
 99 |     
100 |     %Get sizes of the communities and find all communities that include
101 |     %more than 1 cells and calculate average community size (for
102 |     %fragmentation/cohesiveness score)
103 |     sizes = arrayfun(@(x) length(find(labels == x)) ,unLabel);
104 |     idxgr1 = sizes > 1;
105 |     curr_mean = mean(arrayfun(@(x) length(find(labels == x)) ,unLabel(idxgr1)));
106 | 
107 |     fragm_out = array2table(gates(image_num,1),'VariableNames',{'core'});
108 |     fragm_out(:,'frag') = {curr_mean};
109 |     mean_size = [mean_size; fragm_out];
110 |     
111 |     %Return all communities above certain size for analysis in R pipeline
112 |     larger = arrayfun(@(x) length(find(labels == x)) > 10,unLabel);
113 |     labels_use = unLabel(larger);
114 |     
115 |     %Save Communities of each image and involved single-cells to dataset
116 |     currPheno = {};	
117 |     ClusteringCoeff_perCommunity = {};	
118 |     comm = {};
119 |     core = {};
120 |     id = {};
121 |     for c=1:length(labels_use)
122 |         currlab = labels == labels_use(c);	
123 |         tempS = cutS;	
124 |         tempS(~currlab,~currlab) = 0;	
125 |         [acc, ~ ] = avgClusteringCoefficient(tempS);	
126 |         ClusteringCoeff_perCommunity{c} = repmat(acc,sum(currlab),1);	
127 |         currPheno{c} = Phenograph_Vector_orig(currlab);	
128 |         comm{c} = repmat(labels_use(c),sum(currlab),1);
129 |         core{c} = repmat(gates(image_num,1),sum(currlab),1);
130 |         id{c} = rows_orig(currlab);
131 | 
132 |     end
133 |     out_put = array2table([cell2mat(comm'),cell2mat(currPheno'),cell2mat(ClusteringCoeff_perCommunity'),cell2mat(id')],'VariableNames',{'Community','Pheno','ClusteringCoef','CellId'});
134 |     out_put(:,'core') = vertcat(core{:});
135 |     label_out = [label_out;out_put];
136 |     
137 | catch
138 |     failed = [failed, image_num];
139 |     continue
140 | end
141 | 
142 | end
143 | 
144 | %Write out mean size of communities as indicator of tumor fragmentation
145 | writetable(mean_size,'fragmentation_zurich.csv');
146 | %Write out community data for analysis in R
147 | writetable(label_out,'nodules_stroma_zurich.csv');
148 | 
149 | 
150 | %Highlight tumor communities in different colors on graph
151 | custom_gates = retr('custom_gatesfolder');	
152 | Graph2Binary(sparse(cutS),fullfile(custom_gates,'S'));	
153 | niter = 20;
154 | if ispc == 1	
155 |     [c,Q,labels,communities] = LouvainfromBin_Windows(fullfile(custom_gates,'S.bin'),niter);	
156 | else	
157 |     [c,Q,labels,communities] = LouvainfromBin_ubuntu(fullfile(custom_gates,'S.bin'),niter,'Yes');	
158 | end	
159 | llim = max([ceil(length(labels)./1e4) 1]);	
160 |     	
161 | %Visualize communities of at least a certain amount of nodes
162 | unLabel = unique(labels);	
163 | larger = arrayfun(@(x) length(find(labels == x)) > 10,unLabel);
164 | labels_use = unLabel(larger);
165 | 
166 | %For each community highlight it on graph
167 | currPheno = {};	
168 | ClusteringCoeff_perCommunity = {};	
169 | comm = {};
170 | for c=1:length(labels_use)
171 |     currlab = labels == labels_use(c);	
172 |     tempS = cutS;	
173 |     tempS(~currlab,~currlab) = 0;	
174 | 
175 |     tG = graph(tempS);
176 |     tp = plot(tG,'XData',centroids(:,1),'YData',centroids(:,2),'MarkerSize',3,'LineWidth',3);
177 |     set(gca,'Ydir','reverse');
178 | 
179 |     hold on;
180 | 
181 | end
182 | 
183 | 
184 |     
185 | %Microenvironment communities on all cell types
186 | %Loop through all images
187 | mean_size = [];
188 | failed = [];
189 | label_out = [];
190 | for image_num = 1:353
191 |    
192 |     %Get single-cell data of current image
193 |     rows = sessionData(gates{image_num,2},2);
194 |     selectedall_gates = image_num;
195 |     
196 |     %Get neighbor info of single-cells
197 |     pixelexpansion = 4;
198 |     expansion_name = ['neighbour_',num2str(pixelexpansion),'_CellId'];   
199 |     neigb_index = cellfun(@(x) find(~cellfun('isempty',regexp(x,expansion_name))),...
200 |     gates(selectedall_gates,3),'UniformOutput',false);  
201 |     Neighbor_Matrix = sessionData(gates{selectedall_gates,2},[neigb_index{1}]);
202 |     
203 |     %Get the cell type label column that has the same label for all tumor
204 |     %cells and separate labels for stroma and immune cells
205 |     PG_idx = 74;
206 |     Phenograph_Vector_orig = sessionData(gates{selectedall_gates,2},PG_idx);
207 |     Phenograph_Vector = Phenograph_Vector_orig;      
208 |     rows_orig = rows;
209 | 
210 |     %Convert to topological neighborhood graph
211 |     S = sparse(repmat(rows,1,size(Neighbor_Matrix,2)),(Neighbor_Matrix+1),1);
212 |     fs = full(S);
213 |     cutS = fs(:,2:size(fs,2));
214 |     size1 = size(cutS,1);
215 |     size2 = size(cutS,2);
216 |     if size1 > size2
217 |         cutS = [cutS, zeros(size1,size1-size2)];
218 |     elseif size2 > size1
219 |         cutS = [cutS; zeros(size1-size2, size2)];
220 |     end
221 |     
222 |     %%%%%%%%%Plot graph, uncomment this part when looping over all images
223 |     handles = gethand;
224 |     idx_gates = selectedall_gates;
225 |     mask = Mask_all(idx_gates).Image;
226 | 
227 |     if ~isempty(mask)
228 |         centroids_cell = struct2cell(regionprops(mask,'Centroid'));
229 |         centroids = cell2mat(centroids_cell');
230 |         centroids(isnan(centroids)) = 0;
231 |         
232 |         G = graph(cutS);
233 |         figure;
234 |         pg_ranks = centrality(G,'pagerank');
235 |         closeness = centrality(G,'closeness');
236 |         betweenness = centrality(G,'betweenness');
237 |         eigenvector = centrality(G,'eigenvector');
238 |         degree = centrality(G,'degree');
239 | 
240 |         p = plot(G,'XData',centroids(:,1),'YData',centroids(:,2),'MarkerSize',3,'LineWidth',2);
241 |         set(gca,'Ydir','reverse');
242 | 
243 |     end
244 |     %%%%%%%%%%%
245 | try
246 |     %Run louvain community detection
247 |     custom_gates = retr('custom_gatesfolder');	
248 |     Graph2Binary(sparse(cutS),fullfile(custom_gates,'S'));	
249 |     niter = 20;
250 |     if ispc == 1	
251 |         [c,Q,labels,communities] = LouvainfromBin_Windows(fullfile(custom_gates,'S.bin'),niter);	
252 |     else	
253 |         [c,Q,labels,communities] = LouvainfromBin_ubuntu(fullfile(custom_gates,'S.bin'),niter,'Yes');	
254 |     end	
255 |     llim = max([ceil(length(labels)./1e4) 1]);	
256 | 
257 |     %Return communities including at least a minimum number of nodes
258 |     unLabel = unique(labels);	
259 |     larger = arrayfun(@(x) length(find(labels == x)) > 10,unLabel);
260 |     labels_use = unLabel(larger);
261 |     
262 |     %Save Communities of each image and involved single-cells to dataset
263 |     currPheno = {};	
264 |     ClusteringCoeff_perCommunity = {};	
265 |     comm = {};
266 |     core = {};
267 |     id = {};
268 |     for c=1:length(labels_use)
269 |         currlab = labels == labels_use(c);	
270 |         tempS = cutS;	
271 |         tempS(~currlab,~currlab) = 0;	
272 |         [acc, ~ ] = avgClusteringCoefficient(tempS);	
273 |         ClusteringCoeff_perCommunity{c} = repmat(acc,sum(currlab),1);	
274 |         currPheno{c} = Phenograph_Vector_orig(currlab);	
275 |         comm{c} = repmat(labels_use(c),sum(currlab),1);
276 |         core{c} = repmat(gates(image_num,1),sum(currlab),1);
277 |         id{c} = rows_orig(currlab);
278 | 
279 |     end
280 |     out_put = array2table([cell2mat(comm'),cell2mat(currPheno'),cell2mat(ClusteringCoeff_perCommunity'),cell2mat(id')],'VariableNames',{'Community','Pheno','ClusteringCoef','CellId'});
281 |     out_put(:,'core') = vertcat(core{:});
282 |     label_out = [label_out;out_put];
283 |     
284 | catch
285 |     failed = [failed, image_num];
286 |     continue
287 | end
288 | 
289 | end
290 | 
291 | %Write out community data for further analysis in R
292 | writetable(label_out,'nodules_stroma_zurich.csv');
293 | 
294 | 
295 | %Highlight tumor communities in different colors on graph
296 | custom_gates = retr('custom_gatesfolder');	
297 | Graph2Binary(sparse(cutS),fullfile(custom_gates,'S'));	
298 | niter = 20;
299 | if ispc == 1	
300 |     [c,Q,labels,communities] = LouvainfromBin_Windows(fullfile(custom_gates,'S.bin'),niter);	
301 | else	
302 |     [c,Q,labels,communities] = LouvainfromBin_ubuntu(fullfile(custom_gates,'S.bin'),niter,'Yes');	
303 | end	
304 | llim = max([ceil(length(labels)./1e4) 1]);	
305 |     	
306 | unLabel = unique(labels);	
307 | larger = arrayfun(@(x) length(find(labels == x)) > 10,unLabel);
308 | labels_use = unLabel(larger);
309 | 
310 | %Highlight each community on graph	
311 | currPheno = {};	
312 | ClusteringCoeff_perCommunity = {};	
313 | comm = {};
314 | for c=1:length(labels_use)
315 |     currlab = labels == labels_use(c);	
316 |     tempS = cutS;	
317 |     tempS(~currlab,~currlab) = 0;	
318 | 
319 |     tG = graph(tempS);
320 |     tp = plot(tG,'XData',centroids(:,1),'YData',centroids(:,2),'MarkerSize',3,'LineWidth',3);
321 |     set(gca,'Ydir','reverse');
322 |             
323 |     hold on;
324 | 
325 |     [acc, ~ ] = avgClusteringCoefficient(tempS);	
326 |     ClusteringCoeff_perCommunity{c} = repmat(acc,sum(currlab),1);	
327 |     currPheno{c} = Phenograph_Vector(currlab);	
328 |     comm{c} = repmat(labels_use(c),sum(currlab),1);
329 | 
330 | end
331 | 
332 | 
333 | 
334 | 


--------------------------------------------------------------------------------
/Matlab/get_Image_means.m:
--------------------------------------------------------------------------------
  1 | %Retrieve sessionData
  2 | ses = retr('sessionData');
  3 | gates = retr('gates');
  4 | handles = gethand;
  5 | 
  6 | Tiff_all = retr('Tiff_all');
  7 | Tiff_name = retr('Tiff_name');
  8 | 
  9 | %Get full Image marker means
 10 | all_mat = [];
 11 | for i = 1:size(Tiff_all,1)
 12 | 
 13 |     cur_tiff = Tiff_all(i,:);
 14 |     all_mean_intensities = cellfun(@(x) sum(sum(x))/(size(x,1)*size(x,2)),cur_tiff,'UniformOutput',false);
 15 |     all_mat = [all_mat; cell2mat(all_mean_intensities)];
 16 |     
 17 | end
 18 | 
 19 | names = gates{1,3}(3:44);
 20 | intensity_table = array2table(all_mat,'VariableNames',names);
 21 | intensity_table = [intensity_table,gates(1:381,1)];
 22 | writetable(intensity_table,'ImageMeanMarkerIntensity.csv');
 23 | 
 24 | 
 25 | 
 26 | 
 27 | %Get tumor region marker means
 28 | 
 29 | 
 30 | %Retrieve necessary variables
 31 | global Mask_all;
 32 | masks = Mask_all;
 33 | handles = gethand;
 34 | Tiff_all = retr('Tiff_all');
 35 | Tiff_name = retr('Tiff_name');
 36 | Tiff_all = Tiff_all;
 37 | Tiff_name = Tiff_name;
 38 | gates = retr('gates');
 39 | sessionData = retr('sessionData');
 40 | 
 41 | %Select folder containing the tumor masks and get all files corresponding
 42 | %to the masks in question
 43 | CNNmaskfolder = uipickfiles('Prompt','Select folder containging CNN masks'); 
 44 | all_files = dir(char(CNNmaskfolder));
 45 | all_file_names = {all_files(:).name};
 46 | all_file_names = all_file_names(~cellfun(@(x) contains(x,'._'),all_file_names));
 47 | all_file_names = all_file_names(3:end);
 48 | all_mask_files = all_file_names(~cellfun('isempty',regexpi(all_file_names,'_mask_AllTumorFilled.tiff')));
 49 | all_mask_files = all_mask_files (contains(all_mask_files ,'BaselTMA'));
 50 | 
 51 | %Store masks in temporary variables
 52 | temp_Mask_all = struct('Image',[]);
 53 | image_names = cellfun(@(x) extractBefore(x,'a0') ,all_mask_files,'UniformOutput',false);
 54 | unique_image_name = unique(image_names);
 55 | store_preds = [];
 56 | 
 57 | %Loop through all images and find the corresponding masks based on the
 58 | %image name
 59 | for i=1:length(unique_image_name)
 60 |     
 61 |     cur_image = unique_image_name(i);
 62 |     all_masks_fo_curr_image_names = all_mask_files(contains(all_mask_files,cur_image));
 63 |     
 64 |     %Read in masks
 65 |     MultiMasks = cellfun(@(x) imread(fullfile(char(CNNmaskfolder),x)), all_masks_fo_curr_image_names,'UniformOutput',false);
 66 |     names_keep = all_masks_fo_curr_image_names;
 67 |     
 68 |     %Don't read in single-cell maak again
 69 |     kick_out_normal_seg = cellfun(@(x) contains(x,'full_mask.tif'),names_keep);
 70 |     kick_out_test = cellfun(@(x) contains(x,'test'),names_keep);
 71 |     MultiMasks = MultiMasks(~(kick_out_normal_seg | kick_out_test));
 72 |     
 73 |     %Set 65535 to 1 incase necessary
 74 |     for j=1:length(MultiMasks)
 75 |         [a,b] = ismember(MultiMasks{j},65535);
 76 |         MultiMasks{j}(a) = 1;
 77 |     end
 78 |     
 79 |     %Find image in session corresponding to mask and add new masks to
 80 |     %single-cell mask
 81 |     corresp_mask = find(contains(gates(:,1),cur_image));
 82 |     MultiMasks_ext = [{masks(corresp_mask).Image},MultiMasks];
 83 |     temp_Mask_all(i,1,:).Image = MultiMasks_ext;
 84 |     
 85 | end
 86 | 
 87 | %Find all affected images in session (for which additional masks exist)
 88 | found_name_in_session = sum(cell2mat(cellfun(@(x) contains(gates(:,1),x) ,unique_image_name,'UniformOutput',false)),2);
 89 | 
 90 | %Loop through all new masks and extract tumor region mean marker expression
 91 | image_names = {};
 92 | mean_mat = [];
 93 | not_working = {};
 94 | for k=1:length(temp_Mask_all)
 95 |     
 96 |     %Get other mask layers (additional to single-cell mask)
 97 |     other_Masks = temp_Mask_all(k).Image(2:end);
 98 |     
 99 |     %Find index in tiffs
100 |     cur_image_name = unique_image_name(k);
101 |     tiff_idx = contains(gates(:,1),cur_image_name);
102 |     
103 |     %Get Tiffs corresponding to mask
104 |     cur_marker_names = Tiff_name(tiff_idx,:);
105 |     cur_tiffs = Tiff_all(tiff_idx,:);
106 | 
107 |     try
108 |         %Extract marker mean of tumor regions for every channel
109 |         marker_means = cellfun(@(x) cell2mat(struct2cell(regionprops(other_Masks{1},x, 'MeanIntensity'))),cur_tiffs,'UniformOutput',false);
110 |         em = cellfun(@isempty, marker_means);
111 |         marker_means(em) = {0};
112 | 
113 |         image_names = [image_names, cur_image_name];
114 |         mean_mat = [mean_mat; cell2mat(marker_means)];
115 |     catch
116 |         
117 |         not_working = [not_working,cur_image_name];
118 |     end
119 | 
120 | end
121 | 
122 | names = gates{1,3}(3:54);%44 in Basel,59 uncleaned Zurich
123 | intensity_table = array2table(mean_mat,'VariableNames',names);
124 | intensity_table = [image_names',intensity_table];
125 | writetable(intensity_table,'ZuriTumorHollowRegionMeanMarkerIntensity.csv');
126 | 
127 | 
128 | %Loop through all new masks and extract stromal region area
129 | image_names = {};
130 | areas = [];
131 | not_working = {};
132 | for k=1:length(temp_Mask_all)
133 |     
134 |     %Get other mask layers (additional to single-cell mask)
135 |     other_Masks = temp_Mask_all(k).Image(2:end);
136 |     
137 |     %Find index in tiffs
138 |     cur_image_name = unique_image_name(k);
139 | 
140 | 
141 | %     try
142 |         %Extract marker mean of tumor regions for every channel
143 |         stroma_area = sum(cell2mat(struct2cell(regionprops(~other_Masks{1},'Area'))));
144 | 
145 |         image_names = [image_names, cur_image_name];
146 |         areas = [areas; stroma_area];
147 | %     catch
148 | %         
149 | %         not_working = [not_working,cur_image_name];
150 | %     end
151 | 
152 | end
153 | 
154 | area_table = array2table(image_names','VariableNames',{'core'});
155 | area_table(:,'area') = array2table(areas);
156 | writetable(area_table,'Basel_stroma_area.csv');
157 | 
158 | 


--------------------------------------------------------------------------------
/Matlab/histoCAT_adapted/fuse_images.m:
--------------------------------------------------------------------------------
 1 | function [ tiff_matrix ] = fuse_images(tabchild,imh)
 2 | % FUSE_IMAGES: Slightly adapted to our case to not display legend on top of
 3 | % image because we batch saved all images and didn't want parts to be
 4 | % hidden.
 5 | 
 6 | %Get GUI variables
 7 | handles = gethand;
 8 | tab_axes = retr('tab_axes1');
 9 | 
10 | %Delete javawrapper classes
11 | delete(tabchild.Children.findobj('Units','pixels'));
12 | 
13 | %Retrieve global variables
14 | global Mask_all
15 | 
16 | %Function call to get the index and the tiff name of the selected channels
17 | [sel_channels,tiff_matrix] = Comparetiffnames_tolistchannels(Mask_all);
18 | 
19 | %Store the colormap based on the number of selected channels
20 | %in order: 'r','g','b','c','m','y'
21 | colorstouse = [[1 0 0];[0 1 0];[0 0 1];[0 1 1];[1 0 1];[1 1 0]];
22 | 
23 | %If no axes found, create one
24 | if isempty(tab_axes) == 1
25 |     handles.panel_tiff_images;
26 |     tab_axes = subplot(1,1,1,'Parent',tabchild);
27 |     put('tab_axes1',tab_axes);
28 | end
29 | 
30 | %Loop through the selected tiffs (channels)
31 | for k=1:length(tiff_matrix{1,imh})
32 |        
33 |     %Scale image
34 |     tiffimage_read = mat2gray(tiff_matrix{1,imh}{k});
35 |     
36 |     %Focus on axes
37 |     handles.panel_tiff_images;
38 |     axes(tab_axes);
39 |     hold on;
40 |     
41 |     %If it is the first image, set background as the BWimage
42 |     if k == 1
43 |         blackim = imshow(tiffimage_read);
44 |         set(blackim,'Tag','firstgrayimage');
45 |         hold on;     
46 |     end
47 |     
48 |     %Function call to convert image to RGB
49 |     [rgb_Image] = make_rgb( tiffimage_read,colorstouse,k);
50 |     hold on;
51 |     
52 |     %Display RGB image
53 |     imagesh = imshow(rgb_Image);freezeColors;
54 |     hold on;
55 |     
56 |     %Tag image
57 |     set(imagesh,'Tag',strcat('rgbimage',int2str(k)));
58 |     hold off;
59 |     
60 |     %Freeze colors
61 |     freezeColors;
62 |     
63 |     %Adjust the intensity of the cell colors if multiple channels are
64 |     %selected
65 |     if length(tiff_matrix{1,imh}) ~= 1
66 |         disp('Applying contrast to image to display all markers')
67 |         intensemask =  imadjust(tiffimage_read);
68 |     else
69 |         intensemask =  tiffimage_read;
70 |     end
71 |  
72 |     %Set the alphadata of the RGB image to the adjusted grayimage
73 |     set(imagesh,'AlphaData',intensemask);
74 |     freezeColors;
75 |     
76 |     hold off;
77 |     
78 | end
79 | 
80 | %If multiple channels are selected
81 | if numel(sel_channels) > 1
82 |     
83 |     %Set up legend of which color correspond to which channel
84 | %     string_channels = retr('list_channels');
85 | %     La = line(ones(numel(sel_channels)),ones(numel(sel_channels)),'LineWidth',2,'Parent',tabchild.Children.findobj('Type','axes'));
86 | %     set(La,{'color'},mat2cell(colorstouse(1:numel(sel_channels),:),ones(1,numel(sel_channels)),3)); freezeColors;
87 | %     hla=legend(La,cellfun(@(n)(num2str(n)), string_channels(sel_channels), 'UniformOutput', false));
88 | %     
89 | %     %Define the location of the legend
90 | %     set(hla, 'Location','South');
91 | %     set(hla,'FontSize',8,'Interpreter','none');
92 |     
93 | end
94 | 
95 | end
96 | 
97 | 


--------------------------------------------------------------------------------
/Matlab/histoCAT_adapted/heatmap_images_overlay.m:
--------------------------------------------------------------------------------
 1 | function heatmap_images_overlay( labelimg, labelvec, valuevec, axis ,handles )
 2 | % HEATMAP_IMAGES_OVERLAY: Slightly adapted to our case to not display color
 3 | %bar on top of image (inconvenient when batch savin).
 4 | 
 5 | %Initialize amount of colors
 6 | ncols =100;
 7 | 
 8 | %Retrieve current percentile cut-off slider value
 9 | global currentsliderValue;
10 | perc = currentsliderValue;
11 | 
12 | %If a percentile cut-off has been set
13 | if ~(isempty(perc) == 1)
14 |     
15 |     %Function call to cut off the values above a given percentile
16 |     %(outliers)
17 |     valuevec = percentile_cutoff(labelvec, valuevec, handles, perc);
18 |     
19 | end
20 | 
21 | %Sort the label vector and the value vector according to the same order
22 | [labelvec, ord] = sort(labelvec);
23 | valuevec = valuevec(ord);
24 | 
25 | %Normalize the value vector
26 | maxval = max(valuevec);
27 | minval = min(valuevec);
28 | res_valuevec = (valuevec-minval)/(maxval-minval);
29 | 
30 | %If there are no values found, return
31 | if isnan(res_valuevec) == 1
32 |     disp('No Data found to visualize');
33 |     return;
34 | end
35 | 
36 | %Make a 'full vector' in case some cell labels were missing
37 | full_valuevec = zeros(max(labelimg(:)),1);
38 | full_valuevec(labelvec) = res_valuevec;
39 | full_valuevec = (full_valuevec-min(full_valuevec(:))) ./ (max(full_valuevec(:)-min(full_valuevec(:))));
40 | 
41 | %Define the color map
42 | colmap = jet(ncols+1);
43 | 
44 | %Assign the colors to the values
45 | full_valuevec = round(full_valuevec*ncols)+1;
46 | colmap_lab = colmap(full_valuevec,:);
47 | 
48 | %Remove labels that are not in the labelvector from the image mask
49 | labelimg(~ismember(labelimg, labelvec)) = 0;
50 | 
51 | %Apply the colormap
52 | rgb_img = label2rgb(labelimg, colmap_lab, [0,0,0]);
53 | 
54 | %Set focus on axis and hold on to it
55 | axes(axis);
56 | hold on;
57 | 
58 | %Display image
59 | intenseim = imshow(rgb_img);
60 | hold on;
61 | 
62 | %Remove colorbar because when batch saving we didn't want it to hide parts
63 | %of the image.
64 | 
65 | % %Set colorbar
66 | % colormap(axis,colmap);
67 | % cbr = colorbar(axis);
68 | % cbr.Location = 'SouthOutside';
69 | % hold on;
70 | 
71 | %Set labels, lims and ticks
72 | drawnow;
73 | % lims = get(cbr,'Limits');
74 | % yval = linspace(lims(1), lims(2),11);
75 | % set(cbr,'ytick',yval);
76 | ylab=linspace(minval,maxval,11);
77 | ylab =round(ylab, 2, 'significant');
78 | %set(cbr,'YTickLabel',ylab);
79 | freezeColors;
80 | 
81 | %Set position of colorbar
82 | %cbr.Position = [0.1542 0.022 0.7274 0.0200];
83 | 
84 | %Tag the image
85 | set(intenseim,'Tag','rgbimage1');
86 | 
87 | end
88 | 
89 | 


--------------------------------------------------------------------------------
/Matlab/histoCAT_adapted/plot_mask_Callback.m:
--------------------------------------------------------------------------------
  1 | function plot_mask_Callback(hObject, eventdata, handles)
  2 | % PLOT_MASK_CALLBACK: slightly adapted from original histoCAT version, can
  3 | % just be plugged in to replace original function. Adapted to allow for
  4 | % plotting of multiple masks.
  5 | 
  6 | %Retrieve GUI and global variables
  7 | tabmaster_histonetiff = retr('tabmaster_histonetiff');
  8 | global Mask_all
  9 | global Sample_Set_arranged
 10 | 
 11 | %Retrieve previously stored mask outline image (image of outlined single
 12 | %cells)
 13 | maskoutline = retr('maskoutline');
 14 | 
 15 | %Get the current tab number
 16 | tabnum = find(tabmaster_histonetiff.Children == tabmaster_histonetiff.SelectedTab);
 17 | 
 18 | %If the checkbox is checked, show mask
 19 | if handles.mask_onoff.Value == 1
 20 |     
 21 |     %If a mask outline for this image has already been generated and stored previously,
 22 |     %there is no need to regenerate it
 23 |     if isempty(maskoutline) ~= 1
 24 |         
 25 |         %Handle exceptions
 26 |         try
 27 |             
 28 |             %Check if the mask outline for this specific tab exists, if so
 29 |             %display it on the current image
 30 |             if isempty(maskoutline{tabnum}) ~= 1
 31 |                 cmap = retr('cmap');
 32 |                 maskoutline{tabnum}.Visible = 'on';
 33 |                 
 34 |             %If mask outline for current tab does not already exist, generate it
 35 |             else
 36 |                 
 37 |                 %Split the filepaths and extract the sample name of all samples
 38 |                 splitSamplename = cellfun(@(x) strsplit(x,fullfile('/')),Sample_Set_arranged,'UniformOutput',false);
 39 |                 allcutnames = cellfun(@(x) x(end),splitSamplename);
 40 |                 
 41 |                 %Find the index of the sample that corresponds to the currently
 42 |                 %visualized image
 43 |                 idxfound_name = find(~cellfun('isempty',regexpi(allcutnames,tabmaster_histonetiff.SelectedTab.Title)));
 44 | 
 45 |                 %Store the corresponding single-cell mask (each pixel of a
 46 |                 %cell is marked with the corresponding cell number)
 47 |                 lblImg_filled = Mask_all(1,idxfound_name).Image;
 48 |                 
 49 |                 %If there is no mask and hence no single-cell data, return
 50 |                 if isempty(lblImg_filled) == 1
 51 |                     return;
 52 |                 end
 53 |                 
 54 |                 %Get only the outlines of the individual cells (not all the pixels of a
 55 |                 %cell, but only the edges)
 56 |                 lblImg=conv2(single(lblImg_filled),[0 -1 0; -1 4 -1;0 -1 0],'same')>0;
 57 |                 
 58 |                 %Set focus on current axes and hold on to it
 59 |                 axes(tabmaster_histonetiff.SelectedTab.Children.findobj('Type','axes'));
 60 |                 hold on;
 61 |                 
 62 |                 %Display the mask outline image on top of the current image
 63 |                 %axes, and set the transparancy of the mask to a level such
 64 |                 %that both the cell outlines and the background image are visible
 65 |                 cmap = colormap;
 66 |                 lblImg = gray2ind(lblImg,200);
 67 |                 maskoutline{tabnum} = imshow(lblImg);
 68 |                 freezeColors;
 69 |                 set(maskoutline{tabnum},'AlphaData',0.4);
 70 |                 put('maskoutline',maskoutline);
 71 |             end
 72 |             
 73 |         %Catch exceptions and generate mask outlines for current image
 74 |         catch
 75 |             
 76 |             %Split the filepaths and extract the sample name of all samples
 77 |             splitSamplename = cellfun(@(x) strsplit(x,fullfile('/')),Sample_Set_arranged,'UniformOutput',false);
 78 |             allcutnames = cellfun(@(x) x(end),splitSamplename);
 79 |             
 80 |             %Find the index of the sample that corresponds to the currently
 81 |             %visualized image
 82 |             idxfound_name = find(~cellfun('isempty',regexpi(allcutnames,tabmaster_histonetiff.SelectedTab.Title)));
 83 |             
 84 |             %Store the corresponding single-cell mask (each pixel of a
 85 |             %cell is marked with the corresponding cell number)
 86 |             try
 87 |                 lblImg_filled = Mask_all(idxfound_name).Image{1};
 88 |             catch
 89 |                 lblImg_filled = Mask_all(1,idxfound_name).Image;
 90 |             end
 91 |             
 92 |             %If there is no mask and hence no single-cell data, return
 93 |             if isempty(lblImg_filled) == 1
 94 |                 return;
 95 |             end
 96 |             
 97 |             %Get only the outlines of the individual cells (not all the pixels of a
 98 |             %cell, but only the edges)
 99 |             lblImg=conv2(single(lblImg_filled),[0 -1 0; -1 4 -1;0 -1 0],'same')>0;
100 |             
101 |             %Set focus on current axes and hold on to it
102 |             axes(tabmaster_histonetiff.SelectedTab.Children.findobj('Type','axes'));
103 |             hold on;
104 |             
105 |             %Display the mask outline image on top of the current image
106 |             %axes, and set the transparancy of the mask to a level such
107 |             %that both the cell outlines and the background image are visible
108 |             cmap = colormap;
109 |             lblImg = gray2ind(lblImg,200);
110 |             maskoutline{tabnum} = imshow(lblImg);
111 |             freezeColors;
112 |             set(maskoutline{tabnum},'AlphaData',0.4);
113 |             put('maskoutline',maskoutline);
114 |             
115 |         end
116 |         
117 |     %If there is no previously stored mask outline image, generate it for
118 |     %the currently displayed image
119 |     else
120 |         
121 |         %Split the filepaths and extract the sample name of all samples
122 |         splitSamplename = cellfun(@(x) strsplit(x,fullfile('/')),Sample_Set_arranged,'UniformOutput',false);
123 |         allcutnames = cellfun(@(x) x(end),splitSamplename);
124 |         
125 |         %Find the index of the sample that corresponds to the currently
126 |         %visualized image
127 |         idxfound_name = find(~cellfun('isempty',regexpi(allcutnames,tabmaster_histonetiff.SelectedTab.Title)));
128 |         
129 |         %Store the corresponding single-cell mask (each pixel of a
130 |         %cell is marked with the corresponding cell number)
131 |         try
132 |             lblImg_filled = Mask_all(idxfound_name).Image{1};
133 |         catch
134 |             lblImg_filled = Mask_all(1,idxfound_name).Image;
135 |         end
136 |         
137 |         %If there is no mask and hence no single-cell data, return
138 |         if isempty(lblImg_filled) == 1
139 |             return;
140 |         end
141 |         
142 |         %Get only the outlines of the individual cells (not all the pixels of a
143 |         %cell, but only the edges)
144 |         lblImg=conv2(single(lblImg_filled),[0 -1 0; -1 4 -1;0 -1 0],'same')>0;
145 |         
146 |         %Using white to display the mask outlines
147 |         white = [1 1 1];
148 |         mycolormap = repmat(white,length(unique(lblImg)),1);
149 |                 
150 |         %Set focus on current axes and hold on to it
151 |         axes(tabmaster_histonetiff.SelectedTab.Children.findobj('Type','axes'));
152 |         hold on;
153 |         
154 |         %Display the mask outline image on top of the current image
155 |         %axes, and set the transparancy of the mask to a level such
156 |         %that both the cell outlines and the background image are visible
157 |         cmap = mycolormap;
158 |         lblImg = gray2ind(lblImg,200);
159 |         maskoutline{tabnum} = imshow(lblImg);
160 |         freezeColors;
161 |         set(maskoutline{tabnum},'AlphaData',0.4); %replace 0.4 with lblImg in order to not darken the image
162 |         put('maskoutline',maskoutline);
163 |         
164 |     end
165 |     
166 | %If the mask on/off checkbox is not checked, do not display the mask
167 | else
168 |     
169 |     %If a mask outline already exists
170 |     if isempty(maskoutline) ~= 1
171 |         
172 |         %If a mask outline for the current tab exists, switch the visibility
173 |         %off
174 |         try
175 |             if isempty(maskoutline{tabnum}) ~= 1
176 |                 cmap = retr('cmap');
177 |                 maskoutline{tabnum}.Visible = 'off';
178 |             end
179 |         catch
180 |             return;
181 |         end
182 |         
183 |     %If no mask outline has been generated before, it was not displayed in the first
184 |     %place
185 |     else
186 |         return;
187 |     end
188 |     
189 | end
190 | 
191 | %Store the colormap if it has been generated
192 | if isempty(cmap) ~= 1
193 |     put('cmap',cmap);
194 |     tabmaster_histonetiff.SelectedTab.Children.findobj('type','colorbar');
195 | else
196 |     colorbar(tabmaster_histonetiff.SelectedTab.Children.findobj('Type','axes'),'off');
197 | end
198 | 
199 | %Set axes position
200 | tabmaster_histonetiff.SelectedTab.Children.findobj('Type','axes').Position = [0 0 1.1 1];
201 | 
202 | 
203 | end
204 | 
205 | 


--------------------------------------------------------------------------------
/Matlab/histoCAT_adapted/show_selected_area_onTiff.m:
--------------------------------------------------------------------------------
  1 | function show_selected_area_onTiff( Sample_Set_arranged,HashID,Fcs_Interest_all,Mask_all )
  2 | % SHOW_SELECTED_AREA_ONTIFF: Slightly adapted to our case with our
  3 | % customized cell type colors and to not display legend on top of image.
  4 | 
  5 | 
  6 | %Get GUI handles
  7 | handles = gethand;
  8 | 
  9 | %Retrieve GUI variables
 10 | tabmaster_histonetiff = retr('tabmaster_histonetiff');
 11 | sessionData = retr('sessionData');
 12 | sessiondata_index = retr('sessiondata_index');
 13 | selected_gates = get(handles.list_samples,'Value');
 14 | allids = retr('allids');
 15 | gates = retr('gates');
 16 | 
 17 | %Function call to get the imageIDs of the selected gates and the index of the
 18 | %selected gates in the session/ samples list box
 19 | [  imageids, ~, SGsof_imageids_open,sample_orderIDX ] = getimageids_of_selectedgates(Sample_Set_arranged,HashID,selected_gates, allids);
 20 | 
 21 | cluster_names = gates(SGsof_imageids_open,1);
 22 | split_name = cellfun(@(x) strsplit(x,'_'),cluster_names,'UniformOutput',false);
 23 | split_name = cellfun(@(x) strcat(x{2}), split_name,'UniformOutput',false);
 24 | cluster_num = str2double(split_name);
 25 | 
 26 | %If a sample in the list_visual listbox other than 'None' (the first option) 
 27 | %is selected, work with (highlight on) this image, otherwise
 28 | %use the selected sample(s) from the samples listbox (list_samples) from above
 29 | if unique(get(handles.list_visual,'Value') > 1) == 1
 30 |     selectedsample_tiff = get(handles.list_visual,'Value') - 1;
 31 |     imageids = imageids(selectedsample_tiff);
 32 |     sample_orderIDX = sample_orderIDX(selectedsample_tiff);
 33 | end
 34 | 
 35 | %Find colors that are most distinguishable from already used colors and
 36 | %store the colormap based on the number of selected gates
 37 | colorsalreadyused = [[64/255 224/255 208/255];[72/255 209/255 204/255];[0 0 0];[1 20/255 147/255];[199/255 21/255 133/255];[1 105/255 180/255];[1 0 0];[0 1 0];[0 0 1];[0 1 1];[1 0 1];[1 1 0];[0.8 0.5 0];[0 0 0.8];[0.5 0.5 0.5];[0 0.5 0]];
 38 | colorstouse = distinguishable_colors(numel(SGsof_imageids_open)+1,colorsalreadyused); %SGsof_imageids_open
 39 | 
 40 | %Loop through each of the selected ImageIDs
 41 | for ik = 1:length(imageids)
 42 |     
 43 |     %If single-cell information was found for the current imageID
 44 |     if isempty(Fcs_Interest_all{sample_orderIDX(ik),1}) ~= 1
 45 |         
 46 |         %Initialize variables
 47 |         count = 1;
 48 |         Cells_selected = [];
 49 |         gatenum = [];
 50 |         countnum = [];
 51 |         
 52 |         %Store the single-cell mask for the current image
 53 |         try
 54 |             lblImg = Mask_all(1,sample_orderIDX(ik)).Image{1};
 55 |         catch
 56 |             lblImg = Mask_all(1,sample_orderIDX(ik)).Image;
 57 |         end
 58 |         
 59 |         %If there is no mask (and hence no songle-cell information), return
 60 |         if isempty(lblImg) == 1
 61 |             return;
 62 |         end
 63 |         
 64 |         %Loop through the selected gate indices
 65 |         for sesn = SGsof_imageids_open
 66 |             
 67 |             %Find the indices of the cells of the current gate in the entire sessionData
 68 |             Cells_selected{count}= Fcs_Interest_all{sample_orderIDX(ik),1}.CellId(find(ismember([Fcs_Interest_all{sample_orderIDX(ik),1}.ImageId,Fcs_Interest_all{sample_orderIDX(ik),1}.CellId],sessionData(sessiondata_index{sesn}{1}(1):sessiondata_index{sesn}{1}(2),1:2),'rows')));
 69 |             
 70 |             %If there were cells found
 71 |             if isempty(Cells_selected{count}) ~= 1
 72 |                 
 73 |                 %Store the current gate index off the listbox
 74 |                 gatenum = [gatenum sesn];
 75 |                 
 76 |                 %Store the current count number
 77 |                 countnum = [countnum count];
 78 |                 
 79 |             %If no cells were found, continue with next gate index of
 80 |             %the loop
 81 |             else
 82 |                 count = count + 1;
 83 |                 continue;
 84 |             end
 85 |             
 86 |             %Temporarily store the single-cell mask in another variable
 87 |             tempImg = lblImg;
 88 |             
 89 |             %Set the cells which were not found to 0 to omit them (show
 90 |             %only cells contained in selected gate)
 91 |             tempImg(find(~ismember(tempImg,Cells_selected{count}))) = 0;
 92 |             
 93 |             %Convert to logical
 94 |             temp2Img=conv2(single(tempImg),[0 -1 0; -1 4 -1;0 -1 0],'same')>0;
 95 |             
 96 |             %Convert to uint8
 97 |             temp2Img = gray2ind(temp2Img,200);
 98 |             
 99 |             %Import colors previously exported from R pipeline to have
100 |             %consistent color code
101 |             basel_meta_cols = readtable('/home/jana/Desktop/R_dat/RGB_basel_meta.csv');
102 |             basel_meta_cols = table2array(basel_meta_cols);
103 |             basel_meta_cols = [basel_meta_cols;[0,0,0]];
104 | 
105 |             [ rgbimg_rest ] = make_rgb( tempImg,basel_meta_cols,cluster_num(count));
106 |             
107 |             %Set the focus on the axes of the current image tab
108 |             handles.panel_tiff_images;
109 |             axes(tabmaster_histonetiff.Children(ik).Children.findobj('Type','axes'));
110 |             
111 |             %Hold on to the axes
112 |             hold on;
113 |             
114 |             %Overlay the image of the highlighted cells on top of the image of the selected tab
115 |             highlightmaskoutline = imshow(rgbimg_rest);
116 |             
117 |             %Hold on to the image to layer other images of highlighted
118 |             %cells from other selected gates for the current imageID
119 |             hold on;
120 |             
121 |             %Freeze colormap
122 |             freezeColors;
123 |             
124 |             %Adjust the intensity of the single-cell mask of the cells to
125 |             %be highlighted
126 |             intensemask = imadjust(double(tempImg));
127 |             set(highlightmaskoutline,'AlphaData',intensemask);
128 |             outline = imshow(temp2Img);
129 |             set(outline,'AlphaData',0);
130 |             hold off;
131 |             
132 |             %Increment count
133 |             count = count + 1;
134 |             
135 |         end
136 |         
137 |         %Store count numbers
138 |         %countnum = [countnum cluster_num(count-1)];
139 |         
140 |         %Prepare to deal with exception
141 |         try
142 |             
143 |             %If multiple gates of cells are to be highlighted, show common
144 |             %cells in a separate color
145 |             if count > 1
146 |                 
147 |                 %Concatenate all cells to be highlighted
148 |                 allcells = vertcat(Cells_selected{:});
149 |                 
150 |                 %Find the indices of the unique cells
151 |                 [~,idxofuniquevalues,~] = unique(allcells,'stable');
152 |                 
153 |                 %The other indices apart from the unique ones are
154 |                 %duplicates and hence are common for two of the selected gates
155 |                 idxoflightup = [1:size(allcells,1)]';
156 |                 idxofduplicates = find(~ismember(idxoflightup,idxofuniquevalues));
157 |                 commoncells = allcells(idxofduplicates);
158 | 
159 |             %If only one gate was selected there are no common cells
160 |             else
161 |                 commoncells = [];
162 |             end
163 |             
164 |         %In case of exception no common cells will be displayed
165 |         catch
166 |             commoncells = [];
167 |         end
168 |         
169 |         %If common cells have been found
170 |         if isempty(commoncells) ~= 1
171 |             
172 |             %Temporarily store the single-cell mask in a new variable
173 |             tempImg = lblImg;
174 |             
175 |             %Set the cells which were not found to be common cells to 0 in
176 |             %order to overlay only the common cells with an additional color
177 |             tempImg(find(~ismember(tempImg,commoncells))) = 0;
178 |             
179 |             %Function call to convert grayImg to color by highlighting the
180 |             %cells of interest in color. Color is assigned based on the current count number and the
181 |             %distinguishable colors-to-use.
182 |             [ rgbimg ] = make_rgb( tempImg,basel_meta_cols,cluster_num(count));
183 |             
184 |             %Set the focus on the axes of the current image tab
185 |             handles.panel_tiff_images;
186 |             axes(tabmaster_histonetiff.Children(ik).Children.findobj('Type','axes'));
187 |             
188 |             %Hold on to the axes
189 |             hold on;
190 |             
191 |             %Overlay the image of the highlighted cells on top of the image of the selected tab
192 |             highlightmaskoutline = imshow(rgbimg);
193 |             
194 |             %Hold on to the image
195 |             hold on;
196 |             
197 |             %Freeze colormap
198 |             freezeColors;
199 |             
200 |             %Adjust the intensity of the single-cell mask
201 |             intensemask = imadjust(double(tempImg));
202 |             set(highlightmaskoutline,'AlphaData',intensemask);
203 |             hold off;
204 |             
205 |             %Remove legends because when batch saving images we didn't want
206 |             %the legend to hide parts of the image.
207 |             
208 |             %Make the legend for the user to know which cells (colors)
209 |             %correspond to which gate/ the common cells
210 | %             L = line(ones(numel(countnum)),ones(numel(countnum)), 'LineWidth',2,'Parent',tabmaster_histonetiff.Children(ik).Children.findobj('Type','axes'));%;
211 | %             set(L,{'color'},mat2cell(basel_meta_cols(countnum,:),ones(1,numel(countnum)),3))
212 | %             hl=legend(L,cellfun(@(n)(num2str(n)), [gates(gatenum,1);{'CommonCells'}], 'UniformOutput', false)); hold on;
213 | %             
214 | %             %Set the location of the legend
215 | %             set(hl, 'Location','south');
216 | %             set(hl,'FontSize',8);freezeColors;
217 | %            
218 |         %If no common cells were found
219 |         else
220 |             
221 |             %Make the legend for the user to know which cells (colors) correspond to which selected gate
222 | %             L = line(ones(numel(countnum)),ones(numel(countnum)),'LineWidth',2,'Parent',tabmaster_histonetiff.Children(ik).Children.findobj('Type','axes'));  %tabmaster_histonetiff.Children(ik).Children.findobj('Type','axes'));hold on;%tabmaster_histonetiff.Children(ik).Children
223 | %             set(L,{'color'},mat2cell(basel_meta_cols(cluster_num(countnum),:),ones(1,numel(countnum)),3))
224 | %             hl=legend(L,cellfun(@(n)(num2str(n)), gates(gatenum,1), 'UniformOutput', false));
225 | %             
226 | %             %Set the location of the legend
227 | %             set(hl, 'Location','south');
228 | %             set(hl,'FontSize',8,'Interpreter', 'none');freezeColors;
229 |             
230 |         end
231 |         
232 |     %If single-cell information was not found, continue to next imageid   
233 |     else
234 |         
235 |         continue;
236 | 
237 |     end
238 |     
239 | %End of loop through imageids
240 | end
241 | 
242 | 
243 | end
244 | 
245 | 


--------------------------------------------------------------------------------
/Matlab/plot_additionalLayer_mask_Callback.m:
--------------------------------------------------------------------------------
 1 | function plot_additionalLayer_mask_Callback(hObject, eventdata, handles)
 2 | % PLOT_ADDITONALLAYER_MASK_CALLBACK: This function is executed upon checking/unchecking of 
 3 | % the 'Additional layer mask on/off' checkbox. It displayes the segmentation mask outlines
 4 | % (outlines of the additional masks such as tumor region) on top of the currently selected image tab.
 5 | % This function or the checkbox are not in  the publicly available version
 6 | % of histoCAT but can be added to it.
 7 | 
 8 | %Retrieve GUI and global variables
 9 | tabmaster_histonetiff = retr('tabmaster_histonetiff');
10 | global Mask_all
11 | global Sample_Set_arranged
12 | 
13 | %Get the current tab number
14 | tabnum = find(tabmaster_histonetiff.Children == tabmaster_histonetiff.SelectedTab);
15 | 
16 | maskoutline_additional = retr('maskoutline_additional');
17 |     
18 | %If the checkbox is checked, show mask
19 | if handles.additional_layer_mask_onoff.Value == 1
20 |     
21 |     if size(Mask_all(1).Image,2) > 2
22 |         numMasks = 1:size(Mask_all(1).Image,2)-2;
23 | 
24 |         choices = cellfun(@(x) strcat('Layer_',num2str(x)), num2cell(numMasks),'UniformOutput',false);
25 |         Selection = listdlg('PromptString','Select a Layer:','SelectionMode','single','ListString',choices);
26 | 
27 | 
28 |     elseif size(Mask_all(1).Image,2) == 2
29 |         Selection = 1;
30 | 
31 |     else
32 |         errordlg('There is no additional mask in dataset');
33 |         return;
34 |     end
35 | 
36 |     %Split the filepaths and extract the sample name of all samples
37 |     splitSamplename = cellfun(@(x) strsplit(x,fullfile('/')),Sample_Set_arranged,'UniformOutput',false);
38 |     allcutnames = cellfun(@(x) x(end),splitSamplename);
39 | 
40 |     %Find the index of the sample that corresponds to the currently
41 |     %visualized image
42 |     idxfound_name = find(~cellfun('isempty',regexpi(allcutnames,tabmaster_histonetiff.SelectedTab.Title)));
43 | 
44 |     %Store the corresponding single-cell mask (each pixel of a
45 |     %cell is marked with the corresponding cell number)
46 |     lblImg_filled = Mask_all(idxfound_name).Image{1+Selection};
47 | 
48 |     %If there is no mask and hence no single-cell data, return
49 |     if isempty(lblImg_filled) == 1
50 |         return;
51 |     end
52 | 
53 |     %Get only the outlines of the individual cells (not all the pixels of a
54 |     %cell, but only the edges)
55 |     lblImg=conv2(single(lblImg_filled),[0 -1 0; -1 4 -1;0 -1 0],'same')>0;
56 | 
57 |     %Set focus on current axes and hold on to it
58 |     axes(tabmaster_histonetiff.SelectedTab.Children.findobj('Type','axes'));
59 |     hold on;
60 | 
61 |     %Display the mask outline image on top of the current image
62 |     %axes, and set the transparancy of the mask to a level such
63 |     %that both the cell outlines and the background image are visible
64 |     cmap = colormap;
65 |     lblImg = gray2ind(lblImg,200);
66 |     maskoutline_additional{tabnum} = imshow(lblImg);
67 |     freezeColors;
68 |     set(maskoutline_additional{tabnum},'AlphaData',0.4);
69 |     put('maskoutline_additional',maskoutline_additional);
70 | 
71 | else
72 |     maskoutline_additional{tabnum}.Visible = 'off';
73 | end
74 | 
75 | try
76 |     %Store the colormap if it has been generated
77 |     if isempty(cmap) ~= 1
78 |         put('cmap',cmap);
79 |         tabmaster_histonetiff.SelectedTab.Children.findobj('type','colorbar');
80 |     else
81 |         colorbar(tabmaster_histonetiff.SelectedTab.Children.findobj('Type','axes'),'off');
82 |     end
83 | catch
84 |     return;
85 | end
86 | 
87 | %Set axes position
88 | tabmaster_histonetiff.SelectedTab.Children.findobj('Type','axes').Position = [0 0 1.1 1];
89 | 
90 | 
91 | end


--------------------------------------------------------------------------------
/Matlab/write_out_SCdata_from_histoCAT_forSpilloverCorr.m:
--------------------------------------------------------------------------------
 1 | %Script to write out uncompensated single-cell data from histoCAT session 
 2 | %for spillover compensation with R scripts and then import the resulting 
 3 | %compensated single cell data again to use in histoCAT this only works for 
 4 | %our case where there were no other channels/gates in the histoCAT session
 5 | %but the ones used for compensation, otherwise adapt script.
 6 | 
 7 | %Export
 8 | ses = retr('sessionData');
 9 | gates = retr('gates');
10 | %Amount of images hardcoded in case there are already custom channels
11 | %bellow that shouldn't be used, otherwise just take full session
12 | lens = cellfun(@(x) sum(contains(x,'Cell_')),gates(:,3),'UniformOutput',false);
13 | %Should be same for all
14 | max = unique(cell2mat(lens));
15 | writetable(array2table(ses(:,1:(max+2)),'VariableNames',gates{1,3}(1:(max+2))),'curr_single_cell.csv');
16 | 
17 | %Import
18 | 
19 | %Read compensated data back in and replace uncompensated session data with
20 | %compensated data (in order to have the compensated single-cell data for
21 | %histoCAT analyses) 
22 | comp = readtable('compensated_correct_basel.csv');
23 | ses = retr('sessionData');
24 | gates = retr('gates');
25 | ses(:,1:(max+2)) = table2array(comp);
26 | %Replace sessionData with compensated data
27 | put('sessionData',ses);
28 | %Replace table Fcs_Interest_all with compensated data
29 | global Fcs_Interest_all;
30 | temp = Fcs_Interest_all;
31 | un = unique(table2array(comp(:,1)),'stable');
32 | for i=1:length(temp)
33 |     cur_un = un(i);
34 |     temp{i}(:,1:(max+2)) = comp(table2array(comp(:,1)) == cur_un,:);  
35 | end
36 | Fcs_Interest_all = temp;


--------------------------------------------------------------------------------
/R/ZurichTMA_pipeline.Rmd:
--------------------------------------------------------------------------------
   1 | ---
   2 | title: "R downstream analysis Zurich TMA"
   3 | author: "Jana Fischer"
   4 | html_document: default
   5 | ---
   6 | 
   7 | #Downstream R analysis for Zurich TMA on final single cell data (after cleaning of SC and metadata, final cleaned data provided). Produces all Figure content related to the Zurich TMA and aligns with Basel TMA. This pipeline uses output files from the previously run BaselTMA_pipeline. These files are provided in the output folder.
   8 |   
   9 | ```{r}
  10 | library(data.table)
  11 | library(RColorBrewer)
  12 | library(dplyr)
  13 | library(gplots)
  14 | library(ggplot2)
  15 | library(stringr)
  16 | library(ComplexHeatmap)
  17 | library(circlize)
  18 | ```
  19 | 
  20 | ```{r Settings}
  21 | 
  22 | fn_cells = '/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/SC_dat.csv'
  23 | fn_meta = '/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Zuri_PatientMetadata.csv'
  24 | fn_ZuriPheno = '/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/PG_zurich.csv'
  25 | 
  26 | # Define channels to be excluded
  27 | channel_exclude = c("ImageId" ,"CellId" ,"In115 115InIn115Di","Xe134 134XeXe134Di","Hg202 202HgHg202Di","Pb204 204PbPb204Di","Pb206 206PbPb206Di","ArAr80 80ArArArAr80Di","phospho Erk12", "10311239Ru96Di Rutheni","10311240Ru98Di Rutheni","10311241Ru99Di Rutheni", "10311242Ru100Di Rutheni","10311243Ru101Di Rutheni", "10311244Ru102Di Rutheni","10311245Ru104Di Rutheni","Xe126 126XeXe126Di","I127 127II127Di","Xe131 131XeXe131Di","Pb207 207PbPb207Di","Pb208 208PbPb208Di","EulerNumber","MajorAxisLength","MinorAxisLength", "Orientation","10331253Ir191Di Iridium","2971330Dy161Di EpCAM","Perimeter","1971527Ho165Di bCaten","483739Yb171Di Sox9","Solidity")
  28 | 
  29 | #Save cluster order of Figures
  30 | cluster_order = c('37','4','10','11','27','6','19','22','29','31','24','14','30','32','9','23','33','34','40','13','12','17','20','38','15','26','35','25','8','41','3','39','7','2','21','28','18','5','1','16');
  31 | 
  32 | ```
  33 | 
  34 | #Load the data
  35 | ```{r}
  36 | #Single-cell data
  37 | dat <- fread(fn_cells,header = T)
  38 | #Patient metadata
  39 | Sample_metadata <- fread(fn_meta,header = T)
  40 | #PhenoGraph result from publication
  41 | custom_PG <- fread(fn_ZuriPheno, header = T)
  42 | 
  43 | ```
  44 | 
  45 | #Display the channels used for analysis
  46 | ```{r}
  47 | good_channels = unique(dat$channel)[!unique(dat$channel) %in% channel_exclude]
  48 | print(good_channels)
  49 | ```
  50 | 
  51 | #Define a colormap used for general plots
  52 | ```{r Generate a color pallette for plotting}
  53 | qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
  54 | col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
  55 | col_vector = unique(col_vector)
  56 | ```
  57 | 
  58 | #Define custom colormap with colors according to cell types as used in pulication
  59 | ```{r Generate a color pallette for plotting}
  60 | 
  61 | #Metacluster colors from Basel TMA
  62 | mycols_basel_meta <- colors()[c(258,257,86,85,259,81, #green
  63 |                                 652, #yellow
  64 |                                 636,520,430,109,68,43,#light blue
  65 |                                 132,#other dark blue
  66 |                                 26,#dark blue
  67 |                                 8,12, #turquouis
  68 |                                 33,#red
  69 |                                 551,#dark purple
  70 |                                 95,#light purple
  71 |                                 419,#light pink
  72 |                                 117,#pink
  73 |                                 52,#burnt orange
  74 |                                 500,#orange
  75 |                                 568,#pink orange
  76 |                                 624, #brown
  77 |                                 133)] #dark red
  78 | 
  79 | 
  80 | #SCP patient group colors
  81 | mycols_patient = colors()[c(52,500,540,568,571,117,419,314,371,450,95,551,33,8,12,26,132,624)]
  82 | 
  83 | #Colormap for clinical subgroups
  84 | mycols_clinical = colors()[c(545,622,525,74)]
  85 |                 
  86 | ```
  87 | 
  88 | #Extract relevant PhenoGraph columns from saved out PG run
  89 | ```{r}
  90 | # Get Phenograph column
  91 | custom_PG[, id := paste(.BY,collapse =  "_"), by=.(core,CellId)] 
  92 | pheno_cn <- c("id",colnames(custom_PG)[grep('PhenoGraph', colnames(custom_PG))])
  93 | cluster_pheno <- custom_PG[,pheno_cn, with=FALSE]
  94 | colnames(cluster_pheno) <- c("id","cluster")
  95 | ```
  96 | 
  97 | #Plot heatmaps of the phenograph clusters for zurich TMA only
  98 | ```{r}
  99 | setkey(dat,id)
 100 | 
 101 | #Exclude controls
 102 | dat = dat[!str_detect(dat$core,'control'),]
 103 | dat = dat[!str_detect(dat$core,'non-breast'),]
 104 | cluster_pheno = cluster_pheno[!str_detect(cluster_pheno$id,'non-breast'),]
 105 | cluster_pheno = cluster_pheno[!str_detect(cluster_pheno$id,'control'),]
 106 | 
 107 | #Exclude bad stains (these images are excluded from all further analysis because they turned out to be stained very weakly which resulted in almost all contained cells clustering together)
 108 | dat = dat[!str_detect(dat$core,regex('Ay.x1')),]
 109 | dat = dat[!str_detect(dat$core,regex('Ay..x1')),]
 110 | cluster_pheno = cluster_pheno[!str_detect(cluster_pheno$id,'Ay.x1'),]
 111 | cluster_pheno = cluster_pheno[!str_detect(cluster_pheno$id,'Ay..x1'),]
 112 | 
 113 | cluster_pheno$cluster <- factor(cluster_pheno$cluster, levels = cluster_order)
 114 | dat[,c_counts := bbRtools::censor_dat(mc_counts,0.99),by = channel]
 115 | #dat[, sc_counts := scale(mc_counts,censor_val), by=channel]
 116 | 
 117 | summary_dat = dat[cluster_pheno][ channel %in% good_channels ,list(
 118 |   median_val = median(c_counts),
 119 |   mean_val= mean(c_counts),
 120 |   cell_cluster=.N),
 121 |   by=.(channel,cluster)]
 122 | 
 123 | hm_dat = dcast.data.table(data =summary_dat, formula = 'cluster ~ channel',
 124 |                           value.var = 'mean_val') #'median_val' can be exchanged for 'mean_val'
 125 |                           
 126 | trownames = hm_dat$cluster
 127 | hm_dat = as.matrix(hm_dat[,-1,with=F])
 128 | row.names(hm_dat) = trownames
 129 | 
 130 | ```
 131 | 
 132 | Plot the heatmap with z-scoring per marker
 133 | ```{r}
 134 | # Set color map
 135 | cols = rev(brewer.pal(11,'Spectral'))
 136 | cmap = colorRampPalette(cols)
 137 | 
 138 | # Hierarchical clustering on rows with Ward's linkage
 139 | tdist = as.dist(1-cor(t(hm_dat), method="spearman"))
 140 | hr <- hclust(tdist, method="ward.D2")
 141 | co_r <- order.optimal(tdist, hr$merge)
 142 | hr$merge = co_r$merge
 143 | hr$order = co_r$order
 144 | 
 145 | #Order rows in heatmap according to clustering
 146 | order_heatmap_zscored = row.names(hm_dat)[hr$order]
 147 | 
 148 | # Hierarchical clustering on columns with Ward's linkage
 149 | tdist = as.dist(1-cor((hm_dat), method="spearman"))
 150 | hc <- hclust(tdist, method="ward.D2")
 151 | co_c <- order.optimal(tdist, hc$merge)
 152 | hc$merge = co_c$merge
 153 | hc$order = co_c$order
 154 | 
 155 | # Z-score data
 156 | p_dat = scale(hm_dat)
 157 | 
 158 | # Censor z-score at 2
 159 | p_dat[p_dat > 2] =2
 160 | p_dat[p_dat < -2] =-2
 161 | 
 162 | 
 163 | pdf(file="heatmap_zuri.pdf", width=10, height=10)
 164 | 
 165 | heatmap.2(p_dat,
 166 |           scale ='none',
 167 |           trace = "none",
 168 |           col=cmap(75),
 169 |           Rowv=as.dendrogram(hr),
 170 |           Colv=as.dendrogram(hc),
 171 |           density.info ='none',
 172 |           cexRow=0.6,
 173 |           cexCol=0.6,
 174 |           margins=c(4,8),
 175 |           xlab = 'Markers',
 176 |           ylab ='Cluster',
 177 |           main = 'PG_norm_slide')
 178 | 
 179 | dev.off()
 180 | 
 181 | ```
 182 | 
 183 | #Match PG clusters from Zurich TMA to Basel TMA metaclusters
 184 | ```{r}
 185 | # Z-score zurich data
 186 | hm_dat_zuri = hm_dat
 187 | hm_dat_zuri = scale(hm_dat_zuri)
 188 | 
 189 | #Read in metacluster means from Basel TMA
 190 | hm_dat_basel = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/output_BaselTMA/hm_dat_basel.csv',header = T)
 191 | rownames = hm_dat_basel$V1
 192 | hm_dat_basel = as.matrix(hm_dat_basel[,-1])
 193 | rownames(hm_dat_basel)= rownames
 194 | 
 195 | # Z-score basel data
 196 | hm_dat_basel = scale(hm_dat_basel)
 197 | 
 198 | #Make sure all the same markers are used
 199 | colnames(hm_dat_zuri)[!colnames(hm_dat_zuri) %in% colnames(hm_dat_basel)] 
 200 | colnames(hm_dat_basel)[!colnames(hm_dat_basel) %in% colnames(hm_dat_zuri)]
 201 | 
 202 | #Calculate distances based on correlations between zurich and basel clusters
 203 | crosscor = 1-cor(t(hm_dat_basel),t(hm_dat_zuri),method = 'pearson')
 204 | #Find most similar cluster
 205 | idx_min = apply(crosscor,2,function(x){which(x == min(x))})
 206 | dist_min = apply(crosscor,2,min)
 207 | cluster_match = data.table(rownames(crosscor)[unlist(idx_min)])
 208 | colnames(cluster_match) = 'Basel'
 209 | cluster_match$zuri = colnames(crosscor)
 210 | cluster_match$dist = dist_min
 211 | cluster_match$zuri = as.numeric(cluster_match$zuri)
 212 | cluster_match = cluster_match[order(zuri),]
 213 | 
 214 | #Plot heatmap of Zurich PG clusters ordered and colored according to matching Basel metacluster
 215 | p_dat = hm_dat_zuri
 216 | p_dat[p_dat > 2] =2
 217 | p_dat[p_dat < -2] =-2
 218 | p_dat = p_dat[order(as.numeric(rownames(p_dat))),]
 219 | 
 220 | #Read in order from metacluster heatmap of Basel TMA (saved out in output from BaselTMA pipeline) in order to sort this heatmap accordingly
 221 | ordered_labels = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/output_BaselTMA/ordered_labels_basel.csv',header = F)
 222 | ordered_labels = ordered_labels$V1
 223 | ordered_channels = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/output_BaselTMA/ordered_channels_basel.csv',header = F,fill = T)
 224 | ordered_channels = ordered_channels[-1,]
 225 | 
 226 | cluster_match = cluster_match[order(as.numeric(Basel)),]
 227 | p_dat = p_dat[as.character(rev(cluster_match[order(match(cluster_match$Basel[cluster_match$Basel %in% ordered_labels],ordered_labels)),zuri])),]
 228 | p_dat = p_dat[,match(ordered_channels$V1,colnames(p_dat))]
 229 | 
 230 | cols_corresp_basel = mycols_basel_meta[rev(as.numeric(cluster_match[order(match(cluster_match$Basel[cluster_match$Basel %in% ordered_labels],ordered_labels)),Basel]))]
 231 | 
 232 | #Visualizes fully automatic assignment but for the publication a few clusters were reassigned based prior knowledge about markers (for example, cluster 15 was assigned to an immune cell based on marker correlations but contains some tumor markers, hence it was manually reassigned to the most similar tumor cell type)
 233 | pdf('zuri_heatmap_color_and_ord_matched_Basel.pdf')
 234 | heatmap.2(p_dat,
 235 |           scale ='none',
 236 |           trace = "none",
 237 |           col=cmap(75), dendrogram = "none", Rowv = FALSE, Colv = FALSE,
 238 |           density.info ='none',
 239 |           cexRow=0.6,
 240 |           cexCol=0.6,
 241 |           margins=c(4,8),
 242 |           xlab = 'Markers',
 243 |           ylab ='Cluster',
 244 |           main = 'PG_matched',
 245 |           colRow = cols_corresp_basel)
 246 | dev.off()
 247 | 
 248 | 
 249 | #Reassign some clusters as done for publication
 250 | cluster_match$Basel[cluster_match$zuri == 29] = 23 #different tumor cluster matched better based on relevant markers
 251 | cluster_match$Basel[cluster_match$zuri == 40] = 19 #different tumor cluster matched better based on relevant markers
 252 | cluster_match$Basel[cluster_match$zuri == 6] = 24 #different tumor cluster matched better based on relevant markers
 253 | cluster_match$Basel[cluster_match$zuri == 11] = 22 #different tumor cluster matched better based on relevant markers
 254 | cluster_match$Basel[cluster_match$zuri == 8] = 21 #This cluster forms tumor bulks (not stromal!!)
 255 | cluster_match$Basel[cluster_match$zuri == 9] = 19 #different tumor cluster matched better based on relevant markers
 256 | cluster_match$Basel[cluster_match$zuri == 26] = 15 #different tumor cluster matched better based on relevant markers
 257 | cluster_match$Basel[cluster_match$zuri == 15] = 16 #this has tumor markers
 258 | cluster_match$Basel[cluster_match$zuri == 41] = 10
 259 | cluster_match$Basel[cluster_match$zuri == 39] = 8 
 260 | cluster_match$Basel[cluster_match$zuri == 3] = 8
 261 | cluster_match$Basel[cluster_match$zuri == 1] = 3
 262 | 
 263 | #Plot publication version
 264 | cluster_match = cluster_match[order(as.numeric(Basel)),]
 265 | p_dat = p_dat[as.character(rev(cluster_match[order(match(cluster_match$Basel[cluster_match$Basel %in% ordered_labels],ordered_labels)),zuri])),]
 266 | p_dat = p_dat[,match(ordered_channels$V1,colnames(p_dat))]
 267 | 
 268 | cols_corresp_basel = mycols_basel_meta[rev(as.numeric(cluster_match[order(match(cluster_match$Basel[cluster_match$Basel %in% ordered_labels],ordered_labels)),Basel]))]
 269 | 
 270 | pdf('zuri_heatmap_color_and_ord_matched_Basel.pdf')
 271 | heatmap.2(p_dat,
 272 |           scale ='none',
 273 |           trace = "none",
 274 |           col=cmap(75), dendrogram = "none", Rowv = FALSE, Colv = FALSE,
 275 |           density.info ='none',
 276 |           cexRow=0.6,
 277 |           cexCol=0.6,
 278 |           margins=c(4,8),
 279 |           xlab = 'Markers',
 280 |           ylab ='Cluster',
 281 |           main = 'PG_matched',
 282 |           colRow = cols_corresp_basel)
 283 | dev.off()
 284 | 
 285 | ```
 286 | 
 287 | #Plot marker distributions for each cell type cluster
 288 | ```{r}
 289 | #per channel
 290 | for (i in unique(dat[channel %in% good_channels,]$channel)){
 291 |   cdat <- dat[cluster_pheno][channel %in% good_channels,][channel == i,][,markermean:= mean(c_counts),by = 'cluster']
 292 |   pdf(paste0('marker_distributions_channel_',as.character(i),'.pdf'),width = 30,height = 150)
 293 |   print(ggplot(cdat, aes(x=c_counts, colour=cluster)) +
 294 |       geom_density(aes(y=..scaled..)) +
 295 |       geom_vline(data=cdat, aes(xintercept=markermean,  colour=cluster),
 296 |                  linetype="dashed", size=1)+
 297 |       facet_wrap( ~ cluster, ncol=1)+
 298 |     scale_color_manual(values = rev(cols_corresp_basel)))
 299 |   dev.off()}
 300 | ```
 301 | 
 302 | Reproduce clustergram from neighborhood analysis in R with some adaptations
 303 | ```{r}
 304 | 
 305 | #Read in data from histoCAT neighborhood analysis where stromal/immune/Endothelial Metaclusters are separate but all tumor cells have the same label (100)
 306 | clustergram_dat = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/neighborhood_output/Clustergram_Zurich.csv',header = T)
 307 | 
 308 | ##Adapt names to be consistent with naming here, only necessary first time when importing from neighborhood run in histoCAT (provided neighborhood output is already cleaned)
 309 | # rnames = rownames(clustergram_dat)
 310 | # clustergram_dat = data.table(clustergram_dat)
 311 | # clustergram_dat$core = rnames
 312 | # test = unique(clustergram_dat$core)
 313 | # split_core = strsplit(clustergram_dat$core,'_', fixed = TRUE)
 314 | # clustergram_dat$core = unlist(lapply(split_core, function(x){paste(x[c(1,2,8,9,10)],collapse =  "_")}))
 315 | # #Replace the ones that don't need acquisition number again
 316 | # short = unlist(lapply(strsplit(test,'_', fixed = TRUE),function(x){paste(x[c(1,2,8,9)],collapse =  "_")}))
 317 | # duplicate_idx = duplicated(short) | duplicated(short, fromLast = TRUE)
 318 | # in_cores_index = unlist(lapply(unique(clustergram_dat$core)[duplicate_idx],function(x){which(clustergram_dat$core %in% x)}))
 319 | # clustergram_dat$core[setdiff(1:length(clustergram_dat$core),in_cores_index)] = unlist(lapply(strsplit(clustergram_dat$core[setdiff(1:length(clustergram_dat$core),in_cores_index)],'_', fixed = TRUE), function(x){paste(x[1:length(x)-1],collapse =  "_")}))
 320 | # #Delete zeros
 321 | # clustergram_dat$core = unlist(lapply(clustergram_dat$core,function(x){gsub("000","",x)}))
 322 | 
 323 | #Exclude weak stains
 324 | clustergram_dat = clustergram_dat[!str_detect(clustergram_dat$core,regex('Ay.x1')),]
 325 | clustergram_dat = clustergram_dat[!str_detect(clustergram_dat$core,regex('Ay..x1')),]
 326 | 
 327 | #Remove controls
 328 | clustergram_dat = clustergram_dat[!str_detect(clustergram_dat$core,'control'),]
 329 | clustergram_dat = clustergram_dat[!str_detect(clustergram_dat$core,'non-breast'),]
 330 | 
 331 | #Remove normal samples
 332 | clustergram_dat = merge(clustergram_dat,unique(Sample_metadata[,c('core','location')]),by = 'core')
 333 | clustergram_dat = clustergram_dat[!location %in% c('[]','METASTASIS'),]
 334 | clustergram_dat = clustergram_dat[,-'location']
 335 | 
 336 | clustergram_dat_meta = merge(clustergram_dat,Sample_metadata,by = 'core')
 337 | 
 338 | 
 339 | #Prepare for clustered heatmap
 340 | rnames = clustergram_dat$core
 341 | mat = as.matrix(clustergram_dat[,-'core'])
 342 | rownames(mat) = rnames
 343 | 
 344 | #Split into clusters, not used for publication
 345 | hr = hclust(dist(mat), method = "ward.D2")
 346 | clusters = dendextend::cutree(hr, k = 10)
 347 | 
 348 | #Write out clusters
 349 | cnames = names(clusters)
 350 | neighb_clusters = data.table(cnames)
 351 | names(neighb_clusters) = 'core'
 352 | neighb_clusters$cluster = unlist(clusters)
 353 | 
 354 | #Plot heatmap
 355 | h = Heatmap(mat, name = "Clustergram", km = 1, col = colorRamp2(c(-1, 0, 1), c("blue", "white", "red")),
 356 |      show_row_names = T, show_column_names =  T, clustering_method_rows = "ward.D2",clustering_method_columns = "ward.D2",split = clusters)+   #  row_order = rev(order_stacked), cluster_rows = FALSE
 357 | #location
 358 |     Heatmap(factor(clustergram_dat_meta$location), name = "Location", show_row_names = FALSE, width = unit(10, "mm"), col = structure(c("white","red","blue","black"), names = c('[]','CENTER','PERIPHERY','METASTASIS')))+ 
 359 |  #grade 
 360 | Heatmap(factor(clustergram_dat_meta$grade), name = "Grade", show_row_names = FALSE, width = unit(10, "mm"), col = structure(c("green","blue","red",'black'), names = c('1','2','3','METASTASIS')))+ 
 361 | #Mets
 362 | Heatmap(factor(clustergram_dat_meta$PTNM_M), name = "Met", show_row_names = FALSE, width = unit(10, "mm"), col = structure(c("black","gray",'white'), names = c('M1','M0_IPLUS','M0')))
 363 |   
 364 |   
 365 | pdf('clustergram_ZuriImages_far.pdf',width = 20,height = 20)
 366 | h
 367 | dev.off()
 368 | 
 369 | 
 370 | ```
 371 | 
 372 | #Location composition of clusters from heatmap above
 373 | ```{r}
 374 | loc_arc = merge(neighb_clusters,unique(Sample_metadata[,c('core','location')]),by = 'core')
 375 | loc_arc[,count := .N ,by = c('cluster','location')]
 376 | loc_arc[,frac_cluster := count/.N, by = 'cluster']
 377 | loc_arc[,frac_location := count/.N, by = 'location']
 378 | loc_arc = unique(loc_arc[,c('cluster','location','frac_cluster')])
 379 | 
 380 | p <- ggplot(loc_arc, aes(x=as.factor(cluster), y=frac_cluster, fill=as.factor(location))) + 
 381 |   geom_bar(stat='identity',show.legend = TRUE)+
 382 |   scale_fill_manual("Clusters",values =  c('red','blue'))+ 
 383 |   labs(fill = "Location")+
 384 |   coord_flip()+
 385 |   xlab("mArc")+
 386 |   ylab("Frac location")+
 387 |   theme(panel.background = element_blank())+
 388 |   ggtitle('Location composition')
 389 | 
 390 | 
 391 | pdf('mArc_location.pdf')
 392 | p
 393 | dev.off()
 394 | 
 395 | ```
 396 | 
 397 | #Prepare data for patient and image cell type counts
 398 | ```{r}
 399 | #Count number of cells per core and cluster
 400 | dat_cluster_caseID = dat[cluster_pheno][, .(ncells=.N),by=.(core, channel, cluster)]
 401 | dat_cluster_caseID[, id:=paste(cluster, core)]
 402 | setkey(dat_cluster_caseID, 'id')
 403 | 
 404 | #Remove duplicates
 405 | cluster_dat_incl_stroma = subset(dat_cluster_caseID, !duplicated(id))
 406 | 
 407 | #Version including stroma
 408 | #Get fractions/percentages of cells by core/cluster
 409 | cluster_dat_incl_stroma[, frac_cluster := ncells/sum(ncells), by=core]
 410 | cluster_dat_incl_stroma[, frac_cells := ncells/sum(ncells), by=cluster]
 411 | cluster_dat_incl_stroma[, perc_cluster := ncells*100/sum(ncells), by=core]
 412 | cluster_dat_incl_stroma[, perc_cells := ncells*100/sum(ncells), by=cluster]
 413 | 
 414 | #Merge with metadata
 415 | cluster_dat_incl_stroma = merge(cluster_dat_incl_stroma, Sample_metadata[!is.na(Sample_metadata$area),], by = c('core'), all.x = TRUE, allow.cartesian=TRUE, rm.NA = TRUE)
 416 | 
 417 | #Calculate cell type densities per image
 418 | cluster_dat_incl_stroma[,cell_density := ncells/area, by = 'core']
 419 | 
 420 | 
 421 | #Version without stroma
 422 | #Exclude stromal cells
 423 | cluster_dat = subset(dat_cluster_caseID, !duplicated(id))
 424 | cluster_dat = cluster_dat[!cluster %in% c(1:3,5,7,16,18,21,28,39,41),]
 425 | 
 426 | #Get fractions/percentages of cells by core/cluster
 427 | cluster_dat[, frac_cluster := ncells/sum(ncells), by=core]
 428 | cluster_dat[, frac_cells := ncells/sum(ncells), by=cluster]
 429 | cluster_dat[, perc_cluster := ncells*100/sum(ncells), by=core]
 430 | cluster_dat[, perc_cells := ncells*100/sum(ncells), by=cluster]
 431 | 
 432 | #Merge with metadata
 433 | cluster_dat = merge(cluster_dat, Sample_metadata[!is.na(Sample_metadata$area),], by = c('core'), all.x = TRUE, allow.cartesian=TRUE, rm.NA = TRUE)
 434 | 
 435 | #Calculate cell type densities per image
 436 | cluster_dat[,cell_density := ncells/area, by = 'core']
 437 | 
 438 | ```
 439 | 
 440 | #Kick out normal, met and control in case not wanted for subsequent analyses, use metacluster labels from Basel
 441 | ```{r}
 442 | #Including stroma
 443 | cluster_dat_tumors_stroma <- cluster_dat_incl_stroma[!is.na(cluster_dat_incl_stroma$PID),]
 444 | cluster_dat_tumors_stroma <- cluster_dat_tumors_stroma[!cluster_dat_tumors_stroma$location == '[]',]
 445 | 
 446 | #Map metacluster labels (machted from Basel cohort) because from now on only using metacluster cell types 
 447 | cluster_dat_tumors_stroma$cluster <- mapvalues(cluster_dat_tumors_stroma$cluster, from=cluster_match$zuri, to=cluster_match$Basel)
 448 | 
 449 | 
 450 | #Without stromal cells
 451 | cluster_dat_tumors <- cluster_dat[!is.na(cluster_dat$PID),]
 452 | cluster_dat_tumors <- cluster_dat_tumors[!cluster_dat_tumors$location == '[]',]
 453 | 
 454 | #Map metacluster labels (machted from Basel cohort) because from now on only using metacluster cell types 
 455 | cluster_dat_tumors_orig = cluster_dat_tumors
 456 | cluster_dat_tumors$cluster <- mapvalues(cluster_dat_tumors$cluster, from=cluster_match$zuri, to=cluster_match$Basel)
 457 | 
 458 | ```
 459 | 
 460 | #In case normal, met and control are kept, name them accordingly
 461 | ```{r}
 462 | #Non existent patientcodes are control samples
 463 | cluster_dat$location[is.na(cluster_dat$PID)] <- 'CONTROL'
 464 | cluster_dat$PID[is.na(cluster_dat$PID)] <- 'CONTROL'
 465 | cluster_dat$grade[is.na(cluster_dat$grade)] <- 'CONTROL'
 466 | 
 467 | #Empty locations are normal samples, except for when they are met -> overwrite the mets
 468 | cluster_dat$grade[cluster_dat$location == '[]'] <- 'NORMAL'
 469 | cluster_dat$location[cluster_dat$location == '[]'] <- 'NORMAL'
 470 | 
 471 | cluster_dat[, cluster_by_area := ncells/area, by=c('core','cluster')]
 472 | cluster_dat = cluster_dat[!is.na(cluster_dat$cluster_by_area),]
 473 | 
 474 | #Exclude sample with no tumor core
 475 | cluster_dat = cluster_dat[cluster_dat$PID %in% cluster_dat_tumors$PID,]
 476 | ```
 477 | 
 478 | #Distribution of cell types across center and periphery cores
 479 | ```{r}
 480 | cluster_loc = cluster_dat[,c('cluster','core','ncells','location')]
 481 | cluster_loc = cluster_loc[!location %in% c('NORMAL'),]
 482 | cluster_loc[,count := .N, by = c('cluster','location')]
 483 | cluster_loc[,frac_cluster := count/.N, by = 'cluster']
 484 | cluster_loc[,frac_location := count/.N, by = 'location']
 485 | cluster_loc = unique(cluster_loc[,c('cluster','location','frac_cluster')])
 486 | 
 487 | p <- ggplot(cluster_loc, aes(x=cluster, y=frac_cluster, fill=factor(location,levels = c('CENTER','PERIPHERY','NORMAL','METASTASIS')))) + 
 488 |   geom_bar(stat='identity',show.legend = TRUE)+
 489 |   scale_fill_manual("Clusters",values = c('red','blue','grey','black'))+ 
 490 |   labs(fill = "Location")+
 491 |   coord_flip()+
 492 |   xlab("loc")+
 493 |   ylab("Frac cluster")+
 494 |   theme(panel.background = element_blank(),
 495 |         axis.text.y = element_text(colour=rev(cols_corresp_basel)))+
 496 |   ggtitle('Location composition')
 497 | 
 498 | pdf('location.pdf')
 499 | p
 500 | dev.off()
 501 | ```
 502 | 
 503 | #Patient clustering use tumor cells only
 504 | ```{r}
 505 | # Prepare for patient clustering based on fraction of contained cell type metacluster cells
 506 | cluster_dat_tumors <- subset(cluster_dat_tumors, by="PID")
 507 | cluster_dat_tumors[, tot_patient := sum(ncells), by=PID]
 508 | cluster_dat_tumors[, tot_location := sum(ncells), by=location]
 509 | cluster_dat_tumors[, tot_cell_area_patient := sum(unique(sum_area_cells)), by=PID]
 510 | cluster_dat_tumors[, tot_image_area_patient := sum(unique(area)), by=PID]
 511 | cluster_dat_tumors[, frac_cells_image_Area_patient := ncells/tot_image_area_patient, by=.(cluster)]
 512 | cluster_dat_tumors[, frac_cells_cell_Area_patient := ncells/tot_cell_area_patient, by=.(cluster)]
 513 | cluster_dat_tumors[, frac_cells_amount_cells_patient := ncells/tot_patient, by=.(cluster)]
 514 | cluster_dat_tumors[, tot_cell_area_image := sum_area_cells]
 515 | cluster_dat_tumors[, frac_cells_cell_Area_image := ncells/tot_cell_area_image, by=.(cluster)]
 516 | cluster_dat_tumors[, tot_cell_area_location := sum(unique(sum_area_cells)), by=location]
 517 | cluster_dat_tumors[, tot_image_area_location := sum(unique(area)), by=location]
 518 | cluster_dat_tumors[, cluster_by_area := ncells/area, by=c('core','cluster')]
 519 | 
 520 | patient_dat_vars <- cluster_dat_tumors[,c("PID","cluster","frac_cluster")] #replace frac with other norms
 521 | un_clusters <- unique(patient_dat_vars[,"cluster"])
 522 | un_clusters <- transform(un_clusters, cluster = as.character(cluster))
 523 | un_clusters <- unlist(un_clusters)
 524 | colnames(patient_dat_vars)<-c("PID","channel","value")
 525 | patient_dat_vars<-transform(patient_dat_vars, channel = as.character(channel))
 526 | 
 527 | # Hierarchical clustering on patients
 528 | patient_dat_vars = patient_dat_vars[order(PID)]
 529 | patient_wide = dcast.data.table(data =patient_dat_vars, formula = 'PID ~ channel',
 530 |                           value.var = 'value',fun.aggregate = mean,fill = 0)
 531 | patient_wide$PID = as.character(patient_wide$PID)
 532 | 
 533 | dd <- dist(scale(patient_wide[,2:ncol(patient_wide)]), method = "euclidean")
 534 | hc <- hclust(dd, method = "ward.D2")
 535 | 
 536 | #With original clusters for comparison
 537 | patient_dat_vars <- cluster_dat_tumors_orig[,c("PID","cluster","frac_cluster")]
 538 | un_clusters <- unique(patient_dat_vars[,"cluster"])
 539 | un_clusters <- transform(un_clusters, cluster = as.character(cluster))
 540 | un_clusters <- unlist(un_clusters)
 541 | colnames(patient_dat_vars)<-c("PID","channel","value")
 542 | patient_wide_orig = dcast.data.table(data =patient_dat_vars, formula = 'PID ~ channel',
 543 |                           value.var = 'value',fun.aggregate = mean, fill = 0)
 544 | 
 545 | ```
 546 | 
 547 | #Asign whole patient based on all images from Zurich TMA to most similar SCP patient group from Basel TMA based on tumor cell type proportions
 548 | ```{r}
 549 | #Read in patient cell type composition from Basel TMA
 550 | basel_patients = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/output_BaselTMA/patient_wide_basel.csv',header = T)
 551 | meta_patient_clustering = fread(file='/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/output_BaselTMA/SCP_patientgroups.csv', header = TRUE)
 552 | 
 553 | #Aggregate into mean cell type composition per SCP patient group
 554 | basel = merge(basel_patients,meta_patient_clustering, by = 'PID',all.x = T)
 555 | basel$patient_pheno[is.na(basel$patient_pheno)] = 18
 556 | group_means = lapply(unique(basel$patient_pheno),function(x){colMeans(basel[patient_pheno == x,-c('PID','patient_pheno')])})
 557 | group_means_t = data.table(matrix(unlist(group_means), ncol = length(group_means[[1]]), byrow = TRUE))
 558 | group_means_t$patient_pheno = unique(basel$patient_pheno)
 559 | names(group_means_t) = c(names(group_means[[1]]),'patient_pheno')
 560 | 
 561 | rnames = group_means_t$patient_pheno
 562 | basel_mat = as.matrix(group_means_t[,-'patient_pheno'])
 563 | rownames(basel_mat) = rnames
 564 | 
 565 | #Sum over original clusters to get metacluster measurement per image
 566 | rnames = patient_wide_orig$PID
 567 | colnames(patient_wide_orig) <- mapvalues(colnames(patient_wide_orig), from=cluster_match$zuri, to=cluster_match$Basel)
 568 | zuri_patients_sum = data.table(t(rowsum(t(patient_wide_orig[,-'PID']), group = colnames(patient_wide_orig[,-'PID']), na.rm = T)))
 569 | zuri_patients_sum$core = rnames
 570 | zuri_mat = as.matrix(zuri_patients_sum[,-'PID'])
 571 | rownames(zuri_mat) = rnames
 572 | zuri_mat = zuri_mat[,colnames(basel_mat)]
 573 | 
 574 | basel_mat = basel_mat[order(as.numeric(rownames(basel_mat))), ] 
 575 | 
 576 | crosscor = cor(t(basel_mat),t(zuri_mat))
 577 | idx_min = apply(crosscor,2,function(x){which(x == max(x))})
 578 | dist_min = apply(crosscor,2,max)
 579 | SCP_match_patient = data.table(names(unlist(idx_min)))
 580 | colnames(SCP_match_patient) = 'zuri'
 581 | SCP_match_patient$basel = idx_min
 582 | names(SCP_match_patient) = c('PID','patient_pheno_patient')
 583 | 
 584 | ```
 585 | 
 586 | #Image clustering use tumor cells only
 587 | ```{r}
 588 | # Run hierclust on image's celltype fraction
 589 | image_dat_vars <- unique(cluster_dat_tumors) #cluster_dat for all images
 590 | grade <- unique(image_dat_vars[,c('core','grade')])
 591 | image_dat_vars <- image_dat_vars[,c("core","cluster","frac_cluster")]
 592 | un_clusters_im <- unique(image_dat_vars$cluster)
 593 | colnames(image_dat_vars)<-c("core","channel","value")
 594 | image_wide = dcast.data.table(data =image_dat_vars, formula = 'core ~ channel',
 595 |                           value.var = 'value',fun.aggregate = mean, fill = 0)
 596 | 
 597 | dd_image <- dist(scale(image_wide[,2:ncol(image_wide)]), method = "euclidean")
 598 | hc_image <- hclust(dd_image, method = "ward.D2")
 599 | 
 600 | #With original clusters for comparison
 601 | image_dat_vars <- unique(cluster_dat_tumors_orig) #cluster_dat for all images
 602 | grade <- unique(image_dat_vars[,c('core','grade')])
 603 | image_dat_vars <- image_dat_vars[,c("core","cluster","frac_cluster")]
 604 | un_clusters_im <- unique(image_dat_vars$cluster)
 605 | colnames(image_dat_vars)<-c("core","channel","value")
 606 | image_wide_orig = dcast.data.table(data =image_dat_vars, formula = 'core ~ channel',
 607 |                           value.var = 'value',fun.aggregate = mean, fill = 0)
 608 | 
 609 | ```
 610 | 
 611 | #Asign individual images from Zurich TMA to most similar SCP patient group from Basel TMA based on tumor cell type proportions
 612 | ```{r}
 613 | #Read in patient cell type composition from Basel TMA
 614 | basel_patients = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/output_BaselTMA/patient_wide_basel.csv',header = T)
 615 | meta_patient_clustering = fread(file='/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/output_BaselTMA/SCP_patientgroups.csv', header = TRUE)
 616 | 
 617 | #Aggregate into mean cell type composition per SCP patient group
 618 | basel = merge(basel_patients,meta_patient_clustering, by = 'PID',all.x = T)
 619 | basel$patient_pheno[is.na(basel$patient_pheno)] = 18
 620 | group_means = lapply(unique(basel$patient_pheno),function(x){colMeans(basel[patient_pheno == x,-c('PID','patient_pheno')])})
 621 | group_means_t = data.table(matrix(unlist(group_means), ncol = length(group_means[[1]]), byrow = TRUE))
 622 | group_means_t$patient_pheno = unique(basel$patient_pheno)
 623 | names(group_means_t) = c(names(group_means[[1]]),'patient_pheno')
 624 | 
 625 | rnames = group_means_t$patient_pheno
 626 | basel_mat = as.matrix(group_means_t[,-'patient_pheno'])
 627 | rownames(basel_mat) = rnames
 628 | 
 629 | #Sum over original clusters to get metacluster measurement per image
 630 | rnames = image_wide_orig$core
 631 | colnames(image_wide_orig) <- mapvalues(colnames(image_wide_orig), from=cluster_match$zuri, to=cluster_match$Basel)
 632 | zuri_images_sum = data.table(t(rowsum(t(image_wide_orig[,-'core']), group = colnames(image_wide_orig[,-'core']), na.rm = T)))
 633 | zuri_images_sum$core = rnames
 634 | zuri_mat = as.matrix(zuri_images_sum[,-'core'])
 635 | rownames(zuri_mat) = rnames
 636 | zuri_mat = zuri_mat[,colnames(basel_mat)]
 637 | 
 638 | basel_mat = basel_mat[order(as.numeric(rownames(basel_mat))), ] 
 639 | 
 640 | crosscor = cor(t(basel_mat),t(zuri_mat))
 641 | idx_min = apply(crosscor,2,function(x){which(x == max(x))})
 642 | dist_min = apply(crosscor,2,max)
 643 | SCP_match = data.table(names(unlist(idx_min)))
 644 | colnames(SCP_match) = 'zuri'
 645 | SCP_match$basel = idx_min
 646 | names(SCP_match) = c('core','patient_pheno')
 647 | 
 648 | ```
 649 | 
 650 | #Compute KL divergence between tumor cell type distributions of the individual core and the patient average 
 651 | ```{r}
 652 | #KL divergence per patient
 653 | 
 654 | #Tumors mean
 655 | dat_cut_tumors <- cluster_dat_tumors[,c("core","cluster","perc_cluster","PID")]
 656 | 
 657 | #Set absent cells types to 0
 658 | dat_cut_tumors_wide = dcast.data.table(dat_cut_tumors,formula = 'PID + core ~ cluster',value.var = 'perc_cluster',fill = 0)
 659 | dat_cut_tumors_long = melt.data.table(dat_cut_tumors_wide, id.vars = c('PID','core') ,variable.name = 'cluster', value.name = 'perc_cluster')
 660 | 
 661 | #Split into patients
 662 | ind_patients <- split( dat_cut_tumors_long , f = dat_cut_tumors_long$PID )
 663 | all_patient = lapply(ind_patients, function(x){x[order(x$core),]})
 664 | all_patient_tumors = lapply(all_patient,function(x){x[, patient_mean := mean(perc_cluster), by=cluster]})
 665 | ind_cores_tumors <- lapply(all_patient_tumors, function(x){split( x , f = x$core )})
 666 | 
 667 | #Calculate KL divergence
 668 | kldiv <- lapply(ind_cores_tumors, function(x){lapply(x,function(y){entropy::KL.plugin(y$perc_cluster, na.omit(y$patient_mean))})})
 669 | sum_kldiv <- lapply(kldiv,function(x){Reduce("+",x)})
 670 | length_kldiv <- lapply(kldiv,function(x){length(x)})
 671 | mean_kldiv_mat_tumors <- unlist(t(sum_kldiv))/unlist(t(length_kldiv))
 672 | sum_kldiv_mat_tumors <- unlist(t(sum_kldiv))
 673 | 
 674 | ```
 675 | 
 676 | #Compute shannon entropy of each core
 677 | ```{r}
 678 | #Shannon entropy per core
 679 | 
 680 | #Split into cores
 681 | dat_cut_tumors <- unique(cluster_dat_tumors[,c("core","cluster","ncells")])
 682 | ind_cores <- split( dat_cut_tumors , f = dat_cut_tumors$core )
 683 | 
 684 | shannon_cores = lapply(ind_cores, function(x){entropy::entropy.ChaoShen(x$ncells)})
 685 | shannon_core = data.table(unlist(names(shannon_cores)))
 686 | names(shannon_core) = "core"
 687 | shannon_core$shannon = unlist(shannon_cores)
 688 | 
 689 | ```
 690 | 
 691 | #Boxplot of average KL divergence to patient mean for cores assigned to each SCP patientgroup
 692 | ```{r}
 693 | 
 694 | #SCP Patientgroup
 695 | kldiv_ordered = data.table(names(unlist(kldiv)))
 696 | kldiv_ordered$kldiv = as.vector(unlist(kldiv))
 697 | names(kldiv_ordered)[1] = 'core'
 698 | kldiv_ordered$core = unlist(lapply(strsplit(kldiv_ordered$core,'[.]'), function(x){paste(x[2],collapse = '.')}))
 699 | kldiv_ordered = merge(kldiv_ordered,SCP_match, by = 'core')
 700 | 
 701 | p <- ggplot(kldiv_ordered, aes(x=factor(patient_pheno), y=kldiv)) + 
 702 |   geom_boxplot()+
 703 |   geom_point(size=2, alpha=1)+
 704 |   #stat_summary( fun.y = "mean",geom="point",colour = "black", size = 5)+
 705 |   #stat_summary(aes(group=grade), fun.y=mean, geom="line", colour="green")+
 706 |   ylab("KL div to patietn average of core")+
 707 |   xlab("Patientgroup")+
 708 |   theme(panel.background = element_blank())+
 709 |   ggtitle('Cores assigned to patient group')
 710 | 
 711 | pdf('boxplots_KLdiv_SCP.pdf')
 712 | p
 713 | dev.off()
 714 | 
 715 | ```
 716 | 
 717 | #Patients clustering with stacked barplot of individual images, tumor images only, grouped by patient (use data including stroma for visualization)
 718 | ```{r}
 719 | #Prepare
 720 | cluster_dat_ordered_by_patient = cluster_dat_tumors_stroma[!location %in% c("NORMAL","METASTASIS"),]
 721 | image_order = cluster_dat_ordered_by_patient[,c('PID','core')]
 722 | image_order = unique(image_order)
 723 | un_patients = unique(image_order$PID)
 724 | un_patients = un_patients[order(un_patients)]
 725 | o <- hc$order
 726 | #If using a new automatic clustering order
 727 | all_order = image_order[order(match(image_order$PID,un_patients[o]))]
 728 | 
 729 | #Read in original ordering of stacked bar plot to reproduce exact order as in Figure. The automatic ordering is not identical because some trees flipped due to different internal order of the data due to changed patient naming.
 730 | all_order = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/all_order.csv')
 731 | colnames(all_order) = c('PID','core')
 732 | 
 733 | #Plot stacked barplot
 734 | cluster_dat_ordered_by_patient$cluster = factor(cluster_dat_ordered_by_patient$cluster, levels = rev(levels(cluster_dat_ordered_by_patient$cluster)))
 735 | p <- ggplot(cluster_dat_ordered_by_patient, aes(x=core, y=cell_density,  fill=cluster)) + 
 736 |   geom_bar(stat='identity')+
 737 |   scale_fill_manual("Clusters",values = mycols_basel_meta[as.numeric(levels(cluster_dat_ordered_by_patient$cluster))])+
 738 |   scale_x_discrete(limits = all_order$core)+
 739 |   #facet_wrap(core~)+
 740 |   labs(fill = "Clusters")+
 741 |   coord_flip()+
 742 |   xlab("Patient images")+
 743 |   ylab("Percentage of cluster cells in image")+
 744 |   theme(panel.background = element_blank(),
 745 |     axis.text.y = element_text(colour=c(col_vector,"#7FC97F",'black')[as.factor(all_order$PID)]))+
 746 |   ggtitle('Patient composition')
 747 | 
 748 | #If automatically clustsered plot dendrogram
 749 | # p2 <- ggdendro::ggdendrogram(hc, rotate = TRUE,labels = FALSE, theme_dendro = TRUE, leaf_labels = FALSE)+
 750 | #   theme(axis.title.x=element_blank(),
 751 | #           axis.text.x=element_blank(),
 752 | #           axis.ticks.x=element_blank(),
 753 | #           axis.title.y=element_blank(),
 754 | #           axis.text.y=element_blank(),
 755 | #           axis.ticks.y=element_blank())
 756 | 
 757 | #Clinical type bar
 758 | clinical_order = cluster_dat_ordered_by_patient[,c('clinical_type','PID','core')]
 759 | clinical_order = unique(clinical_order)
 760 | un_clin = unique(clinical_order$clinical_type)
 761 | all_clin_order = clinical_order[order(match(clinical_order$PID,unique(all_order$PID)))]
 762 | all_clin_order$clinical_type = factor(all_clin_order$clinical_type, levels = c('HR-HER2+','HR+HER2-','HR+HER2+','TripleNeg'))#'NORMAL'
 763 | p3 <- ggplot(all_clin_order, aes(x=1,y=c(1:length(all_clin_order$clinical_type))))+
 764 |       geom_tile(aes( fill=clinical_type))+
 765 |   scale_fill_manual(values = c(mycols_clinical,'black'))+ #'lightblue',
 766 |     theme(axis.title.x=element_blank(),
 767 |           axis.text.x=element_blank(),
 768 |           axis.ticks.x=element_blank(),
 769 |           axis.title.y=element_blank(),
 770 |           axis.text.y=element_blank(),
 771 |           axis.ticks.y=element_blank(),
 772 |           line = element_blank(),
 773 |           legend.position="none")
 774 | 
 775 | 
 776 | #Location bar
 777 | location_order = cluster_dat_ordered_by_patient[,c('location','PID','core')]
 778 | location_order = unique(location_order)
 779 | un_loc = unique(location_order$location)
 780 | all_loc_order = location_order[order(match(location_order$PID,unique(all_order$PID)))]
 781 | all_loc_order$location = factor(all_loc_order$location, levels = c('CENTER','PERIPHERY','METASTASIS','NORMAL'))#'NORMAL'
 782 | p3 <- ggplot(all_loc_order, aes(x=1,y=c(1:length(all_loc_order$location))))+
 783 |       geom_tile(aes( fill=location))+
 784 |   scale_fill_manual(values = c('red','blue','black','lightblue'))+ #'lightblue',
 785 |     theme(axis.title.x=element_blank(),
 786 |           axis.text.x=element_blank(),
 787 |           axis.ticks.x=element_blank(),
 788 |           axis.title.y=element_blank(),
 789 |           axis.text.y=element_blank(),
 790 |           axis.ticks.y=element_blank(),
 791 |           line = element_blank(),
 792 |           legend.position="none")
 793 | 
 794 | 
 795 | #SCP patient groups matched
 796 | all_pheno_order = SCP_match[order(match(SCP_match$core,all_order$core))]
 797 | all_pheno_order$patient_pheno = factor(all_pheno_order$patient_pheno)
 798 | p5 <- ggplot(all_pheno_order, aes(x=1,y=c(1:length(all_pheno_order$patient_pheno))))+
 799 |       geom_tile(aes( fill=patient_pheno))+
 800 |   scale_fill_manual(values = mycols_patient[as.numeric(levels(all_pheno_order$patient_pheno))])+ #'lightblue',
 801 |     theme(axis.title.x=element_blank(),
 802 |           axis.text.x=element_blank(),
 803 |           axis.ticks.x=element_blank(),
 804 |           axis.title.y=element_blank(),
 805 |           axis.text.y=element_blank(),
 806 |           axis.ticks.y=element_blank(),
 807 |           line = element_blank(),
 808 |           legend.position="none")
 809 | 
 810 | #KL divergence of each image to patient average
 811 | kldiv_ordered = data.table(names(unlist(kldiv)))
 812 | kldiv_ordered$kldiv = as.vector(unlist(kldiv))
 813 | names(kldiv_ordered)[1] = 'core'
 814 | kldiv_ordered$core = unlist(lapply(strsplit(kldiv_ordered$core,'[.]'), function(x){paste(x[2],collapse = '.')}))
 815 | add_normals = all_order$core[!all_order$core %in% kldiv_ordered$core]
 816 | kldiv_ordered = rbind(kldiv_ordered,data.frame(core = add_normals, kldiv = rep(NA,length(add_normals))))
 817 | kl_order = merge(cluster_dat_ordered_by_patient[,c('PID','core')],kldiv_ordered,by = 'core')
 818 | kl_order = unique(kl_order)
 819 | all_kl_order = kl_order[order(match(kl_order$PID,unique(all_order$PID)))]
 820 | #Colorbar for kl div from tumor means
 821 | p4 <- ggplot(all_kl_order, aes(x=1,y=c(1:length(all_kl_order$kldiv))))+
 822 |       geom_tile(aes( fill=all_kl_order$kldiv))+
 823 |   scale_fill_gradient2(low = "blue", mid = "white", high = "red")+
 824 |     theme(axis.title.x=element_blank(),
 825 |           axis.text.x=element_blank(),
 826 |           axis.ticks.x=element_blank(),
 827 |           axis.title.y=element_blank(),
 828 |           axis.text.y=element_blank(),
 829 |           axis.ticks.y=element_blank(),
 830 |           line = element_blank(),
 831 |           legend.position="none")
 832 | 
 833 | #Shannon entropy of each core
 834 | shannon_core = merge(cluster_dat_ordered_by_patient[,c('PID','core')],shannon_core,by = 'core')
 835 | shannon_core = unique(shannon_core)
 836 | all_shannon_order = shannon_core[order(match(shannon_core$PID,unique(all_order$PID)))]
 837 | 
 838 | p7 <- ggplot(all_shannon_order, aes(x=1,y=c(1:length(all_shannon_order$shannon))))+
 839 |       geom_tile(aes( fill=all_shannon_order$shannon))+
 840 |   scale_fill_gradient2(low = "blue", mid = "white", high = "purple")+
 841 |     theme(axis.title.x=element_blank(),
 842 |           axis.text.x=element_blank(),
 843 |           axis.ticks.x=element_blank(),
 844 |           axis.title.y=element_blank(),
 845 |           axis.text.y=element_blank(),
 846 |           axis.ticks.y=element_blank(),
 847 |           line = element_blank(),
 848 |           legend.position="none")
 849 | 
 850 | #Plot everything next to each other
 851 | p6 <- ggdraw() +
 852 |      #draw_plot(p2 + scale_y_reverse(), 0, 0.005, 0.36, 0.984) +
 853 |      draw_plot(p ,0.47, 0.035, 0.5, 0.926)+
 854 |    draw_plot(p3,0.45,0,0.05,1)+
 855 |   draw_plot(p4,0.42,0,0.05,1)+
 856 |   draw_plot(p7,0.39,0,0.05,1)+
 857 |   draw_plot(p5,0.36,0,0.05,1)
 858 | 
 859 | pdf(file="sbp.pdf", width=15, height=50)
 860 | p6
 861 | dev.off()
 862 | 
 863 | 
 864 | ```
 865 | 
 866 | #Read in fragmentation (cohesiveness) scores output by Matlab scipt (based on community detection results on topologic neighborhood graph)
 867 | ```{r}
 868 | frag = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/fragmentation_scores.csv',header = T)
 869 | 
 870 | ##Adjust naming to be consistent with names here in R pipeline (only necessary when loading from histoCAT/Matlab scripts for the first time, provided data already contains cleaned names)
 871 | # test = unique(frag$core)
 872 | # split_core = strsplit(frag$core,'_', fixed = TRUE)
 873 | # frag$core = unlist(lapply(split_core, function(x){paste(x[c(1,2,8,9,10)],collapse =  "_")}))
 874 | # #Replace the ones that don't need acquisition number again
 875 | # short = unlist(lapply(strsplit(test,'_', fixed = TRUE),function(x){paste(x[c(1,2,8,9)],collapse =  "_")}))
 876 | # duplicate_idx = duplicated(short) | duplicated(short, fromLast = TRUE)
 877 | # in_cores_index = unlist(lapply(unique(frag$core)[duplicate_idx],function(x){which(frag$core %in% x)}))
 878 | # frag$core[setdiff(1:length(frag$core),in_cores_index)] = unlist(lapply(strsplit(frag$core[setdiff(1:length(frag$core),in_cores_index)],'_', fixed = TRUE), function(x){paste(x[1:length(x)-1],collapse =  "_")}))
 879 | # #Delete zeros from core names
 880 | # frag$core = unlist(lapply(frag$core,function(x){gsub("000","",x)}))
 881 | 
 882 | #Calculate fragmentation score
 883 | frag[,frag_log := log(frag)]
 884 | frag[,frag_score := ((frag_log - min(frag_log))/(max(frag_log) - min(frag_log)))]
 885 | 
 886 | #Set not present images NA and order according to stacked barplot above
 887 | missing = all_order$core[!all_order$core %in% frag$core]
 888 | frag = rbind(frag,data.frame(core = missing,frag_score = rep(NA,length(missing)),frag = rep(NA,length(missing)),frag_log = rep(NA,length(missing))))
 889 | frag = frag[(core %in% all_order$core),]
 890 | frag = unique(frag[,c('core','frag_score')])
 891 | frag =  frag[order(match(frag$core,all_order$core))]
 892 | 
 893 | #Fragmentation colorbar
 894 | p7 <- ggplot(frag, aes(x=1,y=c(1:length(frag$frag_score))))+
 895 |       geom_tile(aes( fill=frag$frag_score))+
 896 |   scale_fill_gradient2(low = "blue", mid = "white", high = "purple")+
 897 |     theme(axis.title.x=element_blank(),
 898 |           axis.text.x=element_blank(),
 899 |           axis.ticks.x=element_blank(),
 900 |           axis.title.y=element_blank(),
 901 |           axis.text.y=element_blank(),
 902 |           axis.ticks.y=element_blank(),
 903 |           line = element_blank(),
 904 |           legend.position="none")
 905 | 
 906 | p6 <- ggdraw() +
 907 |      #draw_plot(p2 + scale_y_reverse(), 0, 0.005, 0.36, 0.984) +
 908 |      draw_plot(p ,0.47, 0.035, 0.5, 0.926)+
 909 |    draw_plot(p3,0.45,0,0.05,1)+
 910 |   draw_plot(p7,0.39,0,0.05,1)
 911 | 
 912 | pdf(file="fragmentationscore_zurich.pdf", width=30, height=50)
 913 | p6
 914 | dev.off()
 915 | 
 916 | ```
 917 | 
 918 | #Regional heterogeneity across cores of the same patient (Figure 5)
 919 | ```{r}
 920 | #Dot plot with patients on y and patient KL div on x, every dot represents one core of the patient and is colored according to its individually assigned SCP
 921 | kldiv_ordered = data.table(names(unlist(kldiv)))
 922 | kldiv_ordered$kldiv = as.vector(unlist(kldiv))
 923 | names(kldiv_ordered)[1] = 'core'
 924 | kldiv_ordered$core = unlist(lapply(strsplit(kldiv_ordered$core,'[.]'), function(x){paste(x[2],collapse = '.')}))
 925 | kldiv_ordered = merge(kldiv_ordered,SCP_match, by = 'core')
 926 | kldiv_ordered = merge(kldiv_ordered,unique(Sample_metadata[,c('core','PID')]), by = 'core')
 927 | kldiv_ordered[,patient_avg := mean(kldiv), by = 'PID']
 928 | kldiv_ordered$PID = factor(kldiv_ordered$PID, levels = as.vector(as.matrix(unique(kldiv_ordered[order(patient_avg),'PID']))))
 929 | 
 930 | 
 931 | p = ggplot(kldiv_ordered,aes(x = PID,y = kldiv,color = factor(patient_pheno)))+
 932 |   geom_point(aes(size = 13))+
 933 |   scale_color_manual(values = mycols_patient[sort(unique(kldiv_ordered$patient_pheno))])+
 934 |   theme(axis.text.x = element_text(angle = 90))+
 935 |    scale_size_continuous(range = c(0, 13))
 936 | 
 937 | pdf('dot.pdf',height = 10,width = 15)
 938 | p
 939 | dev.off()
 940 | 
 941 | #Bubble plot showing how often individual core SCP assignment agrees with whole patient SCP assignment
 942 | SCP_match = merge(SCP_match,unique(Sample_metadata[,c('core','PID')]),by = 'core');
 943 | SCP_match$PID = as.character(SCP_match$PID)
 944 | co = merge(SCP_match_patient,SCP_match,by = 'PID')
 945 | co$patient_pheno_patient = factor(co$patient_pheno_patient,levels = as.character(1:18))
 946 | co$patient_pheno = factor(co$patient_pheno,levels = as.character(1:18))
 947 | co[,nr := .N , by = c('patient_pheno_patient','patient_pheno')]
 948 | co[,perc := nr/.N , by = c('patient_pheno_patient')]
 949 | co[,perc_core := nr/.N, by = 'patient_pheno']
 950 | 
 951 | pdf('/home/jana/Desktop/R_dat/sad_new_plot.pdf',width = 13)
 952 | ggplot(co, aes(x = patient_pheno, y = patient_pheno_patient)) + 
 953 |   geom_point(aes(size =perc,
 954 |                  color = perc_core)) + 
 955 |   scale_x_discrete(drop=FALSE)+
 956 |   scale_y_discrete(drop=FALSE)+
 957 |   #scale_color_manual(values = mycols_patient[unique(co$patient_pheno_patient)])+
 958 |   scale_color_gradient(low="white", high="red",name = "Perc of cores assigned to x in patients assigned to y") +
 959 |   scale_size(range = c(1, 15),name = "Of patient assigned to y, perc of images assigned to x") +
 960 |   theme(axis.text.y=element_text(color = c(mycols_patient[as.numeric(levels(co$patient_pheno_patient))])))
 961 | dev.off()
 962 | 
 963 | 
 964 | #How many cores of the patients agree on same SCP
 965 | co[,agree := patient_pheno == patient_pheno_patient]
 966 | co[,nr_agree := sum(agree),by = 'PID']
 967 | co[,perc_agree := nr_agree/.N ,by = 'PID']
 968 | co[,perc_disagree := 1 - perc_agree]
 969 | fr = unique(co[,c('PID','perc_disagree')])
 970 | fr$perc_disagree = factor(fr$perc_disagree)
 971 | fr[,nr := .N ,by = 'perc_disagree']
 972 | fr[,perc := nr/.N]
 973 | fr = unique(fr[,c('perc_disagree','perc')])
 974 | 
 975 | bp<- ggplot(fr, aes(x="", y=perc, fill=factor(perc_disagree,levels = rev(levels(perc_disagree)))))+
 976 | geom_bar(width = 1, stat = "identity")+
 977 |   scale_fill_grey(end = 0,start = 0.8)+
 978 |   #scale_fill_manual("Clusters",values =  c(mycols_basel_meta[1:13],'black'))+
 979 |     theme(strip.background = element_blank(),
 980 |         panel.background=element_rect(fill='white', colour = 'black'),
 981 |         panel.grid.major=element_blank(),
 982 |         panel.grid.minor=element_blank(),
 983 |         plot.background=element_blank(),
 984 |         legend.key = element_blank())
 985 | 
 986 | pdf('bar.pdf')
 987 | bp
 988 | dev.off()
 989 | ```
 990 | 
 991 | 
 992 | 
 993 | #Read in tumor community data from Matlab (topological single-cell communities were extracted using the Matlab wrapper of the C++ Louvain implementation) including only tumor cells
 994 | ```{r}
 995 | 
 996 | nodules = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/Community_data_tumor.csv',header = T)
 997 | 
 998 | ##This part is only necessary when loading data output by Matlab scripts with histoCAT naming structure for the first time (provided data is already cleaned)
 999 | # #Exclude weak stains
1000 | # nodules = nodules[!str_detect(nodules$core,regex('Ay.x1')),]
1001 | # nodules = nodules[!str_detect(nodules$core,regex('Ay..x1')),]
1002 | # nodules = nodules[!str_detect(nodules$core,'control'),]
1003 | # nodules = nodules[!str_detect(nodules$core,'non-breast'),]
1004 | # 
1005 | # #Clean naming
1006 | # test = unique(nodules$core)
1007 | # split_core = strsplit(nodules$core,'_', fixed = TRUE)
1008 | # nodules$core = unlist(lapply(split_core, function(x){paste(x[c(1,2,8,9,10)],collapse =  "_")}))
1009 | # #Replace the ones that don't need acquisition number again
1010 | # short = unlist(lapply(strsplit(test,'_', fixed = TRUE),function(x){paste(x[c(1,2,8,9)],collapse =  "_")}))
1011 | # duplicate_idx = duplicated(short) | duplicated(short, fromLast = TRUE)
1012 | # in_cores_index = unlist(lapply(unique(nodules$core)[duplicate_idx],function(x){which(nodules$core %in% x)}))
1013 | # nodules$core[setdiff(1:length(nodules$core),in_cores_index)] = unlist(lapply(strsplit(nodules$core[setdiff(1:length(nodules$core),in_cores_index)],'_', fixed = TRUE), function(x){paste(x[1:length(x)-1],collapse =  "_")}))
1014 | # #Delete zeros from core names
1015 | # nodules$core = unlist(lapply(nodules$core,function(x){gsub("000","",x)}))
1016 | 
1017 | 
1018 | #Replace PG cluster ids with metacluster id's matched from the BaselTMA
1019 | nodules$Pheno = mapvalues(nodules$Pheno, from=cluster_match$zuri, to=cluster_match$Basel)
1020 | 
1021 | #Threshold for only communities of at least a certain size
1022 | nodules[,size_comm := .N, by = c('Community','core')]
1023 | nodules = nodules[size_comm > 9,]
1024 | 
1025 | #Calculate number of cells from each metacluster per community (community numbers are only unique per core)
1026 | colnames(nodules)[colnames(nodules) == 'Pheno'] = 'metacluster'
1027 | nodules[,ncells := .N , by = c('Community','metacluster','core')]
1028 | nodules[,perc_cluster := ncells/size_comm, by = c('Community','core')]
1029 | 
1030 | #Keep as separate variables for later
1031 | save_size = unique(nodules[,c('core','Community','size_comm')])
1032 | nodules_orig = nodules
1033 | 
1034 | #Set missing cells types to 0
1035 | nodules <- unique(nodules[,c("core","metacluster","ncells","Community")]) 
1036 | nodules$Pheno = factor(nodules$Pheno, levels = sort(unique(nodules$Pheno)))
1037 | nodules_wide = dcast.data.table(nodules,formula = 'Community + core ~ metacluster',value.var = 'ncells',fill = 0) #user perc_cluster for mixed nodules
1038 | nodules_wide_not_norm = nodules_wide
1039 | 
1040 | ```
1041 | 
1042 | #PG cluster tumor communtities
1043 | ```{r}
1044 | 
1045 | #01-normalize absolute metacluster cell numbers of each community per metacluster
1046 | nodules_wide = cbind( nodules_wide[,c('Community','core')],apply(nodules_wide[,-c('Community','core')],2, function(x){(x-min(x))/(max(x)-min(x))}))
1047 | 
1048 | #Run PhenoGraph
1049 | rand_seed = 3
1050 | rpheno_out = cytofkit::Rphenograph(nodules_wide[,-c('Community','core')], k = 80, seed = rand_seed,approx = T)
1051 | nodules_wide$cluster = igraph::membership(rpheno_out)
1052 | 
1053 | ##Save out PG results
1054 | #fwrite(nodules_wide[,c('Community','cluster','core')],'PG_epi_zuri.csv',col.names = T)
1055 | 
1056 | #Read in previous PG result from publication
1057 | cl_dat = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/PG_tumor_communities.csv',header = T) 
1058 | nodules_wide = merge(nodules_wide,cl_dat,by = c('Community','core'))
1059 | 
1060 | ```
1061 | 
1062 | #Heatmap of metacluster content of each tumor community type
1063 | ```{r}
1064 | nodules_long = melt.data.table(nodules_wide, id.vars = c('Community','cluster','core') ,variable.name = 'channel', value.name = 'perc_cluster')
1065 | summary_dat = nodules_long[ ,list(
1066 |   mean_val= mean(perc_cluster),
1067 |   #std_val = std(perc_cluster),
1068 |   cell_cluster=.N),
1069 |   by=.(channel,cluster)]
1070 | hm_dat = dcast.data.table(data =summary_dat, formula = 'cluster ~ channel',
1071 |                           value.var = 'mean_val') #can be exchanged for 'median_val'
1072 | 
1073 | #Convert to matrix
1074 | trownames = hm_dat$cluster
1075 | hm_dat = as.matrix(hm_dat[,-1,with=F])
1076 | row.names(hm_dat) = trownames
1077 | 
1078 | # Set color map
1079 | cols = rev(brewer.pal(11,'Spectral'))
1080 | cmap = colorRampPalette(cols)
1081 | 
1082 | # Hierarchical clustering on rows with Ward's linkage
1083 | tdist = as.dist(1-cor(t(hm_dat), method="spearman"))
1084 | hr <- hclust(tdist, method="ward.D2")
1085 | co_r <- order.optimal(tdist, hr$merge)
1086 | hr$merge = co_r$merge
1087 | hr$order = co_r$order
1088 | 
1089 | # Order rows in heatmap according to clustering
1090 | order_heatmap_zscored = row.names(hm_dat)[hr$order]
1091 | 
1092 | # Hierarchical clustering on columns with Ward's linkage
1093 | tdist = as.dist(1-cor((hm_dat), method="spearman"))
1094 | hc <- hclust(tdist, method="ward.D2")
1095 | co_c <- order.optimal(tdist, hc$merge)
1096 | hc$merge = co_c$merge
1097 | hc$order = co_c$order
1098 | 
1099 | # Z-score data
1100 | p_dat = scale(hm_dat)
1101 | 
1102 | # Censor z-score at 2
1103 | p_dat[p_dat > 2] =2
1104 | p_dat[p_dat < -2] =-2
1105 | 
1106 | 
1107 | pdf(file="tumor_community_heatmap_k80.pdf", width=10, height=10)
1108 | 
1109 | heatmap.2(p_dat,
1110 |           scale ='none',
1111 |           trace = "none",
1112 |           col=cmap(75),
1113 |           Rowv=as.dendrogram(hr),
1114 |           Colv=as.dendrogram(hc),
1115 |           density.info ='none',
1116 |           cexRow=0.6,
1117 |           cexCol=0.6,
1118 |           margins=c(4,8),
1119 |           xlab = 'Markers',
1120 |           ylab ='Cluster',
1121 |           main = 'PG_norm_slide',
1122 |           colCol = c(mycols_basel_meta[14:27],'black'),
1123 |           colRow = col_vector) #mycols_basel for small clusters
1124 | 
1125 | dev.off()
1126 | 
1127 | 
1128 | ```
1129 | 
1130 | 
1131 | #tSNE of communities based on metacluster cell content
1132 | ```{r}
1133 | #Remove duplicates
1134 | dat_tsne = nodules_wide[!duplicated(nodules_wide[,-c('Community','core','cluster')])]
1135 | 
1136 | #Run tSNE
1137 | require(doParallel)
1138 | cores = 10
1139 | options('mc.cores' = cores)
1140 | registerDoParallel(cores)
1141 | tsne_comm <- Rtsne.multicore::Rtsne.multicore(dat_tsne[,-c('Community','core','cluster')],
1142 |             verbose = T, dims = 2, num_threads = 10)
1143 | 
1144 | #fwrite(cbind(dat_tsne,tsne_comm$Y),'tsne_epi_zuri.csv',col.names = F)
1145 | 
1146 | #Read in saved tsne run from publication
1147 | tsne_comm = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/tsne_tumor_communities.csv') #tsne_comm.csv
1148 | 
1149 | #SCP patient groups on community tsne
1150 | dat_tsne = merge(dat_tsne,tsne_comm, by = c('Community','core'))
1151 | dat_tsne = merge(unique(Sample_metadata[,c('core','PID')]),dat_tsne,by = 'core')
1152 | dat_tsne = merge(dat_tsne,SCP_match,by = 'core')
1153 | dat_tsne$patient_pheno = as.factor(dat_tsne$patient_pheno)
1154 | 
1155 | #Plot
1156 | p = dat_tsne%>%
1157 |   ggplot(aes(x=V1, y=V2))+
1158 |   geom_point(size=1, alpha=0.8, aes(color=patient_pheno))+
1159 |   labs(colour="Patients")+
1160 |   scale_color_manual(values = mycols_patient[sort(as.numeric(levels(dat_tsne$patient_pheno)))])+
1161 |   ggtitle('Phenograph')+
1162 |   guides(color=guide_legend(override.aes=list(size=5)))+
1163 |   theme(strip.background = element_blank(),
1164 |         panel.background=element_rect(fill='white', colour = 'black'),
1165 |         panel.grid.major=element_blank(),
1166 |         panel.grid.minor=element_blank(),
1167 |         plot.background=element_blank(),
1168 |         legend.key = element_blank())
1169 | 
1170 | pdf('tsne_communities_colorPatientgroup.pdf',width = 10,height = 10)
1171 | p
1172 | dev.off()
1173 | 
1174 | 
1175 | #Community type clusters on tsne
1176 | dat_tsne$cluster = factor(dat_tsne$cluster,levels = hr$labels[hr$order])
1177 | p = dat_tsne%>%
1178 |   ggplot(aes(x=V1, y=V2))+
1179 |   geom_point(size=1, alpha=0.8, aes(color=cluster))+
1180 |   labs(colour="Patients")+
1181 |   scale_color_manual(values = col_vector[as.numeric(hr$labels[hr$order])])+
1182 |   ggtitle('Phenograph')+
1183 |   guides(color=guide_legend(override.aes=list(size=5)))+
1184 |   theme(strip.background = element_blank(),
1185 |         panel.background=element_rect(fill='white', colour = 'black'),
1186 |         panel.grid.major=element_blank(),
1187 |         panel.grid.minor=element_blank(),
1188 |         plot.background=element_blank(),
1189 |         legend.key = element_blank())
1190 | 
1191 | pdf('tsne_epithelial_communities_colorPG.pdf',width = 10,height = 10)
1192 | p
1193 | dev.off()
1194 | ```
1195 | 
1196 | #Stacked bars showing absolute numbers of cells of each metacluster per community type
1197 | ```{r}
1198 | #Use not normalized data to see absolute cell numbers
1199 | cl_dat = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/PG_tumor_communities.csv',header = T)
1200 | nodules_wide_not_norm = merge(nodules_wide_not_norm,cl_dat,by = c('Community','core'))
1201 | nodules_long = melt.data.table(nodules_wide_not_norm, id.vars = c('Community','cluster','core') ,variable.name = 'channel', value.name = 'perc_cluster')
1202 | 
1203 | summary_dat = nodules_long[ ,list(
1204 |   mean_val= mean(perc_cluster),
1205 |   cell_cluster=.N),
1206 |   by=.(channel,cluster)]
1207 | 
1208 | #Bars ordered according to heatmap (above)
1209 | d = unique(summary_dat[,c('channel','cluster','mean_val')])
1210 | p <- ggplot(d, aes(x=factor(cluster,levels = hr$labels[hr$order]), y=mean_val, fill=factor(channel))) + #hr$labels[hr$order]
1211 |   geom_bar(stat='identity',show.legend = TRUE)+
1212 |   scale_fill_manual("Clusters",values =  c(mycols_basel_meta[14:27],'black'))+ 
1213 |   labs(fill = "Clusters")+
1214 |   coord_flip()+
1215 |   xlab("Patient")+
1216 |   ylab("Percentage of cluster cells in patient")+
1217 |   theme(panel.background = element_blank(),
1218 |         axis.text.y = element_text(colour=col_vector[as.numeric(hr$labels[hr$order])]))+ 
1219 |   ggtitle('Patient composition')
1220 | 
1221 | pdf('comm_cluster_compositions.pdf',width = 10,height = 10)
1222 | p
1223 | dev.off()
1224 | ```
1225 | 
1226 | #Clustered heatmaps of community type make up of each patient
1227 | ```{r}
1228 | 
1229 | #Automatically clustered on rows and columns based on community type content of cores
1230 | nodules_wide = merge(unique(Sample_metadata[,c('core','PID')]),nodules_wide,by = 'core')
1231 | nodules_wide = merge(nodules_wide,SCP_match,by = 'core')
1232 | nodules_wide$patient_pheno = as.factor(nodules_wide$patient_pheno)
1233 | enrichment = unique(nodules_wide[,c('cluster','Community','core')])
1234 | enrichment[,both := .N, by = c('cluster','core')]
1235 | enrichment[,frac_patient := both/.N, by = c('core')]
1236 | enrichment = unique(enrichment[,c('core','cluster','frac_patient')])
1237 | d = dcast.data.table(enrichment,formula = 'core  ~ cluster',value.var = 'frac_patient',fill = 0)
1238 | d_mat = as.matrix(d[,-'core'])
1239 | rownames(d_mat) = d$patientcode
1240 | d = merge(d, SCP_match,by = 'core')
1241 | 
1242 | h = Heatmap(d_mat, name = "Clustergram", km = 1, col = colorRamp2(c(0, 1), c("white", "red")),
1243 |      show_row_names = T, show_column_names =  T, clustering_method_rows = "ward.D2",clustering_method_columns = "ward.D2")+
1244 | 
1245 | Heatmap(factor(d$patient_pheno), name = "Patientgroups", show_row_names = FALSE, width = unit(10, "mm"), col = structure(mycols_patient, names = c(as.character(1:18))))
1246 | 
1247 | pdf('levels_epithelial_communities_patientgroup_colors.pdf',width = 10, height = 20)
1248 | h
1249 | dev.off()
1250 | 
1251 | 
1252 | #Group by patient and ordered according to stacked barplot (above)
1253 | missing = all_order$core[!all_order$core %in% d$core]
1254 | d = d[,-'patient_pheno']
1255 | add =  matrix(data=0,nrow=length(missing),ncol=ncol(d)-1)
1256 | add = as.data.table(add)
1257 | add$core = missing
1258 | colnames(add) = c('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','core')
1259 | d = rbind(d,add)
1260 | d = merge(d,Sample_metadata[,c('core','PID')],by = 'core')
1261 | d = merge(d, SCP_match,by = 'core')
1262 | d = d[order(match(d$core,rev(all_order$core))),]
1263 | 
1264 | d_mat = as.matrix(d[,-c('core','patient_pheno','PID')])
1265 | rownames(d_mat) = d$core
1266 | h = Heatmap(d_mat, name = "Clustergram", km = 1, col = colorRamp2(c(0, 1), c("white", "red")),
1267 |      show_row_names = T, show_column_names =  T,cluster_rows = FALSE,clustering_method_columns = "ward.D2")+
1268 |   Heatmap(factor(d$patient_pheno), name = "SCP", show_row_names = FALSE, width = unit(10, "mm"), col = structure(mycols_patient, names = c(as.character(1:18))))+
1269 |   Heatmap(factor(d$PID), name = "Patient", show_row_names = FALSE, width = unit(10, "mm"), col = structure(c(col_vector,'black'), names = c(unique(d$PID))))
1270 | 
1271 | pdf('levels_ordered_patient.pdf',width = 10, height = 20)
1272 | h
1273 | dev.off()
1274 | 
1275 | #Save column order to potentially order other plots accordingly (e.g. stacked bareplot displaying absolute cell type numbers in each community type)
1276 | o_epi = column_order(h)
1277 | 
1278 | ```
1279 | 
1280 | #Center or periphery core location of community types
1281 | ```{r}
1282 | nodules_wide = merge(unique(Sample_metadata[,c('core','PID','location')]),nodules_wide,by = c('core','PID'))
1283 | cluster_loc = nodules_wide[,c('cluster','core','Community','location')]
1284 | cluster_loc = cluster_loc[!location %in% c('NORMAL','METASTASIS','[]'),]
1285 | cluster_loc[,count := .N, by = c('cluster','location')]
1286 | cluster_loc[,frac_cluster := count/.N, by = 'cluster']
1287 | cluster_loc[,frac_location := count/.N, by = 'location']
1288 | cluster_loc = unique(cluster_loc[,c('cluster','location','frac_cluster')])
1289 | 
1290 | p <- ggplot(cluster_loc, aes(x=factor(cluster,levels = hr$labels[hr$order]), y=frac_cluster, fill=factor(location,levels = c('CENTER','PERIPHERY','NORMAL','METASTASIS')))) + 
1291 |   geom_bar(stat='identity',show.legend = TRUE)+
1292 |   scale_fill_manual("Clusters",values = c('red','blue','grey','black'))+ 
1293 |   labs(fill = "Location")+
1294 |   coord_flip()+
1295 |   xlab("loc")+
1296 |   ylab("Frac cluster")+
1297 |   theme(panel.background = element_blank(),
1298 |         axis.text.y = element_text(colour=col_vector[as.numeric(hr$labels[hr$order])]))+
1299 |   ggtitle('Location composition')
1300 | 
1301 | pdf('zurich_community_location.pdf',width = 10, height = 20)
1302 | p
1303 | dev.off()
1304 | 
1305 | ```
1306 | 
1307 | #Read in microenvironment community data from Matlab (topological single-cell communities were extracted using the Matlab wrapper of the C++ Louvain implementation) including all cells (but tumor metacluster agnostic -> all tumor cells are assigned to the same cell type (Pheno = 100))
1308 | ```{r}
1309 | nodules = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/Community_data_microenvironment.csv',header = T)
1310 | 
1311 | # #Adjust naming only first time when reading from Matlab (provided data is already cleaned)
1312 | # test = unique(nodules$core)
1313 | # split_core = strsplit(nodules$core,'_', fixed = TRUE)
1314 | # nodules$core = unlist(lapply(split_core, function(x){paste(x[c(1,2,8,9,10)],collapse =  "_")}))
1315 | # #Replace the ones that don't need acquisition number again
1316 | # short = unlist(lapply(strsplit(test,'_', fixed = TRUE),function(x){paste(x[c(1,2,8,9)],collapse =  "_")}))
1317 | # duplicate_idx = duplicated(short) | duplicated(short, fromLast = TRUE)
1318 | # in_cores_index = unlist(lapply(unique(nodules$core)[duplicate_idx],function(x){which(nodules$core %in% x)}))
1319 | # nodules$core[setdiff(1:length(nodules$core),in_cores_index)] = unlist(lapply(strsplit(nodules$core[setdiff(1:length(nodules$core),in_cores_index)],'_', fixed = TRUE), function(x){paste(x[1:length(x)-1],collapse =  "_")}))
1320 | # #Delete zeros from core names
1321 | # nodules$core = unlist(lapply(nodules$core,function(x){gsub("000","",x)}))
1322 | 
1323 | #Calculate number of cells from each cell type per community (community numbers are only unique per core)
1324 | nodules[,size_comm := .N, by = c('Community','core')]
1325 | nodules = nodules[size_comm > 9,]
1326 | nodules[,ncells := .N , by = c('Community','Pheno','core')]
1327 | nodules[,perc_cluster := ncells/size_comm, by = c('core','Community')]
1328 | 
1329 | #Save variables for bellow in pipeline
1330 | save_size = unique(nodules[,c('core','Community','size_comm')])
1331 | nodules_orig = nodules
1332 | 
1333 | #Set all tumor cell types to metacluster 100
1334 | nodules <- unique(nodules[,c("core","Pheno","ncells","Community")])
1335 | nodules$Pheno = factor(nodules$Pheno, levels = c(rev(cluster_order[cluster_order %in% c("41","3","39","7","2" ,"21","28","18","5","1","16")]),"100"))
1336 | 
1337 | #Set missing cells types to 0
1338 | nodules_wide = dcast.data.table(nodules,formula = 'Community + core ~ Pheno',value.var = 'ncells',fill = 0)
1339 | nodules_wide_not_norm = nodules_wide
1340 | ```
1341 | 
1342 | #Run PG on microenvironment communities
1343 | ```{r}
1344 | #01-normalize absolute cell type numbers of each community per cel type
1345 | nodules_wide = cbind( nodules_wide[,c('Community','core')],apply(nodules_wide[,-c('Community','core')],2, function(x){(x-min(x))/(max(x)-min(x))}))
1346 | 
1347 | #Run PG
1348 | rand_seed = 3
1349 | rpheno_out = cytofkit::Rphenograph(nodules_wide[,-c('Community','core')], k = 20, seed = rand_seed,approx = T)
1350 | nodules_wide$cluster = rpheno_out$membership
1351 | 
1352 | #Write out PG result
1353 | #fwrite(nodules_wide[,c('Community','cluster','core')],'PG_stroma_zurich.csv',col.names = T)
1354 | 
1355 | #Read in PG result from publication
1356 | cl_dat = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/PG_microenvironmnet_communities.csv',header = T)
1357 | nodules_wide = merge(nodules_wide, cl_dat, by = c('core','Community'))
1358 | 
1359 | ```
1360 | 
1361 | #Heatmap of cell type content of each microenvironment community type
1362 | ```{r}
1363 | nodules_long = melt.data.table(nodules_wide, id.vars = c('Community','core','cluster') ,variable.name = 'channel', value.name = 'perc_cluster')
1364 | summary_dat = nodules_long[ ,list(
1365 |   mean_val= mean(perc_cluster),
1366 |   cell_cluster=.N),
1367 |   by=.(channel,cluster)]
1368 | hm_dat = dcast.data.table(data =summary_dat, formula = 'cluster ~ channel',
1369 |                           value.var = 'mean_val') #can be exchanged for 'median_val' 
1370 | 
1371 | #Convert to a matrix
1372 | trownames = hm_dat$cluster
1373 | hm_dat = as.matrix(hm_dat[,-1,with=F])
1374 | row.names(hm_dat) = trownames
1375 | 
1376 | # Set color map
1377 | cols = rev(brewer.pal(11,'Spectral'))
1378 | cmap = colorRampPalette(cols)
1379 | 
1380 | # Hierarchical clustering on rows with Ward's linkage
1381 | tdist = as.dist(1-cor(t(hm_dat), method="spearman"))
1382 | hr <- hclust(tdist, method="ward.D2")
1383 | co_r <- order.optimal(tdist, hr$merge)
1384 | hr$merge = co_r$merge
1385 | hr$order = co_r$order
1386 | 
1387 | # Order rows in heatmap according to clustering
1388 | order_heatmap_zscored = row.names(hm_dat)[hr$order]
1389 | 
1390 | # Hierarchical clustering on columns with Ward's linkage
1391 | tdist = as.dist(1-cor((hm_dat), method="spearman"))
1392 | hc <- hclust(tdist, method="ward.D2")
1393 | co_c <- order.optimal(tdist, hc$merge)
1394 | hc$merge = co_c$merge
1395 | hc$order = co_c$order
1396 | 
1397 | # Z-score data
1398 | p_dat = scale(hm_dat)
1399 | 
1400 | # Censor z-score at 2
1401 | p_dat[p_dat > 2] =2
1402 | p_dat[p_dat < -2] =-2
1403 | 
1404 | #Save order
1405 | order_stroma = hr$labels[hr$order]
1406 | 
1407 | 
1408 | pdf(file="stroma_node_heatmap_k20_zurich.pdf", width=10, height=10)
1409 | 
1410 | heatmap.2(p_dat,
1411 |           scale ='none',
1412 |           trace = "none",
1413 |           col=cmap(75),
1414 |           Rowv=as.dendrogram(hr),
1415 |           Colv=as.dendrogram(hc),
1416 |           density.info ='none',
1417 |           cexRow=0.6,
1418 |           cexCol=0.6,
1419 |           margins=c(4,8),
1420 |           xlab = 'Markers',
1421 |           ylab ='Cluster',
1422 |           main = 'PG_norm_slide',
1423 |           colCol = c(rev(rev(cols_corresp_basel)[cluster_order %in% c("41","3" , "39","7","2" ,"21","28","18","5","1","16")]),'black'), 
1424 |           colRow = col_vector) 
1425 | 
1426 | dev.off()
1427 | ```
1428 | 
1429 | #Run tsne on microenvironment communities based on cell type content
1430 | ```{r}
1431 | dat_tsne = nodules_wide[!duplicated(nodules_wide[,-c('Community','core','cluster')])] 
1432 | 
1433 | #Run a new tsne
1434 | require(doParallel)
1435 | cores = 10
1436 | options('mc.cores' = cores)
1437 | registerDoParallel(cores)
1438 | tsne_comm <- Rtsne.multicore::Rtsne.multicore(dat_tsne[,-c('Community','core','cluster')], 
1439 |             verbose = T, dims = 2, num_threads = 10)
1440 | 
1441 | #Save out tsne results
1442 | #fwrite(cbind(dat_tsne,tsne_comm$Y),'tsne_stroma_zurich_final.csv',col.names = F)
1443 | 
1444 | #Read in previous tsne result from publication
1445 | tsne_comm = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/tsne_microenvironment_communities.csv')
1446 | 
1447 | dat_tsne = merge(dat_tsne,tsne_comm,by = c('Community','core'))
1448 | dat_tsne$cluster = factor(dat_tsne$cluster,order_stroma)
1449 | 
1450 | #plot tsne of microenvironment communities
1451 | p = dat_tsne%>%
1452 |   ggplot(aes(x=V1, y=V2))+
1453 |   geom_point(size=1, alpha=0.8, aes(color=cluster))+
1454 |   labs(colour="Patients")+
1455 |   scale_color_manual(values = col_vector[as.numeric(order_stroma)])+
1456 |   ggtitle('Phenograph')+
1457 |   guides(color=guide_legend(override.aes=list(size=5)))+
1458 |   theme(strip.background = element_blank(),
1459 |         panel.background=element_rect(fill='white', colour = 'black'),
1460 |         panel.grid.major=element_blank(),
1461 |         panel.grid.minor=element_blank(),
1462 |         plot.background=element_blank(),
1463 |         legend.key = element_blank())
1464 | 
1465 | pdf('microenv_communities_tsne.pdf',width = 10,height = 10)
1466 | p
1467 | dev.off()
1468 | ```
1469 | 
1470 | #Stacked bars showing absolute numbers of cells of each cell type per community type
1471 | ```{r}
1472 | cl_dat = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/PG_microenvironmnet_communities.csv',header = T)
1473 | nodules_wide_not_norm = merge(nodules_wide_not_norm,cl_dat,by = c('Community','core'))
1474 | nodules_long = melt.data.table(nodules_wide_not_norm, id.vars = c('Community','core','cluster') ,variable.name = 'channel', value.name = 'perc_cluster')
1475 | nodules_long$perc_cluster = as.double(nodules_long$perc_cluster)
1476 | 
1477 | set.seed(2)
1478 | summary_dat = nodules_long[ ,list(
1479 |   median_val = median(perc_cluster),
1480 |   mean_val= mean(perc_cluster),
1481 |   #std_val = std(perc_cluster),
1482 |   cell_cluster=.N),
1483 |   by=.(channel,cluster)]
1484 | 
1485 | 
1486 | hm_dat = dcast.data.table(data =summary_dat, formula = 'cluster ~ channel',
1487 |                           value.var = 'mean_val') #can be exchanged for 'median_val' 
1488 | 
1489 | #Bars ordered acciording to heatmap
1490 | d = unique(summary_dat[,c('channel','cluster','mean_val')])
1491 | p <- ggplot(d, aes(x=factor(cluster,levels = hr$labels[hr$order]), y=mean_val, fill=factor(channel))) + 
1492 |   geom_bar(stat='identity',show.legend = TRUE)+
1493 |   scale_fill_manual("Clusters",values =  c(rev(rev(cols_corresp_basel)[cluster_order %in% c("41","3" , "39","7","2" ,"21","28","18","5","1","16")]),'black'))+ 
1494 |   labs(fill = "Clusters")+
1495 |   coord_flip()+
1496 |   xlab("Patient")+
1497 |   ylab("Percentage of cluster cells in patient")+
1498 |   theme(panel.background = element_blank(),
1499 |         axis.text.y = element_text(colour=col_vector[as.numeric(hr$labels[hr$order])]))+
1500 |   ggtitle('Patient composition')
1501 | 
1502 | pdf('comm_cluster_compositions.pdf',width = 10,height = 10)
1503 | p
1504 | dev.off()
1505 | 
1506 | ```
1507 | 
1508 | #Clustered heatmaps showing how tumors are made up by the different microenvironment community types (and grouping into 11 stromal environments SE)
1509 | ```{r}
1510 | 
1511 | #Prepare data
1512 | nodules_wide = merge(nodules_wide,Sample_metadata[,c('core','PID')],by = 'core')
1513 | enrichment = unique(nodules_wide[,c('cluster','Community','core')])
1514 | enrichment[,both := .N, by = c('cluster','core')]
1515 | enrichment[,frac_patient := both/.N, by = c('core')]
1516 | enrichment = unique(enrichment[,c('core','cluster','frac_patient')])
1517 | d = dcast.data.table(enrichment,formula = 'core  ~ cluster',value.var = 'frac_patient',fill = 0)
1518 | 
1519 | #Merge with matched SCP patient group info
1520 | d = merge(d,SCP_match,by = 'core')
1521 | d = merge(d,unique(Sample_metadata[,c('core','clinical_type')]),by = 'core')
1522 | 
1523 | #Read in original ordering of this table according to original patient IDs. IDs had to be changed for publication but order needs to remain for result from publication to be exactly reproducible.
1524 | order_orig = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/ZurichTMA/Communities/order_orig.csv',header = T)
1525 | d = d[order(match(core,as.character(order_orig$core))),]
1526 | d_mat = as.matrix(d[,-c('core','patient_pheno','clinical_type')])
1527 | rownames(d_mat) = d$core
1528 | 
1529 | #Cluster into 11 groups
1530 | hr = hclust(dist(d_mat), method = "ward.D2")
1531 | clusters = dendextend::cutree(hr, k = 11)
1532 | cnames = names(clusters)
1533 | n_clusters = data.table(cnames)
1534 | names(n_clusters) = 'core'
1535 | n_clusters$cluster = unlist(clusters)
1536 | n_clusters_orig = n_clusters
1537 | 
1538 | h = Heatmap(d_mat, name = "Clustergram", km = 1, col = colorRamp2(c(0, 1), c("white", "red")),
1539 |      show_row_names = T, show_column_names =  T, clustering_method_rows = "ward.D2",clustering_method_columns = "ward.D2",split = clusters)+
1540 | Heatmap(factor(d$patient_pheno), name = "SCP", show_row_names = FALSE, width = unit(10, "mm"), col = structure(mycols_patient[sort(unique(d$patient_pheno))], names = as.character(sort(unique(d$patient_pheno)))))+
1541 |   Heatmap(factor(d$clinical_type), name = "clinical_type", show_row_names = FALSE, width = unit(10, "mm"), col = structure(mycols_clinical, names = c('HR-HER2+','HR+HER2-','HR+HER2+','TripleNeg')))
1542 | 
1543 | #Save orders
1544 | o_stroma = column_order(h)
1545 | o_patients = names(clusters[unlist(row_order(h))])
1546 | 
1547 | pdf('microenv_communities.pdf',width = 10, height = 30)
1548 | h
1549 | dev.off()
1550 | 
1551 | #Icons describing cell type contents of each Stromal Environment (SE)
1552 | icon = merge(nodules_orig,n_clusters_orig,by = 'core')
1553 | icon = unique(icon[,c('Pheno','ncells','core','Community','cluster')])
1554 | icon[,tot := sum(as.double(ncells)), by = c('cluster','Pheno')]
1555 | icon[,avg := tot/sum(tot), by = c('cluster')]
1556 | 
1557 | bp<- ggplot(icon, aes(x="", y=avg, fill=factor(Pheno,levels = c(cluster_order[cluster_order %in% c("41","3" , "39","7","2","21","28","18","5","1","16")],'100'))))+
1558 | geom_bar(width = 1, stat = "identity")+
1559 |   facet_wrap(~cluster, ncol = 5)+
1560 |   scale_fill_manual("Clusters",values =  c(rev(cols_corresp_basel)[cluster_order %in% c("41","3","39","7","2" ,"21","28","18","5","1","16")],'black'))+
1561 |     theme(strip.background = element_blank(),
1562 |         panel.background=element_rect(fill='white', colour = 'black'),
1563 |         panel.grid.major=element_blank(),
1564 |         panel.grid.minor=element_blank(),
1565 |         plot.background=element_blank(),
1566 |         legend.key = element_blank())
1567 | 
1568 | pdf('icons.pdf',width = 10, height = 20)
1569 | bp
1570 | dev.off()
1571 | 
1572 | ```
1573 | 
1574 | #SCP and SE patientgroup enrichments
1575 | ```{r}
1576 | #Enrichment bubble plot between SCP and SE patient groups
1577 | cors = merge(n_clusters_orig,SCP_match,by = 'core')
1578 | cors[,nr_both := .N, by = c('cluster','patient_pheno')]
1579 | cors[,perc_ME := nr_both/.N, by = c('cluster')]
1580 | cors[,perc_CT := nr_both/.N, by = c('patient_pheno')]
1581 | cors$cluster = factor(cors$cluster, levels = as.character(1:11))
1582 | cors$patient_pheno = factor(cors$patient_pheno)
1583 | colnames(cors)[1:3] = c('PID','StromalComm_groups','SCP_groups')
1584 | 
1585 | #plot
1586 | p4 <- ggplot(cors,aes(y=StromalComm_groups,x=SCP_groups))+
1587 |   geom_point(aes(colour = perc_ME, 
1588 |                  size =perc_CT))  +   
1589 | scale_color_gradient2(low = "blue",  
1590 |                      mid = "white",
1591 |                      high = "red",
1592 |                      name = "Fraction of Comm_cluster in SCP group")+       
1593 | scale_size(range = c(1, 15),name = "Fraction of SCP in Comm_cluster group") +
1594 |       theme(axis.text.x=element_text(color = c(mycols_patient[sort(as.numeric(levels(cors$SCP_groups)))])))
1595 | 
1596 | pdf('Zurich_enrichment_SE_SCP_groups.pdf',width = 20, height = 10)
1597 | p4
1598 | dev.off()
1599 | 
1600 | 
1601 | #Loop through all SE patient groups and test for all SCP groups each one contains whether there is a significant enrichment
1602 | overview = list()
1603 | counter = 1
1604 | for (i in unique(cors$StromalComm_groups)){
1605 |   logic_vector = cors$StromalComm_groups == i
1606 |   MEs = unique(cors$SCP_groups[logic_vector])
1607 |   ME_vectors = lapply(MEs, function(x){cors$SCP_groups == x})
1608 |   res = lapply(ME_vectors, function(x){fisher.test(logic_vector,x,alternative = 'greater')})
1609 |   p = unlist(lapply(res, function(x){x$p.value}))
1610 |   CT_name = rep(i,length(MEs))
1611 |   ME_name = as.character(MEs)
1612 |   #Correct for multiple testing
1613 |   adjusted_p = p.adjust(p, method = 'bonferroni', n = length(p))
1614 |   overview[[counter]] = cbind(CT_name,ME_name,adjusted_p)
1615 |   counter = counter + 1
1616 | }
1617 | 
1618 | d = data.table(do.call(rbind,overview))
1619 | colnames(d) = c('StromalCommRegion','SCP_groups','adjusted_p')
1620 | fwrite(d,file = 'enrichment_p_vals_SCP_SE.csv',col.names = T)
1621 | 
1622 | 
1623 | #Enrichment bubble plot between clinical subtype and SE patient groups
1624 | cors = merge(n_clusters_orig,unique(Sample_metadata[,c('core','clinical_type')]),by = 'core')
1625 | cors = cors[clinical_type != '',]
1626 | cors[,nr_both := .N, by = c('cluster','clinical_type')]
1627 | cors[,perc_ME := nr_both/.N, by = c('cluster')]
1628 | cors[,perc_CT := nr_both/.N, by = c('clinical_type')]
1629 | cors$cluster = factor(cors$cluster, levels = as.character(1:11))
1630 | cors$clinical_type = factor(cors$clinical_type)
1631 | colnames(cors)[1:3] = c('patientcode','StromalComm_groups','clinical_type')
1632 | 
1633 | #plot
1634 | p4 <- ggplot(cors,aes(y=StromalComm_groups,x=clinical_type))+
1635 |   geom_point(aes(colour = perc_ME, 
1636 |                  size =perc_CT))  +   
1637 | scale_color_gradient2(low = "blue",  
1638 |                      mid = "white",
1639 |                      high = "red",
1640 |                      name = "Fraction of Comm_cluster in clinical_type group")+       
1641 | scale_size(range = c(1, 15),name = "Fraction of clinical_type in Comm_cluster group") +
1642 |       theme(axis.text.x=element_text(color = c(mycols_clinical)))
1643 | 
1644 | pdf('Zurich_enrichment_SE_clinical_groups.pdf',width = 20, height = 10)
1645 | p4
1646 | dev.off()
1647 | 
1648 | 
1649 | #Loop through all SE patient groups and test for all clinical subtypes each one contains whether there is a significant enrichment
1650 | overview = list()
1651 | counter = 1
1652 | for (i in unique(cors$StromalComm_groups)){
1653 |   logic_vector = cors$StromalComm_groups == i
1654 |   MEs = unique(cors$clinical_type[logic_vector])
1655 |   ME_vectors = lapply(MEs, function(x){cors$clinical_type == x})
1656 |   res = lapply(ME_vectors, function(x){fisher.test(logic_vector,x,alternative = 'greater')})
1657 |   p = unlist(lapply(res, function(x){x$p.value}))
1658 |   CT_name = rep(i,length(MEs))
1659 |   ME_name = as.character(MEs)
1660 |   #Correct for multiple testing
1661 |   adjusted_p = p.adjust(p, method = 'bonferroni', n = length(p))
1662 |   overview[[counter]] = cbind(CT_name,ME_name,adjusted_p)
1663 |  
1664 |   counter = counter + 1
1665 | }
1666 | 
1667 | d = data.table(do.call(rbind,overview))
1668 | colnames(d) = c('StromalCommRegion','clinical_type','adjusted_p')
1669 | fwrite(d,file = 'enrichment_p_vals_clinical_SE.csv',col.names = T)
1670 | ```
1671 | 
1672 | #Center or periphery core origin of microenvironment community types
1673 | ```{r}
1674 | nodules_wide = merge(unique(Sample_metadata[,c('core','PID','location')]),nodules_wide,by = 'core')
1675 | cluster_loc = nodules_wide[,c('cluster','core','Community','location')]
1676 | cluster_loc$cluster = factor(cluster_loc$cluster)
1677 | cluster_loc = cluster_loc[!location %in% c('NORMAL','METASTASIS','[]'),]
1678 | cluster_loc[,count := .N, by = c('cluster','location')]
1679 | cluster_loc[,frac_cluster := count/.N, by = 'cluster']
1680 | cluster_loc[,frac_location := count/.N, by = 'location']
1681 | cluster_loc = unique(cluster_loc[,c('cluster','location','frac_cluster')])
1682 | 
1683 | #Stacked bar plot
1684 | p <- ggplot(cluster_loc, aes(x=cluster, y=frac_cluster, fill=factor(location,levels = c('CENTER','PERIPHERY')))) + 
1685 |   geom_bar(stat='identity',show.legend = TRUE)+
1686 |   scale_fill_manual("Clusters",values = c('red','blue'))+
1687 |   labs(fill = "Location")+
1688 |   coord_flip()+
1689 |   xlab("loc")+
1690 |   ylab("Frac cluster")+
1691 |   theme(panel.background = element_blank(),
1692 |         axis.text.y = element_text(colour=col_vector))+
1693 |   ggtitle('Location composition')
1694 | 
1695 | 
1696 | pdf('zurich_community_location_microenv.pdf',width = 10, height = 20)
1697 | p
1698 | dev.off()
1699 | 
1700 | ```
1701 | 
1702 | #Order neighborhood analysis output per image (imported above) according to SEs
1703 | ```{r}
1704 | clustergram_dat_meta = clustergram_dat_meta[core %in% o_patients,]
1705 | clustergram_dat_meta = clustergram_dat_meta[order(match(core,o_patients))]
1706 | mat = mat[clustergram_dat_meta$core,]
1707 | splitting = clusters
1708 | splitting = splitting[order(match(names(splitting),clustergram_dat_meta$core))]
1709 | splitting = splitting[names(splitting) %in% clustergram_dat_meta$core]
1710 | 
1711 | #Add colorbars for location, met status, grade and patient ID
1712 | h = Heatmap(mat, name = "Clustergram", km = 1, col = colorRamp2(c(-1, 0, 1), c("blue", "white", "red")),
1713 |      show_row_names = TRUE, show_column_names =  TRUE, cluster_rows = F,clustering_method_columns = "ward.D2",split = splitting)+   
1714 |  Heatmap(factor(clustergram_dat_meta$location), name = "Location", show_row_names = FALSE, width = unit(10, "mm"), col = structure(c("white","red","blue","black"), names = c('[]','CENTER','PERIPHERY','METASTASIS')))+ 
1715 | Heatmap(factor(clustergram_dat_meta$grade), name = "Grade", show_row_names = FALSE, width = unit(10, "mm"), col = structure(c("green","blue","red",'black'), names = c('1','2','3','METASTASIS')))+ 
1716 | Heatmap(factor(clustergram_dat_meta$PTNM_M), name = "Met", show_row_names = FALSE, width = unit(10, "mm"), col = structure(c("black","gray",'white'), names = c('M1','M0_IPLUS','M0')))+ 
1717 |   Heatmap(factor(clustergram_dat_meta$PID), name = "Patient", show_row_names = T, width = unit(10, "mm"), col = structure(c(col_vector,'black','gray'), names = levels(factor(clustergram_dat_meta$PID))))
1718 | 
1719 | 
1720 | pdf('neighborhood_accordingtoSE.pdf',width = 90, height = 60)
1721 | h
1722 | dev.off()
1723 | 
1724 | 
1725 | #Enrichment analysis for pairwise neighborhood interactions in SE groups
1726 | n_clusters_orig = merge(n_clusters_orig,clustergram_dat,by = 'core')
1727 | n_clusters_orig$cluster = as.factor(n_clusters_orig$cluster)
1728 | 
1729 | #Loop through all pairwise neighborhood interactions and test for each significantly enriched/depleted interaction whether there is a enrichment for one SE group. Run once for positive interactions (enrichments) and once for negative (avoidances).
1730 | overview = list()
1731 | counter = 1
1732 | for (i in 4:ncol(n_clusters_orig)){
1733 |   cur = as.matrix(n_clusters_orig[,eval(i),with =F])
1734 |   cur[cur == -1] = 0
1735 |   if (sum(cur)> 0){ #adapt sign: run once for positive interactions and once for negative interactions (also adapt -1 to 1 bellow)
1736 |     cur = as.factor(cur)
1737 |     #only for positive association
1738 |     res = lapply(unique(n_clusters_orig$cluster), function(x){fisher.test(cur == 1,n_clusters_orig$cluster == x,alternative = 'greater',simulate.p.value=TRUE)}) 
1739 |     p = unlist(lapply(res, function(x){x$p.value}))
1740 |     interaction_name = rep(names(n_clusters_orig)[i],length(unique(n_clusters_orig$cluster)))
1741 |     group_name = unique(n_clusters_orig$cluster)
1742 |     #Correct for multiple testing of different SEs
1743 |     adjusted_p = p.adjust(p, method = 'bonferroni', n = length(p))
1744 |     overview[[counter]] = cbind(interaction_name,group_name,adjusted_p)
1745 |     counter = counter + 1
1746 |     }
1747 | }
1748 | 
1749 | #Write out overview of results
1750 | d = data.table(do.call(rbind,overview))
1751 | d = d[adjusted_p < 0.05,]
1752 | fwrite(d,file = 'Zurich_enrichment_SE_significantInteractions_positive.csv',col.names = T)
1753 | 
1754 | ```
1755 | 
1756 | #Boxplots of KL divergencies of individual cores to patient average per SEs and SCPs
1757 | ```{r}
1758 | #SCPs
1759 | 
1760 | #Take KL divergences calculated above
1761 | kldiv_ordered = data.table(names(unlist(kldiv)))
1762 | kldiv_ordered$kldiv = as.vector(unlist(kldiv))
1763 | names(kldiv_ordered)[1] = 'core'
1764 | kldiv_ordered$core = unlist(lapply(strsplit(kldiv_ordered$core,'[.]'), function(x){paste(x[2],collapse = '.')}))
1765 | kldiv_ordered = merge(kldiv_ordered,SCP_match, by = 'core')
1766 | 
1767 | #Also for shannon entrpoies
1768 | sh = merge(shannon_core,SCP_match)
1769 | 
1770 | #Boxplot
1771 | p <- ggplot(kldiv_ordered, aes(x=factor(patient_pheno), y=kldiv)) + #Exchange to sh for shannon entropy
1772 |   geom_boxplot()+
1773 |   geom_point(size=2, alpha=1)+
1774 |   ylab("KL div to patient average of core")+
1775 |   xlab("Patientgroup")+
1776 |   theme(panel.background = element_blank())+
1777 |   ggtitle('Cores assigned to patient group')
1778 | 
1779 | pdf('CoreKLdiv_to_patientMean_perAssignedPatientgroup.pdf')
1780 | p
1781 | dev.off()
1782 | 
1783 | 
1784 | 
1785 | #Stromal environments (if this should only include stromal cells, then adapt KL divergence calculation above to exclude tumor cells or assign them all the same label)
1786 | 
1787 | #Take KL divergences calculated above
1788 | kldiv_ordered = data.table(names(unlist(kldiv)))
1789 | kldiv_ordered$kldiv = as.vector(unlist(kldiv))
1790 | names(kldiv_ordered)[1] = 'core'
1791 | kldiv_ordered$core = unlist(lapply(strsplit(kldiv_ordered$core,'[.]'), function(x){paste(x[2],collapse = '.')}))
1792 | kldiv_ordered = merge(kldiv_ordered,n_clusters_orig, by = 'core')
1793 | kldiv_ordered$cluster = factor(kldiv_ordered$cluster,levels = c(2,1,6,4,10,5,8,11,3,7,9))
1794 | 
1795 | #Also for shannon entrpoies
1796 | sh = merge(shannon_core,n_clusters_orig)
1797 | sh$cluster = factor(sh$cluster,levels = c(2,1,6,4,10,5,8,11,3,7,9))
1798 | 
1799 | p <- ggplot(kldiv_ordered, aes(x=factor(cluster), y=kldiv)) + #Exchange to sh for shannon entropy
1800 |   geom_boxplot()+
1801 |   geom_point(size=2, alpha=1)+
1802 |   ylab("KL div to patietn average of core")+
1803 |   xlab("Patientgroup")+
1804 |   ylim(c(0,2))+
1805 |   theme(panel.background = element_blank())+
1806 |   ggtitle('Cores assigned to patient group')
1807 | 
1808 | pdf('/home/jana/Desktop/R_dat/KL_stromalGroups.pdf')
1809 | p
1810 | dev.off()
1811 | 
1812 | ```
1813 | 
1814 | 
1815 | #Regional heterogeneity of extended data figure 10d
1816 | ```{r}
1817 | #Bubble plot: Of the patients containing at least 1 image individually assigned to a SCP group, which other SCP groups appear in different images of these same patients.
1818 | 
1819 | #How many images of each SCP does every patient contain?
1820 | co = merge(SCP_match,unique(Sample_metadata[,c('core','PID')]),by = 'core');
1821 | co = dcast.data.table(co,'PID ~ patient_pheno')
1822 | 
1823 | #Calculate number of patients containing at least one image of a SCP group
1824 | nr_patients = as.matrix(co[,-'PID'])
1825 | nr_patients[nr_patients > 1] = 1
1826 | rownames(nr_patients) = co$PID
1827 | nr_patients = colSums(nr_patients)
1828 | table_nr = as.data.table(nr_patients)
1829 | table_nr$Var1 = names(nr_patients)
1830 | 
1831 | #Loop through SCP groups and
1832 | amount = list()
1833 | image_fraction = list()
1834 | for (i in c(1:4,6:(ncol(co)))){ #SCP 5 was not found in this cohort
1835 | 
1836 |   #Get patients containing at least one of current SCP
1837 |   cur_nr = as.vector(as.matrix(co[,as.character(eval(i)),with = F]))
1838 |   patients = co[cur_nr > 0,]
1839 |   patients_mat = as.matrix(patients[,-'PID'])
1840 |   rownames(patients_mat) = patients$PID
1841 |   patients_mat[patients_mat > 1] = 1
1842 |   
1843 |   #Calculate in how many patients each of the other SCPs appear together with the one in question
1844 |   cur_amount = colSums(patients_mat)
1845 |   if (nrow(patients_mat) > 1){
1846 |     cur_amount[as.character(eval(i))] = length(which(rowSums(patients_mat[,colnames(patients_mat)[colnames(patients_mat) != as.character(eval(i))]]) == 0))}
1847 |   else{cur_amount[as.character(eval(i))] = length(which(sum(patients_mat[,colnames(patients_mat)[colnames(patients_mat) != as.character(eval(i))]]) == 0))}
1848 |   amount = rbind(amount,cur_amount)
1849 |   
1850 |   #Calculate fractions of all images (of the patients in question) that are of a SCP group
1851 |   image_fraction = rbind(image_fraction,(colSums(patients[,-'PID'])/(sum(colSums(patients[,-'PID'])))))
1852 | 
1853 | }
1854 | 
1855 | 
1856 | #Convert absolute counts to matrix and melt
1857 | amount = matrix(unlist(amount), ncol = ncol(amount), byrow = F)
1858 | rownames(amount) = as.character(c(1:4,6:(ncol(co))))
1859 | colnames(amount) = as.character(c(1:4,6:(ncol(co))))
1860 | m_amount = melt(amount)
1861 | #Divide by nr of patients containing at least one of the SCP (to get fraction of all patients containing at least 1)
1862 | m_amount = merge(m_amount,table_nr,by = 'Var1')
1863 | m_amount = data.table(m_amount)
1864 | m_amount[,fraction_patients := value/nr_patients]
1865 | 
1866 | #Convert image fractions to matrix and melt
1867 | image_fraction = matrix(unlist(image_fraction), ncol = ncol(image_fraction), byrow = F)
1868 | rownames(image_fraction) = as.character(c(1:4,6:(ncol(co))))
1869 | colnames(image_fraction) = as.character(c(1:4,6:(ncol(co))))
1870 | m_act = melt(image_fraction)
1871 | m_amount = merge(m_amount,m_act,by = c('Var1','Var2'))
1872 | 
1873 | #Bubble plot
1874 | pdf('SCP_mixing.pdf',width = 13)
1875 | ggplot(m_amount, aes(x = factor(Var2), y = factor(Var1))) + 
1876 |   geom_point(aes(size =value.y,#Fraction out of all images of patients containing at least one of SCP on y-axis that are of SCP on x-axis
1877 |                  alpha = fraction_patients, #Fraction out of all patients containing at least one of SCP on y, containing at least one other image of SCP on x
1878 |                  color = factor(Var2))) + 
1879 |   scale_color_manual(values = mycols_patient[unique(m_amount$Var1)])+
1880 |   #scale_color_gradient(low="white", high="red",name = "...") +
1881 |   scale_size(range = c(1, 15),name = "Out of patients containing 1, fraction of images in each group") +
1882 |   theme(axis.text.y=element_text(color = c(mycols_patient[unique(m_amount$Var1)])))
1883 | dev.off()
1884 | 
1885 | 
1886 | 
1887 | 
1888 | 
1889 | #Same as above but for stromal environments (SEs)
1890 | 
1891 | #How many images of each SE does every patient contain?
1892 | co2 = merge(n_clusters_orig,unique(Sample_metadata[,c('core','PID')]),by = 'core');
1893 | co2 = dcast.data.table(co2,'PID ~ cluster')
1894 | 
1895 | #Calculate number of patients containing at least one image of a SE group
1896 | nr_patients = as.matrix(co2[,-'PID'])
1897 | nr_patients[nr_patients > 1] = 1
1898 | rownames(nr_patients) = co2$PID
1899 | nr_patients = colSums(nr_patients)
1900 | table_nr = as.data.table(nr_patients)
1901 | table_nr$Var1 = names(nr_patients)
1902 | 
1903 | #Loop through SCP groups and
1904 | amount = list()
1905 | image_fraction = list()
1906 | for (i in c(1:11)){
1907 |   
1908 |   #Get patients containing at least one of current SE
1909 |   cur_nr = as.vector(as.matrix(co2[,as.character(eval(i)),with = F]))
1910 |   patients = co2[cur_nr > 0,]
1911 |   patients_mat = as.matrix(patients[,-'PID'])
1912 |   rownames(patients_mat) = patients$PID
1913 |   patients_mat[patients_mat > 1] = 1
1914 |   
1915 |   #Calculate in how many patients each of the other SEs appear together with the one in question
1916 |   cur_amount = colSums(patients_mat)
1917 |   if (nrow(patients_mat) > 1){
1918 |     cur_amount[as.character(eval(i))] = length(which(rowSums(patients_mat[,colnames(patients_mat)[colnames(patients_mat) != as.character(eval(i))]]) == 0))}
1919 |   else{cur_amount[as.character(eval(i))] = length(which(sum(patients_mat[,colnames(patients_mat)[colnames(patients_mat) != as.character(eval(i))]]) == 0))}
1920 |   amount = rbind(amount,cur_amount)
1921 |   
1922 |   #Calculate fractions of all images (of the patients in question) that are of a SCP group
1923 |   image_fraction = rbind(image_fraction,(colSums(patients[,-'PID'])/(sum(colSums(patients[,-'PID'])))))
1924 | }
1925 | 
1926 | 
1927 | #Convert absolute counts to matrix and melt
1928 | amount = matrix(unlist(amount), ncol = ncol(amount), byrow = F)
1929 | rownames(amount) = as.character(c(1:11))
1930 | colnames(amount) = as.character(c(1:11))
1931 | m_amount = melt(amount)
1932 | #Divide by nr of patients containing at least one of the SE (to get fraction of all patients containing at least 1)
1933 | m_amount = merge(m_amount,table_nr,by = 'Var1')
1934 | m_amount = data.table(m_amount)
1935 | m_amount[,fraction_patients := value/nr_patients]
1936 | 
1937 | #Convert image fractions to matrix and melt
1938 | image_fraction = matrix(unlist(image_fraction), ncol = ncol(image_fraction), byrow = F)
1939 | rownames(image_fraction) = as.character(c(1:11))
1940 | colnames(image_fraction) = as.character(c(1:11))
1941 | m_act = melt(image_fraction)
1942 | m_amount = merge(m_amount,m_act,by = c('Var1','Var2'))
1943 | 
1944 | #Bubble plot
1945 | pdf('SE_mixing.pdf',width = 20)
1946 | ggplot(m_amount, aes(x = factor(Var2), y = factor(Var1))) + 
1947 |   geom_point(shape = 21,color = 'black',aes(size =value.y,#Fraction out of all images of patients containing at least one of SCP on y-axis that are of SCP on x-axis
1948 |                  alpha = fraction_patients,
1949 |                  fill = fraction_patients)) + #Fraction out of all patients containing at least one of SCP on y, containing at least one other image of SCP on x
1950 |   #scale_color_manual(values = mycols_patient[unique(m_amount$Var1)])+
1951 |   scale_fill_gradient(low="white", high="red",name = "...") +
1952 |   scale_size(range = c(1, 15),name = "...") 
1953 | dev.off()
1954 | 
1955 | 
1956 | 
1957 | #Combined SCP and SE as in Extended Data 10
1958 | names(co)[2:length(names(co))] = paste0('SCP_',names(co)[2:length(names(co))])
1959 | names(co2)[2:length(names(co2))] = paste0('StromalRegion_',names(co2)[2:length(names(co2))])
1960 | co_comb = merge(co,co2,by = 'PID')
1961 | nr_patients = as.matrix(co_comb[,-'PID'])
1962 | nr_patients[nr_patients > 1] = 1
1963 | rownames(nr_patients) = co_comb$PID
1964 | nr_patients = colSums(nr_patients)
1965 | table_nr = as.data.table(nr_patients)
1966 | table_nr$Var1 = names(nr_patients)
1967 | 
1968 | amount = list()
1969 | actual_amount =
1970 | tot_amount_SCP = list()
1971 | tot_amount_stroma = list()
1972 | for (i in colnames(co_comb)[colnames(co_comb) != 'PID']){
1973 |   cur_nr = as.vector(as.matrix(co_comb[,eval(i),with = F]))
1974 |   patients = co_comb[cur_nr > 0,]
1975 |   patients_mat = as.matrix(patients[,-'PID'])
1976 |   rownames(patients_mat) = patients$PID
1977 |   patients_mat[patients_mat > 1] = 1
1978 |   cur_amount = colSums(patients_mat)
1979 |   cur_code = str_split(i,'_')[[1]][1]
1980 |   
1981 |   if (nrow(patients_mat) > 1){
1982 |     cur_amount[as.character(eval(i))] = length(which(rowSums(patients_mat[,colnames(patients_mat)[(colnames(patients_mat) != as.character(eval(i))) & str_detect(colnames(patients_mat),cur_code)]]) == 0))}
1983 |   else{cur_amount[as.character(eval(i))] = length(which(sum(patients_mat[,colnames(patients_mat)[(colnames(patients_mat) != as.character(eval(i))) & str_detect(colnames(patients_mat),cur_code)]]) == 0))}
1984 |   amount = rbind(amount,cur_amount)
1985 |   
1986 |   scp = colSums(patients[,str_detect(colnames(patients),'SCP'),with = F])/(sum(colSums(patients[,str_detect(colnames(patients),'SCP'),with = F])))
1987 |   stroma = colSums(patients[,str_detect(colnames(patients),'StromalRegion'),with = F])/(sum(colSums(patients[,str_detect(colnames(patients),'StromalRegion'),with = F])))
1988 |   actual_amount = rbind(actual_amount,c(scp,stroma))
1989 |   tot_amount_SCP = rbind(tot_amount_SCP,c(i,sum(colSums(patients[,c(colnames(patients)[str_detect(colnames(patients),'SCP')]),with = F])))) #Are both identical
1990 | }
1991 | 
1992 | cnames = colnames(amount)
1993 | amount = matrix(unlist(amount), ncol = ncol(amount), byrow = F)
1994 | rownames(amount) = cnames
1995 | colnames(amount) = cnames
1996 | m_amount = melt(amount)
1997 | m_amount = merge(m_amount,table_nr,by = 'Var1')
1998 | m_amount = data.table(m_amount)
1999 | m_amount[,fraction_images := value/nr_patients]
2000 | actual_amount = matrix(unlist(actual_amount), ncol = ncol(actual_amount), byrow = F)
2001 | rownames(actual_amount) = cnames
2002 | colnames(actual_amount) = cnames
2003 | m_act = melt(actual_amount)
2004 | m_amount = merge(m_amount,m_act,by = c('Var1','Var2'))
2005 | 
2006 | #Order microenvironment regions differently
2007 | order_hierarchy = c('SCP_1','SCP_2','SCP_3','SCP_4','SCP_5','SCP_6','SCP_7','SCP_8','SCP_9','SCP_10','SCP_11','SCP_12','SCP_13','SCP_14','SCP_15','SCP_16','SCP_17','SCP_18','StromalRegion_2','StromalRegion_1','StromalRegion_6','StromalRegion_4','StromalRegion_10','StromalRegion_5','StromalRegion_8','StromalRegion_11','StromalRegion_3','StromalRegion_7','StromalRegion_9')
2008 | m_amount$Var1 = factor(m_amount$Var1,levels = rev(order_hierarchy))
2009 | m_amount$Var2 = factor(m_amount$Var2,levels = order_hierarchy)
2010 | 
2011 | pdf('/home/jana/Desktop/R_dat/probability_mixing_stroma_regions.pdf',width = 20,height = 15)
2012 | ggplot(m_amount, aes(x = factor(Var2), y = factor(Var1))) + 
2013 |   geom_point(shape = 21,color = 'black',aes(size =value.y,
2014 |                  #alpha = fraction_images,
2015 |                  fill = fraction_images)) + 
2016 |   #scale_color_manual(values = mycols_patient[unique(m_amount$Var1)])+
2017 |   scale_fill_gradient(low="white", high="red",name = "Out of patients containing 1, fraction of images in each group") +
2018 |   scale_size(range = c(1, 15),name = "Out of patients containing 1, fraction of images in each group") +
2019 |   theme(axis.text.x = element_text(angle = 90))
2020 | dev.off()
2021 | 
2022 | ```
2023 | 
2024 | 
2025 | #IHC-IMC comprison from Extended Data 2: first part whole image quant, second part single-cell quant
2026 | 
2027 | #Load IHC QuPATH scoring (image quant)
2028 | ```{r}
2029 | #QuPATH output csv files all contain IHC location
2030 | 
2031 | #Set directory containing all data for IHC IMC comparison
2032 | filepath <- c("/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/IHC_IMC/")
2033 | 
2034 | #Load IHC data
2035 | EcadherinTissue <- fread(paste(filepath,"QuPATH/ZT208_37_Ecadherin_Annotations.txt",sep=""),header = T)
2036 | HER2Tissue <- fread(paste(filepath,"QuPATH/ZT208_39_HER2_Annotations.txt",sep=""),header = T)
2037 | ERTissue <- fread(paste(filepath,"QuPATH/ZT208_7_ER_Annotations.txt",sep=""),header = T)
2038 | PRTissue <- fread(paste(filepath,"QuPATH/ZT208_8_PR_Annotations.txt",sep=""),header = T)
2039 | KI67Tissue <- fread(paste(filepath,"QuPATH/ZT208_6_KI67_Annotations.txt",sep=""),header = T)
2040 | 
2041 | #Format IHC quant data
2042 | names(EcadherinTissue)[1] <-paste("IHC_location")
2043 | names(EcadherinTissue)[6] <-paste("meanDAB_Ecadherin")
2044 | names(EcadherinTissue)[11] <-paste("Area")
2045 | EcadherinTissue[,'IHC_location']<- gsub(' - PathAnnotationObject', '',EcadherinTissue$IHC_location)
2046 | IHC <- subset(EcadherinTissue, , c('IHC_location','meanDAB_Ecadherin'))
2047 | 
2048 | names(HER2Tissue)[1] <-paste("IHC_location")
2049 | names(HER2Tissue)[6] <-paste("meanDAB_HER2")
2050 | names(HER2Tissue)[11] <-paste("Area")
2051 | HER2Tissue[,'IHC_location']<- gsub(' - PathAnnotationObject', '',HER2Tissue$IHC_location)
2052 | HER2Tissue <- subset(HER2Tissue, , c('IHC_location','meanDAB_HER2'))
2053 | IHC = merge(IHC, HER2Tissue, by='IHC_location', all.x = TRUE)
2054 | 
2055 | names(ERTissue)[1] <-paste("IHC_location")
2056 | names(ERTissue)[6] <-paste("meanDAB_ER")
2057 | names(ERTissue)[11] <-paste("Area")
2058 | ERTissue[,'IHC_location']<- gsub(' - PathAnnotationObject', '',ERTissue$IHC_location)
2059 | ERTissue <- subset(ERTissue, , c('IHC_location','meanDAB_ER'))
2060 | IHC = merge(IHC, ERTissue, by='IHC_location', all.x = TRUE)
2061 | 
2062 | names(PRTissue)[1] <-paste("IHC_location")
2063 | names(PRTissue)[6] <-paste("meanDAB_PR")
2064 | names(PRTissue)[11] <-paste("Area")
2065 | PRTissue[,'IHC_location']<- gsub(' - PathAnnotationObject', '',PRTissue$IHC_location)
2066 | PRTissue <- subset(PRTissue, , c('IHC_location','meanDAB_PR'))
2067 | IHC = merge(IHC, PRTissue, by='IHC_location', all.x = TRUE)
2068 | 
2069 | names(KI67Tissue)[1] <-paste("IHC_location")
2070 | names(KI67Tissue)[6] <-paste("meanDAB_KI67")
2071 | names(KI67Tissue)[11] <-paste("Area")
2072 | KI67Tissue[,'IHC_location']<- gsub(' - PathAnnotationObject', '',KI67Tissue$IHC_location)
2073 | KI67Tissue <- subset(KI67Tissue, , c('IHC_location','meanDAB_KI67'))
2074 | IHC = merge(IHC, KI67Tissue, by='IHC_location', all.x = TRUE)
2075 | 
2076 | ```
2077 | 
2078 | #Load IMC image quant data
2079 | ```{r}
2080 | #Channels to compare between IHC and IMC
2081 | IHCvsIMC_channels = c('Intensity_MeanIntensity_ER', 'Intensity_MeanIntensity_PR', 'Intensity_MeanIntensity_HER2', 'Intensity_MeanIntensity_Ecadherin', 'Intensity_MeanIntensity_Ki67')
2082 | 
2083 | #Load IMC single channel quantification
2084 | IMCtotalimage <- fread(paste(filepath,"IMCQuant_Image.csv",sep=""),header = T)
2085 | 
2086 | #Extract core locations
2087 | IMCtotalimage[, IMC_location := bbRtools::getInfoFromFileList(as.character(.BY),sep = '_',strPos =7),by=PathName_HER2]
2088 | IMC <- subset(IMCtotalimage, , c(IHCvsIMC_channels, 'IMC_location'), by=IMC_location)
2089 | 
2090 | ```
2091 | 
2092 | #Match IHC to IMC cores
2093 | ```{r}
2094 | #Load punch metadata
2095 | punch_meta <- fread(paste0(filepath,"punch_meta.csv"),header = T)
2096 | 
2097 | #Without metadata
2098 | IHCvsIMC_map <- subset(punch_meta, , c('IHC_location', 'IMC_location'))
2099 | IHCvsIMC_map = distinct(IHCvsIMC_map)
2100 | IHC_mapped = merge(IHCvsIMC_map, IHC, by='IHC_location')
2101 | IMC_mapped = merge(IHCvsIMC_map, IMC, by='IMC_location')
2102 | IHCvsIMC_mapped = merge(IHC_mapped, IMC, by='IMC_location')
2103 | 
2104 | #Add metadata
2105 | plot_dat <- merge(IHCvsIMC_mapped, punch_meta, by='IMC_location')
2106 | plot_dat <- plot_dat [!duplicated(IMC_location)]
2107 | 
2108 | ```
2109 | 
2110 | #Plot mean Image intensity of IHC vs IMC for individual channels
2111 | ```{r}
2112 | cur_channelname_x = 'meanDAB_PR'
2113 | cur_channelname_y = 'Intensity_MeanIntensity_PR'
2114 | my.formula <- y ~ x
2115 | 
2116 | ggplot(plot_dat, aes_string(x=cur_channelname_x, y=cur_channelname_y ))+
2117 |   geom_point(aes(alpha=1, color=as.factor(GRADE),size=1))+
2118 |   geom_smooth(method = "lm")+
2119 |   stat_poly_eq(formula = my.formula, 
2120 |                 aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
2121 |                 parse = TRUE) +theme_bw()
2122 | ```
2123 | 
2124 | #All channels
2125 | ```{r}
2126 | #Reformat to have multiple plots based on antibody
2127 | IHC_names <- data.frame("Method" = "IHC", "Stain" = colnames(IHC))
2128 | IMC_names <- data.frame("Method" = "IMC", "Stain" = colnames(IMC))
2129 | IMC_names <- slice(IMC_names,c(6,4,3,1,2,5))
2130 | Antigen  <- c("Antigen", "Ecadherin", "HER2", "ER", "PR", "KI67")
2131 | 
2132 | IHC_names <- cbind(IHC_names, Antigen)
2133 | IHC_names <- IHC_names[-c(1),]
2134 | IMC_names <- cbind(IMC_names, Antigen)
2135 | IMC_names <- IMC_names[-c(1),]
2136 | 
2137 | #IHC_names_long <- melt(IHC_names, id = c("Antigen","Method","Stain"))
2138 | #IMC_names_long <- melt(IHC_names, id = c("Antigen","Method", "Stain"))
2139 | IHCvsIMC_Antigens <- rbind(IHC_names, IMC_names)
2140 | 
2141 | IHCvsIMC_long <- melt(IHCvsIMC_mapped, id = c("IMC_location", "IHC_location"))
2142 | names(IHCvsIMC_long)[names(IHCvsIMC_long) == 'variable'] = "Stain"
2143 | IHCvsIMC_Antigens_long <- merge(IHCvsIMC_long, IHCvsIMC_Antigens, by = "Stain")
2144 | 
2145 | IHCvsIMC_Antigens_wide <- dcast.data.table(IHCvsIMC_Antigens_long, IMC_location+IHC_location+Antigen ~ Method, value.var = "value", fill = NA, fun.aggregate = mean)
2146 | 
2147 | #Add meta data
2148 | plot_dat <- merge(IHCvsIMC_Antigens_wide, punch_meta, by='IMC_location', suffixes = "")
2149 | plot_dat <- plot_dat[GRADE != "",]
2150 | 
2151 | #Plot
2152 | my.formula <- y ~ x
2153 | ggplot(plot_dat, aes_string(x="IHC", y="IMC" ))+
2154 |   facet_wrap(facets = "Antigen", scales = "free")+
2155 |   geom_point(aes(alpha=1, color=as.factor(GRADE),size=1))+
2156 |   geom_smooth(method = "lm")+
2157 |   stat_poly_eq(formula = my.formula, 
2158 |                 aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
2159 |                 parse = TRUE) +theme_bw()
2160 | 
2161 | 
2162 | #On top of each other
2163 | my.formula <- y ~ x
2164 | ggplot(plot_dat, aes_string(x="IHC", y="IMC" ,color = "Antigen"))+
2165 |   geom_point(aes(alpha=1,size=0.3))+
2166 |   geom_smooth(method = "lm")+
2167 |   stat_poly_eq(formula = my.formula, 
2168 |                 aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
2169 |                 parse = TRUE) +theme_bw()
2170 | ```
2171 | 
2172 | #Summary across all correlations for Fig1
2173 | ```{r}
2174 | #Get R squares separately
2175 | Rsq = data.frame()
2176 | for (i in unique(plot_dat$Antigen)){
2177 |   cur = plot_dat[Antigen == i,]
2178 |   res = lm(IMC ~ IHC, data=cur)
2179 |   Rsq = rbind(Rsq,cbind(i,summary(res)$r.squared))
2180 | }
2181 | 
2182 | Rsq$V2 = as.numeric(as.character(Rsq$V2))
2183 | Rsq$i = factor(Rsq$i, levels = c('ER','PR','HER2','Ecadherin','KI67'))
2184 | 
2185 | 
2186 | pdf('/home/jana/Desktop/R_dat/Rsq.pdf')
2187 | ggplot(Rsq,aes(y = V2,x = i,fill = i))+
2188 |   geom_bar(stat = "identity")+
2189 |   scale_fill_manual(values = mycols_basel_meta[c(23,24,20,22,16)])+
2190 |   ylim(0,1)+
2191 |   xlab('Marker')+
2192 |   ylab('R squared')
2193 | dev.off()
2194 | ```
2195 | 
2196 | #Read in Single-cell IHC data for comparison with IMC quant
2197 | ```{r}
2198 | #Read in positive count metadata from clinical assessment
2199 | Sample_metadat_zuri = fread('/home/ubuntu/tmp/server_homes/janaf/Data/2019/Data_publication/IHC_IMC/Meta_IHCcounts.csv',header = T)
2200 | 
2201 | #Read in SC quant IHC data from QuPath
2202 | Ki67pos <- fread(paste(filepath,"/QuPATH/TMACoreSingleCell_Ki-67_Pos.txt",sep=""),header = T)
2203 | Ki67pos[,marker := 'Ki67pos']
2204 | Ki67pos[,all := 'Ki67']
2205 | Ki67pos = Ki67pos[,c('Name','marker','all')]
2206 | 
2207 | Ki67neg <- fread(paste(filepath,"/QuPATH/TMACoreSingleCell_Ki-67_Neg.txt",sep=""),header = T)
2208 | Ki67neg[,marker := 'Ki67neg']
2209 | Ki67neg[,all := 'Ki67']
2210 | Ki67neg = Ki67neg[,c('Name','marker','all')]
2211 | 
2212 | PRpos <- fread(paste(filepath,"/QuPATH/TMACoreSingleCell_PR_Pos.txt",sep=""),header = T)
2213 | PRpos[,marker := 'PRpos']
2214 | PRpos[,all := 'PR']
2215 | PRpos = PRpos[,c('Name','marker','all')]
2216 | 
2217 | PRneg <- fread(paste(filepath,"/QuPATH/TMACoreSingleCell_PR_Neg.txt",sep=""),header = T)
2218 | PRneg[,marker := 'PRneg']
2219 | PRneg[,all := 'PR']
2220 | PRneg = PRneg[,c('Name','marker','all')]
2221 | 
2222 | ERpos <- fread(paste(filepath,"/QuPATH/TMACoreSingleCell_ER_Pos.txt",sep=""),header = T)
2223 | ERpos[,marker := 'ERpos']
2224 | ERpos[,all := 'ER']
2225 | ERpos = ERpos[,c('Name','marker','all')]
2226 | 
2227 | ERneg <- fread(paste(filepath,"/QuPATH/TMACoreSingleCell_ER_Neg.txt",sep=""),header = T)
2228 | ERneg[,marker := 'ERneg']
2229 | ERneg[,all := 'ER']
2230 | ERneg = ERneg[,c('Name','marker','all')]
2231 | 
2232 | #Format into one table and calculate measures
2233 | IHCSC = rbind(Ki67pos,Ki67neg,PRpos,PRneg,ERpos,ERneg)
2234 | IHCSC[,'Name']<- gsub(' - PathCellObject', '',IHCSC$Name)
2235 | IHCSC[,IHC_count := .N ,by = c('Name','marker')]
2236 | IHCSC[,tot_cells := .N, by = c('Name','all')]
2237 | IHCSC[,frac := IHC_count/tot_cells]
2238 | IHCSC = unique(IHCSC)
2239 | 
2240 | #Kick out controls
2241 | IHCSC= IHCSC[!Name %in% unique(IHCSC$Name[!IHCSC$Name %in% IHCvsIMC_map$IHC_location]),]
2242 | IHCvsIMC_map= IHCvsIMC_map[!IHC_location %in% unique(IHCvsIMC_map$IHC_location[!IHCvsIMC_map$IHC_location %in% IHCSC$Name]),]
2243 | 
2244 | #Map names of IMC and IHC cores
2245 | IHCSC$IMCname = mapvalues(IHCSC$Name, unique(IHCSC$Name), IHCvsIMC_map[match(unique(IHCSC$Name),IHCvsIMC_map$IHC_location),IMC_location], warn_missing = TRUE)
2246 | ```
2247 | 
2248 | #IHC and IMC positive single-cell quantitfication comparison
2249 | ```{r}
2250 | 
2251 | #Take single-cell IMC data and dcast
2252 | wide = dcast.data.table(dat,formula = 'core + id ~ channel', value.var = 'c_counts')
2253 | 
2254 | #Look at distributions of markers of interest to decide on positive cell threshold
2255 | ggplot(wide, aes(x=`1441101Er168Di Ki67`, y=`234832Lu175Di panCyto` ))+
2256 |   geom_point(aes(alpha=1/10, size=0.01))+
2257 |   geom_density2d(colour = "red")
2258 | 
2259 | ggplot(wide, aes(x=`1441101Er168Di Ki67`))+
2260 |   geom_density()+
2261 |   xlim(c(0,2))
2262 | 
2263 | #Count positive cells per core for Ki67 acoording to set threshold
2264 | wide[,IMC_pos_count := length(which(`1441101Er168Di Ki67` > 0.5 )),by = 'core'] #/.N   for fraction
2265 | sub = unique(wide[,c('core','IMC_pos_count')])
2266 | sub$IMCname = unlist(lapply(strsplit(sub$core,'_'),function(x){x[length(x)]}))
2267 | 
2268 | #Merge with IHC data
2269 | comb = merge(sub,unique(IHCSC[marker == 'Ki67pos',]), by = 'IMCname')
2270 | comb = merge(comb, Sample_metadata, by = 'core')
2271 | save = comb[,c('core','IMC_pos_count','IHC_count','marker')]
2272 | 
2273 | #Plot correlation of amount of positively counted single cells for IMC and IHC
2274 | my.formula <- y ~ x
2275 | ggplot(comb, aes(x=IMC_pos_count, y=IHC_count ))+
2276 |   geom_point(aes(alpha=1, size=1))+ #color=as.factor(GRADE),
2277 |   geom_smooth(method = "lm")+
2278 |   stat_poly_eq(formula = my.formula, 
2279 |                 aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
2280 |                 parse = TRUE) + theme_bw()
2281 | 
2282 | 
2283 | #ER
2284 | ggplot(wide, aes(x=`112475Gd156Di Estroge`, y=`234832Lu175Di panCyto` ))+
2285 |   geom_point(aes(alpha=1/10, size=0.01))+
2286 |   geom_density2d(colour = "red")
2287 | ggplot(wide, aes(x=`112475Gd156Di Estroge`))+
2288 |   geom_density()+
2289 |   xlim(c(0,2))
2290 | 
2291 | #Count positive cells per core for ER acoording to set threshold
2292 | wide[,IMC_pos_count := length(which(`112475Gd156Di Estroge` > 1 )),by = 'core'] #/.N   for fraction
2293 | sub = unique(wide[,c('core','IMC_pos_count')])
2294 | sub$IMCname = unlist(lapply(strsplit(sub$core,'_'),function(x){x[length(x)]}))
2295 | 
2296 | #Merge with IHC data
2297 | comb = merge(sub,unique(IHCSC[marker == 'ERpos',]), by = 'IMCname')
2298 | comb = merge(comb,Sample_metadata[,c('core','PID')], by = 'core')
2299 | comb = merge(comb,na.omit(Sample_metadat_zuri[,c('PID','Erperc')]), by = 'PID',all.x = T)
2300 | save = rbind(save,comb[,c('core','IMC_pos_count','IHC_count','marker')])
2301 | 
2302 | my.formula <- y ~ x
2303 | ggplot(comb, aes(x=IMC_pos_count, y=IHC_count ))+
2304 |   geom_point(aes(alpha=1,color=Erperc, size=1))+ #color=as.factor(GRADE),
2305 |   geom_smooth(method = "lm")+
2306 |   scale_color_viridis(option="viridis")+
2307 |   stat_poly_eq(formula = my.formula, 
2308 |                 aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
2309 |                 parse = TRUE) +
2310 |   theme_bw()
2311 | 
2312 | 
2313 | 
2314 | #PR
2315 | ggplot(wide, aes(x=`312878Gd158Di Progest`, y=`234832Lu175Di panCyto` ))+
2316 |   geom_point(aes(alpha=1/10, size=0.01))+
2317 |   geom_density2d(colour = "red")
2318 | 
2319 | wide[,IMC_pos_count := length(which(`312878Gd158Di Progest` > 1 )),by = 'core'] 
2320 | sub = unique(wide[,c('core','IMC_pos_count')])
2321 | sub$IMCname = unlist(lapply(strsplit(sub$core,'_'),function(x){x[length(x)]}))
2322 | 
2323 | comb = merge(sub,unique(IHCSC[marker == 'PRpos',]), by = 'IMCname')
2324 | comb = merge(comb,Sample_metadata[,c('core','PID')], by = 'core')
2325 | comb = merge(comb,na.omit(Sample_metadat_zuri[,c('PID','Prperc')]), by = 'PID',all.x = T)
2326 | save = rbind(save,comb[,c('core','IMC_pos_count','IHC_count','marker')])
2327 | 
2328 | my.formula <- y ~ x
2329 | 
2330 | ggplot(comb, aes(x=IMC_pos_count, y=IHC_count ))+
2331 |   geom_point(aes(alpha=1, color=Prperc, size=1))+ #color=as.factor(GRADE),
2332 |   geom_smooth(method = "lm")+
2333 |   scale_color_viridis(option="viridis")+
2334 |   stat_poly_eq(formula = my.formula, 
2335 |                 aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
2336 |                 parse = TRUE) +
2337 |   theme_bw()
2338 | 
2339 | 
2340 | #E-Cadherin
2341 | Ecadh <- fread(paste(filepath,"/QuPATH/TMACoreSingleCell_Ecadherin.txt",sep=""),header = T)
2342 | Ecadh[,marker := 'Ecadh']
2343 | Ecadh = Ecadh[,c('Name','marker','Cell: DAB OD mean')]
2344 | 
2345 | ggplot(Ecadh, aes(x=`Cell: DAB OD mean`)) + 
2346 |     geom_density(alpha=.2, fill="#FF6666")
2347 | 
2348 | #Set threshold for positive IHC cells
2349 | Ecadh[,IHC_count := length(which(`Cell: DAB OD mean` > 0.25 )),by = 'Name']
2350 | Ecadh[,'Name']<- gsub(' - PathCellObject', '',Ecadh$Name)
2351 | Ecadh = unique(Ecadh[,c('Name','IHC_count')])
2352 | 
2353 | #Map core names
2354 | Ecadh= Ecadh[!Name %in% unique(Ecadh$Name[!Ecadh$Name %in% IHCvsIMC_map$IHC_location]),]
2355 | IHCvsIMC_map= IHCvsIMC_map[!IHC_location %in% unique(IHCvsIMC_map$IHC_location[!IHCvsIMC_map$IHC_location %in% Ecadh$Name]),]
2356 | Ecadh$IMCname = mapvalues(Ecadh$Name, unique(Ecadh$Name), IHCvsIMC_map[match(unique(Ecadh$Name),IHCvsIMC_map$IHC_location),IMC_location], warn_missing = TRUE)
2357 | 
2358 | #Set threshold for positive IMC cells
2359 | ggplot(wide, aes(x=`1031747Er167Di ECadhe`, y=`234832Lu175Di panCyto` ))+
2360 |   geom_point(aes(alpha=1/10, size=0.01))+
2361 |   geom_density2d(colour = "red")
2362 | ggplot(wide, aes(x=`1031747Er167Di ECadhe`))+
2363 |   geom_density()
2364 | 
2365 | wide[,IMC_pos_count := length(which(`1031747Er167Di ECadhe` > 3 )),by = 'core']
2366 | sub = unique(wide[,c('core','IMC_pos_count')])
2367 | sub$IMCname = unlist(lapply(strsplit(sub$core,'_'),function(x){x[length(x)]}))
2368 | 
2369 | comb = merge(sub,unique(Ecadh), by = 'IMCname')
2370 | comb = merge(comb,Sample_metadata[,c('core','PID')], by = 'core')
2371 | comb = merge(comb,na.omit(Sample_metadat_zuri[,c('PID','Ecadh')]), by = 'PID',all.x = T)
2372 | comb$marker = 'Ecadh'
2373 | save = rbind(save,comb[,c('core','IMC_pos_count','IHC_count','marker')])
2374 | 
2375 | my.formula <- y ~ x
2376 | 
2377 | ggplot(comb, aes(x=IMC_pos_count, y=IHC_count ))+
2378 |   geom_point(aes(alpha=1,color=Ecadh, size=1))+ #color=as.factor(GRADE),
2379 |   geom_smooth(method = "lm")+
2380 |   scale_color_viridis(option="viridis")+
2381 |   stat_poly_eq(formula = my.formula, 
2382 |                 aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
2383 |                 parse = TRUE) +theme_bw()
2384 | 
2385 | 
2386 | #HER2
2387 | HER2 <- fread(paste(filepath,"/QuPATH/ZT208_39 Detections.txt",sep=""),header = T)
2388 | HER2[,marker := 'HER2']
2389 | HER2 = HER2[,c('Name','marker','Cell: DAB OD mean')]
2390 | 
2391 | ggplot(HER2, aes(x=`Cell: DAB OD mean`)) + 
2392 |     geom_density(alpha=.2, fill="#FF6666")
2393 | 
2394 | #Set threshold for positive IHC cells
2395 | HER2[,IHC_count := length(which(`Cell: DAB OD mean` > 0.5 )),by = 'Name'] #/.N   for fraction
2396 | HER2[,'Name']<- gsub(' - PathCellObject', '',HER2$Name)
2397 | HER2 = unique(HER2[,c('Name','IHC_count')])
2398 | 
2399 | #Map core names
2400 | HER2= HER2[!Name %in% unique(HER2$Name[!HER2$Name %in% IHCvsIMC_map$IHC_location]),]
2401 | IHCvsIMC_map= IHCvsIMC_map[!IHC_location %in% unique(IHCvsIMC_map$IHC_location[!IHCvsIMC_map$IHC_location %in% HER2$Name]),]
2402 | HER2$IMCname = mapvalues(HER2$Name, unique(HER2$Name), IHCvsIMC_map[match(unique(HER2$Name),IHCvsIMC_map$IHC_location),IMC_location], warn_missing = TRUE)
2403 | 
2404 | #Set threshold for positive IMC cells
2405 | ggplot(wide, aes(x=`201487Eu151Di cerbB`, y=`234832Lu175Di panCyto` ))+
2406 |   geom_point(aes(alpha=1/10, size=0.01))+
2407 |   geom_density2d(colour = "red")
2408 | ggplot(wide, aes(x=`201487Eu151Di cerbB`))+
2409 |   geom_density()
2410 | 
2411 | wide[,IMC_pos_count := length(which(`201487Eu151Di cerbB` > 3 )),by = 'core'] 
2412 | sub = unique(wide[,c('core','IMC_pos_count')])
2413 | sub$IMCname = unlist(lapply(strsplit(sub$core,'_'),function(x){x[length(x)]}))
2414 | 
2415 | comb = merge(sub,unique(HER2), by = 'IMCname')
2416 | comb = merge(comb,Sample_metadata[,c('core','PID')], by = 'core')
2417 | comb = merge(comb,na.omit(Sample_metadat_zuri[,c('PID','HER2')]), by = 'PID',all.x = T)
2418 | comb$marker = 'HER2'
2419 | save = rbind(save,comb[,c('core','IMC_pos_count','IHC_count','marker')])
2420 | 
2421 | my.formula <- y ~ x
2422 | 
2423 | ggplot(comb, aes(x=IMC_pos_count, y=IHC_count ))+
2424 |   geom_point(aes(alpha=1,color=HER2, size=1))+ #color=as.factor(GRADE),
2425 |   geom_smooth(method = "lm")+
2426 |   scale_color_viridis(option="viridis")+
2427 |   stat_poly_eq(formula = my.formula, 
2428 |                 aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
2429 |                 parse = TRUE) +
2430 |   theme_bw()
2431 | 
2432 | ```
2433 | 
2434 | #Difference in pos counts
2435 | ```{r}
2436 | 
2437 | #On top of each other
2438 | my.formula <- y ~ x
2439 | 
2440 | save$marker = factor(save$marker, levels = c('ERpos','PRpos','HER2','Ecadh','Ki67pos'))
2441 | pdf('/home/jana/Desktop/R_dat/counts.pdf')
2442 | ggplot(save, aes_string(x="IHC_count", y="IMC_pos_count" ,color = "marker"))+
2443 |   geom_point(alpha=0.3,size=2)+
2444 |   scale_color_manual(values = mycols_basel_meta[c(23,24,20,22,16)])+
2445 |   geom_smooth(method = "lm")+
2446 |   stat_poly_eq(formula = my.formula, 
2447 |                 aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
2448 |                 parse = TRUE) +theme_bw()
2449 | dev.off()
2450 | 
2451 | #Differences in pos cell counts
2452 | save[,diff:= abs(IMC_pos_count - IHC_count)]
2453 | 
2454 | pdf('/home/jana/Desktop/R_dat/diff_counts.pdf')
2455 | ggplot(save,aes(y = diff,x = marker))+
2456 |   geom_violin()
2457 | dev.off()
2458 | 
2459 | 
2460 | ```
2461 | 
2462 | #B and A plots for positive cells only across cores of same patient
2463 | ```{r}
2464 | 
2465 | #ER
2466 | datER = dat[channel == '112475Gd156Di Estroge',]
2467 | datER = unique(datER)
2468 | datER[,ncells := .N,by = 'core']
2469 | datER = merge(datER,unique(Sample_metadata[,c('core','PID')]),by = 'core')
2470 | 
2471 | ggplot(datER,aes(x = c_counts_norm))+
2472 |   geom_density()
2473 | 
2474 | #only positive cells
2475 | datER = datER[c_counts_norm > 0.25,]
2476 | datER[,ncells_pos := .N,by = 'core']
2477 | 
2478 | 
2479 | datER[,mean_core := mean(c_counts_norm),by = 'core']
2480 | datER = unique(datER[,c('PID','core','mean_core','ncells','ncells_pos')])
2481 | 
2482 | #only use cores with at least 200 cells
2483 | datER = datER[ncells > 200,]
2484 | 
2485 | 
2486 | datER[,mean_patient := mean(mean_core), by = c('PID')]
2487 | datER[,diff_patient := mean_patient - mean_core]
2488 | datER[,overall_mean := mean(diff_patient)]
2489 | datER[,overall_std := pracma::std(diff_patient)]
2490 | 
2491 | 
2492 | prcnt = (length(which(datER$diff_patient > datER$overall_mean + 1.96 * datER$overall_std)) + length(which(datER$diff_patient < datER$overall_mean - 1.96 * datER$overall_std)))/nrow(datER)
2493 | 
2494 | 
2495 | g = ggplot(datER,aes(x = mean_patient,y = diff_patient))+
2496 |   geom_point()+
2497 |   geom_hline(yintercept= datER$overall_mean, linetype="dashed", color = "red")+
2498 |   geom_hline(yintercept= datER$overall_mean + 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2499 |   geom_hline(yintercept= datER$overall_mean - 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2500 |   ylim(-0.4,0.4)+
2501 |   theme(axis.text.x = element_text(angle = 90, hjust = 1))+
2502 |     ggtitle(paste0('ER_%',as.character(prcnt)))
2503 | 
2504 | 
2505 | 
2506 | 
2507 | pdf('/home/jana/Desktop/R_dat/ERpos_200.pdf', width = 10, height = 10)
2508 | g
2509 | dev.off()
2510 | 
2511 | 
2512 | #PR
2513 | datER = dat[channel == '312878Gd158Di Progest',]
2514 | datER = unique(datER)
2515 | datER[,ncells := .N,by = 'core']
2516 | datER = merge(datER,unique(Sample_metadata[,c('core','PID')]),by = 'core')
2517 | 
2518 | ggplot(datER,aes(x = c_counts_norm))+
2519 |   geom_density()
2520 | 
2521 | #only pos
2522 | datER = datER[c_counts_norm > 0.29,]
2523 | datER[,ncells_pos := .N,by = 'core']
2524 | 
2525 | 
2526 | datER[,mean_core := mean(c_counts_norm),by = 'core']
2527 | datER = unique(datER[,c('PID','core','mean_core','ncells','ncells_pos')])
2528 | 
2529 | #only use cores with at least 200 cells
2530 | datER = datER[ncells > 200,]
2531 | 
2532 | datER[,mean_patient := mean(mean_core), by = c('patientcode')]
2533 | datER[,diff_patient := mean_patient - mean_core]
2534 | datER[,overall_mean := mean(diff_patient)]
2535 | datER[,overall_std := pracma::std(diff_patient)]
2536 | 
2537 | 
2538 | prcnt = (length(which(datER$diff_patient > datER$overall_mean + 1.96 * datER$overall_std)) + length(which(datER$diff_patient < datER$overall_mean - 1.96 * datER$overall_std)))/nrow(datER)
2539 | 
2540 | g = ggplot(datER,aes(x = mean_patient,y = diff_patient))+
2541 |   geom_point()+
2542 |   geom_hline(yintercept= datER$overall_mean, linetype="dashed", color = "red")+
2543 |   geom_hline(yintercept= datER$overall_mean + 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2544 |   geom_hline(yintercept= datER$overall_mean - 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2545 |   ylim(-0.4,0.4)+
2546 |   theme(axis.text.x = element_text(angle = 90, hjust = 1))+
2547 |     ggtitle(paste0('PR_%',as.character(prcnt)))
2548 | 
2549 | 
2550 | 
2551 | 
2552 | pdf('/home/jana/Desktop/R_dat/PRpos_200.pdf', width = 10, height = 10)
2553 | g
2554 | dev.off()
2555 | 
2556 | 
2557 | #HER2
2558 | datER = dat[channel == '201487Eu151Di cerbB',]
2559 | datER = unique(datER)
2560 | datER[,ncells := .N,by = 'core']
2561 | datER = merge(datER,unique(Sample_metadata[,c('core','PID')]),by = 'core')
2562 | 
2563 | ggplot(datER,aes(x = c_counts_norm))+
2564 |   geom_density()
2565 | 
2566 | #only pos
2567 | datER = datER[c_counts_norm > 0.25,]
2568 | datER[,ncells_pos := .N,by = 'core']
2569 | 
2570 | 
2571 | datER[,mean_core := mean(c_counts_norm),by = 'core']
2572 | datER = unique(datER[,c('PID','core','mean_core','ncells','ncells_pos')])
2573 | 
2574 | #only use cores with at least 200 cells
2575 | datER = datER[ncells > 200,]
2576 | 
2577 | datER[,mean_patient := mean(mean_core), by = c('PID')]
2578 | datER[,diff_patient := mean_patient - mean_core]
2579 | datER[,overall_mean := mean(diff_patient)]
2580 | datER[,overall_std := pracma::std(diff_patient)]
2581 | 
2582 | prcnt = (length(which(datER$diff_patient > datER$overall_mean + 1.96 * datER$overall_std)) + length(which(datER$diff_patient < datER$overall_mean - 1.96 * datER$overall_std)))/nrow(datER)
2583 | 
2584 | 
2585 | g = ggplot(datER,aes(x = mean_patient,y = diff_patient))+
2586 |   geom_point()+
2587 |   geom_hline(yintercept= datER$overall_mean, linetype="dashed", color = "red")+
2588 |   geom_hline(yintercept= datER$overall_mean + 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2589 |   geom_hline(yintercept= datER$overall_mean - 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2590 |   ylim(-0.4,0.4)+
2591 |   theme(axis.text.x = element_text(angle = 90, hjust = 1))+
2592 |     ggtitle(paste0('HER2_%',as.character(prcnt)))
2593 | 
2594 | 
2595 | 
2596 | pdf('/home/jana/Desktop/R_dat/HER2pos_200.pdf', width = 10, height = 10)
2597 | g
2598 | dev.off()
2599 | 
2600 | 
2601 | 
2602 | #Ki67
2603 | datER = dat[channel == '1441101Er168Di Ki67',]
2604 | datER = unique(datER)
2605 | datER[,ncells := .N,by = 'core']
2606 | datER = merge(datER,unique(Sample_metadata[,c('core','PID')]),by = 'core')
2607 | 
2608 | ggplot(datER,aes(x = c_counts_norm))+
2609 |   geom_density()
2610 | 
2611 | #only pos
2612 | datER = datER[c_counts_norm > 0.09,]
2613 | datER[,ncells_pos := .N,by = 'core']
2614 | 
2615 | 
2616 | datER[,mean_core := mean(c_counts_norm),by = 'core']
2617 | datER = unique(datER[,c('PID','core','mean_core','ncells','ncells_pos')])
2618 | 
2619 | #only use cores with at least 200 cells
2620 | datER = datER[ncells > 200,]
2621 | 
2622 | datER[,mean_patient := mean(mean_core), by = c('PID')]
2623 | datER[,diff_patient := mean_patient - mean_core]
2624 | datER[,overall_mean := mean(diff_patient)]
2625 | datER[,overall_std := pracma::std(diff_patient)]
2626 | 
2627 | prcnt = (length(which(datER$diff_patient > datER$overall_mean + 1.96 * datER$overall_std)) + length(which(datER$diff_patient < datER$overall_mean - 1.96 * datER$overall_std)))/nrow(datER)
2628 | 
2629 | g = ggplot(datER,aes(x = mean_patient,y = diff_patient))+
2630 |   geom_point()+
2631 |   geom_hline(yintercept= datER$overall_mean, linetype="dashed", color = "red")+
2632 |   geom_hline(yintercept= datER$overall_mean + 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2633 |   geom_hline(yintercept= datER$overall_mean - 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2634 |   ylim(-0.4,0.4)+
2635 |   theme(axis.text.x = element_text(angle = 90, hjust = 1))+
2636 |     ggtitle(paste0('Ki67_%',as.character(prcnt)))
2637 | 
2638 | 
2639 | 
2640 | pdf('/home/jana/Desktop/R_dat/Ki67pos_200.pdf', width = 10, height = 10)
2641 | g
2642 | dev.off()
2643 | 
2644 | 
2645 | 
2646 | #Ecadh
2647 | datER = dat[channel == '1031747Er167Di ECadhe',]
2648 | datER = unique(datER)
2649 | datER[,ncells := .N,by = 'core']
2650 | datER = merge(datER,unique(Sample_metadata[,c('core','PID')]),by = 'core')
2651 | 
2652 | ggplot(datER,aes(x = c_counts_norm))+
2653 |   geom_density()
2654 | 
2655 | #only pos
2656 | datER = datER[c_counts_norm > 0.75,]
2657 | datER[,ncells_pos := .N,by = 'core']
2658 | 
2659 | 
2660 | datER[,mean_core := mean(c_counts_norm),by = 'core']
2661 | datER = unique(datER[,c('PID','core','mean_core','ncells','ncells_pos')])
2662 | 
2663 | #only use cores with at least 200 cells
2664 | datER = datER[ncells > 200,]
2665 | 
2666 | datER[,mean_patient := mean(mean_core), by = c('PID')]
2667 | datER[,diff_patient := mean_patient - mean_core]
2668 | datER[,overall_mean := mean(diff_patient)]
2669 | datER[,overall_std := pracma::std(diff_patient)]
2670 | 
2671 | prcnt = (length(which(datER$diff_patient > datER$overall_mean + 1.96 * datER$overall_std)) + length(which(datER$diff_patient < datER$overall_mean - 1.96 * datER$overall_std)))/nrow(datER)
2672 | 
2673 | g = ggplot(datER,aes(x = mean_patient,y = diff_patient))+
2674 |   geom_point()+
2675 |   geom_hline(yintercept= datER$overall_mean, linetype="dashed", color = "red")+
2676 |   geom_hline(yintercept= datER$overall_mean + 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2677 |   geom_hline(yintercept= datER$overall_mean - 1.96 * datER$overall_std, linetype="dashed", color = "blue")+
2678 |   ylim(-0.4,0.4)+
2679 |   theme(axis.text.x = element_text(angle = 90, hjust = 1))+
2680 |     ggtitle(paste0('Ecadh_%',as.character(prcnt)))
2681 | 
2682 | 
2683 | pdf('/home/jana/Desktop/R_dat/Ecadhpos_200.pdf', width = 10, height = 10)
2684 | g
2685 | dev.off()
2686 | 
2687 | 
2688 | ```
2689 | 
2690 | 


--------------------------------------------------------------------------------
/R/spillover_compensation/Spillover_compensation.Rmd:
--------------------------------------------------------------------------------
  1 | ```{r Libraries}
  2 | 
  3 | library('data.table')
  4 | library('RColorBrewer')
  5 | library(dplyr)
  6 | library(dtplyr)
  7 | library(ggplot2)
  8 | library(tiff)
  9 | library(EBImage)
 10 | library(fields)
 11 | library(raster)
 12 | library(viridis)
 13 | 
 14 | library(CATALYST)
 15 | library(raster)
 16 | library(scales)
 17 | ```
 18 | 
 19 | 
 20 | ```{r}
 21 | # input files: paths to your input and output folder
 22 | fn_cells = '/home/jana/Desktop/R_dat/curr_single_cell.csv'
 23 | fn_pannel = '/home/jana/Desktop/bb_volume_jana/Data/2018/Clinical_paper/SpilloverCorrected/Data_for_submission/20171218_pannel_BaselZuri.csv'
 24 | fn_sm = '/home/jana/Desktop/bb_volume_jana/Data/2018/Clinical_paper/SpilloverCorrected/Data_for_submission/spillover_matrix_15082018.csv'
 25 | 
 26 | 
 27 | # the random seed
 28 | rand_seed = 1234
 29 | # Should the script use only the cells subsampled at the tsne step?
 30 | subsampled_cells_only = F
 31 | 
 32 | ```
 33 | 
 34 | 
 35 | ```{r}
 36 | getInfoFromString<- function(name,sep='_',strPos=2,censorStr='.fcs'){
 37 |   tp <- gsub(censorStr,'',strsplit(name,sep)[[1]][strPos])
 38 |   tp <- paste(tp,collapse = sep)
 39 |   return(tp)
 40 | }
 41 | 
 42 | ```
 43 | 
 44 | ```{r}
 45 | #' Parses a list of strings and gets the corresponding information
 46 | #'
 47 | #' See getInfoFromString for more information
 48 | #'
 49 | #' @export
 50 | getInfoFromFileList <- function(fileList,sep='_',strPos=2,censorStr='.fcs'){
 51 |   condDict = sapply(fileList,function(file){getInfoFromString(file,sep,strPos,censorStr)})
 52 |   names(condDict) <- fileList
 53 |   return(condDict)
 54 | }
 55 | ```
 56 | 
 57 | ```{r}
 58 | ### load cell data (cells rescaled expanded)
 59 | cells <- fread(fn_cells,header = T)
 60 | 
 61 | measurevar = colnames(cells)[grep('Cell_', colnames(cells), ignore.case=TRUE)]
 62 | cells_rest = data.frame(cells)
 63 | rest_columns = cells_rest[,which(!(colnames(cells)%in% measurevar))]
 64 | 
 65 | split_var = strsplit(measurevar,'_')
 66 | names_dat = unlist(lapply(split_var,function(x){x[length(x)]}))
 67 | 
 68 | dat_panel <- fread(fn_pannel,header = T)
 69 | colnames(dat_panel)[2] = 'MetalTag'
 70 | 
 71 | setkey(dat_panel,MetalTag)
 72 | 
 73 | 
 74 | dat = data.frame(cells)
 75 | dat = dat[, which(colnames(dat) %in% measurevar)]
 76 | colnames(dat) = names_dat
 77 | names = paste(colnames(dat),'Di')
 78 | names = gsub(" ", "", names, fixed = TRUE)
 79 | colnames(dat)<-names
 80 | 
 81 | ```
 82 | 
 83 | ```{r}
 84 | 
 85 | compensate_dat <- function(indat,
 86 |                            sm,
 87 |                            channel_dict=NULL,
 88 |                            channel_col='channel',
 89 |                            meta_cols = c('ImageId', 'CellId'),
 90 |                            custom_isotope_list=custom_isotope_list){
 91 |   
 92 |   
 93 |   tdat <- indat
 94 |   compdat = data.matrix(tdat)
 95 |   compdat = tdat %>%
 96 |     dplyr::select(-one_of(meta_cols)) %>%
 97 |     data.matrix()
 98 |   names_orig = colnames(compdat)
 99 |   
100 |   compdat_after =   flowCore::flowFrame(compdat) %>%
101 |     compCytof(sm , method='nnls', isotope_list=custom_isotope_list) %>%
102 |     exprs()
103 | 
104 |   
105 |   return(compdat_after)
106 | }
107 | ```
108 | 
109 | ```{r}
110 | 
111 | #Adjust spillover matrix
112 | sm = as.matrix(read.csv(fn_sm, row.names = 1))
113 | 
114 | #Run spillover correction
115 | channel_col = 'Target'
116 | metal_col = 'MetalTag'
117 | dat_panel = data.frame(dat_panel)
118 | channel_dict = dat_panel[,metal_col]
119 | channel_dict = paste(channel_dict,'Di')
120 | channel_dict = gsub(" ","",channel_dict)
121 | names(channel_dict) = dat_panel[,channel_col]
122 | 
123 | 
124 | custom_isotope_list <- c(CATALYST::isotope_list, list(BCKG=190, ArAr=80))
125 | dat_comp = compensate_dat(dat, sm, channel_dict = channel_dict, custom_isotope_list=custom_isotope_list)
126 | 
127 | #Adjust output data back to original histoCAT structure
128 | final_dat = data.frame(dat_comp)
129 | final_dat = cbind(rest_columns[,1:2],final_dat)
130 | 
131 | colnames(final_dat) <-c("ImageId","CellId",measurevar)
132 | 
133 | fwrite(final_dat,'/home/jana/Desktop/compensated_correct_basel.csv')
134 | ```
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/R/spillover_compensation/Spillover_matrix.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Make spillover matrix for compensation in IMC image"
  3 | output: html_notebook
  4 | ---
  5 | 
  6 | ```{r Libraries}
  7 | library(CATALYST)
  8 | library("EBImage")
  9 | library(data.table)
 10 | library(fields)
 11 | library(ggplot2)
 12 | library(flowCore)
 13 | library(plyr)
 14 | library(dplyr)
 15 | library(dtplyr)
 16 | library(MASS)
 17 | library(viridis)
 18 | library(RColorBrewer)
 19 | library(raster)
 20 | library(stringr)
 21 | 
 22 | ```
 23 | 
 24 | 
 25 | Point to the singlestain folders
 26 | ```{r Setup}
 27 | fol_ss1 = c('/home/jana/Desktop/compensation_matrix/2017-09-22/')
 28 | fol_ss2 = c('/home/jana/Desktop/compensation_matrix/2017-09-12/')
 29 | fol_ss3 = c('/home/jana/Desktop/compensation_matrix/2017-11-15/')
 30 | fol_out = '/home/jana/'
 31 | prefix = '2018_05_03'
 32 | 
 33 | ```
 34 | 
 35 | 
 36 | Load single stains
 37 | ```{r Load from Folders or Zips}
 38 | 
 39 | load_ss_zip <- function(fol_ss3){
 40 |   fns_inzip <- unzip(fol_ss3, list=T) %>%
 41 |             do(as.data.frame(.$Name[ endsWith(.$Name, '.txt')]))
 42 | 
 43 |   fns_inzip = fns_inzip[,1]
 44 |   imgs.ss = lapply(fns_inzip, function(x){
 45 |               fread(paste0('unzip -qp ', fol_ss3, ' ', gsub(" ", '\\\\ ', x)))
 46 |                 })
 47 |   
 48 |   names(imgs.ss) <- fns_inzip
 49 | return(imgs.ss)
 50 | }
 51 | 
 52 | load_ss_fol <- function(fol_ss3){
 53 |   fns_txt <- list.files(fol_ss3,pattern = '*.[0-9]+.txt$') 
 54 |   imgs.ss <- lapply(fns_txt, function(x){
 55 |               fread(file.path(fol_ss3, x))})
 56 |   names(imgs.ss) <- fns_txt
 57 |   return(imgs.ss)
 58 | }
 59 | list_img_ss <-lapply(fol_ss3, load_ss_fol)
 60 | names(list_img_ss) <- fol_ss3
 61 | #imgs.ss = do.call(list, unlist(imgs.ss, recursive=FALSE))
 62 | ```
 63 | 
 64 | Fix the names within the text files to by CATALYST compatible
 65 | ```{r}
 66 | fixnames <- function(imgdat){
 67 |     imgdat = copy(imgdat)
 68 |     dat =imgdat
 69 |     
 70 |     colnames(dat) = sapply(colnames(dat), function(x) gsub('.*\\(', '',x))
 71 |     colnames(dat) = sapply(colnames(dat), function(x) gsub('\\)', '',x))  
 72 |     return(dat)
 73 | }
 74 | 
 75 | imglist2dat <- function(datlist){
 76 |   imgdat <- rbindlist(lapply(datlist, fixnames), fill=T, idcol = 'file')
 77 |   #imgdat <- imgdat
 78 |   return(imgdat)
 79 | }
 80 | 
 81 | dats_raw = lapply(list_img_ss, imglist2dat)
 82 | ```
 83 | 
 84 | 
 85 | Extract BC masses from the filenames
 86 | ```{r Get bc masses}
 87 | 
 88 | for (dat in dats_raw){
 89 |   dat[, metal:= strsplit(.BY[[1]], '_')[[1]][3],by=file]
 90 |   dat[, mass:= as.numeric(str_extract_all(.BY[[1]], "[0-9]+")[[1]]),by=metal]
 91 | }
 92 | ```
 93 | 
 94 | Look at the data
 95 | ```{r}
 96 | summarize_dat <- function(dat){
 97 |   tdat = dat %>%  
 98 |     dplyr::select(-c(Start_push, End_push, Pushes_duration,   X , Y  ,  Z)) %>%
 99 |   melt.data.table(id.vars = c('metal', 'mass','file')) %>%
100 |   do(data.table(.)[, list(med=median(value)), by=.(variable, metal, mass, file)]) 
101 |   return(tdat)
102 |   
103 | }
104 | 
105 | dats_raw_sum = rbindlist(lapply(dats_raw, summarize_dat),idcol = T)
106 | ```
107 | 
108 | Look at the data
109 | ```{r fig.width=17, fig.height=10}
110 | dats_raw_sum %>%
111 |   ggplot(aes(x=mass, y=med, color=.id))+
112 |   facet_wrap(~file+metal, scales = 'free_y')+
113 |   geom_label(aes(label=variable))
114 |   #geom_jitter(height = 0)
115 |   
116 | ```
117 | 
118 | Depending on the intensity it could be worth to bin some (consecuteive) pixels to get a better accuracy for the estimation
119 | ```{r}
120 | 
121 | npixelbin = 3
122 | 
123 | get_consecutive_bin <- function(nel, nbin){
124 |   idx = rep(1:ceiling(nel/nbin), each=nbin)
125 |   return(idx[1:nel])
126 | }
127 | 
128 | aggregate_pixels <- function(dat, n){
129 |   tdat = dat[, rowsum(.SD, get_consecutive_bin(.N, n)) ,by=.(file, mass, metal)]
130 |   return(tdat)
131 | }
132 | 
133 | dats_agg <- lapply(dats_raw, function(x) aggregate_pixels(x, n=npixelbin))
134 | dats_agg_sum = rbindlist(lapply(dats_agg, summarize_dat), idcol = T)
135 | ```
136 | 
137 | After aggregation
138 | ```{r fig.width=17, fig.height=10}
139 | dats_agg_sum %>%
140 |   ggplot(aes(x=mass, y=med, color=.id))+
141 |   facet_wrap(~file+metal, scales = 'free_y')+
142 |   geom_label(aes(label=variable))
143 |   #geom_jitter(height = 0)
144 |   
145 | ```
146 | 
147 | Define helper funcitons
148 | ```{r Define helper functions}
149 | filter_rare_bc <- function(re, minevents){
150 |   stats = table(re@bc_ids)
151 |   nonfreq = names(stats)[stats <minevents]
152 |   re@bc_ids[re@bc_ids %in% nonfreq] = '0'
153 |   return(re)
154 |   
155 | }
156 | 
157 | ensure_correct_bc <- function(re, mass){
158 |   re@bc_ids[re@bc_ids != as.character(mass)] = '0'
159 |   return(re)
160 | }
161 | 
162 | 
163 | re_from_dat <- function(dat, bc_ms, minevents=10){
164 |   ff = dat %>%
165 |       dplyr::select(-c(file, mass, metal)) %>%
166 |       as.matrix.data.frame() %>%
167 |       flowFrame()
168 |   
169 |   
170 |   re <- CATALYST::assignPrelim(x=ff,y= bc_ms)
171 |   re <- estCutoffs(re)
172 |   re <- applyCutoffs(re)
173 | 
174 |   # filter for conditions with less then minevents
175 |   #re = ensure_correct_bc(re, dat[, mass])
176 |   re = filter_rare_bc(re, minevents)
177 |   return(re)
178 | }
179 | 
180 | 
181 | 
182 | sm_from_re <- function(re, ...){
183 |   sm = computeSpillmat(re, ...)
184 |   return(sm)
185 | }
186 | 
187 | sm_from_dat <- function(dat, bc_ms, minevents=10, ...){
188 |   re <- re_from_dat(dat, bc_ms, minevents)
189 |   sm <- sm_from_re(re, ...)
190 |   return(sm)
191 | }
192 | ```
193 | 
194 | Write out spillover matrix
195 | ```{r Binned}
196 | 
197 | res = lapply(dats_agg, function(x) re_from_dat(x, x[!is.na(mass), unique(mass)]))
198 | sms = lapply(res, function(x) sm_from_re(x))
199 | sm = data.frame(sms[[1]])
200 | fwrite(sm, file='/home/jana/sm_test.csv', col.names = TRUE, row.names = TRUE)
201 | ```
202 | 
203 | Plot spillover matrix
204 | ```{r}
205 | i = 1
206 | bc_ms =  dats_agg[[i]][!is.na(mass), unique(mass)]
207 | custom_isotope_list <- c(CATALYST::isotope_list, list(BCKG=190, ArAr=80))
208 | CATALYST::plotSpillmat(bc_ms,sms[[1]], isotope_list=custom_isotope_list)
209 | ```
210 | 
211 | ```{r quick check if the debarcoding went well, so all the events were correclty classified}
212 | i =1
213 | re = res[[i]]
214 | dat = copy(dats_agg[[i]])
215 | dat[ , bc := bc_ids(re)]
216 | 
217 | dat[bc!= '0', sum(as.character(mass) != bc), by=metal]
218 | ```


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3518284.svg)](https://doi.org/10.5281/zenodo.3518284)
 2 | # The Single-Cell Pathology Landscape of Breast Cancer
 3 | 
 4 | This repository contains all code used to produce the results and figures of the publication "The Single-Cell Pathology Landscape of Breast Cancer". All data, including tiff images, masks, single-cell and patient data are available on Zenodo (10.5281/zenodo.3518284).
 5 | 
 6 | ## Matlab scripts:
 7 | Image and other early analysis steps were performed using Matlab. Since the single-cell data was extracted using histoCAT, the Matlab scripts assume a data structure as in a loaded histoCAT session. Saved histoCAT sessions can be downloaded from Zenodo (10.5281/zenodo.3518284).
 8 | 
 9 | ## R scripts:
10 | Downstream analysis was performed using R pipelines. The R analysis is divided into one notebook for the analysis of the first TMA of 281 patients from University Hospital Basel and a second one for comparison and analysis of the second multi-core cohort from Univerity Hospital Zurich. All input data required to reproduce the figures of this publication are available on Zenodo(10.5281/zenodo.3518284). The BaselTMA and ZurichTMA folders contain the input data for the respective R pipelines.
11 | 
12 | ## Data organization on Zenodo:
13 | OMEandSingleCellMasks.zip contains the ome-tiff stacks and the single-cell masks.
14 | TumorStroma_masks.zip contains masks for tumor and stromal regions.
15 | SingleCell_and_Metadata.zip contains the single-cell and patient data as well as all other input data for the R pipelines provided here.
16 | 
17 | | Where to find:                            | Subpath                                                     |
18 | | ----------------------------------------- | ----------------------------------------------------------- |
19 | | Patient and core metadata BaselTMA        | SingleCell_and_Metadata/BaselTMA/Basel_PatientMetadata.csv  |
20 | | Patient and core metadata ZurichTMA       | SingleCell_and_Metadata/ZurichTMA/Zuri_PatientMetadata.csv  |
21 | | Single-cell data BaselTMA                 | SingleCell_and_Metadata/BaselTMA/SC_dat.csv                 |
22 | | Single-cell data ZurichTMA                | SingleCell_and_Metadata/ZurichTMA/SC_dat.csv                |
23 | | Single-cell segmentation masks both TMAs  | OMEandSingleCellMasks/Basel_Zuri_masks/                     |
24 | | Image tiffs both TMAs                     | OMEandSingleCellMasks/ome/                                  |
25 | | Antibody panel                            | SingleCell_and_Metadata/Basel_Zuri_StainingPanel.csv        |
26 | 
27 | ### Important notes when working with the data provided on Zenodo: 
28 | - The single-cell data provided for downstream R analysis is already spillover corrected.
29 | -  The single-cell masks that were generated using CellProfiler do not always contain strictly sequential single-cell labels. Every now and then an ID is skipped due to excluded edge cells. This can cause issues in histoCAT and therefore the single cells are automatically relabelled sequentially during loading into histoCAT. We exported the single-cell data from histoCAT for downstream R analysis and therefore the single-cell labels are the newly assigned sequential ones and match the labels in the histoCAT sessions. However, the original mask files that are also provided here still contain the original labels from CellProfiler. For matching the single-cell data provided here directly to the masks (e.g. for visualization of single-cell features on the image outside of histoCAT), the single-cell labels in the mask need to be relabelled as well or matched based on the rank.
30 | 


--------------------------------------------------------------------------------