├── README.md
├── classify_geojson_objects_in_wsi_centeroid_based_withlevel_mask.py
├── qupath_export.groovy
├── qupath_export_compressed.groovy
├── qupath_import.groovy
└── qupath_import_compressed.groovy


/README.md:
--------------------------------------------------------------------------------
1 | Code for the associated blog post:
2 | 
3 | http://www.andrewjanowczyk.com/exporting-and-re-importing-annotations-from-qupath-for-usage-in-machine-learning/
4 | 
5 | Which discusses exporting/importing qupath annotations in geojson format, as well as manipulating them in python using a shapely object
6 | 


--------------------------------------------------------------------------------
/classify_geojson_objects_in_wsi_centeroid_based_withlevel_mask.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     formats: ipynb,py:light
  5 | #     text_representation:
  6 | #       extension: .py
  7 | #       format_name: light
  8 | #       format_version: '1.5'
  9 | #       jupytext_version: 1.4.1
 10 | #   kernelspec:
 11 | #     display_name: Python 3
 12 | #     language: python
 13 | #     name: python3
 14 | # ---
 15 | 
 16 | # +
 17 | 
 18 | openslidelevel=0 #level from openslide to read 
 19 | tilesize=10000 #size of the tile to load from openslide 
 20 | patchsize=32 #patch size needed by our DL model 
 21 | 
 22 | minhits=100 #the minimum number of objects needed to be present within a tile for the tile to be computed on
 23 | batchsize=1024 #how many patches we want to send to the GPU at a single time
 24 | nclasses=2 #number of output classes our model is providing
 25 | classnames=["Other","Lymphocyte"] #the names of those classes which will appear in QuPath later on 
 26 | colors=[-377282,-9408287] #their associated color, see selection of different color values at the bottom of the file
 27 | 
 28 | mask_patches=False #if we woud like to blackout the space around the object of interest, this is determined by how the model was trained
 29 | 
 30 | json_fname=r'1L1_nuclei_reg.json' #input geojson file
 31 | json_annotated_fname=r'1L1_nuclei_reg_anno.json' #target output geojson file
 32 | model_fname="lymph_model.pth" #DL model to use
 33 | wsi_fname="1L1_-_2019-09-10_16.44.58.ndpi" #whole slide image fname to load cells from which coincide with the json file
 34 | 
 35 | # -
 36 | 
 37 | import os
 38 | os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] #can either specify openslide bin path in PATH, or add it dynamically
 39 | import openslide
 40 | from tqdm.autonotebook import tqdm
 41 | from math import ceil
 42 | import matplotlib.pyplot as plt
 43 | 
 44 | import geojson
 45 | from shapely.geometry import shape
 46 | from shapely.strtree import STRtree
 47 | from shapely.geometry import Point
 48 | from shapely.geometry import Polygon
 49 | 
 50 | # +
 51 | import torch
 52 | from torch import nn
 53 | from torchsummary import summary
 54 | import numpy as np
 55 | import cv2
 56 | import gzip
 57 | 
 58 | device = torch.device('cuda')
 59 | 
 60 | def divide_batch(l, n): 
 61 |     for i in range(0, l.shape[0], n):  
 62 |         yield l[i:i + n,::] 
 63 |         
 64 | #--- Load your model here    
 65 | #model = LoadYourModelHere().to(device)    
 66 | #checkpoint = torch.load(model_fname, map_location=lambda storage, loc: storage)  # load checkpoint to CPU and then put to device https://discuss.pytorch.org/t/saving-and-loading-torch-models-on-2-machines-with-different-number-of-gpu-devices/6666
 67 | #model.load_state_dict(checkpoint["model_dict"])
 68 | #model.eval()
 69 | #summary(model, (3, 32, 32))
 70 | 
 71 | # +
 72 | 
 73 | if json_fname.endswith(".gz"):
 74 |     with gzip.GzipFile(json_fname, 'r') as f:
 75 |         allobjects = geojson.loads(f.read(), encoding= 'ascii')
 76 | else:
 77 |     with open(json_fname) as f:
 78 |         allobjects = geojson.load(f)
 79 | 
 80 | print("done loading")
 81 | # -
 82 | 
 83 | allshapes=[shape(obj["nucleusGeometry"] if "nucleusGeometry" in obj.keys() else obj["geometry"]) for obj in allobjects]
 84 | allcenters=[ s.centroid  for s in allshapes]
 85 | print("done converting")
 86 | 
 87 | for i in range(len(allshapes)):
 88 |         allcenters[i].id=i
 89 | 
 90 | searchtree = STRtree(allcenters)
 91 | print("done building tree")
 92 | 
 93 | # +
 94 | osh  = openslide.OpenSlide(wsi_fname)
 95 | nrow,ncol = osh.level_dimensions[0]
 96 | nrow=ceil(nrow/tilesize)
 97 | ncol=ceil(ncol/tilesize)
 98 | 
 99 | scalefactor=int(osh.level_downsamples[openslidelevel])
100 | paddingsize=patchsize//2*scalefactor
101 | 
102 | int_coords = lambda x: np.array(x).round().astype(np.int32)
103 | 
104 | # +
105 | for y in tqdm(range(0,osh.level_dimensions[0][1],round(tilesize * scalefactor)), desc="outer" , leave=False):
106 |     for x in tqdm(range(0,osh.level_dimensions[0][0],round(tilesize * scalefactor)), desc=f"inner {y}", leave=False):            
107 | 
108 |         tilepoly = Polygon([[x,y],[x+tilesize*scalefactor,y],
109 |                             [x+tilesize*scalefactor,y+tilesize*scalefactor],
110 |                             [x,y+tilesize*scalefactor]])
111 |         hits=searchtree.query(tilepoly)
112 | 
113 |         if len(hits) < minhits:
114 |             continue
115 | 
116 |         tile  = np.asarray(osh.read_region((x-paddingsize, y-paddingsize), openslidelevel, 
117 |                                            (tilesize+2*paddingsize,tilesize+2*paddingsize)))[:,:,0:3] #trim alpha
118 | 
119 |         if mask_patches:
120 |             mask = np.zeros((tile.shape[0:2]),dtype=tile.dtype)
121 |             exteriors = [int_coords(allshapes[hit.id].boundary.coords) for hit in hits]
122 |             exteriors_shifted=[(ext-np.asarray([(x-paddingsize),(y-paddingsize)]))//scalefactor for ext in exteriors]
123 |             cv2.fillPoly(mask, exteriors_shifted,1)
124 | 
125 |         arr_out = np.zeros((len(hits),patchsize,patchsize,3))
126 |         id_out = np.zeros((len(hits),1))
127 | 
128 | 
129 |         #---- get patches from hits within this tile and stick them (and their ids) into matricies
130 |         for hit,arr,id in zip(hits,arr_out,id_out):
131 |             px,py=hit.coords[:][0]  #this way is faster than using hit.x and hit.y, likely because of call stack overhead
132 |             c=int((px-x+paddingsize)//scalefactor)
133 |             r=int((py-y+paddingsize)//scalefactor)
134 |             patch = tile[r - patchsize // 2:r + patchsize // 2, c - patchsize // 2:c + patchsize // 2, :]
135 |             
136 |             if mask_patches:
137 |                 maskpatch = mask[r - patchsize // 2:r + patchsize // 2, c - patchsize // 2:c + patchsize // 2]
138 |                 patch = np.multiply(patch, maskpatch[:, :, None])
139 |                 
140 |             arr[:] = patch
141 |             
142 |             id[:]=hit.id
143 | 
144 | 
145 |         #---- process batch
146 | 
147 |         classids=[]
148 |         for batch_arr in tqdm(divide_batch(arr_out,batchsize),leave=False):
149 |             batch_arr_gpu = torch.from_numpy(batch_arr.transpose(0, 3, 1, 2)).type('torch.FloatTensor').to(device)/255
150 |             
151 |             # ---- get results
152 |             #classids.append(torch.argmax( model.img2class(batch_arr_gpu),dim=1).detach().cpu().numpy())
153 |             classids.append(np.random.choice([0,1],arr_out_gpu.shape[0]))
154 |         classids=np.hstack(classids)
155 | 
156 |         for id,classid in zip(id_out,classids):
157 |             allobjects[int(id)]["properties"]['classification']={'name':classnames[classid],'colorRGB':colors[classid]}
158 | 
159 |         
160 | 
161 | 
162 | # +
163 | # # for debugging
164 | # for i,(c,a) in enumerate(zip(classids,arr_out)):
165 | #     plt.imshow(a/255)
166 | #     plt.show()
167 | #     print(c)
168 | #     if i > 10:
169 | #         break
170 | 
171 | # +
172 | # # for timing
173 | # # %load_ext line_profiler
174 | # # %lprun -f makeoutput makeoutput()
175 | # makeoutput()
176 | # -
177 | 
178 | if json_annotated_fname.endswith(".gz"):
179 |     with gzip.open(json_annotated_fname, 'wt', encoding="ascii") as zipfile:
180 |         geojson.dump(allobjects, zipfile)
181 | else:
182 |     with open(json_annotated_fname, 'w') as outfile:
183 |         geojson.dump(allobjects,outfile)
184 | 
185 | 
186 | 
187 | # +
188 | 
189 | #         "name": "Positive", # add incolors
190 | #         "colorRGB": -377282
191 | 
192 | #         "name": "Other",
193 | #         "colorRGB": -14336
194 | 
195 | #         "name": "Stroma",
196 | #         "colorRGB": -6895466
197 | 
198 | 
199 | #         "name": "Necrosis",
200 | #         "colorRGB": -13487566
201 | 
202 | #         "name": "Tumor",
203 | #         "colorRGB": -3670016
204 | 
205 | #         "name": "Immune cells",
206 | #         "colorRGB": -6268256
207 | 
208 | 
209 | #         "name": "Negative",
210 | #         "colorRGB":-9408287
211 | 
212 | 
213 | # +
214 | #This code to perform entire polygon with complex objects
215 | #exteriors = [int_coords(geo.coords) for hit in hits for geo in hit.boundary.geoms ] #need this modificatrion for complex structures
216 | 
217 | #This code to perform by center with complex objects
218 | #exteriors = [int_coords(geo.coords) for hit in hits for geo in allshapes[hit.id].boundary.geoms ]
219 | 
220 | 


--------------------------------------------------------------------------------
/qupath_export.groovy:
--------------------------------------------------------------------------------
 1 | // --- remove measurements, not needed but makes file smaller
 2 | Set annotationMeasurements = []
 3 | 
 4 | getDetectionObjects().each{it.getMeasurementList().getMeasurementNames().each{annotationMeasurements << it}}
 5 | //println(annotationMeasurements)
 6 | 
 7 | annotationMeasurements.each{ removeMeasurements(qupath.lib.objects.PathCellObject, it);}
 8 | 
 9 | 
10 | // write to file
11 | boolean prettyPrint = false // false results in smaller file sizes and thus faster loading times, at the cost of nice formating
12 | def gson = GsonTools.getInstance(prettyPrint)
13 | def annotations = getDetectionObjects()
14 | //println gson.toJson(annotations) // you can check here but this will be HUGE and take a long time to parse
15 | 
16 | 
17 | // automatic output filename, otherwise set explicitly
18 | String imageLocation = getCurrentImageData().getServer().getPath()
19 | outfname = imageLocation.split("file:/")[1]+".json"
20 | 
21 | 
22 | 
23 | File file = new File(outfname)
24 | file.withWriter('UTF-8') {
25 |     gson.toJson(annotations,it)
26 | }
27 | 


--------------------------------------------------------------------------------
/qupath_export_compressed.groovy:
--------------------------------------------------------------------------------
 1 | // --- remove measurements, not needed but makes file smaller
 2 | Set annotationMeasurements = []
 3 | 
 4 | getDetectionObjects().each{it.getMeasurementList().getMeasurementNames().each{annotationMeasurements << it}}
 5 | //println(annotationMeasurements)
 6 | 
 7 | annotationMeasurements.each{ removeMeasurements(qupath.lib.objects.PathCellObject, it);}
 8 | 
 9 | 
10 | // write to file
11 | boolean prettyPrint = false // false results in smaller file sizes and thus faster loading times, at the cost of nice formating
12 | def gson = GsonTools.getInstance(prettyPrint)
13 | def annotations = getDetectionObjects()
14 | //println gson.toJson(annotations) // you can check here but this will be HUGE and take a long time to parse
15 | 
16 | 
17 | // automatic output filename, otherwise set explicitly
18 | String imageLocation = getCurrentImageData().getServer().getPath()
19 | outfname = imageLocation.split("file:/")[1]+".json.gz"
20 | 
21 | 
22 | import java.util.zip.GZIPOutputStream
23 | FileOutputStream output = new FileOutputStream(outfname);
24 | Writer writer = new OutputStreamWriter(new GZIPOutputStream(output), "ascii")
25 | gson.toJson(annotations,writer)    
26 | writer.close()
27 | 
28 | 


--------------------------------------------------------------------------------
/qupath_import.groovy:
--------------------------------------------------------------------------------
 1 | 
 2 | // ----------- IMPORT JSON
 3 | 
 4 | 
 5 | def gson = GsonTools.getInstance(true)
 6 | 
 7 | def json = new File("d:/1L1_nuclei_anno.json").text
 8 | //println json
 9 | 
10 | 
11 | // Read the annotations
12 | def type = new com.google.gson.reflect.TypeToken<List<qupath.lib.objects.PathObject>>() {}.getType()
13 | def deserializedAnnotations = gson.fromJson(json, type)
14 | 
15 | // Set the annotations to have a different name (so we can identify them) & add to the current image
16 | // deserializedAnnotations.eachWithIndex {annotation, i -> annotation.setName('New annotation ' + (i+1))}   # --- THIS WON"T WORK IN CURRENT VERSION
17 | addObjects(deserializedAnnotations)   
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/qupath_import_compressed.groovy:
--------------------------------------------------------------------------------
 1 | // ----------- IMPORT compressed JSON
 2 | 
 3 | import java.util.zip.GZIPInputStream
 4 | InputStream fileStream = new FileInputStream("d:/1L1_nuclei_anno.json.gz");
 5 | InputStream gzipStream = new GZIPInputStream(fileStream);
 6 | Reader decoder = new InputStreamReader(gzipStream, 'ascii');
 7 | BufferedReader buffered = new BufferedReader(decoder);
 8 | 
 9 | def gson = GsonTools.getInstance(true)
10 | 
11 | // Read the annotations
12 | def type = new com.google.gson.reflect.TypeToken<List<qupath.lib.objects.PathObject>>() {}.getType()
13 | def deserializedAnnotations = gson.fromJson(buffered, type)
14 | 
15 | // Set the annotations to have a different name (so we can identify them) & add to the current image
16 | // deserializedAnnotations.eachWithIndex {annotation, i -> annotation.setName('New annotation ' + (i+1))}   # --- THIS WON"T WORK IN CURRENT VERSION
17 | addObjects(deserializedAnnotations)
18 | 
19 | 


--------------------------------------------------------------------------------