├── README.md ├── classify_geojson_objects_in_wsi_centeroid_based_withlevel_mask.py ├── qupath_export.groovy ├── qupath_export_compressed.groovy ├── qupath_import.groovy └── qupath_import_compressed.groovy /README.md: -------------------------------------------------------------------------------- 1 | Code for the associated blog post: 2 | 3 | http://www.andrewjanowczyk.com/exporting-and-re-importing-annotations-from-qupath-for-usage-in-machine-learning/ 4 | 5 | Which discusses exporting/importing qupath annotations in geojson format, as well as manipulating them in python using a shapely object 6 | -------------------------------------------------------------------------------- /classify_geojson_objects_in_wsi_centeroid_based_withlevel_mask.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:light 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.4.1 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # + 17 | 18 | openslidelevel=0 #level from openslide to read 19 | tilesize=10000 #size of the tile to load from openslide 20 | patchsize=32 #patch size needed by our DL model 21 | 22 | minhits=100 #the minimum number of objects needed to be present within a tile for the tile to be computed on 23 | batchsize=1024 #how many patches we want to send to the GPU at a single time 24 | nclasses=2 #number of output classes our model is providing 25 | classnames=["Other","Lymphocyte"] #the names of those classes which will appear in QuPath later on 26 | colors=[-377282,-9408287] #their associated color, see selection of different color values at the bottom of the file 27 | 28 | mask_patches=False #if we woud like to blackout the space around the object of interest, this is determined by how the model was trained 29 | 30 | json_fname=r'1L1_nuclei_reg.json' #input geojson file 31 | json_annotated_fname=r'1L1_nuclei_reg_anno.json' #target output geojson file 32 | model_fname="lymph_model.pth" #DL model to use 33 | wsi_fname="1L1_-_2019-09-10_16.44.58.ndpi" #whole slide image fname to load cells from which coincide with the json file 34 | 35 | # - 36 | 37 | import os 38 | os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] #can either specify openslide bin path in PATH, or add it dynamically 39 | import openslide 40 | from tqdm.autonotebook import tqdm 41 | from math import ceil 42 | import matplotlib.pyplot as plt 43 | 44 | import geojson 45 | from shapely.geometry import shape 46 | from shapely.strtree import STRtree 47 | from shapely.geometry import Point 48 | from shapely.geometry import Polygon 49 | 50 | # + 51 | import torch 52 | from torch import nn 53 | from torchsummary import summary 54 | import numpy as np 55 | import cv2 56 | import gzip 57 | 58 | device = torch.device('cuda') 59 | 60 | def divide_batch(l, n): 61 | for i in range(0, l.shape[0], n): 62 | yield l[i:i + n,::] 63 | 64 | #--- Load your model here 65 | #model = LoadYourModelHere().to(device) 66 | #checkpoint = torch.load(model_fname, map_location=lambda storage, loc: storage) # load checkpoint to CPU and then put to device https://discuss.pytorch.org/t/saving-and-loading-torch-models-on-2-machines-with-different-number-of-gpu-devices/6666 67 | #model.load_state_dict(checkpoint["model_dict"]) 68 | #model.eval() 69 | #summary(model, (3, 32, 32)) 70 | 71 | # + 72 | 73 | if json_fname.endswith(".gz"): 74 | with gzip.GzipFile(json_fname, 'r') as f: 75 | allobjects = geojson.loads(f.read(), encoding= 'ascii') 76 | else: 77 | with open(json_fname) as f: 78 | allobjects = geojson.load(f) 79 | 80 | print("done loading") 81 | # - 82 | 83 | allshapes=[shape(obj["nucleusGeometry"] if "nucleusGeometry" in obj.keys() else obj["geometry"]) for obj in allobjects] 84 | allcenters=[ s.centroid for s in allshapes] 85 | print("done converting") 86 | 87 | for i in range(len(allshapes)): 88 | allcenters[i].id=i 89 | 90 | searchtree = STRtree(allcenters) 91 | print("done building tree") 92 | 93 | # + 94 | osh = openslide.OpenSlide(wsi_fname) 95 | nrow,ncol = osh.level_dimensions[0] 96 | nrow=ceil(nrow/tilesize) 97 | ncol=ceil(ncol/tilesize) 98 | 99 | scalefactor=int(osh.level_downsamples[openslidelevel]) 100 | paddingsize=patchsize//2*scalefactor 101 | 102 | int_coords = lambda x: np.array(x).round().astype(np.int32) 103 | 104 | # + 105 | for y in tqdm(range(0,osh.level_dimensions[0][1],round(tilesize * scalefactor)), desc="outer" , leave=False): 106 | for x in tqdm(range(0,osh.level_dimensions[0][0],round(tilesize * scalefactor)), desc=f"inner {y}", leave=False): 107 | 108 | tilepoly = Polygon([[x,y],[x+tilesize*scalefactor,y], 109 | [x+tilesize*scalefactor,y+tilesize*scalefactor], 110 | [x,y+tilesize*scalefactor]]) 111 | hits=searchtree.query(tilepoly) 112 | 113 | if len(hits) < minhits: 114 | continue 115 | 116 | tile = np.asarray(osh.read_region((x-paddingsize, y-paddingsize), openslidelevel, 117 | (tilesize+2*paddingsize,tilesize+2*paddingsize)))[:,:,0:3] #trim alpha 118 | 119 | if mask_patches: 120 | mask = np.zeros((tile.shape[0:2]),dtype=tile.dtype) 121 | exteriors = [int_coords(allshapes[hit.id].boundary.coords) for hit in hits] 122 | exteriors_shifted=[(ext-np.asarray([(x-paddingsize),(y-paddingsize)]))//scalefactor for ext in exteriors] 123 | cv2.fillPoly(mask, exteriors_shifted,1) 124 | 125 | arr_out = np.zeros((len(hits),patchsize,patchsize,3)) 126 | id_out = np.zeros((len(hits),1)) 127 | 128 | 129 | #---- get patches from hits within this tile and stick them (and their ids) into matricies 130 | for hit,arr,id in zip(hits,arr_out,id_out): 131 | px,py=hit.coords[:][0] #this way is faster than using hit.x and hit.y, likely because of call stack overhead 132 | c=int((px-x+paddingsize)//scalefactor) 133 | r=int((py-y+paddingsize)//scalefactor) 134 | patch = tile[r - patchsize // 2:r + patchsize // 2, c - patchsize // 2:c + patchsize // 2, :] 135 | 136 | if mask_patches: 137 | maskpatch = mask[r - patchsize // 2:r + patchsize // 2, c - patchsize // 2:c + patchsize // 2] 138 | patch = np.multiply(patch, maskpatch[:, :, None]) 139 | 140 | arr[:] = patch 141 | 142 | id[:]=hit.id 143 | 144 | 145 | #---- process batch 146 | 147 | classids=[] 148 | for batch_arr in tqdm(divide_batch(arr_out,batchsize),leave=False): 149 | batch_arr_gpu = torch.from_numpy(batch_arr.transpose(0, 3, 1, 2)).type('torch.FloatTensor').to(device)/255 150 | 151 | # ---- get results 152 | #classids.append(torch.argmax( model.img2class(batch_arr_gpu),dim=1).detach().cpu().numpy()) 153 | classids.append(np.random.choice([0,1],arr_out_gpu.shape[0])) 154 | classids=np.hstack(classids) 155 | 156 | for id,classid in zip(id_out,classids): 157 | allobjects[int(id)]["properties"]['classification']={'name':classnames[classid],'colorRGB':colors[classid]} 158 | 159 | 160 | 161 | 162 | # + 163 | # # for debugging 164 | # for i,(c,a) in enumerate(zip(classids,arr_out)): 165 | # plt.imshow(a/255) 166 | # plt.show() 167 | # print(c) 168 | # if i > 10: 169 | # break 170 | 171 | # + 172 | # # for timing 173 | # # %load_ext line_profiler 174 | # # %lprun -f makeoutput makeoutput() 175 | # makeoutput() 176 | # - 177 | 178 | if json_annotated_fname.endswith(".gz"): 179 | with gzip.open(json_annotated_fname, 'wt', encoding="ascii") as zipfile: 180 | geojson.dump(allobjects, zipfile) 181 | else: 182 | with open(json_annotated_fname, 'w') as outfile: 183 | geojson.dump(allobjects,outfile) 184 | 185 | 186 | 187 | # + 188 | 189 | # "name": "Positive", # add incolors 190 | # "colorRGB": -377282 191 | 192 | # "name": "Other", 193 | # "colorRGB": -14336 194 | 195 | # "name": "Stroma", 196 | # "colorRGB": -6895466 197 | 198 | 199 | # "name": "Necrosis", 200 | # "colorRGB": -13487566 201 | 202 | # "name": "Tumor", 203 | # "colorRGB": -3670016 204 | 205 | # "name": "Immune cells", 206 | # "colorRGB": -6268256 207 | 208 | 209 | # "name": "Negative", 210 | # "colorRGB":-9408287 211 | 212 | 213 | # + 214 | #This code to perform entire polygon with complex objects 215 | #exteriors = [int_coords(geo.coords) for hit in hits for geo in hit.boundary.geoms ] #need this modificatrion for complex structures 216 | 217 | #This code to perform by center with complex objects 218 | #exteriors = [int_coords(geo.coords) for hit in hits for geo in allshapes[hit.id].boundary.geoms ] 219 | 220 | -------------------------------------------------------------------------------- /qupath_export.groovy: -------------------------------------------------------------------------------- 1 | // --- remove measurements, not needed but makes file smaller 2 | Set annotationMeasurements = [] 3 | 4 | getDetectionObjects().each{it.getMeasurementList().getMeasurementNames().each{annotationMeasurements << it}} 5 | //println(annotationMeasurements) 6 | 7 | annotationMeasurements.each{ removeMeasurements(qupath.lib.objects.PathCellObject, it);} 8 | 9 | 10 | // write to file 11 | boolean prettyPrint = false // false results in smaller file sizes and thus faster loading times, at the cost of nice formating 12 | def gson = GsonTools.getInstance(prettyPrint) 13 | def annotations = getDetectionObjects() 14 | //println gson.toJson(annotations) // you can check here but this will be HUGE and take a long time to parse 15 | 16 | 17 | // automatic output filename, otherwise set explicitly 18 | String imageLocation = getCurrentImageData().getServer().getPath() 19 | outfname = imageLocation.split("file:/")[1]+".json" 20 | 21 | 22 | 23 | File file = new File(outfname) 24 | file.withWriter('UTF-8') { 25 | gson.toJson(annotations,it) 26 | } 27 | -------------------------------------------------------------------------------- /qupath_export_compressed.groovy: -------------------------------------------------------------------------------- 1 | // --- remove measurements, not needed but makes file smaller 2 | Set annotationMeasurements = [] 3 | 4 | getDetectionObjects().each{it.getMeasurementList().getMeasurementNames().each{annotationMeasurements << it}} 5 | //println(annotationMeasurements) 6 | 7 | annotationMeasurements.each{ removeMeasurements(qupath.lib.objects.PathCellObject, it);} 8 | 9 | 10 | // write to file 11 | boolean prettyPrint = false // false results in smaller file sizes and thus faster loading times, at the cost of nice formating 12 | def gson = GsonTools.getInstance(prettyPrint) 13 | def annotations = getDetectionObjects() 14 | //println gson.toJson(annotations) // you can check here but this will be HUGE and take a long time to parse 15 | 16 | 17 | // automatic output filename, otherwise set explicitly 18 | String imageLocation = getCurrentImageData().getServer().getPath() 19 | outfname = imageLocation.split("file:/")[1]+".json.gz" 20 | 21 | 22 | import java.util.zip.GZIPOutputStream 23 | FileOutputStream output = new FileOutputStream(outfname); 24 | Writer writer = new OutputStreamWriter(new GZIPOutputStream(output), "ascii") 25 | gson.toJson(annotations,writer) 26 | writer.close() 27 | 28 | -------------------------------------------------------------------------------- /qupath_import.groovy: -------------------------------------------------------------------------------- 1 | 2 | // ----------- IMPORT JSON 3 | 4 | 5 | def gson = GsonTools.getInstance(true) 6 | 7 | def json = new File("d:/1L1_nuclei_anno.json").text 8 | //println json 9 | 10 | 11 | // Read the annotations 12 | def type = new com.google.gson.reflect.TypeToken>() {}.getType() 13 | def deserializedAnnotations = gson.fromJson(json, type) 14 | 15 | // Set the annotations to have a different name (so we can identify them) & add to the current image 16 | // deserializedAnnotations.eachWithIndex {annotation, i -> annotation.setName('New annotation ' + (i+1))} # --- THIS WON"T WORK IN CURRENT VERSION 17 | addObjects(deserializedAnnotations) 18 | 19 | 20 | -------------------------------------------------------------------------------- /qupath_import_compressed.groovy: -------------------------------------------------------------------------------- 1 | // ----------- IMPORT compressed JSON 2 | 3 | import java.util.zip.GZIPInputStream 4 | InputStream fileStream = new FileInputStream("d:/1L1_nuclei_anno.json.gz"); 5 | InputStream gzipStream = new GZIPInputStream(fileStream); 6 | Reader decoder = new InputStreamReader(gzipStream, 'ascii'); 7 | BufferedReader buffered = new BufferedReader(decoder); 8 | 9 | def gson = GsonTools.getInstance(true) 10 | 11 | // Read the annotations 12 | def type = new com.google.gson.reflect.TypeToken>() {}.getType() 13 | def deserializedAnnotations = gson.fromJson(buffered, type) 14 | 15 | // Set the annotations to have a different name (so we can identify them) & add to the current image 16 | // deserializedAnnotations.eachWithIndex {annotation, i -> annotation.setName('New annotation ' + (i+1))} # --- THIS WON"T WORK IN CURRENT VERSION 17 | addObjects(deserializedAnnotations) 18 | 19 | --------------------------------------------------------------------------------