├── .DS_Store
├── train
    ├── requirements.txt
    ├── hoptimus_model_backbone.py
    ├── data_utils
    │   ├── MHIST.py
    │   ├── Lizard.py
    │   └── MSI_TCGA_COAD.py
    ├── train_msi_tcga.py
    ├── train_mhist.py
    ├── train_lizard.py
    └── deepmil.py
├── pathology_overview.png
├── data
    ├── download_lizard.sh
    ├── download_mhist.sh
    ├── download_tcga.sh
    └── gdc_manifest_tcga_coad.txt
├── deploy
    ├── prepare_instance.sh
    ├── Dockerfile
    ├── build_and_push.sh
    ├── BatchInference.ipynb
    └── inference.py
├── CODE_OF_CONDUCT.md
├── preprocessing
    ├── Dockerfile
    ├── build_and_push.sh
    └── preprocessing-code
    │   └── generate_features.py
├── LICENSE
├── CONTRIBUTING.md
├── infra
    └── infra-stack.yml
├── Train.ipynb
└── README.md


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/ai-digital-pathology/main/.DS_Store


--------------------------------------------------------------------------------
/train/requirements.txt:
--------------------------------------------------------------------------------
1 | timm
2 | evaluate
3 | tensorboard
4 | openpyxl
5 | h5py
6 | torchmetrics
7 | 


--------------------------------------------------------------------------------
/pathology_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/ai-digital-pathology/main/pathology_overview.png


--------------------------------------------------------------------------------
/data/download_lizard.sh:
--------------------------------------------------------------------------------
1 | 
2 | echo "starting donwload !\n"
3 | cd SageMaker/mnt/efs
4 | mkdir Lizard && cd Lizard
5 | curl 'xxxxxx-kaggle-link' -L -o 'archive.zip'
6 | unzip archive.zip
7 | rm archive.zip
8 | 


--------------------------------------------------------------------------------
/deploy/prepare_instance.sh:
--------------------------------------------------------------------------------
1 | sudo yum update -y
2 | sudo amazon-linux-extras install epel -y
3 | sudo yum install openslide-tools -y 
4 | sudo yum install python3-pip -y
5 | sudo yum install opencv opencv-devel opencv-python -y
6 | 
7 | 


--------------------------------------------------------------------------------
/data/download_mhist.sh:
--------------------------------------------------------------------------------
 1 | echo "starting donwload !\n"
 2 | cd SageMaker/mnt/efs
 3 | mkdir MHIST && cd MHIST
 4 | echo "download annotations.csv"
 5 | wget https://xxxx -O annotations.csv
 6 | echo "download images.zip"
 7 | wget https://xxxxx -O image.zip
 8 | unzip images.zip
 9 | rm images.zip 
10 | 
11 | 


--------------------------------------------------------------------------------
/data/download_tcga.sh:
--------------------------------------------------------------------------------
 1 | sudo yum update -y
 2 | sudo amazon-linux-extras install epel -y
 3 | sudo yum install openslide-tools -y
 4 | 
 5 | # install GDC client to download from the TCGA Repository
 6 | pip install virtualenv 
 7 | git clone https://github.com/NCI-GDC/gdc-client
 8 | cd bin 
 9 | chmod +x package
10 | ./package


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/preprocessing/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG REGION=us-west-2
 2 | 
 3 | # SageMaker PyTorch image for training
 4 | FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/pytorch-training:2.4.0-gpu-py311-cu124-ubuntu22.04-sagemaker
 5 | 
 6 | ENV PATH="/opt/ml/code:${PATH}"
 7 | 
 8 | RUN apt-get update && apt-get install -y \
 9 |     openslide-tools \
10 |     && rm -rf /var/lib/apt/lists/*
11 | 
12 | RUN pip install --no-cache-dir --upgrade pip && \
13 |     pip install --no-cache-dir openslide-python
14 | 
15 | RUN conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch -c nvidia
16 | RUN conda install -c rapidsai -c conda-forge cucim
17 | RUN conda install timm h5py
18 |     
19 | COPY /preprocessing-code /opt/ml/code
20 | ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code
21 | 
22 | ENV SAGEMAKER_PROGRAM generate_features.py
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 
18 | 


--------------------------------------------------------------------------------
/train/hoptimus_model_backbone.py:
--------------------------------------------------------------------------------
 1 | import os 
 2 | import functools
 3 | import torch
 4 | import torch.nn as nn
 5 | import timm
 6 | from torchvision import transforms
 7 | from PIL import Image
 8 | 
 9 | class HOPTIMUSZero(nn.Module):
10 |     def __init__(self, checkpoint=None):
11 |         super(HOPTIMUSZero, self).__init__()
12 |         self.model = timm.create_model("hf_hub:bioptimus/H-optimus-0", pretrained=True)
13 |         self.model.eval()
14 | 
15 |         self.transform = transforms.Compose([
16 |                 transforms.ToTensor(),
17 |                 transforms.Normalize(
18 |                     mean=(0.707223, 0.578729, 0.703617), 
19 |                     std=(0.211883, 0.230117, 0.177517)
20 |                 ),
21 |             ])
22 |         
23 |     def to(self, device):
24 |         self.model = self.model.to(device)
25 |         return self
26 |     
27 |     @torch.inference_mode()
28 |     def forward(self, image):
29 |         if isinstance(image, Image.Image):
30 |             image = self.transform(image).unsqueeze(dim=0)
31 |             image = image.to("cuda" if torch.cuda.is_available() else "cpu")
32 | 
33 |         output = self.model.forward_features(image)
34 |         return {
35 |             'x_norm_cls_token': output[:, 0, :], 
36 |             'x_norm_patch_tokens': output[:, 5:, :]
37 |         } 
38 | 
39 | if __name__ == "__main__":
40 |     # Initialize the model
41 |     model = HOPTIMUSZero()
42 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
43 |     model = model.to(device=device)
44 | 
45 |     # Create a random input image
46 |     input_image = torch.rand(3, 224, 224)
47 |     input_image = transforms.ToPILImage()(input_image)
48 |     
49 |     # Perform inference
50 |     output = model(input_image)
51 |     print(output)
52 | 


--------------------------------------------------------------------------------
/deploy/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG REGION=us-east-1
 2 | 
 3 | # SageMaker PyTorch image for INFERENCE (See available images: https://github.com/aws/deep-learning-containers/blob/master/available_images.md)
 4 | FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/pytorch-inference:2.5.1-gpu-py311-cu124-ubuntu22.04-sagemaker
 5 | 
 6 | ENV PATH="/opt/ml/code:${PATH}"
 7 | 
 8 | RUN apt-get update && apt-get install -y \
 9 |     openslide-tools \
10 |     libopencv-dev \
11 |     python3-opencv \
12 |     && rm -rf /var/lib/apt/lists/*
13 | 
14 | RUN pip install --no-cache-dir --upgrade pip && \
15 |     pip install --no-cache-dir openslide-python opencv-python pillow tqdm
16 | 
17 | RUN conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch -c nvidia
18 | RUN conda install -c rapidsai -c conda-forge cucim
19 | RUN conda install timm h5py
20 | 
21 | # /opt/ml and all subdirectories are utilized by SageMaker, we use the /code subdirectory to store our user code.
22 | COPY /inference.py /opt/ml/model/code/inference.py
23 | COPY /hoptimus_model_backbone.py /opt/ml/model/code/hoptimus_model_backbone.py
24 | 
25 | # this environment variable is used by the SageMaker PyTorch container to determine our user code directory.
26 | ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/model/code
27 | 
28 | # this environment variable is used by the SageMaker PyTorch container to determine our program entry point
29 | # for training and serving.
30 | # For more information: https://github.com/aws/sagemaker-pytorch-container
31 | ENV SAGEMAKER_PROGRAM inference.py
32 | 
33 | ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/model/code
34 | ENV SAGEMAKER_HANDLER inference:predict_fn
35 | 
36 | ENV SAGEMAKER_MODEL_SERVER_TIMEOUT=3600
37 | ENV SAGEMAKER_MODEL_SERVER_WORKERS=1
38 | ENV TS_DECODE_INPUT_REQUEST=true
39 | ENV TEMP=/tmp
40 | ENV TRANSFORMERS_CACHE=/tmp/transformers_cache
41 | ENV SAGEMAKER_MAX_RETRY_DELAY=120
42 | ENV SAGEMAKER_SERVING_TIME=3600
43 | 
44 | RUN pip freeze


--------------------------------------------------------------------------------
/deploy/build_and_push.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script shows how to build the Docker image and push it to ECR to be ready for use
 4 | # by SageMaker.
 5 | 
 6 | # The argument to this script is the image name. This will be used as the image on the local
 7 | # machine and combined with the account and region to form the repository name for ECR.
 8 | image=$1
 9 | 
10 | cp ../train/hoptimus_model_backbone.py hoptimus_model_backbone.py
11 | echo "backbone model copied to code folder"
12 | 
13 | dockerfile=${2:-Dockerfile}
14 | 
15 | if [ "$image" == "" ]
16 | then
17 |     echo "Usage: $0 <image-name>"
18 |     exit 1
19 | fi
20 | 
21 | account=$(aws sts get-caller-identity --query Account --output text)
22 | # Get the region defined in the current configuration (default to us-west-2 if none defined)
23 | region=$(aws configure get region)
24 | region=${region:-us-west-2}
25 | 
26 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image}:latest"
27 | echo "ECR image fullname: ${fullname}"
28 | # If the repository doesn't exist in ECR, create it.
29 | 
30 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1
31 | 
32 | if [ $? -ne 0 ]
33 | then
34 |     aws ecr create-repository --repository-name "${image}" > /dev/null
35 | fi
36 | 
37 | # Get the login command from ECR and execute it directly
38 | export _DOCKER_REPO="$(aws ecr get-authorization-token --output text  --query 'authorizationData[].proxyEndpoint')"
39 | aws ecr get-login --no-include-email --region "${region}" | awk '{print $6}' | docker login -u AWS --password-stdin $_DOCKER_REPO
40 | 
41 | # Get the login command from ECR in order to pull down the SageMaker PyTorch image
42 | aws ecr get-login --no-include-email --region "${region}" | awk '{print $6}' | docker login -u AWS --password-stdin "763104351884.dkr.ecr.${region}.amazonaws.com"
43 | 
44 | # Build the docker image locally with the image name and then push it to ECR
45 | # with the full name.
46 | 
47 | docker build -f ${dockerfile} -t ${image} . --build-arg REGION=${region}
48 | docker tag ${image} ${fullname}
49 | docker push ${fullname}
50 | 
51 | rm hoptimus_model_backbone.py
52 | echo "Cleaning process completed, image built"
53 | 


--------------------------------------------------------------------------------
/preprocessing/build_and_push.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script shows how to build the Docker image and push it to ECR to be ready for use
 4 | # by SageMaker.
 5 | 
 6 | # The argument to this script is the image name. This will be used as the image on the local
 7 | # machine and combined with the account and region to form the repository name for ECR.
 8 | image=$1
 9 | 
10 | cp ../train/hoptimus_model_backbone.py preprocessing-code/hoptimus_model_backbone.py
11 | echo "backbone model copied to code folder"
12 | 
13 | dockerfile=${2:-Dockerfile}
14 | 
15 | if [ "$image" == "" ]
16 | then
17 |     echo "Usage: $0 <image-name>"
18 |     exit 1
19 | fi
20 | 
21 | account=$(aws sts get-caller-identity --query Account --output text)
22 | # Get the region defined in the current configuration (default to us-west-2 if none defined)
23 | region=$(aws configure get region)
24 | region=${region:-us-west-2}
25 | 
26 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image}:latest"
27 | echo "ECR image fullname: ${fullname}"
28 | # If the repository doesn't exist in ECR, create it.
29 | 
30 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1
31 | 
32 | if [ $? -ne 0 ]
33 | then
34 |     aws ecr create-repository --repository-name "${image}" > /dev/null
35 | fi
36 | 
37 | # Get the login command from ECR and execute it directly
38 | export _DOCKER_REPO="$(aws ecr get-authorization-token --output text  --query 'authorizationData[].proxyEndpoint')"
39 | aws ecr get-login --no-include-email --region "${region}" | awk '{print $6}' | docker login -u AWS --password-stdin $_DOCKER_REPO
40 | 
41 | # Get the login command from ECR in order to pull down the SageMaker PyTorch image
42 | aws ecr get-login --no-include-email --region "${region}" | awk '{print $6}' | docker login -u AWS --password-stdin "763104351884.dkr.ecr.${region}.amazonaws.com"
43 | 
44 | # Build the docker image locally with the image name and then push it to ECR
45 | # with the full name.
46 | 
47 | docker build -f ${dockerfile} -t ${image} . --build-arg REGION=${region}
48 | docker tag ${image} ${fullname}
49 | docker push ${fullname}
50 | 
51 | rm preprocessing-code/hoptimus_model_backbone.py
52 | echo "Cleaning process completed, image built"
53 | 


--------------------------------------------------------------------------------
/train/data_utils/MHIST.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | from PIL import Image
 4 | import torch
 5 | from torch.utils.data import Dataset, DataLoader, random_split
 6 | from torchvision import transforms
 7 | 
 8 | class MHISTDataset(Dataset):
 9 |     def __init__(self, csv_file, img_dir):
10 |         self.annotations = pd.read_csv(csv_file)
11 |         self.transform = transforms.Compose([
12 |             transforms.Resize((224, 224)),
13 |             transforms.ToTensor(),
14 |         ])
15 |         self.img_dir = img_dir
16 | 
17 |     def __len__(self):
18 |         return len(self.annotations)
19 | 
20 |     def __getitem__(self, idx):
21 |         # Load Image and Resize
22 |         img_name = f'{self.img_dir}/{self.annotations.iloc[idx, 0]}'
23 |         image = Image.open(img_name).convert('RGB')
24 |         image = self.transform(image)
25 | 
26 |         # Load Label
27 |         label = 0.0 if self.annotations.iloc[idx, 1]=='HP' else 1.0
28 |         
29 |         return image, label
30 | 
31 | 
32 | class MHIST():
33 |     def __init__(self, dataset_path='/home/ec2-user/SageMaker/mnt/efs/MHIST', batch_size=16):
34 |         csv_file = f'{dataset_path}/annotations.csv'
35 |         img_dir = f'{dataset_path}/images'
36 | 
37 |         full_dataset = MHISTDataset(csv_file=csv_file, img_dir=img_dir)
38 |         
39 |         # Split the dataset into train and validation
40 |         full_train_dataset = MHISTDataset(csv_file=csv_file, img_dir=img_dir)
41 |         full_train_dataset.annotations = full_train_dataset.annotations[full_train_dataset.annotations['Partition'] == 'train']
42 |         
43 |         test_dataset = MHISTDataset(csv_file=csv_file, img_dir=img_dir)
44 |         test_dataset.annotations = test_dataset.annotations[test_dataset.annotations['Partition'] == 'test']
45 | 
46 |         self.train_loader = DataLoader(full_train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
47 |         self.val_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
48 |         
49 |         self.classes = ('HP', 'SSA')
50 | 
51 | if __name__=="__main__":
52 |     import matplotlib.pyplot as plt
53 |     data = MHIST()
54 |     images, labels = next(iter(data.train_loader, batch_size=4))
55 |     
56 |     # Plot the first 4 images
57 |     fig, axes = plt.subplots(2, 2, figsize=(8, 8))
58 | 
59 |     for i, ax in enumerate(axes.flat[:4]):
60 |         ax.imshow(images[i].permute(1, 2, 0))
61 |         ax.set_title(str(labels[i]))
62 |         ax.axis('off')
63 | 
64 |     plt.tight_layout()
65 |     plt.show()
66 | 


--------------------------------------------------------------------------------
/train/data_utils/Lizard.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pandas as pd
 4 | from PIL import Image
 5 | import torch
 6 | from torch.utils.data import Dataset, DataLoader
 7 | from sklearn.model_selection import train_test_split
 8 | import torchvision.transforms.v2 as transforms
 9 | import scipy.io as sio
10 | 
11 | class SegmentationDataset(Dataset):
12 |     def __init__(self, images, labels, split='train'):
13 |         self.images = images
14 |         self.labels = labels
15 |         self.split = split
16 |         self.train_transform = transforms.Compose([
17 |             transforms.ToImage(),
18 |             transforms.Resize((224, 224)),
19 |             transforms.RandomHorizontalFlip(p=0.5),
20 |             transforms.RandomVerticalFlip(p=0.5),
21 |             transforms.ToDtype(torch.float32),
22 |         ])
23 | 
24 |         self.val_transform = transforms.Compose([
25 |             transforms.ToImage(),
26 |             transforms.Resize((224, 224)),
27 |             transforms.ToDtype(torch.float32),
28 |         ])
29 | 
30 |     def __len__(self):
31 |         return len(self.images)
32 | 
33 |     def __getitem__(self, idx):
34 |         image = self.images[idx]
35 |         label = np.expand_dims(self.labels[idx, :, :, 1].astype(np.int64), axis=2)
36 |         
37 |         # Convert image and label to PyTorch tensors and resize
38 |         if self.split == 'train':
39 |             image, label = self.train_transform(image, label)
40 |         elif self.split=='val':
41 |             image, label = self.val_transform(image, label)
42 | 
43 |         return image, torch.squeeze(label).long()
44 | 
45 | class Lizard():
46 |     def __init__(self, dataset_path='/home/ec2-user/SageMaker/mnt/efs/Lizard', batch_size=16):
47 |         images = np.load(f'{dataset_path}/data/images.npy')
48 |         labels = np.load(f'{dataset_path}/data/labels.npy')
49 | 
50 |         # Split the data into train and test sets
51 |         train_images, test_images, train_labels, test_labels = train_test_split(
52 |             images, labels, test_size=0.2, random_state=42
53 |         )
54 | 
55 |         # Create the train and test datasets
56 |         train_dataset = SegmentationDataset(train_images, train_labels, split='train')
57 |         test_dataset = SegmentationDataset(test_images, test_labels, split='val')
58 | 
59 |         self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8)
60 |         self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8)
61 |         self.id2class = {
62 |             0: 'Background',
63 |             1: 'Neutrophil',
64 |             2: 'Epithelial',
65 |             3: 'Lymphocyte',
66 |             4: 'Plasma',
67 |             5: 'Neutrophil',
68 |             6: 'Connective tissue'
69 |         }
70 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/train/data_utils/MSI_TCGA_COAD.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import pandas as pd
 3 | from pathlib import Path
 4 | from sklearn.model_selection import train_test_split
 5 | 
 6 | import torch
 7 | from torch.utils.data import Dataset, DataLoader, dataloader
 8 | 
 9 | 
10 | class MSIDataset(Dataset):
11 |     def __init__(self, excel_file, embedding_dir, max_tiles=1000, split='train'):
12 |         self.max_tiles=int(max_tiles)
13 |         _data = pd.read_excel(excel_file)
14 |         self.data = _data[_data['TCGA Project Code']=='COAD'][['TCGA Participant Barcode', 'MSI Status']].dropna(inplace=False)
15 |         _label_map = {'MSS': 0, 'MSI-L': 1, 'MSI-H': 2}
16 |         self.data['MSI Status'] = self.data['MSI Status'].map(_label_map)
17 |         
18 |         # Only keep files for which we have a WSI
19 |         self.valid_patient_codes = [p.stem[:12] for p in Path(embedding_dir).glob('*h5')]
20 |         self.data = self.data[self.data['TCGA Participant Barcode'].isin(self.valid_patient_codes)]
21 |         X_train, X_test, y_train, y_test  = train_test_split(self.data['TCGA Participant Barcode'], self.data['MSI Status'], test_size=0.2, random_state=42)
22 |         
23 |         if split=='train':
24 |             self.data = pd.DataFrame({'TCGA Participant Barcode': X_train.reset_index(drop=True), 'MSI Status': y_train.reset_index(drop=True)})
25 |         else:
26 |             self.data = pd.DataFrame({'TCGA Participant Barcode': X_test.reset_index(drop=True), 'MSI Status': y_test.reset_index(drop=True)})
27 |         
28 |         self.embedding_dir=embedding_dir
29 |         print(f"kept {len(self.data)} slides")
30 | 
31 |     def __getitem__(self, index):
32 |         embedding_path = next(Path(self.embedding_dir).glob(f"{self.data['TCGA Participant Barcode'][index]}*.h5"))
33 |         if Path(embedding_path).exists():
34 |             with h5py.File(embedding_path, 'r') as f:
35 |                 embeddings_array = f['feats'][:]
36 |             if len(embeddings_array) >= self.max_tiles:
37 |                 embeddings_array = torch.from_numpy(embeddings_array[:self.max_tiles]).float()
38 |         label = float(self.data['MSI Status'][index])
39 | 
40 |         return embeddings_array, label
41 |         
42 |     def __len__(self):
43 |         return len(self.data)
44 |     
45 | 
46 | def padding_collate(batch):
47 |     """A collate function to pad features matrix of different lengths."""
48 |     samples_to_pad, other_samples = [], []
49 |     for sample in batch:
50 |         samples_to_pad.append(sample[0])
51 |         other_samples.append(sample[1:])
52 | 
53 |     features_dim = samples_to_pad[0].size()[-1]
54 |     max_len = max([s.size(0) for s in samples_to_pad])
55 |     padded_dims = (len(samples_to_pad), max_len, features_dim)
56 | 
57 |     padded_samples = samples_to_pad[0].data.new(*padded_dims).fill_(0.0)
58 | 
59 |     for i, tensor in enumerate(samples_to_pad):
60 |         length = tensor.size(0)
61 |         padded_samples[i, :length, ...] = tensor[:max_len, ...]
62 | 
63 |     # Batching other members of the tuple using default_collate
64 |     other_samples = dataloader.default_collate(other_samples)
65 | 
66 |     return (padded_samples, *other_samples)
67 | class MSI_TCGA_COAD():
68 |     def __init__(self, dataset_path='/home/ec2-user/SageMaker/mnt/efs/TCGA-COAD-features', batch_size=16, max_tiles=1000):
69 |         
70 |         csv_file = f'{dataset_path}/liu.xlsx'
71 |         
72 |         # Split the dataset into train and validation
73 |         dataset_train = MSIDataset(csv_file, dataset_path, max_tiles=1000, split='train')
74 |         dataset_test = MSIDataset(csv_file, dataset_path, max_tiles=1000, split='test')
75 |         test_dataset = MSIDataset(csv_file, embedding_dir=dataset_path, max_tiles=max_tiles, split='test')
76 |         
77 |         self.dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True, collate_fn=padding_collate)
78 |         self.dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=True, collate_fn=padding_collate)
79 | 
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/deploy/BatchInference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "71db6435-13e9-46e8-afc5-04b393e76b01",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "import sagemaker\n",
 13 |     "from sagemaker.transformer import Transformer\n",
 14 |     "from sagemaker.model import Model"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "id": "b11bd4f7",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Build and Deploy the Inference container\n",
 23 |     "\n",
 24 |     "Since this embedding extraction job relies on multiple custom libraries (openslide, opencv,...) we will use a custom SageMaker Model Container. \n",
 25 |     "\n",
 26 |     "`./build_and_push.sh wsi-embedding`"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "id": "e8a8e455-72e5-4569-94c3-3c2948728685",
 33 |    "metadata": {
 34 |     "tags": []
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "model = Model(\n",
 39 |     "    name=\"wsi-embeddings\",\n",
 40 |     "    image_uri=\"xxx.dkr.ecr.us-east-1.amazonaws.com/patch:latest\",\n",
 41 |     "    role=sagemaker.get_execution_role(),\n",
 42 |     ")\n",
 43 |     "\n",
 44 |     "model.create()"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "id": "749e0646-0580-4c12-bd3e-ed9f88e7bc39",
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "import boto3\n",
 55 |     "import json\n",
 56 |     "\n",
 57 |     "def create_manifest_file(bucket_name, prefix=\"\"):\n",
 58 |     "    \"\"\"Create a manifest file for .svs files in the S3 bucket\"\"\"\n",
 59 |     "    s3_client = boto3.client('s3')\n",
 60 |     "    \n",
 61 |     "    # List all objects in the bucket with the given prefix\n",
 62 |     "    paginator = s3_client.get_paginator('list_objects_v2')\n",
 63 |     "    manifest_data = []\n",
 64 |     "    \n",
 65 |     "    for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):\n",
 66 |     "        if 'Contents' in page:\n",
 67 |     "            for obj in page['Contents']:\n",
 68 |     "                if obj['Key'].endswith('.svs'):\n",
 69 |     "                    manifest_data.append({\n",
 70 |     "                        \"source\": f\"s3://{bucket_name}/{obj['Key']}\"\n",
 71 |     "                    })\n",
 72 |     "\n",
 73 |     "    # Write manifest file to S3\n",
 74 |     "    manifest_content = \"\\n\".join(json.dumps(item) for item in manifest_data)\n",
 75 |     "    manifest_key = \"manifest.jsonl\"\n",
 76 |     "    s3_client.put_object(\n",
 77 |     "        Bucket=bucket_name,\n",
 78 |     "        Key=manifest_key,\n",
 79 |     "        Body=manifest_content.encode('utf-8')\n",
 80 |     "    )\n",
 81 |     "    \n",
 82 |     "    return f\"s3://{bucket_name}/{manifest_key}\"\n",
 83 |     "\n",
 84 |     "# Create the manifest file\n",
 85 |     "manifest_path = create_manifest_file(\n",
 86 |     "    bucket_name=\"pathologybenchmark-s3bucket-u7pe00xtbplu\"\n",
 87 |     ")\n"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "id": "9b499775-7881-4e6f-8aa6-23fd4f4f9318",
 94 |    "metadata": {
 95 |     "tags": []
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# Create transformer\n",
100 |     "transformer = Transformer(\n",
101 |     "    model_name=\"wsi-embeddings\",\n",
102 |     "    instance_count=1,\n",
103 |     "    instance_type=\"ml.g4dn.xlarge\",\n",
104 |     "    output_path=\"s3://xxxx/embeddings/\",\n",
105 |     "    base_transform_job_name=\"wsi-embeddings\",\n",
106 |     "    accept=\"application/x-embeddings\",\n",
107 |     "    assemble_with=\"None\",\n",
108 |     "    max_concurrent_transforms=1,\n",
109 |     "    strategy=\"SingleRecord\",\n",
110 |     "    env = {'SAGEMAKER_MODEL_SERVER_TIMEOUT' : '3600',\n",
111 |     "           'SAGEMAKER_SERVING_TIME':'3600'}\n",
112 |     ")\n",
113 |     "\n",
114 |     "# Run the transform job directly with the manifest path\n",
115 |     "transformer.transform(\n",
116 |     "    data=manifest_path,\n",
117 |     "    content_type=\"application/jsonlines\",\n",
118 |     "    split_type=\"Line\",\n",
119 |     "    model_client_config={'InvocationsTimeoutInSeconds':3600}\n",
120 |     ")\n"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "id": "ad99505c-66e0-4d81-bd5c-f818e24b21a9",
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": []
130 |   }
131 |  ],
132 |  "metadata": {
133 |   "kernelspec": {
134 |    "display_name": "conda_pytorch_p310",
135 |    "language": "python",
136 |    "name": "conda_pytorch_p310"
137 |   },
138 |   "language_info": {
139 |    "codemirror_mode": {
140 |     "name": "ipython",
141 |     "version": 3
142 |    },
143 |    "file_extension": ".py",
144 |    "mimetype": "text/x-python",
145 |    "name": "python",
146 |    "nbconvert_exporter": "python",
147 |    "pygments_lexer": "ipython3",
148 |    "version": "3.10.14"
149 |   }
150 |  },
151 |  "nbformat": 4,
152 |  "nbformat_minor": 5
153 | }
154 | 


--------------------------------------------------------------------------------
/train/train_msi_tcga.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse 
  3 | import torch
  4 | import shutil
  5 | from tqdm import tqdm 
  6 | import numpy as np
  7 | import pandas as pd
  8 | import torch.optim as optim
  9 | import torch.nn.functional as F
 10 | from sklearn.metrics import accuracy_score, roc_auc_score
 11 | 
 12 | from data_utils.MSI_TCGA_COAD import MSI_TCGA_COAD
 13 | from deepmil import DeepMIL
 14 | 
 15 | 
 16 | def parse_args():
 17 |     parser = argparse.ArgumentParser()
 18 |     parser.add_argument('--batch-size', type=int, default=32)
 19 |     parser.add_argument('--epochs', type=int, default=20)
 20 |     parser.add_argument('--learning-rate', type=float, default=1e-3)
 21 |     parser.add_argument('--max-tiles', type=float, default=10000)
 22 |     parser.add_argument('--data-dir', type=str, default=os.environ.get('SM_CHANNEL_TRAINING'))
 23 |     parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
 24 |     parser.add_argument('--output-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
 25 |     return parser.parse_args()
 26 | 
 27 | def save_checkpoint(model, is_best=False):
 28 |     if not os.path.exists(args.model_dir):
 29 |         os.makedirs(args.model_dir)
 30 | 
 31 |     
 32 |     # Save to file
 33 |     filename = os.path.join(args.model_dir, "wsi_classification_model.pth")
 34 |     print("saved model...")
 35 | 
 36 |     torch.save(model.state_dict(), filename)
 37 |     
 38 |     if is_best:
 39 |         best_filename = os.path.join(args.model_dir, 'wsi_classification_model_best.pth')
 40 |         shutil.copyfile(filename, best_filename)
 41 |         print("saved new model...")
 42 | 
 43 | 
 44 | args = parse_args()
 45 | 
 46 | # Load Dataset
 47 | dataset = MSI_TCGA_COAD(dataset_path=args.data_dir, batch_size=args.batch_size, max_tiles=args.max_tiles)
 48 | dataloader_train = dataset.dataloader_train
 49 | dataloader_test = dataset.dataloader_test
 50 | 
 51 | # Define the device (GPU or CPU)
 52 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 53 | 
 54 | # Define the model
 55 | model = DeepMIL(
 56 |     in_features=1536,
 57 |     out_features=1,
 58 | ).to(device)
 59 | 
 60 | 
 61 | # Define the loss function and optimizer
 62 | criterion = torch.nn.BCEWithLogitsLoss()
 63 | optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
 64 | 
 65 | # Training loop
 66 | num_epochs = args.epochs
 67 | best_acc = 0.0
 68 | 
 69 | 
 70 | for epoch in range(num_epochs):
 71 |     avg_loss = 0
 72 |     model.train()
 73 |     pbar = tqdm(dataloader_train, total=len(dataloader_train), desc=f"Epoch {epoch+1}/{num_epochs}")
 74 |     for batch_idx, (embeddings_b, labels_b) in enumerate(pbar):
 75 |         embeddings_b, labels_b = embeddings_b.to(device), labels_b.to(device)
 76 |         mask_b = embeddings_b.sum(-1, keepdim=True) == 0.0
 77 |         mask_b = mask_b.to("cuda")
 78 |         
 79 |         optimizer.zero_grad()
 80 | 
 81 |         # Forward pass
 82 |         logits_b = model(embeddings_b, mask_b).squeeze()
 83 |         loss = criterion(logits_b, labels_b)
 84 |                 
 85 | 
 86 |         # Backward pass and optimization
 87 |         loss.backward()
 88 |         optimizer.step()
 89 |         avg_loss += loss.item()
 90 | 
 91 |         pbar.set_postfix({'loss': f'{avg_loss / (batch_idx+1):.4f}'})
 92 | 
 93 |         if (batch_idx + 1) % 10 == 0:
 94 |             print(f"Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(dataloader_train)}], Loss: {loss.item():.4f}")
 95 | 
 96 |     print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_loss/len(dataloader_train):.4f}")
 97 | 
 98 |     # Validation
 99 |     model.eval()
100 |     with torch.no_grad():
101 |         validation_avg_loss = 0
102 |         true_labels, predicted_labels = [], []
103 | 
104 |         for val_embeddings_b, val_labels_b in dataloader_test:
105 |             mask_b = val_embeddings_b.sum(-1, keepdim=True) == 0.0
106 |             val_embeddings_b, val_labels_b = val_embeddings_b.to("cuda"), val_labels_b.to("cuda")
107 |             mask_b = mask_b.to("cuda")
108 | 
109 |             logits_b = model(val_embeddings_b, mask_b).squeeze()
110 |             logits_b = logits_b.unsqueeze(0) if logits_b.dim() == 0 else logits_b
111 |             preds_b = F.sigmoid(logits_b)
112 |             
113 |             #preds_b = torch.argmax(val_logits, dim=1)
114 | 
115 |             true_labels.extend(val_labels_b.cpu().numpy()) 
116 |             predicted_labels.extend(preds_b.cpu().numpy())
117 |             validation_avg_loss += criterion(preds_b, val_labels_b).item()
118 |             
119 |         accuracy = accuracy_score(np.array(true_labels), np.array(predicted_labels)>0.5)
120 |         rocauc = roc_auc_score(np.array(true_labels), np.array(predicted_labels))
121 | 
122 |         print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {validation_avg_loss/len(dataloader_test):.4f}")
123 |         print(f"Validation Accuracy: {accuracy}")
124 |         print(f"Validation ROC AUC: {rocauc}")
125 | 
126 |         if accuracy > best_acc:
127 |             best_acc = accuracy
128 |             save_checkpoint(model, is_best=True) 
129 |         else:
130 |             save_checkpoint(model, is_best=False)
131 | 
132 |     model.train()
133 | 
134 | 


--------------------------------------------------------------------------------
/train/train_mhist.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.utils.data as data
  6 | from torchvision import transforms
  7 | from hoptimus_model_backbone import HOPTIMUSZero
  8 | from data_utils.MHIST import MHIST
  9 | from torch.utils.tensorboard import SummaryWriter
 10 | from tqdm import tqdm 
 11 | import shutil
 12 | from sklearn import metrics
 13 | import numpy as np
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser()
 17 |     parser.add_argument('--batch-size', type=int, default=32)
 18 |     parser.add_argument('--epochs', type=int, default=20)
 19 |     parser.add_argument('--learning-rate', type=float, default=1e-3)
 20 |     parser.add_argument('--weight-decay', type=float, default=1e-4)
 21 |     parser.add_argument('--data-dir', type=str, default=os.environ.get('SM_CHANNEL_TRAINING'))
 22 |     parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
 23 |     parser.add_argument('--output-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
 24 |     parser.add_argument('--model_name', type=str, default='HOPTIMUSZero')
 25 |     return parser.parse_args()
 26 | 
 27 | class LinearProbe(nn.Module):
 28 |     def __init__(self, output_size):
 29 |         super().__init__()
 30 |         self.encoder = HOPTIMUSZero()
 31 |         # Freeze the encoder weights
 32 |         for param in self.encoder.parameters():
 33 |             param.requires_grad = False
 34 | 
 35 |         self.linear = nn.Linear(1536, output_size)
 36 | 
 37 |     def forward(self, x):
 38 |         with torch.inference_mode(False):
 39 |             features = self.encoder(x)['x_norm_cls_token']
 40 |         features = features.clone().detach().requires_grad_(True)
 41 |         return self.linear(features)
 42 | 
 43 | class Trainer(object):
 44 |     def __init__(self):
 45 |         args = parse_args()
 46 |         print(f"args: {args}")
 47 |         self.device = "cuda" if torch.cuda.is_available() else "cpu"
 48 |         print("using device: ", self.device)
 49 |         self.batch_size = args.batch_size
 50 |         self.lr = args.learning_rate
 51 |         self.weight_decay = args.weight_decay
 52 |         self.epochs = args.epochs
 53 |         self.model_dir = args.model_dir
 54 |         self.output_dir = args.output_dir
 55 |         self.loss_func = nn.BCEWithLogitsLoss()
 56 | 
 57 |         self.writer = SummaryWriter(log_dir=os.path.join(self.output_dir, 'tensorboard_logs'))
 58 |         self.best_acc = 0
 59 | 
 60 |         self.dataset = MHIST(
 61 |             dataset_path=os.environ.get('SM_CHANNEL_TRAINING', '/home/ec2-user/SageMaker/mnt/efs/MHIST/'),
 62 |             batch_size=self.batch_size
 63 |         )
 64 | 
 65 |         print("data access: SUCCESS")
 66 | 
 67 |         self.train_loader = self.dataset.train_loader
 68 |         self.val_loader = self.dataset.val_loader
 69 | 
 70 |         self.model = LinearProbe(output_size=1).to(self.device)
 71 |         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
 72 | 
 73 |     def train(self):
 74 |         for epoch in range(self.epochs):
 75 |             self.model.train()            
 76 |             avg_loss = 0
 77 |             
 78 |             pbar = tqdm(self.train_loader, total=len(self.train_loader), desc=f"Epoch {epoch+1}/{self.epochs}")
 79 |             
 80 |             for i, (images, labels) in enumerate(pbar):
 81 |                 self.optimizer.zero_grad()
 82 |                 images = images.to(self.device)
 83 |                 labels = labels.to(self.device)
 84 |                 logits = self.model(images).squeeze()
 85 |                 loss = self.loss_func(logits, labels)
 86 |                 loss.backward()
 87 |                 self.optimizer.step()
 88 |                 
 89 |                 avg_loss += loss.item()
 90 | 
 91 |                 pbar.set_postfix({'loss': f'{avg_loss / (i+1):.4f}'})
 92 |                 
 93 |                 if i % 100 == 0:
 94 |                     self.writer.add_scalar('training loss', avg_loss / (i+1), epoch * len(self.train_loader) + i)
 95 | 
 96 |             
 97 |             print(f"Epoch {epoch+1}/{self.epochs} - Training Loss: {avg_loss / len(self.train_loader):.4f}")
 98 |             
 99 |             val_loss, val_acc = self.validation(epoch)
100 |             
101 |             print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")
102 | 
103 |     def validation(self, epoch):
104 |         self.model.eval()
105 |         total_loss = 0
106 |         all_preds = []
107 |         all_labels = []
108 | 
109 |         with torch.no_grad():
110 |             for images, labels in self.val_loader:
111 |                 images = images.to(self.device)
112 |                 labels = labels.to(self.device)
113 | 
114 |                 logits = self.model(images).squeeze()
115 |                 preds = nn.functional.sigmoid(logits)
116 | 
117 |                 loss = self.loss_func(logits, labels)
118 |                 total_loss += loss
119 | 
120 |                 all_preds.extend(preds.cpu().numpy())
121 |                 all_labels.extend(labels.cpu().numpy())
122 | 
123 |         predictions = np.array(all_preds)
124 |         labels = np.array(all_labels)
125 |         acc = metrics.accuracy_score(labels, predictions>0.5)
126 |         auc = metrics.roc_auc_score(labels, predictions)
127 | 
128 |         avg_loss = total_loss / len(self.val_loader)
129 |         # avg_acc = accuracy_score(all_labels, all_preds)
130 | 
131 |         print(f"Validation Loss: {avg_loss:.4f}")
132 |         print(f"Validation AUC: {auc:.4f}")
133 |         print(f"Validation Accuracy: {acc:.4f}")
134 | 
135 |         self.writer.add_scalar('validation/loss', avg_loss, epoch)
136 |         self.writer.add_scalar('validation/accuracy', acc, epoch)
137 |         self.writer.add_scalar('validation/auc', auc, epoch)
138 | 
139 |         if auc > self.best_acc:
140 |             self.best_acc = acc
141 |             self.save_checkpoint(is_best=True)
142 |         else:
143 |             self.save_checkpoint(is_best=False)
144 | 
145 |         return avg_loss, acc
146 | 
147 |     def save_checkpoint(self, is_best=False):
148 |         if not os.path.exists(self.model_dir):
149 |             os.makedirs(self.model_dir)
150 | 
151 |         # Save to file
152 |         filename = os.path.join(self.model_dir, "classification_model.pth")
153 |         print("saved model...")
154 | 
155 |         torch.save(self.model.state_dict(), filename)
156 |         
157 |         if is_best:
158 |             best_filename = os.path.join(self.model_dir, 'classification_model_best.pth')
159 |             shutil.copyfile(filename, best_filename)
160 |             print("saved new model...")
161 | 
162 | if __name__ == "__main__":
163 |     print("Started training container. Running training script.")
164 |     trainer = Trainer()
165 |     trainer.train()
166 | 


--------------------------------------------------------------------------------
/infra/infra-stack.yml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Description: EFS to Finetune Pathology Foundational models on downstream tasks
  3 | 
  4 | Parameters:
  5 |   VpcCidr:
  6 |     Type: String
  7 |     Default: 10.0.0.0/16
  8 |     Description: The CIDR block for the VPC.
  9 |   PrivateSubnetCidr1:
 10 |     Type: String
 11 |     Default: 10.0.1.0/24
 12 |     Description: The CIDR block for the first private subnet.
 13 |   PrivateSubnetCidr2:
 14 |     Type: String
 15 |     Default: 10.0.2.0/24
 16 |     Description: The CIDR block for the second private subnet.
 17 |   PublicSubnetCidr1:
 18 |     Type: String
 19 |     Default: 10.0.10.0/24
 20 |     Description: The CIDR block for the first public subnet.
 21 |   PublicSubnetCidr2:
 22 |     Type: String
 23 |     Default: 10.0.20.0/24
 24 |     Description: The CIDR block for the second public subnet.
 25 |   NotebookInstanceType:
 26 |     Type: String
 27 |     Default: ml.t3.medium
 28 |     Description: The instance type for the SageMaker Notebook Instance.
 29 | 
 30 | 
 31 | Resources:
 32 |   VPC:
 33 |     Type: AWS::EC2::VPC
 34 |     Properties:
 35 |       CidrBlock: !Ref VpcCidr
 36 |       EnableDnsHostnames: true
 37 |       EnableDnsSupport: true
 38 | 
 39 |   PrivateSubnet1:
 40 |     Type: AWS::EC2::Subnet
 41 |     Properties:
 42 |       VpcId: !Ref VPC
 43 |       CidrBlock: !Ref PrivateSubnetCidr1
 44 |       AvailabilityZone: !Select [0, !GetAZs '']
 45 | 
 46 |   PrivateSubnet2:
 47 |     Type: AWS::EC2::Subnet
 48 |     Properties:
 49 |       VpcId: !Ref VPC
 50 |       CidrBlock: !Ref PrivateSubnetCidr2
 51 |       AvailabilityZone: !Select [1, !GetAZs '']
 52 | 
 53 |   PublicSubnet1:
 54 |     Type: AWS::EC2::Subnet
 55 |     Properties:
 56 |       VpcId: !Ref VPC
 57 |       CidrBlock: !Ref PublicSubnetCidr1
 58 |       AvailabilityZone: !Select [0, !GetAZs '']
 59 |       MapPublicIpOnLaunch: true
 60 | 
 61 |   PublicSubnet2:
 62 |     Type: AWS::EC2::Subnet
 63 |     Properties:
 64 |       VpcId: !Ref VPC
 65 |       CidrBlock: !Ref PublicSubnetCidr2
 66 |       AvailabilityZone: !Select [1, !GetAZs '']
 67 |       MapPublicIpOnLaunch: true
 68 | 
 69 |   InternetGateway:
 70 |     Type: AWS::EC2::InternetGateway
 71 | 
 72 |   VPCGatewayAttachment:
 73 |     Type: AWS::EC2::VPCGatewayAttachment
 74 |     Properties:
 75 |       VpcId: !Ref VPC
 76 |       InternetGatewayId: !Ref InternetGateway
 77 | 
 78 |   PublicRouteTable:
 79 |     Type: AWS::EC2::RouteTable
 80 |     Properties:
 81 |       VpcId: !Ref VPC
 82 | 
 83 |   PublicRoute:
 84 |     Type: AWS::EC2::Route
 85 |     DependsOn: VPCGatewayAttachment
 86 |     Properties:
 87 |       RouteTableId: !Ref PublicRouteTable
 88 |       DestinationCidrBlock: 0.0.0.0/0
 89 |       GatewayId: !Ref InternetGateway
 90 | 
 91 |   PublicSubnetRouteTableAssociation1:
 92 |     Type: AWS::EC2::SubnetRouteTableAssociation
 93 |     Properties:
 94 |       SubnetId: !Ref PublicSubnet1
 95 |       RouteTableId: !Ref PublicRouteTable
 96 | 
 97 |   PublicSubnetRouteTableAssociation2:
 98 |     Type: AWS::EC2::SubnetRouteTableAssociation
 99 |     Properties:
100 |       SubnetId: !Ref PublicSubnet2
101 |       RouteTableId: !Ref PublicRouteTable
102 | 
103 |   NatGateway:
104 |     Type: AWS::EC2::NatGateway
105 |     Properties:
106 |       AllocationId: !GetAtt NatGatewayEIP.AllocationId
107 |       SubnetId: !Ref PublicSubnet1
108 | 
109 |   NatGatewayEIP:
110 |     Type: AWS::EC2::EIP
111 |     Properties:
112 |       Domain: vpc
113 | 
114 |   PrivateRouteTable:
115 |     Type: AWS::EC2::RouteTable
116 |     Properties:
117 |       VpcId: !Ref VPC
118 | 
119 |   PrivateRoute:
120 |     Type: AWS::EC2::Route
121 |     Properties:
122 |       RouteTableId: !Ref PrivateRouteTable
123 |       DestinationCidrBlock: 0.0.0.0/0
124 |       NatGatewayId: !Ref NatGateway
125 | 
126 |   PrivateSubnetRouteTableAssociation1:
127 |     Type: AWS::EC2::SubnetRouteTableAssociation
128 |     Properties:
129 |       SubnetId: !Ref PrivateSubnet1
130 |       RouteTableId: !Ref PrivateRouteTable
131 | 
132 |   PrivateSubnetRouteTableAssociation2:
133 |     Type: AWS::EC2::SubnetRouteTableAssociation
134 |     Properties:
135 |       SubnetId: !Ref PrivateSubnet2
136 |       RouteTableId: !Ref PrivateRouteTable
137 | 
138 |   S3VPCEndpoint:
139 |     Type: AWS::EC2::VPCEndpoint
140 |     Properties:
141 |       VpcId: !Ref VPC
142 |       ServiceName:  !Sub 'com.amazonaws.${AWS::Region}.s3'
143 |       RouteTableIds:
144 |         - !Ref PublicRouteTable
145 |         - !Ref PrivateRouteTable
146 | 
147 |   EFSFileSystem:
148 |     Type: AWS::EFS::FileSystem
149 |     Properties:
150 |       PerformanceMode: generalPurpose
151 |       Encrypted: True
152 |       LifecyclePolicies:
153 |         - TransitionToIA: 'AFTER_30_DAYS'
154 | 
155 |   EFSMountTarget1:
156 |     Type: AWS::EFS::MountTarget
157 |     Properties:
158 |       FileSystemId: !Ref EFSFileSystem
159 |       SubnetId: !Ref PrivateSubnet1
160 |       SecurityGroups:
161 |         - !Ref EFSSecurityGroup
162 | 
163 |   EFSMountTarget2:
164 |     Type: AWS::EFS::MountTarget
165 |     Properties:
166 |       FileSystemId: !Ref EFSFileSystem
167 |       SubnetId: !Ref PrivateSubnet2
168 |       SecurityGroups:
169 |         - !Ref EFSSecurityGroup
170 | 
171 |   EFSSecurityGroup:
172 |     Type: AWS::EC2::SecurityGroup
173 |     Properties:
174 |       GroupDescription: Security group for EFS access
175 |       SecurityGroupIngress:
176 |         - IpProtocol: tcp
177 |           FromPort: 2049
178 |           ToPort: 2049
179 |           SourceSecurityGroupId: !Ref SageMakerSecurityGroup
180 |       VpcId: !Ref VPC
181 | 
182 |   SageMakerSecurityGroup:
183 |     Type: AWS::EC2::SecurityGroup
184 |     Properties:
185 |       GroupDescription: Security group for SageMaker
186 |       SecurityGroupEgress:
187 |         - IpProtocol: -1
188 |           CidrIp: 0.0.0.0/0
189 |       VpcId: !Ref VPC
190 | 
191 |   SageMakerLifecycleConfig:
192 |     Type: AWS::SageMaker::NotebookInstanceLifecycleConfig
193 |     Properties:
194 |       NotebookInstanceLifecycleConfigName: EFSMountConfig
195 |       OnStart:
196 |         - Content: 
197 |             Fn::Base64:
198 |                 !Sub |
199 |                 #!/bin/bash
200 |                 set -e
201 |                 
202 |                 # Mount the EFS file system
203 |                 sudo mkdir -p /home/ec2-user/SageMaker/mnt/efs
204 |                 sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport ${EFSFileSystem}.efs.${AWS::Region}.amazonaws.com:/ /home/ec2-user/SageMaker/mnt/efs
205 |                 
206 |                 # Set appropriate permissions
207 |                 sudo chmod go+rw /home/ec2-user/SageMaker/mnt/efs
208 |                 
209 |                 echo "EFS mounted successfully"
210 |     
211 |   SageMakerNotebookInstance:
212 |     Type: AWS::SageMaker::NotebookInstance
213 |     Properties:
214 |       InstanceType: !Ref NotebookInstanceType
215 |       RoleArn: !GetAtt SageMakerRole.Arn
216 |       VolumeSizeInGB: 50
217 |       SubnetId: !Ref PrivateSubnet1
218 |       SecurityGroupIds:
219 |         - !Ref SageMakerSecurityGroup
220 |       LifecycleConfigName: EFSMountConfig
221 | 
222 |   SageMakerRole:
223 |     Type: AWS::IAM::Role
224 |     Properties:
225 |       AssumeRolePolicyDocument:
226 |         Version: "2012-10-17"
227 |         Statement:
228 |           - Effect: Allow
229 |             Principal:
230 |               Service: sagemaker.amazonaws.com
231 |             Action: "sts:AssumeRole"
232 |       ManagedPolicyArns:
233 |         - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
234 |         - arn:aws:iam::aws:policy/AmazonElasticFileSystemFullAccess
235 |         - arn:aws:iam::aws:policy/CloudWatchFullAccess
236 |         - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess
237 | 
238 | Outputs:
239 |   EFSFileSystemId:
240 |     Description: The ID of the EFS file system.
241 |     Value: !Ref EFSFileSystem
242 | 
243 |   SageMakerNotebookInstanceName:
244 |     Description: The name of the SageMaker Notebook Instance.
245 |     Value: !Ref SageMakerNotebookInstance
246 | 
247 |   SageMakerSubnet:
248 |     Description: The subnet for the SageMaker Notebook Instance.
249 |     Value: !Ref PrivateSubnet1
250 | 
251 |   SageMakerSecurityGroup:
252 |     Description: The security group for the SageMaker Notebook Instance.
253 |     Value: !Ref SageMakerSecurityGroup


--------------------------------------------------------------------------------
/train/train_lizard.py:
--------------------------------------------------------------------------------
  1 | ## Code Inspired by https://github.com/NielsRogge/Transformers-Tutorials/blob/master/DINOv2/Train_a_linear_classifier_on_top_of_DINOv2_for_semantic_segmentation.ipynb
  2 | 
  3 | import argparse
  4 | import os
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.utils.data as data
  8 | from torchvision import transforms
  9 | from hoptimus_model_backbone import HOPTIMUSZero
 10 | from data_utils.Lizard import Lizard
 11 | from torch.utils.tensorboard import SummaryWriter
 12 | from tqdm import tqdm
 13 | import evaluate
 14 | import shutil
 15 | import numpy as np
 16 | import matplotlib.pyplot as plt
 17 | from torchmetrics.classification import Dice
 18 | 
 19 | 
 20 | def parse_args():
 21 |     parser = argparse.ArgumentParser()
 22 |     parser.add_argument('--batch-size', type=int, default=32)
 23 |     parser.add_argument('--epochs', type=int, default=20)
 24 |     parser.add_argument('--learning-rate', type=float, default=1e-5)
 25 |     parser.add_argument('--data-dir', type=str, default=os.environ.get('SM_CHANNEL_TRAINING'))
 26 |     parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
 27 |     parser.add_argument('--output-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
 28 |     parser.add_argument('--model_name', type=str, default='HOPTIMUSZero')
 29 |     return parser.parse_args()
 30 | 
 31 | class SegmentationModel(nn.Module):
 32 |     def __init__(self, num_classes=7):
 33 |         # Num_classes (including Background class !)
 34 |         super().__init__()
 35 |             
 36 |         self.encoder = HOPTIMUSZero()
 37 |         # Freeze the encoder weights
 38 |         # for param in self.encoder.parameters():
 39 |         #     param.requires_grad = False
 40 |             
 41 |         # Define a Segmentation Convolutional Head
 42 |         self.segmentation_conv = nn.Sequential(
 43 |             nn.Upsample(scale_factor=2),
 44 |             nn.Conv2d(1536, 64, (3, 3), padding=(1, 1)),
 45 |             nn.Upsample(scale_factor=2),
 46 |             nn.Conv2d(64, num_classes, (3, 3), padding=(1, 1)),
 47 |         )
 48 |         
 49 |     def forward(self, x):
 50 |         batch_size = x.shape[0]
 51 |         
 52 |         with torch.inference_mode(False):
 53 |             features = self.encoder(x)['x_norm_patch_tokens']
 54 |             
 55 |         x = features.clone().detach().requires_grad_(True)
 56 |         x = x.permute(0, 2, 1)
 57 |         x = x.reshape(batch_size, 1536, 16, 16)
 58 |         x = self.segmentation_conv(x)
 59 |         logits = torch.nn.functional.interpolate(x, size=(224, 224), mode="bilinear", align_corners=False)
 60 |         return logits
 61 | 
 62 | class Trainer(object):
 63 |     def __init__(self):
 64 |         args = parse_args()
 65 |         print(f"args: {args}")
 66 | 
 67 |         self.device = "cuda" if torch.cuda.is_available() else "cpu"
 68 |         print("using device: ", self.device)
 69 |         self.batch_size = args.batch_size
 70 |         self.lr = args.learning_rate
 71 |         self.epochs = args.epochs
 72 |         self.model_dir = args.model_dir
 73 |         self.output_dir = args.output_dir
 74 | 
 75 | 
 76 |         self.writer = SummaryWriter(log_dir='runs')
 77 |         self.best_iou = 0
 78 | 
 79 |         self.dataset = Lizard(dataset_path=os.environ.get('SM_CHANNEL_TRAINING', '/home/ec2-user/SageMaker/mnt/efs/Lizard/'), batch_size=self.batch_size)
 80 |         print("data access: SUCCESS")
 81 | 
 82 |         self.train_loader = self.dataset.train_loader
 83 |         self.val_loader = self.dataset.test_loader
 84 |         
 85 |         self.metric = evaluate.load('mean_iou', num_labels=len(self.dataset.id2class.keys()), ignore_index=0)
 86 |         self.dice = Dice(average='micro', ignore_index=0).to(self.device)
 87 |         
 88 |         self.model = SegmentationModel(num_classes=len(self.dataset.id2class.keys())).to(self.device)
 89 |         
 90 |         self.criterion = nn.CrossEntropyLoss(ignore_index=0)
 91 |         self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
 92 |         
 93 |     def train(self):
 94 |         for epoch in range(self.epochs):
 95 |             self.model.train()
 96 |             avg_loss = 0
 97 |             avg_dice = 0
 98 |             
 99 |             pbar = tqdm(self.train_loader, total=len(self.train_loader), desc=f"Epoch {epoch+1}/{self.epochs}")
100 |             
101 |             for i, (images, labels) in enumerate(pbar):
102 |                 images = images.to(self.device)
103 |                 labels = labels.to(self.device)
104 |                 outputs = self.model(images)
105 |                 loss = self.criterion(outputs, labels)
106 |             
107 |                 loss.backward()
108 |                 self.optimizer.step()
109 |                 avg_loss += loss.item()
110 |                 
111 |                 # Zero the parameter gradient
112 |                 self.optimizer.zero_grad()
113 | 
114 |                 # Compute mean_iou metric
115 |                 predicted = outputs.argmax(dim=1)
116 |                 self.metric.add_batch(predictions=predicted.detach().cpu().numpy(), references=labels.detach().cpu().numpy())
117 |                 avg_dice += self.dice(outputs, labels).item()
118 |                 
119 |         
120 |                 metrics = self.metric.compute(num_labels=len(self.dataset.id2class.keys()), ignore_index=0, reduce_labels=False)
121 |                 mean_iou = metrics['mean_iou']
122 |                 mean_acc = metrics['mean_accuracy']
123 |                 pbar.set_postfix({'loss': f'{avg_loss / (i+1):.4f}', 'mean_iou': f'{mean_iou:.4f}', 'mean_acc': f'{mean_acc:.4f}'})
124 |             # Training Loss
125 |             print(f"Epoch {epoch+1}/{self.epochs} - Training Loss: {avg_loss / len(self.train_loader):.4f}, Traning Dice: {avg_dice/len(self.train_loader):.4f}")
126 |             
127 |             # Validation 
128 |             val_loss, val_mean_iou, val_mean_acc, val_dice = self.validation()
129 |             print(f"Epoch {epoch+1}/{self.epochs} - Validation Loss: {val_loss:.4f}")
130 |             print(f"Epoch {epoch+1}/{self.epochs} - Validation Mean_IOU: {val_mean_iou:.4f}")
131 |             print(f"Epoch {epoch+1}/{self.epochs} - Validation Mean_Accuracy: {val_mean_acc:.4f}")
132 |             print(f"Epoch {epoch+1}/{self.epochs} - Validation Mean_DICE: {val_mean_acc:.4f}")
133 |             
134 |                 
135 |     def validation(self):
136 |         self.model.eval()
137 |         total_val_loss = 0
138 |         total_val_dice = 0
139 | 
140 |         with torch.no_grad():
141 |             for images, labels in self.val_loader:
142 |                 images = images.to(self.device)
143 |                 labels = labels.to(self.device)
144 |                 outputs = self.model(images)
145 |                 loss = self.criterion(outputs, labels)
146 | 
147 |                 total_val_loss += loss.item()
148 | 
149 |                 # Compute mean_iou metric
150 |                 predicted = outputs.argmax(dim=1)
151 |                 self.metric.add_batch(predictions=predicted.detach().cpu().numpy(), references=labels.detach().cpu().numpy())
152 |                 total_val_dice += self.dice(outputs, labels).item()
153 | 
154 |         val_loss = total_val_loss / len(self.val_loader)
155 |         val_dice = total_val_dice / len(self.val_loader)
156 |         
157 |         val_metrics = self.metric.compute(num_labels=len(self.dataset.id2class.keys()), ignore_index=0)
158 |         val_mean_iou = val_metrics['mean_iou']
159 |         val_mean_acc = val_metrics['mean_accuracy']
160 |         
161 |         
162 |         if val_mean_iou > self.best_iou:
163 |             self.best_iou = val_mean_iou
164 |             self.save_checkpoint(is_best=True)
165 |         else:
166 |             self.save_checkpoint(is_best=False)
167 | 
168 |         return val_loss, val_mean_iou, val_mean_acc, val_dice
169 |     
170 |     def save_checkpoint(self, is_best=False):
171 |         if not os.path.exists(self.model_dir):
172 |             os.makedirs(self.model_dir)
173 | 
174 |         # Save to file
175 |         filename = os.path.join(self.model_dir, "segmentation_model.pth")
176 |         print("saved model...")
177 | 
178 |         torch.save(self.model.state_dict(), filename)
179 |         
180 | 
181 |         if is_best:
182 |             best_filename = os.path.join(self.model_dir, 'segmentation_model_best.pth')
183 |             shutil.copyfile(filename, best_filename)
184 |             print("saved new model...")
185 |         
186 |             
187 | def main():
188 |     trainer = Trainer()
189 |     trainer.train()
190 | 
191 | 
192 | def visualize_map(dataset, image, segmentation_map):
193 |     "Debug utility function to plot the segmentation map"
194 |     # map every class to a random color
195 |     id2color = {k: list(np.random.choice(range(256), size=3)) for k,v in dataset.id2class.items()}
196 | 
197 |     color_seg = np.zeros((segmentation_map.shape[0], segmentation_map.shape[1], 3), dtype=np.uint8) # height, width, 3
198 |     for label, color in id2color.items():
199 |         color_seg[segmentation_map == label, :] = color
200 | 
201 |     # Show image + mask
202 |     img = np.array(image) * 0.5 + color_seg * 0.5
203 |     img = img.astype(np.uint8)
204 | 
205 |     plt.figure(figsize=(15, 10))
206 |     plt.imshow(img)
207 |     plt.show()
208 | 
209 | 
210 | 
211 | if __name__ == "__main__":
212 |     main()
213 | 


--------------------------------------------------------------------------------
/preprocessing/preprocessing-code/generate_features.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import os
  3 | import argparse
  4 | import time 
  5 | from cucim import CuImage
  6 | from concurrent.futures import ThreadPoolExecutor, as_completed
  7 | from tqdm import tqdm
  8 | import cv2
  9 | from PIL import Image
 10 | import numpy as np
 11 | from matplotlib import pyplot as plt
 12 | import random
 13 | import torch
 14 | from torch.utils.data import Dataset, DataLoader
 15 | from torchvision import transforms
 16 | from hoptimus_model_backbone import HOPTIMUSZero
 17 | import h5py
 18 | 
 19 | def parse_args():
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument('--target_mpp', type=float, default=0.5, help='Target microns per pixel')
 22 |     parser.add_argument('--num_process', type=int, default=4, help='Number of processes to use')
 23 |     parser.add_argument('--tile_size', type=int, default=224, help='Size of the tile')
 24 |     parser.add_argument('--tile_count', type=int, default=1000, help='Number of tiles')
 25 |     
 26 |     args = parser.parse_args()
 27 |     
 28 |     # Print all arguments
 29 |     print("Arguments:")
 30 |     for arg in vars(args):
 31 |         print(f"  {arg}: {getattr(args, arg)}")
 32 |     
 33 |     return args
 34 | 
 35 | def get_slide_mpp(slide: CuImage) -> float | None:
 36 |     """Get the slide resolution in MPP."""
 37 |     if "aperio" in slide.metadata and "MPP" in slide.metadata["aperio"]:
 38 |         return float(slide.metadata["aperio"]["MPP"])
 39 |     return None
 40 | 
 41 | def canny_fcn(patch: np.ndarray) -> bool:
 42 |     """Check if a patch is a foreground patch using Canny edge detection."""
 43 |     patch_img = Image.fromarray(patch)
 44 |     tile_to_greyscale = patch_img.convert("L")
 45 |     # tile_to_greyscale is an PIL.Image.Image with image mode L
 46 |     # Note: If you have an L mode image, that means it is
 47 |     # a single channel image - normally interpreted as greyscale.
 48 |     # The L means that is just stores the Luminance.
 49 |     # It is very compact, but only stores a greyscale, not colour.
 50 |     tile2array = np.array(tile_to_greyscale)
 51 |     # Hardcoded thresholds.
 52 |     edge = cv2.Canny(tile2array, 40, 100)
 53 |     # Avoid dividing by zero.
 54 |     edge = edge / np.max(edge) if np.max(edge) != 0 else 0
 55 |     edge = (
 56 |         ((np.sum(np.sum(edge)) / (tile2array.shape[0] * tile2array.shape[1])) * 100)
 57 |         if (tile2array.shape[0] * tile2array.shape[1]) != 0
 58 |         else 0
 59 |     )
 60 |     is_foreground_image = edge >= 2.0
 61 |     return is_foreground_image
 62 | 
 63 | def get_tile_and_check_is_foreground(
 64 |     slide: CuImage, location: tuple[int, int], level: int, tile_size: int
 65 | ) -> tuple[np.ndarray, tuple[int, int], bool]:
 66 |     tile = slide.read_region(
 67 |         location=location,
 68 |         level=level,
 69 |         size=(tile_size, tile_size),
 70 |     )
 71 |     tile = np.asarray(tile)
 72 |     is_foreground = canny_fcn(tile)
 73 |     return tile, location, is_foreground
 74 | 
 75 | def get_level_closest_to_mpp(slide: CuImage, target_mpp: float) -> int:
 76 |     """Get the slide level closest to the target MPP."""
 77 |     slide_mpp = get_slide_mpp(slide)  # Slide resolution in MPP.
 78 |     if slide_mpp is None:
 79 |         raise ValueError("Slide MPP is not available in metadata")
 80 |     # Get the resolutions of the pyramid.
 81 |     slide_level_mpps = [
 82 |         slide_mpp * float(d) for d in slide.resolutions["level_downsamples"]
 83 |     ]
 84 |     level_closest_to_mpp = int(
 85 |         np.argmin([np.abs(mpp - target_mpp) for mpp in slide_level_mpps])
 86 |     )
 87 |     return level_closest_to_mpp
 88 | 
 89 | class SlideTileDataset(Dataset):
 90 |     def __init__(self, patches, transform):
 91 |         self.tiles = patches
 92 |         self.transform = transform
 93 | 
 94 |     def __len__(self):
 95 |         return len(self.tiles)
 96 | 
 97 |     def __getitem__(self, i: int) -> torch.Tensor:
 98 |         image = Image.fromarray(self.tiles[i])
 99 |         image = self.transform(image)
100 |         return image
101 | 
102 | 
103 | def main(args):
104 |     slide_paths = [path for path in Path(SLIDE_PATHS).glob('**/*.svs')]
105 |     print(f'Found {len(slide_paths)} slide(s)')
106 | 
107 |     # Create Output Dictionary
108 |     Path(OUTDIR).mkdir(parents=True, exist_ok=True)
109 | 
110 |     # Load Model & preprocessing steps
111 |     model = HOPTIMUSZero()
112 |     model.to("cuda")
113 |     model.eval()
114 |     
115 |     transform = transforms.Compose(
116 |         [
117 |             transforms.ToTensor(),
118 |             transforms.Normalize(
119 |                 mean=(0.707223, 0.578729, 0.703617),
120 |                 std=(0.211883, 0.230117, 0.177517),
121 |             ),
122 |         ]
123 |     )
124 |     print(f"FM Model loaded to GPU")
125 | 
126 |     for idx, slide_path in enumerate(slide_paths):
127 |         slide_name = Path(slide_path).stem
128 |         print(f"Processing slide #{idx+1}/{len(slide_paths)}, with name {slide_name}")
129 | 
130 |         if Path(f'{OUTDIR}/{slide_name}.h5').exists():
131 |             print(f"Embeddings for {slide_name} already exist. Skipping slide")
132 |             continue
133 | 
134 |         # Read image using CuCim and CuPy
135 |         slide_image = CuImage(slide_path.as_posix())
136 |         
137 |         try:
138 |             # For simplicity, we get the level corresponding to the closest MPP in the
139 |             # image pyramid of the target MPP.
140 |             # /!\ The actual MPP used to get the tiles can therefore be quite different
141 |             # from the target MPP, for instance if the slide only has the MPP
142 |             # [0.25, 1.0, 2.0].
143 |             level = get_level_closest_to_mpp(slide_image, float(args.target_mpp))
144 |         except:
145 |             print(f"Could not find MPP for {slide_path}, going to next slide...")
146 |             continue
147 |             
148 |         slide_dims_at_level = slide_image.resolutions["level_dimensions"][level]
149 |         num_tiles_at_level = (
150 |             slide_dims_at_level[0] // args.tile_size,
151 |             slide_dims_at_level[1] // args.tile_size,
152 |         )
153 |         
154 |         print(
155 |             f"Total number of tiles found: {num_tiles_at_level[0] * num_tiles_at_level[1]}."
156 |         )
157 |         locations = [
158 |             (i * args.tile_size, j * args.tile_size)
159 |             for i in range(num_tiles_at_level[0])
160 |             for j in range(num_tiles_at_level[1])
161 |         ]
162 |         random.shuffle(locations)
163 |         futures = []
164 |         with ThreadPoolExecutor() as executor:
165 |             for location in locations:
166 |                 futures.append(
167 |                     executor.submit(
168 |                         get_tile_and_check_is_foreground,
169 |                         slide=slide_image,
170 |                         location=location,
171 |                         level=level,
172 |                         tile_size=args.tile_size,
173 |                     )
174 |                 )
175 |             foreground_tiles = []
176 |             for future in as_completed(futures):
177 |                 tile, location, is_foreground = future.result()
178 |                 if is_foreground:
179 |                     foreground_tiles.append(tile)
180 | 
181 |                 if len(foreground_tiles) >= args.tile_count:
182 |                     executor.shutdown(wait=False, cancel_futures=True)
183 |                     break
184 |         print(f"Number of foreground tiles kept: {len(foreground_tiles)}.")
185 |         
186 |         dataset = SlideTileDataset(foreground_tiles, transform=transform)
187 |         dataloader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False)
188 |         print("Starting feature extraction ...")
189 |         start = time.time()
190 |         features = []
191 |         with torch.no_grad():
192 |             with torch.autocast(device_type="cuda", dtype=torch.float16):
193 |                 for batch in tqdm(dataloader):
194 |                     batch = batch.to("cuda")
195 |                     features_b = model(batch)['x_norm_cls_token']
196 |                     features.append(features_b.cpu().numpy())
197 | 
198 |         features = np.concatenate(features, axis=0)
199 |         end = time.time()
200 |         print(
201 |             f"Feature extraction done in {end-start:.2f} seconds ! Features shape: {features.shape}"
202 |         )
203 |         with h5py.File(f'{OUTDIR}/{slide_name}.h5', 'w') as f:
204 |             f['feats']=features
205 |         print(f"Saved embedddings for {slide_name}")
206 | 
207 | def debug_cuda_versions():
208 |     print("CUDA Version:")
209 |     try:
210 |         import pycuda.driver as cuda
211 |         cuda_version = cuda.get_version()
212 |         print(f"CUDA version: {cuda_version[0]}.{cuda_version[1]}")
213 |     except ImportError:
214 |         print("pycuda not found")
215 |     
216 |     # print("\nCuPy Version:")
217 |     # try:
218 |     #     import cupy as cp
219 |     #     cupy_version = cp.__version__
220 |     #     print(f"CuPy version: {cupy_version}")
221 |     # except ImportError:
222 |     #     print("CuPy not found")
223 |     
224 |     print("\nCuCIM Version:")
225 |     try:
226 |         import cucim
227 |         cucim_version = cucim.__version__
228 |         print(f"CuCIM version: {cucim_version}")
229 |     except ImportError:
230 |         print("CuCIM not found")
231 |     
232 |     print("\nTorch Version:")
233 |     try:
234 |         import torch
235 |         torch_version = torch.__version__
236 |         torch_cuda_version = torch.version.cuda
237 |         print(f"PyTorch version: {torch_version}")
238 |         print(f"PyTorch CUDA version: {torch_cuda_version}")
239 |     except ImportError:
240 |         print("PyTorch not found")
241 | 
242 | 
243 | 
244 | if __name__ == "__main__":
245 |     debug_cuda_versions()
246 |     SLIDE_PATHS = os.environ.get('SM_CHANNEL_DATASET', '/home/ec2-user/SageMaker/mnt/efs/TCGA-COAD')
247 |     OUTDIR = os.environ.get('SM_CHANNEL_OUTPUT', '/home/ec2-user/SageMaker/mnt/efs/TCGA-COAD-acc/')
248 |     args = parse_args()
249 |     main(args)
250 | 


--------------------------------------------------------------------------------
/deploy/inference.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import os
  3 | import argparse
  4 | import time 
  5 | from cucim import CuImage
  6 | from concurrent.futures import ThreadPoolExecutor, as_completed
  7 | from tqdm import tqdm
  8 | import cv2
  9 | from PIL import Image
 10 | import numpy as np
 11 | import boto3
 12 | from matplotlib import pyplot as plt
 13 | import random
 14 | import torch
 15 | import json
 16 | from torch.utils.data import Dataset, DataLoader
 17 | from torchvision import transforms
 18 | from hoptimus_model_backbone import HOPTIMUSZero
 19 | import h5py
 20 | 
 21 | TARGET_MPP = 0.5 
 22 | TILE_SIZE = 224
 23 | TILE_COUNT = 1000
 24 | 
 25 | def upload_to_s3(local_path: str, s3_bucket: str, s3_key: str) -> str:
 26 |     """
 27 |     Upload a file to S3 and return the S3 URI
 28 |     """
 29 |     s3_client = boto3.client('s3')
 30 |     try:
 31 |         s3_client.upload_file(local_path, s3_bucket, s3_key)
 32 |         s3_uri = f"s3://{s3_bucket}/{s3_key}"
 33 |         print(f"Successfully uploaded file to {s3_uri}")
 34 |         return s3_uri
 35 |     except Exception as e:
 36 |         print(f"Error uploading to S3: {str(e)}")
 37 |         raise e
 38 | 
 39 | def get_slide_mpp(slide: CuImage) -> float | None:
 40 |     """Get the slide resolution in MPP."""
 41 |     if "aperio" in slide.metadata and "MPP" in slide.metadata["aperio"]:
 42 |         return float(slide.metadata["aperio"]["MPP"])
 43 |     return None
 44 | 
 45 | def canny_fcn(patch: np.ndarray) -> bool:
 46 |     """Check if a patch is a foreground patch using Canny edge detection."""
 47 |     patch_img = Image.fromarray(patch)
 48 |     tile_to_greyscale = patch_img.convert("L")
 49 |     # tile_to_greyscale is an PIL.Image.Image with image mode L
 50 |     # Note: If you have an L mode image, that means it is
 51 |     # a single channel image - normally interpreted as greyscale.
 52 |     # The L means that is just stores the Luminance.
 53 |     # It is very compact, but only stores a greyscale, not colour.
 54 |     tile2array = np.array(tile_to_greyscale)
 55 |     # Hardcoded thresholds.
 56 |     edge = cv2.Canny(tile2array, 40, 100)
 57 |     # Avoid dividing by zero.
 58 |     edge = edge / np.max(edge) if np.max(edge) != 0 else 0
 59 |     edge = (
 60 |         ((np.sum(np.sum(edge)) / (tile2array.shape[0] * tile2array.shape[1])) * 100)
 61 |         if (tile2array.shape[0] * tile2array.shape[1]) != 0
 62 |         else 0
 63 |     )
 64 |     is_foreground_image = edge >= 2.0
 65 |     return is_foreground_image
 66 | 
 67 | def get_tile_and_check_is_foreground(
 68 |     slide: CuImage, location: tuple[int, int], level: int, tile_size: int
 69 | ) -> tuple[np.ndarray, tuple[int, int], bool]:
 70 |     tile = slide.read_region(
 71 |         location=location,
 72 |         level=level,
 73 |         size=(tile_size, tile_size),
 74 |     )
 75 |     tile = np.asarray(tile)
 76 |     is_foreground = canny_fcn(tile)
 77 |     return tile, location, is_foreground
 78 | 
 79 | def get_level_closest_to_mpp(slide: CuImage, target_mpp: float) -> int:
 80 |     """Get the slide level closest to the target MPP."""
 81 |     slide_mpp = get_slide_mpp(slide)  # Slide resolution in MPP.
 82 |     if slide_mpp is None:
 83 |         raise ValueError("Slide MPP is not available in metadata")
 84 |     # Get the resolutions of the pyramid.
 85 |     slide_level_mpps = [
 86 |         slide_mpp * float(d) for d in slide.resolutions["level_downsamples"]
 87 |     ]
 88 |     level_closest_to_mpp = int(
 89 |         np.argmin([np.abs(mpp - target_mpp) for mpp in slide_level_mpps])
 90 |     )
 91 |     return level_closest_to_mpp
 92 | 
 93 | class SlideTileDataset(Dataset):
 94 |     def __init__(self, patches, transform):
 95 |         self.tiles = patches
 96 |         self.transform = transform
 97 | 
 98 |     def __len__(self):
 99 |         return len(self.tiles)
100 | 
101 |     def __getitem__(self, i: int) -> torch.Tensor:
102 |         image = Image.fromarray(self.tiles[i])
103 |         image = self.transform(image)
104 |         return image
105 | 
106 | def extract_embeddings(slide_path, model):
107 |     slide_name = Path(slide_path).stem
108 |     print(f"Processing slide with name {slide_name}")
109 | 
110 |     # Read image using CuCim and CuPy
111 |     slide_image = CuImage(slide_path)
112 |     
113 |     try:
114 |         # For simplicity, we get the level corresponding to the closest MPP in the
115 |         # image pyramid of the target MPP.
116 |         # /!\ The actual MPP used to get the tiles can therefore be quite different
117 |         # from the target MPP, for instance if the slide only has the MPP
118 |         # [0.25, 1.0, 2.0].
119 |         level = get_level_closest_to_mpp(slide_image, float(TARGET_MPP))
120 |     except:
121 |         print(f"Could not find MPP for {slide_path}, going to next slide...")
122 |         return None
123 |         
124 |     slide_dims_at_level = slide_image.resolutions["level_dimensions"][level]
125 |     num_tiles_at_level = (
126 |         slide_dims_at_level[0] // TILE_SIZE,
127 |         slide_dims_at_level[1] // TILE_SIZE,
128 |     )
129 |     
130 |     print(
131 |         f"Total number of tiles found: {num_tiles_at_level[0] * num_tiles_at_level[1]}."
132 |     )
133 |     locations = [
134 |         (i * TILE_SIZE, j * TILE_SIZE)
135 |         for i in range(num_tiles_at_level[0])
136 |         for j in range(num_tiles_at_level[1])
137 |     ]
138 |     random.shuffle(locations)
139 |     futures = []
140 |     with ThreadPoolExecutor() as executor:
141 |         for location in locations:
142 |             futures.append(
143 |                 executor.submit(
144 |                     get_tile_and_check_is_foreground,
145 |                     slide=slide_image,
146 |                     location=location,
147 |                     level=level,
148 |                     tile_size=TILE_SIZE,
149 |                 )
150 |             )
151 |         foreground_tiles = []
152 |         for future in as_completed(futures):
153 |             tile, location, is_foreground = future.result()
154 |             if is_foreground:
155 |                 foreground_tiles.append(tile)
156 | 
157 |             if len(foreground_tiles) >= TILE_COUNT:
158 |                 executor.shutdown(wait=False, cancel_futures=True)
159 |                 break
160 |         print(f"Number of foreground tiles kept: {len(foreground_tiles)}.")
161 |         
162 |         dataset = SlideTileDataset(foreground_tiles, transform=model.transform)
163 |         dataloader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False)
164 |         print("Starting feature extraction ...")
165 |         start = time.time()
166 |         features = []
167 |         with torch.no_grad():
168 |             with torch.autocast(device_type="cuda", dtype=torch.float16):
169 |                 for batch in tqdm(dataloader):
170 |                     batch = batch.to("cuda")
171 |                     features_b = model(batch)['x_norm_cls_token']
172 |                     features.append(features_b.cpu().numpy())
173 | 
174 |         features = np.concatenate(features, axis=0)
175 |         end = time.time()
176 |         print(
177 |             f"Feature extraction done in {end-start:.2f} seconds ! Features shape: {features.shape}"
178 |         )  
179 |         output_path = os.path.join('/tmp/', f"{slide_name}_embeddings.h5")
180 | 
181 |         with h5py.File(output_path, 'w') as f:
182 |             f['feats']=features
183 |         print(f"Saved embedddings for {slide_name}")
184 |         return output_path
185 |     
186 | def model_fn(model_dir):
187 |     """Load the model for inference"""
188 |     try:
189 |         import urllib.request
190 |         print("Testing internet connectivity...")
191 |         urllib.request.urlopen('http://google.com', timeout=10)
192 |         print("Internet connection available")
193 |     except Exception as e:
194 |         print(f"Internet connectivity test failed: {str(e)}")
195 |         
196 |     print(f"Starting model instantiation")
197 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
198 |     model = HOPTIMUSZero()
199 |     model = model.to(device)
200 |     model.eval()
201 |     print("Hoptimus model loaded")
202 |     return model 
203 | 
204 | 
205 | def input_fn(request_body, request_content_type):
206 |     """Parse input data for prediction"""
207 |     print(f"Received content type: {request_content_type}")
208 |     if request_content_type == 'application/jsonlines':
209 |         # Parse the JSON line to get the S3 path
210 |         data = json.loads(request_body.decode())
211 |         s3_path = data["source"]
212 |         
213 |         # Download the file locally
214 |         local_path = f"/tmp/{os.path.basename(s3_path)}"
215 |         s3 = boto3.client('s3')
216 |         
217 |         # Parse the S3 URI
218 |         bucket = s3_path.split('/')[2]
219 |         key = '/'.join(s3_path.split('/')[3:])
220 |         
221 |         print(f"Downloading {s3_path} to {local_path}")
222 |         # Download the file
223 |         s3.download_file(bucket, key, local_path)
224 |         return local_path
225 |     elif request_content_type == 'application/x-directory':
226 |         # request_body will be the local path to the file
227 |         return request_body
228 |     else:
229 |         raise ValueError(f"Unsupported content type: {request_content_type}")
230 | 
231 | 
232 | def predict_fn(file_path, model):
233 |     """Run prediction on the input data"""
234 |     try:
235 |         print(f"Processing file: {file_path}")
236 |         output_path = extract_embeddings(file_path, model)
237 |         s3_uri = upload_to_s3(output_path, "pathologybenchmark-s3bucket-u7pe00xtbplu", "embeddings")
238 |         
239 |         # Delete the input file after processing
240 |         if os.path.exists(file_path):
241 |             os.remove(file_path)
242 |             os.remove(output_path)
243 |             print(f"Deleted temporary file: {file_path}")
244 |         
245 |         # Return just the filename
246 |         return os.path.basename(output_path)
247 |     
248 |     except Exception as e:
249 |         print(f"Error in predict_fn: {str(e)}")
250 |         # Ensure file is deleted even if an error occurs
251 |         if os.path.exists(file_path):
252 |             os.remove(file_path)
253 |             print(f"Deleted temporary file due to error: {file_path}")
254 |         raise e
255 | 
256 |     
257 | def output_fn(prediction, accept):
258 |     """Format the prediction output"""
259 |     print(f"Output filename: {prediction}")
260 |     return prediction
261 | 
262 | 
263 | if __name__ == "__main__":
264 |     # Initialize the model
265 |     start_time = time.time()
266 |     model = model_fn("test")
267 |     predict_fn("slide.svs", model)
268 |     print(f"total time: {time.time()-start_time}")
269 | 
270 | 


--------------------------------------------------------------------------------
/train/deepmil.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Owkin Inc.
  3 | This source code is licensed under the GNU GENERAL PUBLIC LICENSE v3 license found in the
  4 | LICENSE file in the root directory of this source tree.
  5 | 
  6 | Implementation of DeepMIL model, as proposed by
  7 | Ilse et al. 2018, Attention-based Deep Multiple Instance Learning.
  8 | """
  9 | from typing import Optional, List, Tuple, Union
 10 | import torch
 11 | 
 12 | class MLP(torch.nn.Sequential):
 13 |     """
 14 |     MLP Module
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     in_features: int
 19 |     out_features: int
 20 |     hidden: Optional[List[int]] = None
 21 |     activation: Optional[torch.nn.Module] = torch.nn.Sigmoid
 22 |     bias: bool = True
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         in_features: int,
 28 |         out_features: int,
 29 |         hidden: Optional[List[int]] = None,
 30 |         activation: Optional[torch.nn.Module] = torch.nn.Sigmoid(),
 31 |         bias: bool = True,
 32 |     ):
 33 |         d_model = in_features
 34 |         layers = []
 35 | 
 36 |         if hidden is not None:
 37 |             for i, h in enumerate(hidden):
 38 |                 seq = [torch.nn.Linear(d_model, h, bias=bias)]
 39 |                 d_model = h
 40 | 
 41 |                 if activation is not None:
 42 |                     seq.append(activation)
 43 | 
 44 |                 layers.append(torch.nn.Sequential(*seq))
 45 | 
 46 |         layers.append(torch.nn.Linear(d_model, out_features))
 47 | 
 48 |         super(MLP, self).__init__(*layers)
 49 | 
 50 | 
 51 | class TilesMLP(torch.nn.Module):
 52 |     """
 53 |     MLP to be applied to tiles to compute scores.
 54 |     This module can be used in combination of a mask
 55 |     to prevent padding from influencing the scores values.
 56 |     Parameters
 57 |     ----------
 58 |     in_features: int
 59 |         size of each input sample
 60 |     out_features: int
 61 |         size of each output sample
 62 |     hidden: Optional[List[int]] = None:
 63 |         Number of hidden layers and their respective number of features.
 64 |     bias: bool = True
 65 |         If set to ``False``, the layer will not learn an additive bias.
 66 |     activation: torch.nn.Module = torch.nn.Sigmoid()
 67 |         MLP activation function
 68 |     """
 69 | 
 70 |     def __init__(
 71 |         self,
 72 |         in_features: int,
 73 |         out_features: int = 1,
 74 |         hidden: Optional[List[int]] = None,
 75 |         bias: bool = True,
 76 |         activation: torch.nn.Module = torch.nn.Sigmoid(),
 77 |     ):
 78 |         super(TilesMLP, self).__init__()
 79 | 
 80 |         self.hidden_layers = torch.nn.ModuleList()
 81 |         if hidden is not None:
 82 |             for h in hidden:
 83 |                 self.hidden_layers.append(
 84 |                     MaskedLinear(in_features, h, bias=bias, mask_value="-inf")
 85 |                 )
 86 |                 self.hidden_layers.append(activation)
 87 |                 in_features = h
 88 | 
 89 |         self.hidden_layers.append(torch.nn.Linear(in_features, out_features, bias=bias))
 90 | 
 91 |     def forward(self, x: torch.Tensor, mask: Optional[torch.BoolTensor] = None):
 92 |         """
 93 |         Parameters
 94 |         ----------
 95 |         x: torch.Tensor
 96 |             (B, N_TILES, IN_FEATURES)
 97 |         mask: Optional[torch.BoolTensor] = None
 98 |             (B, N_TILES), True for values that were padded.
 99 |         Returns
100 |         -------
101 |         x: torch.Tensor
102 |             (B, N_TILES, OUT_FEATURES)
103 |         """
104 |         for layer in self.hidden_layers:
105 |             if isinstance(layer, MaskedLinear):
106 |                 x = layer(x, mask)
107 |             else:
108 |                 x = layer(x)
109 |         return x
110 |     
111 | class MaskedLinear(torch.nn.Linear):
112 |     """
113 |     Linear layer to be applied tile wise.
114 |     This layer can be used in combination with a mask
115 |     to prevent padding tiles from influencing the values of a subsequent
116 |     activation.
117 |     Example:
118 |         >>> module = Linear(in_features=128, out_features=1) # With Linear
119 |         >>> out = module(slide)
120 |         >>> wrong_value = torch.sigmoid(out) # Value is influenced by padding
121 |         >>> module = MaskedLinear(in_features=128, out_features=1, mask_value='-inf') # With MaskedLinear
122 |         >>> out = module(slide, mask) # Padding now has the '-inf' value
123 |         >>> correct_value = torch.sigmoid(out) # Value is not influenced by padding as sigmoid('-inf') = 0
124 |     Parameters
125 |     ----------
126 |     in_features: int
127 |         size of each input sample
128 |     out_features: int
129 |         size of each output sample
130 |     mask_value: Union[str, int]
131 |         value to give to the mask
132 |     bias: bool = True
133 |         If set to ``False``, the layer will not learn an additive bias.
134 |     """
135 | 
136 |     def __init__(
137 |         self,
138 |         in_features: int,
139 |         out_features: int,
140 |         mask_value: Union[str, float],
141 |         bias: bool = True,
142 |     ):
143 |         super(MaskedLinear, self).__init__(
144 |             in_features=in_features, out_features=out_features, bias=bias
145 |         )
146 |         self.mask_value = mask_value
147 | 
148 |     def forward(self, x: torch.Tensor, mask: Optional[torch.BoolTensor] = None):
149 |         """
150 |         Parameters
151 |         ----------
152 |         x: torch.Tensor
153 |             (B, SEQ_LEN, IN_FEATURES)
154 |         mask: Optional[torch.BoolTensor] = None
155 |             (B, SEQ_LEN, 1), True for values that were padded.
156 |         Returns
157 |         -------
158 |         x: torch.Tensor
159 |             (B, SEQ_LEN, OUT_FEATURES)
160 |         """
161 |         x = super(MaskedLinear, self).forward(x)
162 |         if mask is not None:
163 |             x = x.masked_fill(mask, float(self.mask_value))
164 |         return x
165 | 
166 |     def extra_repr(self):
167 |         return "in_features={}, out_features={}, mask_value={}, bias={}".format(
168 |             self.in_features, self.out_features, self.mask_value, self.bias is not None
169 |         )
170 |     
171 | class GatedAttention(torch.nn.Module):
172 |     """
173 |     Gated Attention, as defined in https://arxiv.org/abs/1802.04712.
174 |     Permutation invariant Layer on dim 1.
175 | 
176 |     Parameters
177 |     ----------
178 |     d_model: int = 128
179 |     """
180 | 
181 |     def __init__(
182 |         self,
183 |         d_model: int = 128,
184 |     ):
185 |         super(GatedAttention, self).__init__()
186 | 
187 |         self.att = torch.nn.Linear(d_model, d_model)
188 |         self.gate = torch.nn.Linear(d_model, d_model)
189 |         self.w = MaskedLinear(d_model, 1, "-inf")
190 | 
191 |     def attention(
192 |         self,
193 |         v: torch.Tensor,
194 |         mask: Optional[torch.BoolTensor] = None,
195 |     ) -> torch.Tensor:
196 |         """
197 |         Gets attention logits.
198 |         Parameters
199 |         ----------
200 |         v: torch.Tensor
201 |             (B, SEQ_LEN, IN_FEATURES)
202 |         mask: Optional[torch.BoolTensor] = None
203 |             (B, SEQ_LEN, 1), True for values that were padded.
204 |         Returns
205 |         -------
206 |         attention_logits: torch.Tensor
207 |             (B, N_TILES, 1)
208 |         """
209 | 
210 |         h_v = self.att(v)
211 |         h_v = torch.tanh(h_v)
212 | 
213 |         u_v = self.gate(v)
214 |         u_v = torch.sigmoid(u_v)
215 | 
216 |         attention_logits = self.w(h_v * u_v, mask=mask)
217 |         return attention_logits
218 | 
219 |     def forward(
220 |         self, v: torch.Tensor, mask: Optional[torch.BoolTensor] = None
221 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
222 |         """
223 |         Parameters
224 |         ----------
225 |         v: torch.Tensor
226 |             (B, SEQ_LEN, IN_FEATURES)
227 |         mask: Optional[torch.BoolTensor] = None
228 |             (B, SEQ_LEN, 1), True for values that were padded.
229 |         Returns
230 |         -------
231 |         scaled_attention, attention_weights: Tuple[torch.Tensor, torch.Tensor]
232 |             (B, IN_FEATURES), (B, N_TILES, 1)
233 |         """
234 |         attention_logits = self.attention(v=v, mask=mask)
235 | 
236 |         attention_weights = torch.softmax(attention_logits, 1)
237 |         scaled_attention = torch.matmul(attention_weights.transpose(1, 2), v)
238 | 
239 |         return scaled_attention.squeeze(1), attention_weights
240 | 
241 | 
242 | class DeepMIL(torch.nn.Module):
243 |     """
244 |     Deep MIL classification model.
245 |     https://arxiv.org/abs/1802.04712
246 | 
247 |     Parameters
248 |     ----------
249 |     in_features: int
250 |     out_features: int = 1
251 |     d_model_attention: int = 128
252 |     tiles_mlp_hidden: Optional[List[int]] = None
253 |     mlp_hidden: Optional[List[int]] = None
254 |     mlp_activation: Optional[torch.nn.Module] = torch.nn.Sigmoid
255 |     bias: bool = True
256 |     """
257 | 
258 |     def __init__(
259 |         self,
260 |         in_features: int,
261 |         out_features: int = 1,
262 |         d_model_attention: int = 128,
263 |         tiles_mlp_hidden: Optional[List[int]] = None,
264 |         mlp_hidden: Optional[List[int]] = None,
265 |         mlp_activation: Optional[torch.nn.Module] = torch.nn.Sigmoid(),
266 |         bias: bool = True,
267 |     ):
268 |         super(DeepMIL, self).__init__()
269 | 
270 |         self.tiles_emb = TilesMLP(
271 |             in_features,
272 |             hidden=tiles_mlp_hidden,
273 |             bias=bias,
274 |             out_features=d_model_attention,
275 |         )
276 | 
277 |         self.attention_layer = GatedAttention(d_model=d_model_attention)
278 | 
279 |         mlp_in_features = d_model_attention
280 | 
281 |         self.mlp = MLP(
282 |             in_features=mlp_in_features,
283 |             out_features=out_features,
284 |             hidden=mlp_hidden,
285 |             activation=mlp_activation,
286 |         )
287 | 
288 |     def score_model(self, x: torch.Tensor, mask: Optional[torch.BoolTensor] = None) -> torch.Tensor:
289 |         """
290 |         Gets attention logits.
291 |         Parameters
292 |         ----------
293 |         x: torch.Tensor
294 |             (B, N_TILES, FEATURES)
295 |         mask: Optional[torch.BoolTensor]
296 |             (B, N_TILES, 1), True for values that were padded.
297 |         Returns
298 |         -------
299 |         attention_logits: torch.Tensor
300 |             (B, N_TILES, 1)
301 |         """
302 |         tiles_emb = self.tiles_emb(x, mask)
303 |         attention_logits = self.attention_layer.attention(tiles_emb, mask)
304 |         return attention_logits
305 | 
306 |     def forward(
307 |         self, x: torch.Tensor, mask: Optional[torch.BoolTensor] = None
308 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
309 |         """
310 |         Parameters
311 |         ----------
312 |         x: torch.Tensor
313 |             (B, N_TILES, FEATURES)
314 |         mask: Optional[torch.BoolTensor]
315 |             (B, N_TILES, 1), True for values that were padded.
316 |         Returns
317 |         -------
318 |         logits, attention_weights: Tuple[torch.Tensor, torch.Tensor]
319 |             (B, OUT_FEATURES), (B, N_TILES)
320 |         """
321 |         tiles_emb = self.tiles_emb(x, mask)
322 |         scaled_tiles_emb, _ = self.attention_layer(tiles_emb, mask)
323 | 
324 |         logits = self.mlp(scaled_tiles_emb)
325 | 
326 |         return logits


--------------------------------------------------------------------------------
/Train.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "52323109-e236-419d-bb2e-bdf8897d3e40",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Classification Training (MHIST)"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 60,
 14 |    "id": "98ed6e8b-f614-46a6-a25e-904322045f94",
 15 |    "metadata": {
 16 |     "tags": []
 17 |    },
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stderr",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.\n",
 24 |       "INFO:sagemaker:Creating training-job with name: MHIST-Classification-2024-10-29-23-04-44-207\n"
 25 |      ]
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "import sagemaker\n",
 30 |     "from sagemaker.pytorch import PyTorch\n",
 31 |     "from sagemaker.inputs import FileSystemInput\n",
 32 |     "\n",
 33 |     "# Initialize the SageMaker session\n",
 34 |     "sagemaker_session = sagemaker.Session()\n",
 35 |     "\n",
 36 |     "# Define the EFS file system input\n",
 37 |     "efs_data_input = FileSystemInput(\n",
 38 |     "    file_system_id='fs-0b7a195df6775de4c', # MODIFY\n",
 39 |     "    file_system_type='EFS',\n",
 40 |     "    directory_path='/MHIST',\n",
 41 |     "    file_system_access_mode='ro'\n",
 42 |     ")\n",
 43 |     "\n",
 44 |     "efs_model_input = FileSystemInput(\n",
 45 |     "    file_system_id='fs-0b7a195df6775de4c',\n",
 46 |     "    file_system_type='EFS',\n",
 47 |     "    directory_path='/models',\n",
 48 |     "    file_system_access_mode='ro'\n",
 49 |     ")\n",
 50 |     "\n",
 51 |     "# Configure the PyTorch estimator\n",
 52 |     "estimator = PyTorch(\n",
 53 |     "    source_dir='train',\n",
 54 |     "    entry_point='train_mhist.py',\n",
 55 |     "    role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',\n",
 56 |     "    instance_count=1,\n",
 57 |     "    instance_type='ml.p3.2xlarge',\n",
 58 |     "    subnets=['subnet-008fa8aee9db06e83'], # MODIFY\n",
 59 |     "    security_group_ids=['sg-09d4640079b19f275'], # MODIFY\n",
 60 |     "    framework_version='2.3',\n",
 61 |     "    py_version='py311',\n",
 62 |     "    hyperparameters={\n",
 63 |     "        'epochs': 20,\n",
 64 |     "        'batch-size': 32,\n",
 65 |     "        'learning-rate': 1e-3,\n",
 66 |     "        'weight-decay':1e-4,\n",
 67 |     "    },\n",
 68 |     "    metric_definitions=[\n",
 69 |     "        {'Name': 'TrainingLoss', 'Regex': 'Training Loss: ([0-9\\\\.]+)'},\n",
 70 |     "        {'Name': 'ValidationLoss', 'Regex': 'Validation Loss: ([0-9\\\\.]+)'},\n",
 71 |     "        {'Name': 'ValidationAccuracy', 'Regex': 'Validation Accuracy: ([0-9\\\\.]+)'}\n",
 72 |     "    ],\n",
 73 |     "    base_job_name='MHIST-Classification'\n",
 74 |     ")\n",
 75 |     "\n",
 76 |     "# Start the training job\n",
 77 |     "estimator.fit({'training': efs_data_input, 'models': efs_model_input}, wait=False)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "id": "4ab25971-f718-4c43-beb7-a33b447d9e81",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "# Segmentation Training (Lizard)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "id": "6698aeab-b11f-45f1-8fd9-1a74829da2a5",
 92 |    "metadata": {
 93 |     "tags": []
 94 |    },
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stderr",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.\n",
101 |       "INFO:sagemaker:Creating training-job with name: Lizard-Segmentation-2024-11-02-17-33-17-908\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "import sagemaker\n",
107 |     "from sagemaker.pytorch import PyTorch\n",
108 |     "from sagemaker.inputs import FileSystemInput\n",
109 |     "\n",
110 |     "# Initialize the SageMaker session\n",
111 |     "sagemaker_session = sagemaker.Session()\n",
112 |     "\n",
113 |     "# Define the EFS file system input\n",
114 |     "efs_data_input = FileSystemInput(\n",
115 |     "    file_system_id='fs-0b7a195df6775de4c', # MODIFY\n",
116 |     "    file_system_type='EFS',\n",
117 |     "    directory_path='/Lizard',\n",
118 |     "    file_system_access_mode='ro'\n",
119 |     ")\n",
120 |     "\n",
121 |     "efs_model_input = FileSystemInput(\n",
122 |     "    file_system_id='fs-0b7a195df6775de4c',\n",
123 |     "    file_system_type='EFS',\n",
124 |     "    directory_path='/models',\n",
125 |     "    file_system_access_mode='ro'\n",
126 |     ")\n",
127 |     "\n",
128 |     "\n",
129 |     "# Configure the PyTorch estimator\n",
130 |     "estimator = PyTorch(\n",
131 |     "    source_dir='train',\n",
132 |     "    entry_point='train_lizard.py',\n",
133 |     "    role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',\n",
134 |     "    instance_count=1,\n",
135 |     "    instance_type='ml.g5.2xlarge',\n",
136 |     "    subnets=['subnet-008fa8aee9db06e83'], # MODIFY\n",
137 |     "    security_group_ids=['sg-09d4640079b19f275'], # MODIFY\n",
138 |     "    framework_version='2.3',\n",
139 |     "    py_version='py311',\n",
140 |     "    hyperparameters={\n",
141 |     "        'epochs': 200,\n",
142 |     "        'batch-size': 128,\n",
143 |     "        'learning-rate': 1e-5\n",
144 |     "    },\n",
145 |     "    metric_definitions=[\n",
146 |     "        {'Name': 'TrainingLoss', 'Regex': 'Training Loss: ([0-9\\\\.]+)'},\n",
147 |     "        {'Name': 'ValidationLoss', 'Regex': 'Validation Loss: ([0-9\\\\.]+)'},\n",
148 |     "        {'Name': 'ValidationIOU', 'Regex': 'Validation Mean_IOU: ([0-9\\\\.]+)'},\n",
149 |     "        {'Name': 'ValidationDice', 'Regex': 'Validation Mean_DICE: ([0-9\\\\.]+)'}\n",
150 |     "    ],\n",
151 |     "    base_job_name='Lizard-Segmentation'\n",
152 |     ")\n",
153 |     "\n",
154 |     "# Start the training job\n",
155 |     "estimator.fit({'training': efs_data_input, 'models': efs_model_input}, wait=False)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "id": "8e87d6fd-2468-4ad0-b375-1b18c4dd2abb",
161 |    "metadata": {
162 |     "tags": []
163 |    },
164 |    "source": [
165 |     "# Feature extraction WSI"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "id": "2b38eb5c-a52b-4c2b-9e48-5c571eafb596",
171 |    "metadata": {},
172 |    "source": [
173 |     "First build and push the docker image by running the `build_and_push.sh` script: \n",
174 |     "* `cd preprocessing`\n",
175 |     "* `bash ./build_and_push.sh cucim-tiler`\n",
176 |     "\n",
177 |     "This will build and push the custom Docker image to an ECR registry called `cucim-tiler` and can than be used to run the following custom Estimator"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "id": "3e5d73d2-df70-4700-8925-cdc4ddbc4912",
184 |    "metadata": {
185 |     "tags": []
186 |    },
187 |    "outputs": [
188 |     {
189 |      "name": "stderr",
190 |      "output_type": "stream",
191 |      "text": [
192 |       "INFO:sagemaker:Creating training-job with name: Tile-Feature-Extraction-2024-11-18-22-43-40-962\n"
193 |      ]
194 |     }
195 |    ],
196 |    "source": [
197 |     "import sagemaker\n",
198 |     "from sagemaker.estimator import Estimator\n",
199 |     "from sagemaker.inputs import FileSystemInput\n",
200 |     "\n",
201 |     "# Initialize the SageMaker session\n",
202 |     "sagemaker_session = sagemaker.Session()\n",
203 |     "\n",
204 |     "# Define the EFS file system input\n",
205 |     "efs_data_input = FileSystemInput(\n",
206 |     "    file_system_id='fs-0b7a195df6775de4c', # MODIFY\n",
207 |     "    file_system_type='EFS',\n",
208 |     "    directory_path='/TCGA-COAD',\n",
209 |     "    file_system_access_mode='ro'\n",
210 |     ")\n",
211 |     "\n",
212 |     "efs_data_output = FileSystemInput(\n",
213 |     "    file_system_id='fs-0b7a195df6775de4c', # MODIFY\n",
214 |     "    file_system_type='EFS',\n",
215 |     "    directory_path='/TCGA-COAD-features',\n",
216 |     "    file_system_access_mode='rw'\n",
217 |     ")\n",
218 |     "\n",
219 |     "estimator = Estimator(\n",
220 |     "    role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',\n",
221 |     "    instance_count=1,\n",
222 |     "    image_uri=\"713881812217.dkr.ecr.us-west-2.amazonaws.com/cucim-tiler:latest\",\n",
223 |     "    instance_type='ml.g5.2xlarge',\n",
224 |     "    subnets=['subnet-008fa8aee9db06e83'], # MODIFY\n",
225 |     "    security_group_ids=['sg-09d4640079b19f275'], # MODIFY\n",
226 |     "    base_job_name='Tile-Feature-Extraction',\n",
227 |     "    metric_definitions=[\n",
228 |     "        {'Name': 'Slide #', 'Regex': 'Processing slide #([0-9\\\\.]+)'},\n",
229 |     "    ],\n",
230 |     ")\n",
231 |     "\n",
232 |     "# Start the training job\n",
233 |     "estimator.fit({'dataset': efs_data_input, 'output': efs_data_output}, wait=False)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "id": "c1358462-8399-4b01-8d87-ee197621c474",
239 |    "metadata": {},
240 |    "source": [
241 |     "# WSI Prediction (Slide Level)"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": null,
247 |    "id": "89bd9897-0227-4011-8935-91ea4130beb9",
248 |    "metadata": {},
249 |    "outputs": [
250 |     {
251 |      "name": "stderr",
252 |      "output_type": "stream",
253 |      "text": [
254 |       "INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.\n",
255 |       "INFO:sagemaker:Creating training-job with name: WSI-Classification-2024-11-04-15-54-54-698\n"
256 |      ]
257 |     }
258 |    ],
259 |    "source": [
260 |     "import sagemaker\n",
261 |     "from sagemaker.pytorch import PyTorch\n",
262 |     "from sagemaker.inputs import FileSystemInput\n",
263 |     "\n",
264 |     "# Initialize the SageMaker session\n",
265 |     "sagemaker_session = sagemaker.Session()\n",
266 |     "\n",
267 |     "# Define the EFS file system input\n",
268 |     "efs_data_input = FileSystemInput(\n",
269 |     "    file_system_id='fs-0b7a195df6775de4c', # MODIFY\n",
270 |     "    file_system_type='EFS',\n",
271 |     "    directory_path='/TCGA-COAD-features',\n",
272 |     "    file_system_access_mode='ro'\n",
273 |     ")\n",
274 |     "\n",
275 |     "efs_model_input = FileSystemInput(\n",
276 |     "    file_system_id='fs-0b7a195df6775de4c',\n",
277 |     "    file_system_type='EFS',\n",
278 |     "    directory_path='/models',\n",
279 |     "    file_system_access_mode='ro'\n",
280 |     ")\n",
281 |     "\n",
282 |     "# Configure the PyTorch estimator\n",
283 |     "estimator = PyTorch(\n",
284 |     "    source_dir='train',\n",
285 |     "    entry_point='train_msi_tcga.py',\n",
286 |     "    role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',\n",
287 |     "    instance_count=1,\n",
288 |     "    instance_type='ml.g5.2xlarge',\n",
289 |     "    subnets=['subnet-008fa8aee9db06e83'], # MODIFY\n",
290 |     "    security_group_ids=['sg-09d4640079b19f275'], # MODIFY\n",
291 |     "    framework_version='2.2',\n",
292 |     "    py_version='py310',\n",
293 |     "    hyperparameters={\n",
294 |     "        'epochs': 100,\n",
295 |     "        'batch-size': 32,\n",
296 |     "        'learning-rate': 1e-3,\n",
297 |     "        'max-tiles': 1000\n",
298 |     "    },\n",
299 |     "    metric_definitions=[\n",
300 |     "        {'Name': 'TrainingLoss', 'Regex': 'Training Loss: ([0-9\\\\.]+)'},\n",
301 |     "        {'Name': 'ValidationLoss', 'Regex': 'Validation Loss: ([0-9\\\\.]+)'},\n",
302 |     "        {'Name': 'ValidationAccuracy', 'Regex': 'Validation Accuracy: ([0-9\\\\.]+)'}\n",
303 |     "    ],\n",
304 |     "    base_job_name='WSI-Classification',\n",
305 |     ")\n",
306 |     "\n",
307 |     "# Start the training job\n",
308 |     "estimator.fit({'training': efs_data_input, 'models': efs_model_input}, wait=False)"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": null,
314 |    "id": "de6f9a53-4c34-422e-8b09-7a3b4c16ae78",
315 |    "metadata": {},
316 |    "outputs": [],
317 |    "source": []
318 |   }
319 |  ],
320 |  "metadata": {
321 |   "kernelspec": {
322 |    "display_name": "conda_python3",
323 |    "language": "python",
324 |    "name": "conda_python3"
325 |   },
326 |   "language_info": {
327 |    "codemirror_mode": {
328 |     "name": "ipython",
329 |     "version": 3
330 |    },
331 |    "file_extension": ".py",
332 |    "mimetype": "text/x-python",
333 |    "name": "python",
334 |    "nbconvert_exporter": "python",
335 |    "pygments_lexer": "ipython3",
336 |    "version": "3.10.15"
337 |   }
338 |  },
339 |  "nbformat": 4,
340 |  "nbformat_minor": 5
341 | }
342 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Fine-tuning Vision Foundation-Models for Digital Pathology
  2 | 
  3 | *Update Februrary 23rd, 2025: [H-Optimus-1](https://huggingface.co/bioptimus/H-optimus-1) was announced, and the following code can be re-used*
  4 | 
  5 | This repository showcases how to fine-tune [H-optimus-0](https://huggingface.co/bioptimus/H-optimus-0) on patch-level and whole-slide level tasks for digital pathology using AWS:
  6 | The different steps are:
  7 |  * Deployment of the **AWS Infrastructure**
  8 |  * Downloading the training datsets
  9 |  * Fine-tuning of patch-level models:
 10 |    * MHIST classification: This task uses the MHIST dataset for binary classification of colorectal polyps. The training script is located at `train/train_mhist.py`.
 11 |    * Lizard segmentation: This task uses the Lizard dataset for colonic nuclear instance segmentation. The training script is located at `train/train_lizard.py`.
 12 |  * Fine-tuning of WSI-level models:
 13 |    * Whole-Slide level feature extraction pipeline: This task uses a custom Docker image for tiling and feature extraction from whole-slide images. The Docker image is built and pushed using the `preprocessing/build_and_push.sh` script.
 14 |    * Whole-Slide level WSI prediction using Multiple Instance Learning : This task uses the features extracted from the TCGA-COAD dataset to predict MSI status at the whole-slide level. The training script is located at `train/train_msi_tcga.py`
 15 | 
 16 | ![Pathology overview](pathology_overview.png)
 17 | 
 18 | The `train.ipynb` notebook in this repository demonstrates how to start the different training jobs for each task. It includes code for:
 19 |   * Setting up the SageMaker session and defining EFS inputs
 20 |   * Configuring and launching training jobs for each task
 21 |   * Specifying hyperparameters and metric definitions
 22 |   * Using custom Docker images for feature extraction
 23 | 
 24 | To use this notebook:
 25 |   * Open the `train.ipynb` notebook in your SageMaker instance
 26 |   * Modify the EFS file system ID, subnet, and security group IDs as needed
 27 |   * Run the cells for the desired tasks
 28 | 
 29 |   
 30 | # Initialization Steps: 
 31 | 
 32 | 1. Deploy the required Infrastructure using the provided CloudFormation stack: ```aws cloudformation deploy --template-file infra/infra-stack.yml --stack-name EFS-SM --profile=XXXXX --capabilities CAPABILITY_IAM```. As part of the deployment, the following infrastructure is being deployed: 
 33 |    * **Networking Infrastructure**: A VPC with a public and a private subnet. The Public Subnet has internet access, but the private subnet does not.
 34 |    * An **Elastic File System** with LifeCycle Configuration : The EFS is hosted on a the private subnet
 35 |    * A **SageMaker Notebook Instance** with a LifeCycle Configuration that mounts the EFS on the SageMaker Instance at start time. 
 36 |    * A **SageMaker Notebook Instance** Execution Role that has access to the EFS and full SageMaker permissions. 
 37 |   
 38 | 2. Retrieve the `EFSFileSystemId` and the `SageMakerNotebookInstanceName` that was created in the previous time. You can retrieve these values in the console or by running the following command: 
 39 | ```pathology-blogpost % aws cloudformation describe-stacks --stack-name EFS --query 'Stacks[0].Outputs' --profile=XXXXX```
 40 | 
 41 | 3. At this stage, you are ready to mount your EFS on your SageMaker notebook. For full details, you can refer to the following [blogpost](https://aws.amazon.com/blogs/machine-learning/mount-an-efs-file-system-to-an-amazon-sagemaker-notebook-with-lifecycle-configurations/)
 42 | 
 43 | The following command are an example of how to mount such an instance 
 44 | ```bash
 45 | cd SageMaker
 46 | sudo mkdir efs
 47 | sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 fs-0f634af3c0c47b63d.efs.us-west-2.amazonaws.com:/ efs
 48 | https://aws.amazon.com/blogs/machine-learning/mount-an-efs-file-system-to-an-amazon-sagemaker-notebook-with-lifecycle-configurations/
 49 | ```
 50 | 
 51 | 4. At this point, we are ready to download the training datasets that we will use to train our model on these specific pathology downstream tasks.
 52 | 
 53 |    * Downstream task : Binary Classification of predominant histological pattern in WSI of colorectal polyps. For this task, we use the publicly available [MHIST dataset](https://bmirds.github.io/MHIST/). MHIST is a binary task which comprises of 3,152 hematoxylin and eosin (H&E)-stained Formalin Fixed Paraffin-Embedded (FFPE) fixed-size images (224 by 224 pixels) of colorectal polyps from the Department of Pathology and Laboratory Medicine at Dartmouth-Hitchcock Medical Center (DHMC).
 54 | 
 55 |     The tissue classes are: Hyperplastic Polyp (HP), Sessile Serrated Adenoma (SSA). This classification task focuses on the clinically-important binary distinction between HPs and SSAs, a challenging problem with considerable inter-pathologist variability. HPs are typically benign, while sessile serrated adenomas are precancerous lesions that can turn into cancer if left untreated and require sooner follow-up examinations. Histologically, HPs have a superficial serrated architecture and elongated crypts, whereas SSAs are characterized by broad-based crypts, often with complex structure and heavy serration. 
 56 | 
 57 |     Submit the data request form and you will be provided with the `annotations.csv` file and the `images.zip` file. A sample download script for MHIST can be found under `download_mhist.sh`. ***Note: The URLs of the files needs to be updated according to your data access grant***. This is a patch-level classification task.
 58 |    
 59 | 
 60 |    * Downstream task: Segmentation task of Colonic Nuclear Instance Segmentation. For this task we leverage the [Lizard dataset](https://arxiv.org/abs/2108.11195) developed by Warwick University available on Kaggle here. This is a patch level segmentation task. 
 61 |     ```wget -O archive.zip https://www.kaggle.com/api/v1/datasets/download/aadimator/conic-challenge-dataset```
 62 | 
 63 |    * Downstream task on **TCGA-COAD**. Using the GDC data transfer tool available [here](https://gdc.cancer.gov/access-data/gdc-data-transfer-tool) and generate a manifest file with a all the WSI images from the TCGA data repository. Download the data using the [steps described in the TCGA documentation](https://docs.gdc.cancer.gov/Data_Transfer_Tool/Users_Guide/Preparing_for_Data_Download_and_Upload/)
 64 |   
 65 |         ```gdc-client download -m link/to/manifest-file.txt -d /mnt/efs/TCGA-COAD```
 66 | 
 67 |         This dataset is used for Slide-Level Tasks. The MSI status for the TCGA-COAD cohor is based on the findings of Liu et al available [here](https://github.com/KatherLab/cancer-metadata/blob/main/tcga/liu.xlsx)
 68 |         
 69 | 
 70 | 
 71 | 
 72 | 5. Here, we will be leveraging the Foundational Model **H-Optimus-0** that was trained by [BioOptimus](https://www.bioptimus.com/news/bioptimus-launches-h-optimus-0-the-worlds-largest-open-source-ai-foundation-model-for-pathology). The model is weights are opensourced and can be downloaded via HuggingFace:
 73 | 
 74 | ```python
 75 | import timm
 76 | model = timm.create_model(
 77 |     "hf-hub:bioptimus/H-optimus-0", pretrained=True, init_values=1e-5, dynamic_img_size=False
 78 | )
 79 | ```
 80 | 
 81 | 
 82 |  # MHIST Model Training steps
 83 | 
 84 |  Download the code repo:
 85 | 
 86 |  MHIST is a colorectal polyp classification public dataset that contains 3,152 images (224×224 pixels) presenting either hyperplastic polyp or sessile serrated adenoma at 5× magnification. This is a tile-level classification task. The dataset is defined and loaded in the `train/data_utils/MHIST.py` script. The training leverages a PyTorch Image defined in the `train/train_mhist.py` script. 
 87 | 
 88 |  The training can be started using the following code snippet: 
 89 | 
 90 |  ```python
 91 | import sagemaker
 92 | from sagemaker.pytorch import PyTorch
 93 | from sagemaker.inputs import FileSystemInput
 94 | 
 95 | # Initialize the SageMaker session
 96 | sagemaker_session = sagemaker.Session()
 97 | 
 98 | # Define the EFS file system input
 99 | efs_data_input = FileSystemInput(
100 |     file_system_id='fs-XXXXXX', # MODIFY
101 |     file_system_type='EFS',
102 |     directory_path='/MHIST',
103 |     file_system_access_mode='ro'
104 | )
105 | 
106 | efs_model_input = FileSystemInput(
107 |     file_system_id='fs-0b7a195df6775de4c',
108 |     file_system_type='EFS',
109 |     directory_path='/models',
110 |     file_system_access_mode='ro'
111 | )
112 | 
113 | 
114 | # Configure the PyTorch estimator
115 | estimator = PyTorch(
116 |     source_dir='train',
117 |     entry_point='train_mhist.py',
118 |     role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',
119 |     instance_count=1,
120 |     instance_type='ml.p3.2xlarge',
121 |     subnets=['subnet-XXXX'], # MODIFY
122 |     security_group_ids=['sg-XXXXX'], # MODIFY
123 |     framework_version='2.3',
124 |     py_version='py311',
125 |     hyperparameters={
126 |         'epochs': 10,
127 |         'batch-size': 32,
128 |         'learning-rate': 0.001
129 |     }
130 | )
131 | 
132 | # Start the training job
133 | estimator.fit({'training': efs_data_input, 'models': efs_model_input})
134 | ```
135 | 
136 | Once, the model is trained, you can look at the model metrics on the test dataset in the Training Logs. You can deploy the model to a SageMaker Endpoint with the following code: 
137 | 
138 | ```python
139 | import sagemaker
140 | from sagemaker.pytorch import PyTorch
141 | from sagemaker.inputs import FileSystemInput
142 | 
143 | # Initialize the SageMaker session
144 | sagemaker_session = sagemaker.Session()
145 | 
146 | # Define the EFS file system input
147 | efs_data_input = FileSystemInput(
148 |     file_system_id='fs-0b7a195df6775de4c', # MODIFY
149 |     file_system_type='EFS',
150 |     directory_path='/Lizard',
151 |     file_system_access_mode='ro'
152 | )
153 | 
154 | efs_model_input = FileSystemInput(
155 |     file_system_id='fs-0b7a195df6775de4c',
156 |     file_system_type='EFS',
157 |     directory_path='/models',
158 |     file_system_access_mode='ro'
159 | )
160 | 
161 | 
162 | # Configure the PyTorch estimator
163 | estimator = PyTorch(
164 |     source_dir='train',
165 |     entry_point='train_lizard.py',
166 |     role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',
167 |     instance_count=1,
168 |     instance_type='ml.p3.2xlarge',
169 |     subnets=['subnet-008fa8aee9db06e83'], # MODIFY
170 |     security_group_ids=['sg-09d4640079b19f275'], # MODIFY
171 |     framework_version='2.3',
172 |     py_version='py311',
173 |     hyperparameters={
174 |         'epochs': 10,
175 |         'batch-size': 32,
176 |         'learning-rate': 0.0001
177 |     }
178 | )
179 | 
180 | # Start the training job
181 | estimator.fit({'training': efs_data_input, 'models': efs_model_input})
182 | ```
183 | 
184 | # Segmentation Training (Lizard)
185 | 
186 | The Lizard dataset is used for colonic nuclear instance segmentation. The training script is located at `train/train_lizard.py`. Here's how to start the training job:
187 | 
188 | ```python
189 | import sagemaker
190 | from sagemaker.pytorch import PyTorch
191 | from sagemaker.inputs import FileSystemInput
192 | 
193 | # Initialize the SageMaker session
194 | sagemaker_session = sagemaker.Session()
195 | 
196 | # Define the EFS file system inputs
197 | efs_data_input = FileSystemInput(
198 |     file_system_id='fs-0b7a195df6775de4c', # MODIFY
199 |     file_system_type='EFS',
200 |     directory_path='/Lizard',
201 |     file_system_access_mode='ro'
202 | )
203 | 
204 | efs_model_input = FileSystemInput(
205 |     file_system_id='fs-0b7a195df6775de4c',
206 |     file_system_type='EFS',
207 |     directory_path='/models',
208 |     file_system_access_mode='ro'
209 | )
210 | 
211 | # Configure the PyTorch estimator
212 | estimator = PyTorch(
213 |     source_dir='train',
214 |     entry_point='train_lizard.py',
215 |     role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',
216 |     instance_count=1,
217 |     instance_type='ml.g5.16xlarge',
218 |     subnets=['subnet-008fa8aee9db06e83'], # MODIFY
219 |     security_group_ids=['sg-09d4640079b19f275'], # MODIFY
220 |     framework_version='2.3',
221 |     py_version='py311',
222 |     hyperparameters={
223 |         'epochs': 200,
224 |         'batch-size': 128,
225 |         'learning-rate': 1e-5
226 |     },
227 |     metric_definitions=[
228 |         {'Name': 'TrainingLoss', 'Regex': 'Training Loss: ([0-9\\.]+)'},
229 |         {'Name': 'ValidationLoss', 'Regex': 'Validation Loss: ([0-9\\.]+)'},
230 |         {'Name': 'ValidationIOU', 'Regex': 'Validation Mean_IOU: ([0-9\\.]+)'},
231 |         {'Name': 'ValidationDice', 'Regex': 'Validation Mean_DICE: ([0-9\\.]+)'}
232 |     ],
233 |     base_job_name='Lizard-Segmentation'
234 | )
235 | 
236 | # Start the training job
237 | estimator.fit({'training': efs_data_input, 'models': efs_model_input}, wait=False)
238 | ```
239 | 
240 | # Feature Extraction
241 | 
242 | This task uses a custom Docker image for tiling and feature extraction from whole-slide images. Before running the job, make sure to build and push the Docker image:
243 | ```bash
244 | cd preprocessing
245 | bash ./build_and_push.sh cucim-tiler
246 | ```
247 | 
248 | Then, you can start the feature extraction job:
249 | 
250 | ```python
251 | import sagemaker
252 | from sagemaker.estimator import Estimator
253 | from sagemaker.inputs import FileSystemInput
254 | 
255 | # Initialize the SageMaker session
256 | sagemaker_session = sagemaker.Session()
257 | 
258 | # Define the EFS file system inputs
259 | efs_data_input = FileSystemInput(
260 |     file_system_id='fs-0b7a195df6775de4c', # MODIFY
261 |     file_system_type='EFS',
262 |     directory_path='/TCGA-COAD',
263 |     file_system_access_mode='ro'
264 | )
265 | 
266 | efs_data_output = FileSystemInput(
267 |     file_system_id='fs-0b7a195df6775de4c', # MODIFY
268 |     file_system_type='EFS',
269 |     directory_path='/TCGA-COAD-features3',
270 |     file_system_access_mode='rw'
271 | )
272 | 
273 | estimator = Estimator(
274 |     role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',
275 |     instance_count=1,
276 |     image_uri="713881812217.dkr.ecr.us-west-2.amazonaws.com/cucim-tiler:latest",
277 |     instance_type='ml.g5.2xlarge',
278 |     subnets=['subnet-008fa8aee9db06e83'], # MODIFY
279 |     security_group_ids=['sg-09d4640079b19f275'], # MODIFY
280 |     base_job_name='Tile-Feature-Extraction',
281 |     metric_definitions=[
282 |         {'Name': 'Slide #', 'Regex': 'Processing slide #([0-9\\.]+)'},
283 |     ],
284 | )
285 | 
286 | # Start the training job
287 | estimator.fit({'dataset': efs_data_input, 'output': efs_data_output}, wait=False)
288 | ```
289 | 
290 | # WSI Classification (Slide level)
291 | 
292 | This task uses the features extracted from the TCGA-COAD dataset to predict MSI status at the whole-slide level. The training script is located at train/train_msi_tcga.py. Here's how to start the training job:
293 | 
294 | ```python
295 | import sagemaker
296 | from sagemaker.pytorch import PyTorch
297 | from sagemaker.inputs import FileSystemInput
298 | 
299 | # Initialize the SageMaker session
300 | sagemaker_session = sagemaker.Session()
301 | 
302 | # Define the EFS file system input
303 | efs_data_input = FileSystemInput(
304 |     file_system_id='fs-0b7a195df6775de4c', # MODIFY
305 |     file_system_type='EFS',
306 |     directory_path='/TCGA-COAD-features2',
307 |     file_system_access_mode='ro'
308 | )
309 | 
310 | efs_model_input = FileSystemInput(
311 |     file_system_id='fs-0b7a195df6775de4c',
312 |     file_system_type='EFS',
313 |     directory_path='/models',
314 |     file_system_access_mode='ro'
315 | )
316 | 
317 | # Configure the PyTorch estimator
318 | estimator = PyTorch(
319 |     source_dir='train',
320 |     entry_point='train_msi_tcga.py',
321 |     role='arn:aws:iam::713881812217:role/EFS-SM-SageMakerRole-EyrRK8nNZo79',
322 |     instance_count=1,
323 |     instance_type='ml.g4dn.4xlarge',
324 |     subnets=['subnet-008fa8aee9db06e83'], # MODIFY
325 |     security_group_ids=['sg-09d4640079b19f275'], # MODIFY
326 |     framework_version='2.2',
327 |     py_version='py310',
328 |     hyperparameters={
329 |         'epochs': 100,
330 |         'batch-size': 128,
331 |         'learning-rate': 1e-5,
332 |         'max-tiles': 6000
333 |     },
334 |     metric_definitions=[
335 |         {'Name': 'TrainingLoss', 'Regex': 'Training Loss: ([0-9\\.]+)'},
336 |         {'Name': 'ValidationLoss', 'Regex': 'Validation Loss: ([0-9\\.]+)'},
337 |         {'Name': 'ValidationAccuracy', 'Regex': 'Validation Accuracy: ([0-9\\.]+)'}
338 |     ],
339 |     base_job_name='WSI-Classification',
340 | )
341 | 
342 | # Start the training job
343 | estimator.fit({'training': efs_data_input, 'models': efs_model_input}, wait=False)
344 | ```
345 | 
346 | 


--------------------------------------------------------------------------------
/data/gdc_manifest_tcga_coad.txt:
--------------------------------------------------------------------------------
  1 | id	filename	md5	size	state
  2 | dd32a0d5-139c-4786-9fb7-0db929d21a12	TCGA-AA-3692-01Z-00-DX1.6e8c2370-54a7-4fce-b55c-bdb459828990.svs	33ab408ba06495e02e56f2d6ea95ffd0	258790396	released
  3 | 4942dd61-3a5c-4a6a-8808-22161577a630	TCGA-AA-A00L-01Z-00-DX1.4F57C465-BC1C-4774-B003-F7F29C6A69D0.svs	231094b2747a44834898a9489b1411fd	446308504	released
  4 | 588d5512-9711-4285-83b4-14bb1c3b6514	TCGA-AZ-6601-01Z-00-DX1.40681471-3104-48be-8b57-55dba1f432f8.svs	7db7346c371654bca7736f494fb56461	775233117	released
  5 | 06a6a408-3198-4911-bdea-8e56cd0554a3	TCGA-CM-5863-01Z-00-DX1.2dceed07-9373-4103-be16-533dac9f283b.svs	c980a718a947c487069cfd4f37b64df6	569596907	released
  6 | 80d752c3-1cfa-4eb1-baf5-ec407843eec2	TCGA-AA-3842-01Z-00-DX1.8bbbd702-2b17-4c3e-a8bd-55c3ae8aaba3.svs	f8dde3fb9b35c8e220e8b8a572fca998	200287850	released
  7 | 3c964c2b-607a-437e-8fe2-3998850f9689	TCGA-AA-3939-01Z-00-DX1.6ceb6e8f-a469-4f42-9597-8bf853d95640.svs	4984d5b4c1d36c8bbe3e19391e37db97	223274076	released
  8 | b9a36eb1-ce20-4166-8cb8-a00782229e61	TCGA-T9-A92H-01Z-00-DX3.1DE7D5ED-60F7-4645-8243-AB0C027B3ED7.svs	9c0b1b98e8ba5faa7bb1fae218701d6f	625728508	released
  9 | 1badcce0-68c4-44b0-9b4e-acbb94c3fe3b	TCGA-F4-6703-01Z-00-DX1.28225f5d-d880-4605-831f-f22ec0272cde.svs	2e1a413e95d956bfcaeef3d424352063	370647653	released
 10 | 8884689f-5d9b-4424-8ef8-850c58c0c747	TCGA-AZ-4315-01Z-00-DX1.1a2c2771-3e59-47c3-b380-42110c545e6b.svs	bccb49d75e2c9b24b91820308b596004	928770793	released
 11 | a56ed047-d15e-4281-82b5-b1ab6e75b3f3	TCGA-CM-6676-01Z-00-DX1.dcc2bf23-ecaa-4952-8485-fc609af66298.svs	8a3e617beb0161b79ced1cb87836cded	560094523	released
 12 | 15d3e243-669f-40e8-8c9c-8d039929172c	TCGA-AZ-6599-01Z-00-DX1.9d6aae3d-6934-4e96-8699-db41e1194f29.svs	83f2317ec0ac1c57539d49bbf07332e5	476609419	released
 13 | 263abaf6-c5a1-4215-8dd5-a8b2a7bfd745	TCGA-3L-AA1B-01Z-00-DX2.17CE3683-F4B1-4978-A281-8F620C4D77B4.svs	403aa378ff8655f8328d314b758c9756	739685437	released
 14 | ed5f8c30-29e3-4144-948b-b8658564f2d6	TCGA-3L-AA1B-01Z-00-DX1.8923A151-A690-40B7-9E5A-FCBEDFC2394F.svs	1f0cdb5cacd17c3dbf99f82ac41339dd	1228418533	released
 15 | 023ca8d1-d8ef-492e-b542-e5e16e1ded09	TCGA-A6-6653-01Z-00-DX1.e130666d-2681-4382-9e7a-4a4d27cb77a4.svs	d48cdf4cb936b05a93501b8659a7788e	244711457	released
 16 | c278e965-d5e0-49bf-87f5-a9257b2b702c	TCGA-D5-6920-01Z-00-DX1.e184673f-e7e9-44aa-9dae-7054bd1d0d00.svs	cd233652dae58ea1f737b9f2052661fc	123085051	released
 17 | 2c8b6894-a7b0-4a08-94ea-26bedbb0dffc	TCGA-CA-5256-01Z-00-DX1.67cc2ca1-40df-4e76-be88-dfd93e20e017.svs	f8309d55b4df8ff81d1182b265b4f2a2	325064995	released
 18 | 3f582038-6f0c-460b-9ff5-d0a46282e31d	TCGA-DM-A0XF-01Z-00-DX1.6FD3D3CF-A1E2-4F4E-BF02-F81B1A1061CC.svs	d7f48194c78fa595e19d44685211b8a5	1198277401	released
 19 | c10e5c0e-dc3c-4032-86ef-70a1a4f1195f	TCGA-AA-A022-01Z-00-DX1.2673F279-5DF6-4E71-92B2-A589DD8F583B.svs	f5f88cf4af9a6ba6419c380bbdaf4538	749369164	released
 20 | 4d52a92f-7495-41ec-8606-5fff12413940	TCGA-AD-6889-01Z-00-DX1.5269A81E-5391-4875-8F2A-6505BC5BBFD9.svs	7dae30338c238f33d71b7318df81e04c	420566141	released
 21 | 7fe47f9f-3250-4e29-ac95-42fb09bc3a53	TCGA-AA-3975-01Z-00-DX1.e73492da-b6fb-4c56-ab30-53b0b7041e04.svs	b8e97bd29d9aad7f4d577f43b61dfc9c	386018496	released
 22 | cea3184b-c890-49d5-b1f8-8fca4ca145dc	TCGA-D5-6534-01Z-00-DX1.eb7b12b8-ad31-438f-8e1d-9bb76a560c86.svs	8bba36dfac4dacab1c2e78a2c1187369	740560187	released
 23 | f7d4a000-4473-4d7b-9980-c495428eaa50	TCGA-A6-2681-01Z-00-DX1.5e11f090-a19d-4d5c-bcf6-c219b55d02bc.svs	db16253e279da4c33307ec581dc9647f	163704887	released
 24 | 9a36ae11-6455-424c-8a31-c63ac5c0f233	TCGA-AA-3693-01Z-00-DX1.0e18d1db-cdff-433b-a150-6c759c4dc3bb.svs	7378f4e0a0fc4d7c88666c79a54cb0ef	361059378	released
 25 | 02a30d63-1da6-42d0-ab89-b4992ddcacec	TCGA-DM-A1D8-01Z-00-DX1.2DD544F5-D72F-4840-B2D3-F361E032EA3B.svs	91f00a97c836822ffa79a3668b73b54b	528108609	released
 26 | a60ef267-5d58-42e2-8d9f-db3d2605f912	TCGA-AA-A00U-01Z-00-DX1.E83A6B38-D472-482F-89D7-FF61FB589371.svs	da2cda4df9271ecea83f024455eed867	546259320	released
 27 | c7e229f4-7185-4211-979c-6cca1bbe4e0f	TCGA-AD-6890-01Z-00-DX1.4778042f-f210-489c-bb76-b4fe16b0d500.svs	9e795a5b28caafe471ac28d02e39539f	73468773	released
 28 | 63cf5b73-b605-4cee-b9dd-b92efbd00533	TCGA-CM-6170-01Z-00-DX1.aa9c41ea-3894-4524-a94c-f44c6c53c2d0.svs	4cac4101c5aadb027d0f463ea8588e50	420368057	released
 29 | e2663d91-94f9-48dd-8680-3803ede25421	TCGA-AA-3984-01Z-00-DX1.d0cb7571-c612-4410-ac03-ebe800ad6767.svs	d09ff8f062d669ccbec767ae9dbd0e20	383356784	released
 30 | 85647bdf-5f5b-4bb5-b4f4-0c62d4a74457	TCGA-G4-6311-01Z-00-DX1.f1b98598-dbd8-4ba5-9ec7-5c93ccc82c81.svs	d1f191747672285eb38a76333c10f465	728849151	released
 31 | fb848c34-7a87-4e1b-9e4d-be73ff8a6785	TCGA-AA-A01S-01Z-00-DX1.1F2812C1-9807-4D14-8071-3FE15236EB44.svs	2ae21041497607fb37d889fd4b72f59e	1242406216	released
 32 | 866108f6-0a38-4493-9eb5-864ae8b1eb4a	TCGA-AA-3655-01Z-00-DX1.D78D8DBE-E74F-491D-AC9C-10E4C4E7BB02.svs	32777c50e751ddf0c18e52caf03eb2da	13432152	released
 33 | 2678c27a-d474-4960-bb99-b4135a8e0353	TCGA-CM-5862-01Z-00-DX1.df57752c-5937-40f2-a48f-37a147a82139.svs	78b22f0a44b5779b4d0867ba26af6edf	648648245	released
 34 | f0c120bf-849d-4cf4-bcc5-ed2abf7d522f	TCGA-QG-A5YW-01Z-00-DX1.3242285F-FA82-4A92-9D0E-951013A3C91A.svs	31a7b721e6a6ffc5db013be0ccf1c596	1520823971	released
 35 | df7eff22-a7b7-470a-877b-c4cbbc7020fe	TCGA-AA-3812-01Z-00-DX1.c501fc71-8370-4034-b32a-1bb7cd846881.svs	048c4d228a0d02a19c02909a91e4c2ad	320687466	released
 36 | dabf61c8-5fa8-4656-bc64-42452d2f30a9	TCGA-G4-6317-01Z-00-DX1.6521a551-1516-4431-b3d7-af0a46978bcf.svs	ed066ab4bb4631bca2c30bc578926499	680646981	released
 37 | de83ad59-f5cb-4bcf-bcf0-4d8f00002d92	TCGA-AA-3854-01Z-00-DX1.1564d865-6653-4be1-951e-ea9fab0102a7.svs	be05d2c7a8dffdb762587f82f7dbf9e4	128342020	released
 38 | 79aa3803-9a5f-4589-942c-156b3fd2a62c	TCGA-AA-3973-01Z-00-DX1.05cee752-3f4e-442d-a093-dcfb2b6130f0.svs	53c3f02acd1a7371da677adb8d8f0c77	283301276	released
 39 | dc246a31-9d41-47a5-8fd1-27401081b7cd	TCGA-AA-3673-01Z-00-DX1.a80676fa-5481-4b63-9639-dbeb31ae82d8.svs	6e2c7c83cca6870025848d9e8ae39c3e	162000642	released
 40 | 158a9449-075b-4fba-a146-9d426d207b1e	TCGA-CA-6717-01Z-00-DX1.08da75b7-a08f-46b3-a8c0-24f601ec4558.svs	b4c3dcabea8fcd6359e841a5eb489f75	605620595	released
 41 | 5f46d414-f2e3-4250-baad-474f623ce5c2	TCGA-CM-6675-01Z-00-DX1.4f2301e2-2894-484d-8a52-7be902a9861b.svs	46d88f954c135ad2bf4b5462d4e086d1	1272881239	released
 42 | 3918b8a1-e2d2-4ecc-97ea-a8ad81474088	TCGA-AA-3966-01Z-00-DX1.7bc0c76e-f2d9-4abd-b63c-dad01aa4b1f7.svs	f8ce516b0c71b31d4957e70454b9a7fb	440709222	released
 43 | 6e24d6c9-c684-4bb6-8297-f49a6afd73d7	TCGA-G4-6626-01Z-00-DX1.20e7691e-9ef5-4278-a7d4-3967e36f24d5.svs	3f3986c3461d307baaa6ed3727346fa2	545658843	released
 44 | 7bd3167c-fe4b-48ca-b02e-c94e1ebeab20	TCGA-AA-3555-01Z-00-DX1.d471efa5-7638-43e7-a2cd-93c1aed135d0.svs	530f60a2d8b8bc8129a9efe828d2a7aa	314276460	released
 45 | 9d0ac4ac-d488-4162-8d1b-9d8287d0fe52	TCGA-AA-3980-01Z-00-DX1.93383cb9-59a7-431d-b268-3c3d59a1120e.svs	345b319e1d29f936afd8f518b2790a02	275358782	released
 46 | 2adbf842-77f4-4fa9-a440-9ef5c53de2fd	TCGA-CK-5915-01Z-00-DX1.04650539-005e-4221-90d6-49706b1d7244.svs	af99c97cb71bfd00ed53de176faa51ce	1081880407	released
 47 | 656ada04-4d7a-48ee-9d56-b6c0cc959874	TCGA-A6-A566-01Z-00-DX1.325BC1B7-2D0D-43CC-A23B-7D13B2DF665D.svs	cc5df3e70f9e09c61e84be58bdc21f50	770376041	released
 48 | 301a0478-49d9-4adf-b060-baf2b8cd8e51	TCGA-A6-5659-01Z-00-DX1.c671806f-013e-4d99-9841-cda5bd43eff1.svs	86874a0f86be9847ed818e4e5ab6d741	97034187	released
 49 | 962c448f-ab98-45c8-a6cd-43a54a797371	TCGA-A6-5666-01Z-00-DX1.6f2cf971-edcb-415d-9709-feb7422cddc4.svs	43a13abf5dfeec5d1b0d3db2e5a74735	449540171	released
 50 | bb860990-f9de-41cb-8e51-4a28f26aad61	TCGA-AA-3947-01Z-00-DX1.77005e19-8a8e-4b82-89e9-c81af9b41193.svs	1c96a6307c03875117059903157dfae8	701254788	released
 51 | 5faaf55f-a794-4761-90c7-1b6ac528230c	TCGA-AU-6004-01Z-00-DX1.12a234b3-1843-47ca-b650-37c1a631f489.svs	6840a736b84f0fb266a1bb376fe5afa2	247910497	released
 52 | 920dba4e-065b-4c9d-b970-b78d033ae12a	TCGA-D5-6536-01Z-00-DX1.a4528e3f-770e-4271-9943-d3a8b8bd3e9d.svs	da66a91b3cf8e611c1116107a1ca6d29	923873229	released
 53 | 5fe3d9b5-c3f6-46d7-a06d-84e395017cf6	TCGA-AA-3663-01Z-00-DX1.9AEDC003-2062-4876-8993-A5CEE4DDE1A9.svs	0dacd8bed84bd4071745f470d47ec4d0	12572176	released
 54 | f60984ad-aef4-454d-acc2-28886772f55b	TCGA-AA-A02W-01Z-00-DX1.3D9DD408-C389-411D-B4AC-6DC531D35BAD.svs	7190821a0aa5bace72f24d0bef30e055	415303056	released
 55 | 00d69f69-3120-4b0c-8985-aab4edf9bc4b	TCGA-D5-5539-01Z-00-DX1.9c46fe78-2adb-4f49-9141-cda135c2c90b.svs	4905e69c6baaa348290db8265a76b106	503817113	released
 56 | b9496405-b480-4dda-9ee9-1aeff378f535	TCGA-AZ-6600-01Z-00-DX1.9afe2f8f-bcfe-43df-a83b-6c183f226757.svs	2ae4e01418919f36011bd09aab3a77dc	979053525	released
 57 | 4f8edf30-a7bc-4a4a-a8bd-3af91fbddce2	TCGA-A6-2684-01Z-00-DX1.be127778-e160-4ae3-9e5a-13a16eae2e7a.svs	3fcafb900bc76bc9f58f3b3b670241a7	172005717	released
 58 | 49df964d-62da-429f-8700-3a5955ea4ef3	TCGA-CM-6166-01Z-00-DX1.52eaa124-7ab5-4aaf-b074-7f89a4c53804.svs	5a503da3fdd9cd6fd80444f9ed1ee561	661268139	released
 59 | 565a064e-24e6-4981-b6d0-03e15fff255d	TCGA-AA-3846-01Z-00-DX1.d6233d91-4d33-424a-99c1-8173fbeb5090.svs	c0241857a29380768d8d9f0191c1d2a6	122294768	released
 60 | 6fcd943f-d10f-460b-8f28-7ce08818acff	TCGA-AA-3855-01Z-00-DX1.f305ce6c-e87c-4e68-b31e-2e6e8b52606f.svs	b2ff85bf49757cc7d71b846cf5454ed5	327551540	released
 61 | 08ef38c6-068b-420d-b629-d7778492c0a9	TCGA-AA-3664-01Z-00-DX1.bd07e7ef-0acb-43d8-a4f6-15b3442d2ed5.svs	259c7b6aac8235fe1d1f2245f7863d59	458131871	released
 62 | cea26052-2806-45cb-a27d-e1e6bdaaf3e7	TCGA-G4-6320-01Z-00-DX1.09f11d38-4d47-44c9-b8d6-4d4910c6280e.svs	017058ac76f4ee4affc800c363fa5f13	537617393	released
 63 | 754bec03-2e61-4652-b373-3e1f66001809	TCGA-AA-3877-01Z-00-DX1.36902310-bc0b-4437-9f86-6df85703e0ad.svs	76b48eae726e542797f8ffb658916dd2	337475202	released
 64 | ac864132-d7f0-4587-9841-f78e89e72acf	TCGA-DM-A280-01Z-00-DX1.e7cfcec1-e284-4b94-80c2-cbb3186f7c6f.svs	e5c75a43e1e26afbd5f8c4a0c17460e2	1788647013	released
 65 | c471b932-b3ef-428c-8349-abff0a6d375a	TCGA-AA-A02J-01Z-00-DX1.1326204B-9264-482C-9F75-795DD085C0DF.svs	3570c5b905c5ca02f77c2add3c8bed4f	851913400	released
 66 | 2e1217f8-442e-431e-9036-1e159627a9ae	TCGA-A6-2675-01Z-00-DX1.d37847d6-c17f-44b9-b90a-84cd1946c8ab.svs	d5786a4c7cf4ccf7a002538c4e0b42fb	39144385	released
 67 | f6c772bc-2678-4808-9821-0cd9405b868b	TCGA-AZ-4308-01Z-00-DX1.804e054d-e206-4b34-a992-86317ef515d9.svs	964ff1de9d35eef0de1bc1facf1b5162	543862337	released
 68 | e592fc48-a4b8-4e73-9307-4fc462e6eda5	TCGA-NH-A8F8-01Z-00-DX1.0C13D583-0BCE-44F7-A4E6-5994FE97B99C.svs	19e6ba56dfde003fd873284083c7a985	1535400125	released
 69 | a7aa17d3-4cc1-4547-9da8-7106c45b148b	TCGA-AA-3519-01Z-00-DX1.82e03504-31d8-43d5-8d3f-01d9016af0fe.svs	f52812a247a3db3aebe6578c92039737	343920848	released
 70 | c280054b-b07d-4bfb-a32e-4fbe60831acd	TCGA-D5-6922-01Z-00-DX1.6c11a531-71a3-45ff-b01a-49719b6a195c.svs	8ce8aec110ac1cc3a7f520f20408cf8b	122202341	released
 71 | 2680fe2f-eef9-404e-a6a3-cdec178460a8	TCGA-5M-AAT4-01Z-00-DX1.725C46CA-9354-43AC-AA81-3E5A66354D6B.svs	54c5c2873378ff9ca9080f5aebfef590	2385119307	released
 72 | 4750c7b9-15c5-41c8-91cf-127b1cfbc78c	TCGA-AA-A00E-01Z-00-DX1.ABFCAF2D-287A-445F-9F32-BD00D1B385C3.svs	4460f7d2cacd5c92e17b4f8f238531c8	910656942	released
 73 | ca97b61f-6cd6-4439-96ba-009841691943	TCGA-AA-A010-01Z-00-DX1.AA174B84-1EF2-47EA-85A6-516B3328325D.svs	72d9b55bfad87f67840581311be1edb8	1111139508	released
 74 | b3f39a90-2d3b-4e5a-af64-a935157fd796	TCGA-AM-5820-01Z-00-DX1.365538bf-65ff-4fcd-8862-90627484431c.svs	6e795238dfb46a826c34109575409430	340881103	released
 75 | 83f6ffcc-0c38-457c-b041-b19d1541ce07	TCGA-G4-6293-01Z-00-DX1.62ed5ed9-a79a-487a-bd6f-1f3f0571d44d.svs	62e2df70c862057a0add84d500096253	688844627	released
 76 | e4fadd85-32b8-4ac0-a5d5-8f09c5c60f1f	TCGA-D5-6898-01Z-00-DX1.d2c5ab3b-da4d-4fa4-b535-4339d59515e7.svs	93833dd36a0a600c38cf7f5733c1649e	229509973	released
 77 | 8dc9441e-aa26-4718-bee4-77b28f1a401a	TCGA-AD-5900-01Z-00-DX1.ff1dbf00-d9c5-45a2-9732-07b46f4e1471.svs	d124f266cf49558c8a2fe639d4bb2fe5	89411607	released
 78 | 675fc406-3949-4456-bf24-1bcacd22b24f	TCGA-DM-A1D0-01Z-00-DX1.1EE92F9A-3DAA-4C1E-9A17-F9E0D31BE0C1.svs	cbc25bd0d61bb7861fe3e42e15035e58	1447014483	released
 79 | 2efd0746-94de-4e08-b7f9-e9babfeb84c3	TCGA-CM-4747-01Z-00-DX1.e0fac451-1322-464e-b718-174e9db33f39.svs	4e8c95284ffb2ae1afdc9aabd9ed6bb9	553303925	released
 80 | 5aa1b035-0a9e-4245-abba-2ad841f6955a	TCGA-AA-3548-01Z-00-DX1.41949ab5-79f2-4729-9d54-c0fca1daf124.svs	1d052698ba7dd7fbaef27d8c2327cb6d	343954138	released
 81 | 98cf0323-7ab5-4057-963f-f927f9619a28	TCGA-CK-5914-01Z-00-DX1.dfa6d814-6ddb-4058-a236-d57303cbfbe9.svs	d760ccb105cfe0befb8ac2d60453abe7	715692731	released
 82 | d8f3f990-3177-484e-91c5-7ecf33c82c79	TCGA-F4-6808-01Z-00-DX1.c5c94635-21de-4edb-a903-4a2c914a5795.svs	0fed4728f500381b58857a7e322440f3	715966745	released
 83 | 43d122bb-54c5-42f2-9f1e-4fbc108f4c4a	TCGA-5M-AAT5-01Z-00-DX1.548E7CEB-48FB-4037-A616-39AB025E7A73.svs	417f6e60f37fe80ffc3e9afcd4c41499	2497921503	released
 84 | 32409f33-aef7-4e0c-bf57-efac8614835c	TCGA-D5-6931-01Z-00-DX1.c1d00654-b5ff-4485-90a6-97ae9e7bd7fa.svs	4ec097d7538f76a28fbe64ea4c5d2f2f	277779255	released
 85 | b1a1c99c-e3f9-4190-a0b8-06dddbc1ed79	TCGA-AA-A00F-01Z-00-DX1.7E748515-2D18-4061-AF9A-E1446E44E7B8.svs	1e3dd0900b932ae652e912d58e72fb83	265520030	released
 86 | 88a6a5e3-0bd3-4d18-82de-2c4186e7175a	TCGA-AA-3875-01Z-00-DX1.016712a0-4226-4086-857c-3d6d85f186e3.svs	a420228c76239436983eca637ff9287d	232153032	released
 87 | ca9a9065-36d2-4cbe-83a3-4bf13e297e91	TCGA-CK-4948-01Z-00-DX1.cd6ecbec-9136-4ce7-9a96-eb1ac975b30f.svs	4143b5acf41a8ea9f1852e68dda9b3f3	550828635	released
 88 | b2e3452c-dc6b-4b67-b12a-6c9c1925242e	TCGA-AA-3520-01Z-00-DX1.3e2b41c2-eb21-4f68-8946-92b59cc1f969.svs	4e488eb899fedf45d500bdffd431e230	196534970	released
 89 | e45b588f-c7db-4cea-a78c-bfecd1a46da6	TCGA-A6-2671-01Z-00-DX1.13d1a0d9-78cd-4cfc-b670-34a79ebe52ee.svs	982ef534e965db5653e74ff4c3abf052	162512031	released
 90 | 775c999e-8aaa-4c3a-aed5-af619532866d	TCGA-NH-A8F7-01Z-00-DX1.5CB8911D-07C3-4EF2-A97D-A62B441CF79E.svs	1bb6f9a7d57ff42af0c94e5342ca87dc	1149778127	released
 91 | 3b20cf7f-b717-45d9-be7a-657df3c6f579	TCGA-A6-6650-01Z-00-DX1.92f39e59-8784-4dfd-a06f-804bebcdfb26.svs	8bc55732636c0eb40e66cc4d85ee3628	257083599	released
 92 | 372f70b7-4525-4b76-8f0d-33f3e0a190fe	TCGA-D5-6929-01Z-00-DX1.5e555bba-87b2-440c-b6d4-e6fec3f7bf3a.svs	8f8b390a8167c7074fa6dfbf7e553551	1184647143	released
 93 | 45847d26-d445-4bcd-a9c8-5beb47cc5f8f	TCGA-AZ-6607-01Z-00-DX1.b0a25161-7e13-42d6-9271-ecc5ecce2232.svs	a79242ac84ed01a901ac1b47d4a9ba19	907148471	released
 94 | 05d5bb0d-6a62-4967-92c0-f4e9bc53a0b4	TCGA-CA-5796-01Z-00-DX1.88141789-4240-4ab7-8db1-e4cb7ee1ebda.svs	6298fafb882a801f49ff9ba370ae6e57	762804109	released
 95 | d226ff07-fec0-4d46-b55d-9a5a1d2c90e6	TCGA-DM-A0X9-01Z-00-DX1.C7FC2C17-12CC-4F10-B54F-7C29379D834E.svs	7a5488c3fde89370962d933817d70513	1518384173	released
 96 | ba605d84-5a10-4c72-bb0d-a64c94e6deca	TCGA-AA-3852-01Z-00-DX1.d662015f-398d-4a98-b384-46221070da2f.svs	825be31aed201f0262d60f48bdae18c4	192941206	released
 97 | d9984adc-6367-49bc-9a7a-f9787db08be4	TCGA-DM-A28E-01Z-00-DX1.4381ffe6-3918-4fdd-b192-000f2b737b22.svs	51f790060e2a1fd1810d1866bc57ddc1	1389916197	released
 98 | c1ecc7cd-ec3c-4fc7-b61e-d3ac09492dec	TCGA-CM-6163-01Z-00-DX1.012a7433-73bb-4584-957b-f92c8877a114.svs	dafe6b2a424cd41fe3fbce5211c1628e	846287119	released
 99 | dffdc56e-f1f5-4a1b-a16b-561462b3b740	TCGA-4N-A93T-01Z-00-DX1.82E240B1-22C3-46E3-891F-0DCE35C43F8B.svs	101243b373ac1cb67b393c510872855b	938870973	released
100 | 3df01e75-ed41-46d9-bfd7-e09f1e935810	TCGA-D5-5541-01Z-00-DX1.2cd0a69e-879e-47aa-8035-1f9732ec4760.svs	53b1ab98bf634c631254025cd8e00d31	656913971	released
101 | 2100a74b-8905-4848-b51a-5d7764871620	TCGA-CM-5861-01Z-00-DX1.b900abc0-ecca-48e1-98ba-fbc99a6dae3e.svs	cb4c67ed591d06b0c548f68dec09e636	582337081	released
102 | 5ad0faff-3ea0-46da-8b6b-a7c70d61fbec	TCGA-5M-AATE-01Z-00-DX1.483FFD2F-61A1-477E-8F94-157383803FC7.svs	1cb06cd98f9e9c7687b2aee3e9772f49	2762599587	released
103 | 064f3779-aeb1-4be1-9b86-76edc95a5c28	TCGA-D5-6535-01Z-00-DX1.0d7485ff-cf98-4c86-8a61-c7364f41b8b0.svs	fd9c494b1c5ebb1028efc16e418158bf	800214603	released
104 | 88237eee-faf0-41eb-bc75-b89affd0c3cb	TCGA-AA-3681-01Z-00-DX1.576342cf-0f40-404a-b3c5-b33103f86777.svs	bbf9db6ebe06247edb8098f65aac5a1d	226334092	released
105 | f3e1b482-623f-473b-961f-e8ed85dea2c0	TCGA-D5-6532-01Z-00-DX1.a28f2969-31ae-408f-99f5-5428e183e123.svs	69e7ac626f9ddf0a619aa6f257f54add	926652863	released
106 | 477a0c14-bb53-4ef1-9441-d303aafb92dd	TCGA-AY-A69D-01Z-00-DX1.94582A46-7470-4265-8371-23BC246431EE.svs	7f9eb3af4e0c66d79f7a585cc0e6743f	633994446	released
107 | ba3eab18-7d88-4b4a-ad56-9ce542a3de85	TCGA-F4-6806-01Z-00-DX1.483d0fc7-220c-4b62-8c9e-a1004ce7450c.svs	11f5b3f56b05a9eb694bb053478175a4	548674957	released
108 | a781f94d-72ef-4ee0-98e1-4ec63f509c77	TCGA-D5-5537-01Z-00-DX1.14709d4c-eba0-48d0-87b8-5f34f74429d6.svs	415646c83a3a03b3e53ab2df952116fa	908864705	released
109 | e51eb325-b534-42d8-882a-653161d32487	TCGA-A6-6142-01Z-00-DX1.e923ce20-d3c3-4d21-9e7c-d999a3742f9b.svs	6c8042ed2b3c1f674e01d745ddbe6147	213078619	released
110 | 88c5cb51-b42f-4fc1-b22b-8bb4a111e91b	TCGA-F4-6569-01Z-00-DX1.accbe317-9a4d-49b9-b9c9-4d2bb1301f67.svs	d73dca27c2373bc5ec4dbd418bb0fa33	374009183	released
111 | a3f603c8-b0f6-4323-99f6-2f0772daf78a	TCGA-F4-6463-01Z-00-DX1.a3fa6fb4-ce9d-4f0d-b5f7-3c9da7322cd0.svs	c63ffbfde84ed8a44bdbe657e9b3fcab	223150941	released
112 | acc689e0-67f9-4bae-b5d5-96dae5efebc1	TCGA-AA-A02F-01Z-00-DX1.6E214530-87AE-4E9F-89A9-E35BA9C69BB0.svs	4eced63b6ff937b0a7be4bd2a718c06e	1546348562	released
113 | 94eceace-42de-4085-b01d-2545ea5a4073	TCGA-CM-4748-01Z-00-DX1.e6307e86-29c5-4018-a94a-77fae9b08123.svs	d897652981fd9b82b54fb1198e3df0fb	487390101	released
114 | 7d6c3f4d-7267-4a08-b1bd-57213e2ff226	TCGA-AA-3989-01Z-00-DX1.792D9A97-E06B-4F8C-8181-C4E5BD8B9A59.svs	e3ccd834f466d8245d3fd9622974cff4	275555708	released
115 | a42f83a9-22ec-4666-8147-a72df2cdb033	TCGA-AZ-4682-01Z-00-DX1.abca2345-ed4a-4f64-af1d-0e60f81b1288.svs	d65ea49d36c63a20543a0a524a5c2fe2	562501073	released
116 | c177e031-8d19-4ca1-939d-efac58cf9e87	TCGA-G4-6295-01Z-00-DX1.9e7ae22f-daac-42cb-a879-bcf505d1c725.svs	e0838432632e755176a2ddac792af8ac	671892485	released
117 | a0aaad6c-33ac-42b3-b725-1bfce7406c42	TCGA-AY-A54L-01Z-00-DX1.BD4039B4-D732-418B-9CC9-064095A1F06F.svs	1689af05a80d29699a8efb22e079776c	880212948	released
118 | 03d1d2c0-b00c-4b48-af63-70ca7300ed15	TCGA-A6-5657-01Z-00-DX1.d0cab3dd-8758-4a3e-8bb2-7cd9411dbeb6.svs	7cb4be8df5d6d1f48c89c84597b7849d	186636821	released
119 | 43610e35-774c-43da-9053-742e286fa52c	TCGA-DM-A282-01Z-00-DX1.65f620ea-37be-4d3c-a993-a2bfb552108c.svs	7eec115e5089991839c6322e89027674	1697125009	released
120 | f6109ea3-6f3e-477d-9353-c08dcce93454	TCGA-D5-6531-01Z-00-DX1.32241731-5890-424e-96d5-b897e770f03c.svs	6046a4a5880234533841044ce0fe410b	526998525	released
121 | c2fdb147-2e4a-48bb-ac70-230b485f5caf	TCGA-DM-A1D6-01Z-00-DX1.BCDA4D6C-8424-477D-9FAF-907206D2DDD6.svs	fbc21800e146214dcda7997d3a1e7a35	1425785301	released
122 | 4a3acf15-462d-44a3-b158-5dbda7a29860	TCGA-DM-A1DA-01Z-00-DX1.00001FEF-3B63-4C6F-952A-1D5F6F51CD22.svs	7538caeb0bb639f7308fee11e8af5414	1287326161	released
123 | e4423a3b-d74a-4b89-93bc-27ca11d2c6ee	TCGA-CM-6165-01Z-00-DX1.d59faefa-f647-4617-9dcf-4fd6ab45b4e6.svs	f4986690f7c91ccb8d6a14fde2d6cd1b	864212875	released
124 | 1ca35a8e-3911-4247-a906-832f3223e192	TCGA-AA-3660-01Z-00-DX1.CCD0F50D-9991-4CC2-AC77-AD1F78D8CFEB.svs	cd7f52bfb5ba5094c41a9f195c1cf8eb	8893474	released
125 | b5d1e455-5c25-4ad1-87b5-b56776d17baf	TCGA-AA-A00W-01Z-00-DX1.24770462-BD63-4881-9AE3-9198E9093AD9.svs	9812001cb6374bb768308c118b71d893	594956978	released
126 | 3fa4a855-ff4c-4f82-a248-229b4668d3ae	TCGA-A6-5664-01Z-00-DX1.622f6650-1926-4fa2-b42b-74122d9a68a4.svs	bdfe0950ef98b0e125b382d259482e60	294223087	released
127 | ad571159-611a-446e-8a43-4a5c0a76aea4	TCGA-D5-6530-01Z-00-DX1.5c4bbcd1-51ba-467d-93f9-f2a9e7c5e010.svs	7914cc81abe54091880adcc13c99704d	1318639179	released
128 | 96b86ad8-69b1-4ce5-9382-6175af5db999	TCGA-DM-A28G-01Z-00-DX1.5e8602bd-31e1-4813-8214-cd56280defe5.svs	d35270dd392bcdf411cfab212c4385f2	1544264469	released
129 | 1e1d8d98-9965-4c8d-92dd-c88cfc6cce2c	TCGA-G4-6298-01Z-00-DX1.83055d52-71f7-46ec-be53-11d86b19b4cf.svs	cc867ab4c30c4f02feda07cdf4d9fa2b	1152726443	released
130 | 23055574-65ae-4a76-9968-7dfcb5336cf7	TCGA-AZ-4681-01Z-00-DX1.e468c1a1-e251-4521-82fe-526c9c5f8190.svs	62ebd4d9aec0b56187f3ab2a0939ec32	617106897	released
131 | 8d436468-3965-434a-b014-7279824352dd	TCGA-AA-3543-01Z-00-DX1.20129c52-157d-4d66-809f-d21694683c8d.svs	5c468a2c99013391ad8580c3ae913d5b	227482082	released
132 | 7447c422-1367-410c-8b26-b692d91b8844	TCGA-AA-3860-01Z-00-DX1.a63df9ca-6141-4bdc-8545-719fd9ae0aa5.svs	938dd55be8e55eea39ca477ab292317f	266013204	released
133 | e1072fe7-d1c9-4a70-be92-cbb111223d98	TCGA-G4-6310-01Z-00-DX1.b88472d4-3adc-4e4d-b0f2-0dc195a3d7df.svs	a25ddd2e68bd8134b0fe8c21b5eb2197	664957525	released
134 | 4b490e3e-5fa9-4656-9a37-9aec8c01133b	TCGA-AA-3516-01Z-00-DX1.0a9d9207-6dc2-44b9-89ea-16418430c484.svs	de390e63015c78a44c72a84573d2dc15	293872406	released
135 | ae63964e-1799-4c8e-ad46-09707f1fcae8	TCGA-AA-3527-01Z-00-DX1.20035f6a-71f8-4d1f-a356-64001be9c2dd.svs	e34e7c35389d51dd615be34e48cdaf51	227398282	released
136 | bffacf34-4942-496d-9c5d-d36294d80a9d	TCGA-AA-3844-01Z-00-DX1.bf88ce1f-0601-40c8-813e-4e3df51bd2f0.svs	f862d5ed5cee7403c50100a6c460ae66	279867456	released
137 | 3058f5a6-b21a-4424-bd13-58fed2f4f959	TCGA-AA-3522-01Z-00-DX1.bd54ca55-9036-4167-b8b9-14f4209b7e4d.svs	865b4ae4cc023ea8e608602280c7f24b	200704956	released
138 | d82635df-31d1-4110-8acb-aadd88e9ba74	TCGA-AA-3976-01Z-00-DX1.d2519da8-bc55-4dde-9839-9fa51ecff1b3.svs	1b6ffeb1ae58e642bf74cfecc163b082	171826916	released
139 | f42d95c6-a82d-41fe-a76e-3248592e0bc3	TCGA-AA-3554-01Z-00-DX1.53ea377e-6671-47bb-a2b6-b136d9686144.svs	b30ac9fd0c931b9b5dce44874847028b	388479088	released
140 | 92b15918-9499-4664-a91e-c774ecfb2a22	TCGA-AA-3870-01Z-00-DX1.76e57cf5-6c8c-4b75-a8db-29d4522b66cb.svs	e176ce1525420ea708a784dd833cb7df	212693754	released
141 | 83bfc9a9-7f16-493d-87bf-988dec60c82c	TCGA-AA-3950-01Z-00-DX1.2a81cf11-4c16-4e9e-8809-6f63152060da.svs	dff6b5e3e154cba1469b02bbd6512d9d	268804188	released
142 | a72573a6-2462-4185-b4e8-a081c95d8fd7	TCGA-AA-3713-01Z-00-DX1.8148ACEB-7C1E-4D29-B908-F3729657EA4F.svs	56ff0355289e8be02bf44ed0fae8a857	8573530	released
143 | b2cba6c5-173a-42cc-b0aa-93100222d08b	TCGA-AA-A024-01Z-00-DX1.5F24A31C-2F11-4768-9906-7BAB578C742D.svs	596e409d5b962024dbd39e21c63cdb5d	1122599358	released
144 | 227fecea-0d2a-4eaf-b32a-1129887d6926	TCGA-AA-3561-01Z-00-DX1.1b5a2925-53f9-470f-a62c-cc2e5d5abb58.svs	0671eb70be5ee24bc5d5ad9f3d2889ba	484855906	released
145 | 3da5c30e-042f-4dca-965e-0852ca2d8987	TCGA-AA-3811-01Z-00-DX1.369d7946-911e-4e97-8ae7-32ef12b6b106.svs	f7d531a7bd252df867122f1e9d740446	300952708	released
146 | a8903dd1-b3ec-4b10-89f3-142fab914157	TCGA-NH-A5IV-01Z-00-DX1.1A6F9F12-C00E-47F2-9400-541BA230EFBE.svs	66b04c9fd051644e030620ff438914cc	1564882941	released
147 | 6037aa4b-7c77-4ac5-b96f-807be2dd7e82	TCGA-A6-6652-01Z-00-DX1.30916007-088e-48bd-abf8-519f34e2c37a.svs	21eedadf83f0e5262f70b28bce347f2a	93316571	released
148 | 5fb8d491-f1b7-4284-b982-a965b12edcd3	TCGA-AD-A5EJ-01Z-00-DX1.FA56CEAF-8B70-45EF-A2C9-8AA7BEB3D88A.svs	5ba8fb9f633a52ad3dcb2482b75b07c3	149648896	released
149 | 03e74334-057d-47f1-9651-4a270ca1f943	TCGA-CK-4950-01Z-00-DX1.03dcc4c2-2b63-45a2-8561-bf18193202b5.svs	a462bb3c46235080a172fe6bca1dd4c4	684652353	released
150 | 9c45927e-dd3f-46a7-9773-3ef6874ad7d0	TCGA-D5-6529-01Z-00-DX1.3b2ef23a-29b5-4a22-893c-6114d8244e68.svs	f95553420b0989a7d763b4ca5077c90f	657402683	released
151 | 2db970a8-1f49-4b6e-b8be-639cef516156	TCGA-CM-5341-01Z-00-DX1.af4f75ff-3971-4639-8ef4-918ef4b29df0.svs	c5d98952cd1089c5fea5164240dd4475	599301711	released
152 | 2e5f3ab3-6945-4600-8720-122c77dd28e4	TCGA-AZ-6603-01Z-00-DX1.e39d6a8c-a738-4d63-b094-11be49fac828.svs	36411fa8ec6d1a5ee322df8946a8cd34	691887763	released
153 | 940f86d9-850e-4959-b3e7-ea63b1d2588a	TCGA-F4-6809-01Z-00-DX1.5ab8333f-0c77-4685-8701-4130a93e6f3a.svs	2ff187413ea759bfbe1fa25c92fe5ad1	530791857	released
154 | e5d2f743-a51f-4ba7-a23d-e87a069a7ee1	TCGA-5M-AAT6-01Z-00-DX1.8834C952-14E3-4491-8156-52FC917BB014.svs	6af6aff02d391d623297b524183d0c40	1766078883	released
155 | cd7bddd5-0f09-4891-a11a-b18cae6b9b53	TCGA-AA-3819-01Z-00-DX1.cd674efa-b953-4721-9468-ec6ad8b6f567.svs	25be495b11fe29fefab7d3d94ca7fc5b	237808674	released
156 | a1c865f1-70bd-417a-ad95-b7cb28601bb3	TCGA-CM-5344-01Z-00-DX1.586a3060-8c97-4619-b5b0-ad2d0d2b62cb.svs	68fecd48c0a31f94ddc8635ea844aacd	1003492961	released
157 | 363a0302-d11f-423a-adc6-e324dd65312b	TCGA-A6-6648-01Z-00-DX1.88b9a490-0bed-43f3-bd74-1bf2810f6884.svs	c2d5d1773a2d3619ba2a75254a7fae85	92767481	released
158 | 86baacba-f6e0-45fa-a355-7213b35cab64	TCGA-DM-A28A-01Z-00-DX1.05b565c5-efa0-41be-a7e3-46f9166ddb7b.svs	de08a3f365aa7a66896b46233a684034	1517270555	released
159 | 8d11f723-0be2-4f35-b83a-1d674410e9e5	TCGA-AA-3866-01Z-00-DX1.f93457c3-abaa-4268-84e2-394d7c1aa523.svs	c2b8345f171925d90ac7aead760656e4	846678230	released
160 | a661753c-59b0-4e68-87c9-ac2dd5d17051	TCGA-QG-A5Z1-01Z-00-DX1.F3157C57-0F35-42D3-9CA5-C72D93F1BF89.svs	df070b04ee1b82d0808919b9011b93e1	877640615	released
161 | 3f7b713c-e8b9-480d-a006-69ff3df858ff	TCGA-QG-A5Z1-01Z-00-DX2.2CE72B6A-557F-43BD-BA4C-B252E14E46EF.svs	104e4c21db218eaaff15c64b586eb023	1512832207	released
162 | ec1f4d29-4b67-420f-9bba-9138b5d64d4e	TCGA-AA-3544-01Z-00-DX1.96850cbf-2305-4b65-8f06-db801af51cc3.svs	32f627f5a4f2ce15062628d6bf5fae75	361541756	released
163 | 98d93e91-2ad8-4346-a2c1-801703222daa	TCGA-AA-3850-01Z-00-DX1.49b55930-74fd-4103-9151-7b906a18be02.svs	edc000931e0dfec891de0d1ff2705b31	258576732	released
164 | 5794786f-1632-4ec6-8a92-2430c9564260	TCGA-CK-4952-01Z-00-DX1.0e98c7b4-5f80-485d-a191-ad93564b5f96.svs	b08f705d3b8cc62e037e71ffed196ad9	488415319	released
165 | 998d120d-443f-4a3b-97fe-6e9088c86366	TCGA-AA-3529-01Z-00-DX1.99453fef-afe8-4a43-a64f-df2d48ef9e55.svs	13574b403e06d6422ee4c06423d29aa1	625109552	released
166 | 95f9ca4b-edc6-43cd-8d05-ff92a4ddca49	TCGA-AZ-4684-01Z-00-DX1.1c29deb2-b0e2-4788-a3e8-83ecab7f9208.svs	588dfccaef03769a74a2b49ab2e2700f	829664311	released
167 | acf9470e-04dd-481b-9510-97a3a149f02b	TCGA-AA-3848-01Z-00-DX1.bb018b1c-7748-4865-b00f-42edc35b5047.svs	3e99112599c0077672c4e00d3bd1f898	369381732	released
168 | 30ea4f08-2d34-466d-86b8-eb89ad03fe1d	TCGA-A6-6138-01Z-00-DX1.11a4fad6-dfa3-4236-b714-bf1023b54622.svs	de8141786593c8b64bc024d75ec71ef0	193818641	released
169 | cf213c6d-3805-45cc-a27e-69904a088a29	TCGA-AA-3549-01Z-00-DX1.2fe99d54-c61b-4867-bafe-efe4f291c429.svs	ff93fa409218357399aa85175c9c2f94	315603106	released
170 | 32268fd4-3c70-4906-97ac-c480f76fd00c	TCGA-CM-5349-01Z-00-DX1.d893eb9a-0321-4052-acfc-8c9a6e463921.svs	85d2ed43ed13bf9f86780a298c9e4153	713342183	released
171 | d22cced9-3126-4072-a418-5a3863df2b7e	TCGA-AA-3532-01Z-00-DX1.00c7a378-a7c5-4fb4-9f53-6197be81c2eb.svs	f3b6dbc3cb9d02103f21be812de49515	198732672	released
172 | f60ff5bf-405c-482f-9bac-12056d3f4215	TCGA-CM-6169-01Z-00-DX1.0381c243-02b8-4f1d-840c-19ef44d4b92c.svs	9c2f6c60967769593731c971fcf9ad62	813164435	released
173 | 625ae478-a0a5-492b-8e8e-a0763008f960	TCGA-AA-A00Q-01Z-00-DX1.D427F78B-6640-400B-B8F8-B5568B1C4321.svs	62f008860f93cc3702adcd43df557bd0	640523462	released
174 | 645ac325-9832-46ab-9832-9652bf6add98	TCGA-CM-6674-01Z-00-DX1.4a08b16a-788e-43dc-85d2-baff6e911de2.svs	64b00c4b881d6c419da6ddf80b24bdd2	1104971675	released
175 | dbfe866c-fc72-4423-9667-9f03c6f76123	TCGA-CM-4751-01Z-00-DX1.F72E1883-5293-4351-A8DC-C4EA5D8F797C.svs	90ad5f04025a107e9de3fa1ab2900919	741961715	released
176 | 6a00516b-dbb2-4500-a75a-b4684b658a1d	TCGA-AA-3872-01Z-00-DX1.eb3732ee-40e3-4ff0-a42b-d6a85cfbab6a.svs	5b172bc6ec283084e50afc5d9ab14d98	186766650	released
177 | 219576f3-3c71-44f8-a23d-addde6ef33dc	TCGA-AA-A03J-01Z-00-DX1.4E57E86E-ADEE-4837-9F91-E9CA141F7ACC.svs	db61fd927424a68521bcfac3423da062	1837160324	released
178 | 86901b57-8a9d-45de-bdb4-4ce5501704ee	TCGA-QL-A97D-01Z-00-DX1.6B48E95D-BE3C-4448-A1AF-6988C00B7AF1.svs	9244f2e801e50611d13e2591139ae128	1560348469	released
179 | 103810e8-c721-4242-b729-270097a23469	TCGA-A6-5665-01Z-00-DX1.3ad2c249-d138-4037-a59b-4747ce2b789a.svs	73157eae5c5559cc546e475af4ad3af7	168725275	released
180 | faee5ad2-300c-481f-ac93-86d074775f04	TCGA-A6-6137-01Z-00-DX1.f50ab63c-05b0-49ea-9ceb-ed10cd6dc027.svs	2cab785a0b0bd2fc8fbfba2de2ecf2f7	321958511	released
181 | 2bbf3cea-c7cb-4410-9151-7281132bdae5	TCGA-AD-6899-01Z-00-DX1.646f5e1a-212f-4b15-8689-8b55f7ba8c47.svs	8507a9fba5a1c6f4341867105392477b	49599403	released
182 | 8e176cfe-6a85-4429-808d-af8e8e56e734	TCGA-A6-3807-01Z-00-DX1.c3de2064-4f8d-4786-9ff9-2e0f44293717.svs	e2ae70b4cb08e8ec44b177121efa44b6	368808405	released
183 | bcc16f92-3489-4c23-a8de-3a79fab77086	TCGA-D5-6537-01Z-00-DX1.f81ccf91-7ce6-4ccc-8278-cc05f639aca7.svs	78803f93622cb49be0a0a41034f5872b	717296673	released
184 | 9158e34e-8866-4e37-9c29-ecd388d31f91	TCGA-CM-4752-01Z-00-DX1.ac26d5ca-f554-4766-a4c3-f90a8c327dd4.svs	676be167c64c74a97b4aac5bd0844ce1	537532599	released
185 | 35edf06a-803c-4aef-bdb7-5965b86f69e9	TCGA-AA-3815-01Z-00-DX1.d6823390-73c1-431f-b480-0954f4df8224.svs	bc00cdb93d95b930aa3692203b31c64e	544691952	released
186 | d4b04202-16cd-4aba-8dc5-47a3ee4d04a3	TCGA-DM-A1D7-01Z-00-DX1.4F3CF25D-A350-4A92-A891-7FFE40BE2710.svs	577b8931e780ae5176c6ae8baec2a719	908569445	released
187 | 39a4f424-780f-44fe-8d60-1e2ffe8ce6c6	TCGA-A6-2672-01Z-00-DX1.e2a845c8-6d77-4120-9f43-abec84a66c1c.svs	1f03db6486272ede6a6346618ef0363f	160280869	released
188 | 871c2b48-1461-40b9-a517-7a67474b22fa	TCGA-G4-6299-01Z-00-DX1.22701e3b-7bfb-45ad-9382-842b2da0387a.svs	c0bf39fa47ce712eea8bfddbede9dab6	895608509	released
189 | 45292bb4-733b-4636-ade7-4a8c6d6bfc16	TCGA-D5-6923-01Z-00-DX1.ad5211f6-32a3-42c6-8594-93cb4432b5f6.svs	a6f7a2440eea439b7c2af1182efade6c	132695627	released
190 | b8e25c65-06e3-4dc7-8b8c-7a0dad244425	TCGA-AA-3542-01Z-00-DX1.db284d9a-bde5-471c-ac37-5f3216d0f077.svs	fea80a45a77ae531a4a14e3a748df73e	288551414	released
191 | 98b7c182-b122-429c-af4f-45be155b56f8	TCGA-AA-3856-01Z-00-DX1.973974e7-fcfe-4866-bc0c-50645c6c304b.svs	5dac1fb3e6936e42cd158cf425eda4cf	619340700	released
192 | 001b7d97-9425-43c3-a9a3-a36cb3d2a591	TCGA-A6-2686-01Z-00-DX1.0540a027-2a0c-46c7-9af0-7b8672631de7.svs	180d8f24d037b1e5e70dcd3dbc360b2d	497633503	released
193 | d5b766d5-9f09-46a2-84c4-2eec944efc7f	TCGA-CM-6171-01Z-00-DX1.74d4391e-3dbc-4ad4-b188-3b11ac65e6d8.svs	2935f6e2e504c5a5361c96264ac715b2	905712995	released
194 | ce9a87a9-c6e2-43b3-949f-1c6c2922523e	TCGA-A6-3808-01Z-00-DX1.2b5a7ba3-133c-48be-87c6-199c4af208a0.svs	49e3b2f3251f49370a1d59d685552353	224700101	released
195 | 62a7f12b-05e5-4a82-9bef-dd901d2ff6d1	TCGA-DM-A28M-01Z-00-DX1.055b2d62-8a1e-4bdf-a49e-123ad0de657b.svs	a5ffd61bd47c2b1d1393b40c16671c3a	1582538259	released
196 | aaab8c27-dbc8-4221-9b14-97834e3b756f	TCGA-AA-3684-01Z-00-DX1.c6be6ea4-fa92-4499-b458-85c3a8b1e3b6.svs	526c5bb13594b929d7c4391c537af9e0	975409112	released
197 | 3e405d04-ccc8-4757-9a70-d30f449e2548	TCGA-QG-A5Z2-01Z-00-DX2.F2352352-8F00-4BB3-8A62-8D1C1E374F95.svs	4f9f2e23c9286a9dee507651eb89abb1	1610012065	released
198 | fa3a810e-09c6-4342-8923-1fe70546d9de	TCGA-AA-3492-01Z-00-DX1.32D79909-71D5-4843-847E-AECA5DBC963D.svs	0aa83b987910cba90490d5dbeaf113bf	9013124	released
199 | 9cea4fba-125a-43ca-bc06-49d5b24daa4f	TCGA-CK-6751-01Z-00-DX1.df9e123a-c44c-4cc5-82de-ba7c4dbcb444.svs	6b3928920972eb63bf82ea132a5010f7	506907171	released
200 | df076968-bcb9-4d39-918d-a6775020cd60	TCGA-F4-6855-01Z-00-DX1.41ed5985-be19-4dce-aab6-de3be0f1dcca.svs	428da2ad4faa0da19c59baf59d115432	129167575	released
201 | 20c10ff9-8505-451d-adb0-aacfe649ad79	TCGA-AZ-6606-01Z-00-DX1.aa79b3d8-8ff1-4171-96dc-94bc7d073d93.svs	0d79ea4b874240252d05dfa5eeee771a	879253119	released
202 | bd7699c4-4b91-4e1f-9114-f449cf7b2889	TCGA-G4-6315-01Z-00-DX1.2c3c17b0-c118-42b1-b1c9-7cc984e47f6c.svs	482b83a79b232a70fe0a34e101ea063e	856957481	released
203 | 7c653c1e-b9ea-41db-a4d0-8d409769e22b	TCGA-AA-3956-01Z-00-DX1.9438cc23-9537-424c-9a70-852919548387.svs	16b249432585b1b6abec4d2ff793698a	277749588	released
204 | 28515fea-02da-4b19-9554-6cd9cc55a3d2	TCGA-DM-A28F-01Z-00-DX1.3ebf56a0-8f55-4681-bc7f-1e20d802a966.svs	6c110524ecb529b329c49e592fff45f8	1030100829	released
205 | 395f7bb8-037f-4126-98a9-2f48442d0606	TCGA-AA-A00A-01Z-00-DX1.B51DF257-6110-404C-9B20-A3C3453788F6.svs	c4cfb7eb9b017640d4d17dd284d3b125	854744872	released
206 | ea9736d6-504b-458a-9361-605ce755aef6	TCGA-NH-A6GC-01Z-00-DX1.29073D7E-5EEF-4BBA-96BE-DC8C69924C42.svs	68f5d89a916fc57c947c322c8e0ff54f	1300049075	released
207 | 7ef2a641-5cec-4743-a2a5-4416268f09fe	TCGA-A6-2679-01Z-00-DX1.8df66ef4-d9e5-41db-836d-f0afe46d6b5a.svs	d7700edb52c52814478433cd90398228	128003621	released
208 | ec5b592f-8203-451a-8491-5998c12a5355	TCGA-DM-A288-01Z-00-DX1.716efd68-52d4-4049-b9a6-480700579e74.svs	53d0e63c560c572079018fe616f194c1	1796899455	released
209 | 8472de58-9075-4534-b00b-3a87ba2158da	TCGA-AD-6963-01Z-00-DX1.7df2e133-5f24-4c0a-b7f5-5a65fe3420c9.svs	d7b7a08c5e7b931aa7b04222c90b1b2c	60853347	released
210 | e79cfeea-cbff-46fa-8538-7f2527af70aa	TCGA-AY-6386-01Z-00-DX1.2B976983-5724-4335-8B47-9B44271B0A01.svs	cee0d2ad8a7b2557eb1973474a81513d	356941631	released
211 | c4d2c58a-4de5-452b-907b-40a61972aa59	TCGA-AA-3851-01Z-00-DX1.cefbb22e-6b16-41b2-b732-452bf2efe425.svs	1ac240e36c5d1f2f06bddbf0db8288b8	301924912	released
212 | c62ad85b-43f6-44f5-9924-bfafb73aeafe	TCGA-F4-6460-01Z-00-DX1.92a182ea-f22a-4d74-bfb6-34d3cd757dce.svs	bc5ab38f610fabd0fe556955bb52a2b8	723106957	released
213 | 65d4d30f-b02d-43b1-be7e-ea8da14e0f88	TCGA-D5-6541-01Z-00-DX1.b342c06b-8c59-4218-82f5-388568037e41.svs	e4364da76aa7f8b4076e0db450a397e0	817110635	released
214 | 223b9587-088a-4fbf-a750-9ce0d51a016c	TCGA-A6-A5ZU-01Z-00-DX2.221EC8DE-4029-4ED2-8D84-95647BD39E03.svs	6110a9afc038e306c284f0c52cfc877d	747436481	released
215 | 5f586d76-05de-458a-a62c-4989fa79ddcb	TCGA-AA-3982-01Z-00-DX1.2d2e4de6-5b8e-4fbb-a9a8-4fbb48b5492a.svs	51f40512c89b64c07714a924cacdc87d	315995056	released
216 | 9ed5b8f5-f800-46b3-b5db-252bf79c2890	TCGA-AM-5821-01Z-00-DX1.0851bd43-6c23-4db3-a50a-7c9fb5080150.svs	08dd236ba96e6c2657015c6ba3059c0b	433253901	released
217 | 8e8a9f9c-b5f9-4104-a005-35656843fd27	TCGA-NH-A6GA-01Z-00-DX1.33AFBF24-84BD-4E21-8A2D-A565AD3E4376.svs	55b7ed7aa8505cbe6b8c0b124ffe05b0	1207460853	released
218 | aa21c004-1a95-43df-8523-a64f6e9e73ea	TCGA-G4-6297-01Z-00-DX1.3e37fa8b-c10e-4e44-9933-5bcbbe088fe0.svs	96f20217bda40d648f7782fe79a4621b	948949867	released
219 | 6096f752-6794-425b-95da-8e2df23d11dd	TCGA-G4-6322-01Z-00-DX1.8676af67-716a-4052-a5e7-024b2e29c492.svs	c54a37602210613e2ce7c49f88cb0a17	916930917	released
220 | ed7c20f8-e2a0-4a07-baaf-5002433d8c0c	TCGA-CM-4746-01Z-00-DX1.c83b9795-bf45-4100-9052-a7e485e0f770.svs	2a7d75bead5646ae234e0a70dc30013c	1064067427	released
221 | fbde56ab-b131-436e-8a6d-24c1e05217ca	TCGA-CM-4743-01Z-00-DX1.f54a6355-5623-498c-96b9-2ff1de6576c6.svs	613c0862d52ca48e177e44d64c311799	1236833149	released
222 | 62ef2f7f-8870-484b-9af4-0c2e277006b2	TCGA-AA-A01Z-01Z-00-DX1.9724B55C-C5D9-4C8B-AA05-76C21BA1F046.svs	ce4d7fb33b0b970b6cbba86fade23d09	1003283578	released
223 | deaaac57-4ce2-417d-8f5b-ecd9d3303f94	TCGA-AA-3517-01Z-00-DX1.dac0f9a3-fa10-42e7-acaf-e86fff0829d2.svs	9557af5c293e319cafd45493e18ef8c8	326690588	released
224 | 21b6d67b-524b-4172-a741-32790e6d9742	TCGA-NH-A50V-01Z-00-DX1.408BA0A6-E569-4464-A8CB-D6553A4DF9E0.svs	e1d9f4401c05d55d4726c7aaff0a1f48	1504733163	released
225 | f14a4926-7285-4e18-bf48-5fd7f27c6823	TCGA-AA-3968-01Z-00-DX1.54b76478-a822-49b5-8286-dcbbb2fba2f8.svs	81d1e14bf2e4ce3a2eff599ebabb8c52	268215310	released
226 | b9afc2c7-c1eb-4bdd-b883-6f4423528c36	TCGA-AA-3678-01Z-00-DX1.4bc052e9-e5b0-4328-afe5-8d19fd2c386c.svs	36b177ac7e704ccb0f3f61d9ed88f28e	145841476	released
227 | 3a278a6f-e7d3-4507-903f-50a0ea8aa113	TCGA-CM-6167-01Z-00-DX1.7adf00e3-6768-46bb-814c-b2f04c472cc8.svs	128efb5632e5e0ad4382b97576b919ee	728688673	released
228 | 1b2d9c2e-5f7e-44a7-bef5-9c6ffda97429	TCGA-CM-4744-01Z-00-DX1.527ead53-bd55-4321-adea-079bf5e2e8a5.svs	9e2576a9d0bc154e09f59c24bbf02f9f	1072119169	released
229 | e6088bd0-2ef5-44f1-9111-62e10b8922ab	TCGA-D5-7000-01Z-00-DX1.fb08c430-2c8c-486b-a39d-7d28c5eae189.svs	3ae8a55addfe120bc4238441c8ec44fb	97777595	released
230 | 2b2ba75b-ae19-49d4-b14d-1f49a0fa9613	TCGA-AA-3534-01Z-00-DX1.a49495e0-93b2-41fa-9229-6375533578b5.svs	66c00bcb00202fc03b6cb8f108cc6d71	206804284	released
231 | b93a9f8d-bf8b-4a31-83a5-cb63eebc131f	TCGA-DM-A1DB-01Z-00-DX1.092D3ABD-7DFE-4193-B049-B3C3617706B0.svs	e30f5a0be672fa2fda9dffcdb7f74495	1919176771	released
232 | 9d4f11ce-4e20-4e4f-92c6-cd039ca7973d	TCGA-A6-2685-01Z-00-DX1.c69e23f4-34c9-41ff-a037-44bf7bbf33cd.svs	8a505e9d239d2aad4028487067c5334b	436523169	released
233 | 1ce88052-f889-4630-871d-09de5c5ad369	TCGA-NH-A6GB-01Z-00-DX1.AD90C375-54ED-4EE4-A537-59A2E3FE4BCD.svs	7d1bd80092cf7bcf6ec091090e82f6e2	1958128915	released
234 | dcbe590f-5960-402f-a095-d9be5ac773e8	TCGA-D5-6928-01Z-00-DX1.f8a8fb91-c23e-418e-b9a3-500af1402ce1.svs	df4429cf8352e45b5e5e120f778fb35a	127650421	released
235 | b47a7445-1a54-41ed-940e-bebccd87288f	TCGA-AA-A00N-01Z-00-DX1.79D0A833-8411-486B-9BED-7B5E203D02F2.svs	b195d10bafa574f900d9fe00c1b08335	426997170	released
236 | 41e7dc14-d609-4047-a89d-53f63eb25fee	TCGA-AA-3818-01Z-00-DX1.80d3eeeb-9a4d-4211-90e8-605a4b809a63.svs	2ccba1e0784629625634c8fcb746b5f6	259827032	released
237 | 0eced714-3cec-43d5-a956-72f7ab15cb94	TCGA-G4-6588-01Z-00-DX1.0747172e-f630-4b7c-9341-55078585ae00.svs	4fb662d507df3eb6e845f10967689741	809239347	released
238 | 27ac8b0a-9daa-4fe6-8bd7-067f18f4a389	TCGA-AA-3672-01Z-00-DX1.6cc142eb-e77f-4c09-a6ac-e85470221812.svs	343e6ab936cba29dab7521b18265adac	507400886	released
239 | 2d3b690e-06a8-43ad-9235-dc5eef98dd19	TCGA-A6-2682-01Z-00-DX1.be71dca0-b9b7-40be-a6c6-9d053c7886a6.svs	bc63dd6e5f6732173386a987ad9e9bdf	316352773	released
240 | a02c0560-9e02-4e4c-ba2e-62211f785c3e	TCGA-AA-3525-01Z-00-DX1.b6079f23-6ad2-41fb-885f-d7c68450c8d5.svs	2e47ed959eb61332bbd411b13c560f3e	371668746	released
241 | 20c54d6e-a4cb-4503-aafb-a70b8c342b9a	TCGA-AA-3560-01Z-00-DX1.7ca786bd-777e-4b33-8778-fc5e2b061ff1.svs	60f8aa474db0bb9606840ae721f682e4	198771958	released
242 | 0dc16a72-93b4-4c47-b58b-dec3979085b9	TCGA-CK-5912-01Z-00-DX1.23a955f3-a1ed-4cb3-8e49-cbb3f789f3f5.svs	2c911bb3d50a2a69c0b9237299a0b98c	1081516431	released
243 | 2c2dc5d7-b7ff-4043-811d-3b24c1ae4184	TCGA-G4-6314-01Z-00-DX1.bea21980-9584-4382-9de3-4c5114edb10d.svs	e58b2e7b712ff1d66df041ab224b5804	740128083	released
244 | 13ea8b59-6628-4faa-8047-6628704c4636	TCGA-AA-3986-01Z-00-DX1.db60e495-c0eb-416c-b65b-55ce62ed10b0.svs	67ab4b2b5a07ff03fc09f7471184e48c	668432404	released
245 | 0e5316aa-c726-46e3-881b-c2bf918c1231	TCGA-DM-A285-01Z-00-DX1.219e2829-8ffd-4b51-adce-cfd48293191b.svs	83303298a57a7e5f385b72f9d68e4a76	1513819103	released
246 | d65c5d21-6333-4a9e-9a2a-139a122a3c8a	TCGA-AZ-4313-01Z-00-DX1.5e7ecf69-d1fd-4997-9dcc-ab8e9f10b423.svs	4e9750d2fd3ce0704429cb413feeed12	664882033	released
247 | 10efa9de-0389-4ecf-8297-cce33b2dc6c9	TCGA-A6-4105-01Z-00-DX1.228b02a5-04fa-4392-bf03-b297c19665c3.svs	2da9e4b84c4ba902dd18e24721080cff	420811207	released
248 | 683ec752-46a5-40cd-9bda-b6b1c771e955	TCGA-CM-4750-01Z-00-DX1.250ea50f-3aae-4fcd-9ba9-25cf2115525f.svs	2f72575f40bd4c3313e0fb73716cdb20	1255908505	released
249 | aa80bf43-a394-4d17-a14c-f8d67a9e7af6	TCGA-G4-6586-01Z-00-DX1.f19ef98f-9540-4b8d-bd13-5891e79b2576.svs	a9ef7f89c61ab9ad3d37e39a33ee2a44	676419285	released
250 | c0cfb588-39ba-48f1-a653-b3974026e011	TCGA-AY-4071-01Z-00-DX1.6C426E14-2DBD-4009-A6B5-B8B7B8F5888C.svs	69a75ab0ed488b80dc28b67f1c376f3d	112945741	released
251 | cdcd8072-7739-496e-ac05-6df73e3cd940	TCGA-AA-3511-01Z-00-DX1.F66F89C7-147D-4EE9-A482-61C3033EF443.svs	e5bfa77c987236ab2579f00ddaaa3394	10442472	released
252 | 2e23c9a5-0e47-4b5a-9359-b73ed2dd1916	TCGA-AA-A00Z-01Z-00-DX1.47847702-E46E-40AA-9BA6-2ED1912D1E73.svs	18d5094c88efac02fe4461023e6d4c5f	303800466	released
253 | 8b1a3ed9-480f-4dae-ba4f-393a839e62c4	TCGA-CA-5255-01Z-00-DX1.77310ae2-9c5f-48c4-9754-c5b30d287089.svs	f29005665faa147cf2a0884222c2c36a	546549735	released
254 | f373057a-ac13-4a03-9194-58c2c67ccee5	TCGA-A6-2680-01Z-00-DX1.7b77c0fb-f51d-4d16-ae77-f7615b1d0b87.svs	27099598b0a39fb2a54f13872b99e540	172748717	released
255 | ef497638-68a8-40f4-b321-61bd559c010b	TCGA-G4-6627-01Z-00-DX1.f23c6977-d0cb-4bc8-b373-4b1b73c331cc.svs	ba32875453b4f63b75eed38e8c41c9fe	900624421	released
256 | 492e492d-6ab5-4fa7-97e1-9b1553d4337b	TCGA-AA-3526-01Z-00-DX1.82876320-2866-4ffa-81d7-3278f7150fc3.svs	229f6d193a2880e067e0d99d41a319c5	356122786	released
257 | ef52da91-023b-45be-ac86-22f4e051ff2c	TCGA-AA-A01D-01Z-00-DX1.A6FE424C-9BF8-4605-8A78-7BD7B83BEC61.svs	4918f190719586db6f82714ac7dd4e79	831954692	released
258 | ace9aef0-0144-4e2b-b5d5-454024a7c591	TCGA-A6-2678-01Z-00-DX1.bded5c5c-555a-492a-91c7-151492d0ee5e.svs	cd7238a283455f419d80531e568aa00b	179013807	released
259 | ca33b5ee-d075-4ea9-8490-3167beedd69b	TCGA-F4-6570-01Z-00-DX1.57a15bf3-d9a1-4da7-b71a-7b992a275bdf.svs	a5319945a8633a3cfdf97bea5ca37859	577698187	released
260 | 8a78f6fa-6940-4692-8992-104378fc3ee8	TCGA-AA-3558-01Z-00-DX1.c4af1f52-2e81-4d66-9561-ce446dcace03.svs	0fc3d689f26cff315501f92aeaec2d78	460745654	released
261 | 600dc15d-8c77-4eba-a62a-47551d2213d6	TCGA-QG-A5YX-01Z-00-DX1.28125B5A-B696-44AE-8A86-72E2CF7B9A6A.svs	c08ce8883ce2bd7d67c366623ca82d81	1271440499	released
262 | f8ec6630-7642-4e41-9975-7f171710fde2	TCGA-AA-3833-01Z-00-DX1.d27bd30c-bba2-4621-8157-feb28ba2e241.svs	565896dfee0a7efed0565fb4058ce03c	406575192	released
263 | 09b18cbc-7c10-4b9f-af6b-4dd8c2e8218a	TCGA-A6-6654-01Z-00-DX1.ed491b61-7c44-4275-879b-22f8007b5ff1.svs	eb104314956b3e95561d9443f91d5453	348456783	released
264 | 66c0dab4-d5c5-4d36-b2fc-4fc8c9aa963a	TCGA-CM-5860-01Z-00-DX1.95f23758-00b7-4602-b4ef-944130528f36.svs	83469d2f1597165b44a979ddc4d3bcc9	761795059	released
265 | e21c8190-ca2b-424e-811d-98cd24814bda	TCGA-AA-A01X-01Z-00-DX1.7433F54C-2A79-467A-8FEA-638AE48F42A0.svs	f85442f5c1a548fa1537271989cef385	1196945248	released
266 | 337025b3-97a6-4913-a4d2-3494f810884d	TCGA-AA-3695-01Z-00-DX1.be93a101-7b57-4140-bd82-86c65e65ca27.svs	d9a8e1ff12d24d97efdd7c8010440fca	412433004	released
267 | da15fd05-3763-417e-b269-da9b819ea617	TCGA-CM-6162-01Z-00-DX1.806a99a3-cda2-4dde-8d13-d22912b44d49.svs	d0c92ce8ad5d903afadc9ea08bb33859	733243201	released
268 | d101e777-82d9-4e5a-b6bf-62392a0511eb	TCGA-A6-6140-01Z-00-DX1.f34d99be-25dd-4811-9155-0dbb53e849ac.svs	4691cc35c25895cceca7491f01fb4fb3	273385337	released
269 | ed500fa9-70b1-4d52-9e73-d8d512448901	TCGA-AD-6901-01Z-00-DX1.0a69c0b5-6238-4c1a-bbbd-ea743bf6fc98.svs	019662e78859da5c7cddc01af02fd5a1	117149885	released
270 | b2cdcbeb-a7ab-4950-a41b-4747902feb56	TCGA-AA-3821-01Z-00-DX1.019b3b1d-5c05-4be6-af25-6ee63475897e.svs	2ee18bfe9470c8cadefed2e44785d0e8	328261828	released
271 | e1a7d9a2-48db-41f7-8548-ca8ac936d22d	TCGA-DM-A28H-01Z-00-DX1.daf607db-57d0-4685-8dd1-f6d0a9ee0435.svs	76539e406222d93221fd5a0d830b8f3a	1283222375	released
272 | e72187ad-1c39-4191-9379-e6d8392a6af9	TCGA-CM-6164-01Z-00-DX1.ccf5ce96-b732-4c35-b177-d3dbe2ed89cb.svs	12148cab50cd64e97c8ba63a719bf93d	607355251	released
273 | 9fd2abcc-4fd7-40a1-b042-7d48f62a4282	TCGA-G4-6306-01Z-00-DX1.962227ca-b0d6-4cf4-afea-8f7c2f9b2477.svs	dfd330470fc45a8e6ccb4f695c99ddd8	638495709	released
274 | 05b07eb8-5db7-4755-8a0c-900059c0e3af	TCGA-CA-6718-01Z-00-DX1.9774472f-a29a-4b2b-8e50-ccbf9e5f9686.svs	c1a4203a9bb666d2458333dcc3f43752	681487273	released
275 | 2c275682-06bf-4321-9135-890af2807717	TCGA-G4-6304-01Z-00-DX1.cf678a4f-5448-4d6a-a36f-cd1aec4d48a7.svs	297562c9b4faf4d7d1bafe83f138e057	1372925325	released
276 | 17cfcc8c-49a4-48ce-a5e1-4a3c582ce198	TCGA-AA-3837-01Z-00-DX1.5692e5c0-6dc2-45be-a5f5-00a907c5c824.svs	0e54e1ae55b27a30e023fcfc647098b8	272986122	released
277 | 70012428-8df8-4eb2-8d28-7d0b2a88d1d7	TCGA-A6-3810-01Z-00-DX1.2940ca70-013a-4bc3-ad6a-cf4d9ffa77ce.svs	af5a0d02ce2cd62e6639fe0be2575ddb	58846445	released
278 | 21471300-f78e-4174-bdc9-7a4076c5f1b7	TCGA-CM-5868-01Z-00-DX1.70f2e193-248d-4bf9-a875-49c314223f70.svs	f881b5ed47335ced17f9ffaea1d2c598	900598393	released
279 | 11489c71-5516-4ca3-8256-4c91ac223e20	TCGA-D5-6926-01Z-00-DX1.3830423a-3587-432b-9a6c-84f838e49fe6.svs	d996b3eba51fd363f296a715afac4923	239278833	released
280 | 16a5bceb-dba9-4c63-92bf-a63c5901d0b6	TCGA-CK-4947-01Z-00-DX1.b257d5fd-b97b-4987-b088-77f044ca7fe2.svs	df8ce055036b39f4aad8fb3a059a24f0	906600575	released
281 | 2fd860a1-534f-49f7-8169-ece8748480ac	TCGA-AA-3538-01Z-00-DX1.60d0b039-25d6-4b71-a36f-5b2764a983ef.svs	7d43cda4a4e5e8faea9f2699746c25f2	151974892	released
282 | 2704d972-b63d-4db8-b1e4-56ba9721d6a6	TCGA-AZ-4614-01Z-00-DX1.c1488dfe-528f-4dd4-b5f8-da81fbb4ec0b.svs	6dfc1821a5673cd2260428f5ba63f1c2	1021307135	released
283 | df3708ed-f6bd-464a-9717-360ab5877e10	TCGA-AA-A00J-01Z-00-DX1.BA85D337-6687-44A6-A8DD-0CE889134BA0.svs	7ed512b7f933e751bbcd8848a00724b0	1927041470	released
284 | 2b02baad-7143-4fea-9f28-ff2d4efc4044	TCGA-D5-6932-01Z-00-DX1.d18111de-f0f5-4637-8534-a2b4396cbb41.svs	d4bb72b15b1a9b9cfdc6d3c89b6edf71	228188255	released
285 | 9b44570d-51dd-4cc4-8ec5-37ede80aca31	TCGA-AA-3994-01Z-00-DX1.ca18c0cb-88b4-4a31-be1f-cca57dfadabc.svs	f0b25828508f97a18ac3857f37e92b5f	212368348	released
286 | 3db6186a-d812-4d9d-a7e8-a7f0c0aa140a	TCGA-SS-A7HO-01Z-00-DX1.D20B9109-F984-40DE-A4F1-2DFC61002862.svs	1afa8847f484afa25c3a1fbdbbc89b54	2333626110	released
287 | 0e720a10-f234-41ae-b885-dc32e86778db	TCGA-AA-3949-01Z-00-DX1.23748e80-0d7e-4238-8b29-f74cddae8596.svs	a4ecd4ede226ae6c2e4ce1ffe659e314	260091780	released
288 | 17eb2473-7018-474e-ad62-e95be30edcb1	TCGA-D5-6540-01Z-00-DX1.4ca9e502-959b-4fa8-a748-5fd0878e5c3f.svs	e43391da19059b866de232d164500fd6	564150921	released
289 | 87c3fcd8-8a5c-4797-9dc3-193077082874	TCGA-AA-A01R-01Z-00-DX1.5D2BEC13-8F61-49D4-A96E-4C6C44BD5A38.svs	25ef2f81a1155a92058905227a7c9879	1835252848	released
290 | 33a4ddef-d89c-4a9d-822d-33d3cf9527e4	TCGA-AA-3845-01Z-00-DX1.20682536-a009-4184-a40b-cb889f37ad32.svs	76a21f14bd3570a0035ed1e92b1387a8	223572372	released
291 | d467f892-035c-4906-abea-c6143c8f8434	TCGA-A6-4107-01Z-00-DX1.89bf3dd5-72a6-49cc-9857-df2c36884029.svs	998bf802b1f9b871d98cea88690a028a	159935975	released
292 | 53772800-a81b-4652-a891-6a9898de8283	TCGA-AA-3679-01Z-00-DX1.b3445f8e-b143-4f24-9edd-8abdcb6b139b.svs	f7e02f3bd3a45f6720937b18bb62a106	305678090	released
293 | 37d511f7-036f-4750-b40e-8ef00a8ecfde	TCGA-AA-3955-01Z-00-DX1.e0eea910-db79-4797-b9db-bb8bfe35306d.svs	2e5901dd78073eadc1a4a6fe9b0326d3	219818402	released
294 | 75689647-c068-4d5a-bd1e-fc984cbf71a3	TCGA-AA-A004-01Z-00-DX1.2576461E-7FA3-4CC6-8CC3-58D8E88CE04D.svs	13756b346f6e247e9201b2f75aa89744	882367214	released
295 | 0b9fadfc-f3ba-4af2-899c-bf804369fd55	TCGA-A6-A567-01Z-00-DX1.F941874E-9BF7-4E8B-908C-41A638D62275.svs	a66099fb106c5d9c7b38d2d876a2e972	850944145	released
296 | 567492c8-a39d-4a87-a569-b1a6537a1815	TCGA-AA-3514-01Z-00-DX1.9e135da2-436e-47e3-9dbf-b2f577677828.svs	7a58d1813b20484daf35343884176b93	291846830	released
297 | 7bc0aeaa-e051-48fa-9d7e-afd6838713e3	TCGA-AA-3710-01Z-00-DX1.78082263-b7e9-4281-aa55-2da5f80e4499.svs	16f6b69860e017945d18d940425a5896	416103630	released
298 | 930bd45f-64f8-48a9-a4f3-e14c383ce6b6	TCGA-CA-5254-01Z-00-DX1.cd986205-1db0-457b-9a28-75bed56376cb.svs	db6e4e06768cd6431bc23706e3fbe119	470697071	released
299 | 808c0782-1739-4c70-a26f-8fe8c4ae8f64	TCGA-CA-6715-01Z-00-DX1.d5db8085-f91a-4eee-b15f-61960af713af.svs	1be848428371eb539483412adce1b6a4	824600629	released
300 | c60a846f-6646-492f-802d-1bfb349d9303	TCGA-D5-6930-01Z-00-DX1.fbf9468b-67c6-413d-a188-707ee2ab9b95.svs	183ff3d962c27a7f53fd04bcc2b3cda0	141114419	released
301 | 741c96f0-28ee-4a7b-9720-3afa20f69229	TCGA-AZ-4615-01Z-00-DX1.ecabbbb1-c1ed-4f60-b44f-b07eaa177208.svs	14b5d4c70484eb6a3ecced3c255974e0	886605099	released
302 | 3e1fc385-7eef-4d96-9735-a5c039286ee4	TCGA-A6-5660-01Z-00-DX1.b254e383-a889-4b73-8f91-8580c8285754.svs	a11b2bac8d718565b19400fe6c4b69cd	202973171	released
303 | 4e9db2f2-606c-47ac-9861-a6efde2766d0	TCGA-F4-6704-01Z-00-DX1.f7cb6641-a6f3-40b7-a5cb-aaf604f73d0f.svs	1558e9e41e3fa7ecd8ad333784062e4a	217797945	released
304 | 0bdf189f-bfe3-4587-8303-d1905a3822e4	TCGA-F4-6856-01Z-00-DX1.2872c7b5-b94d-4147-ad90-69f88668135a.svs	051f44f8a4b665d08c5891de71deb3b3	413042397	released
305 | 31565365-1404-46ff-b4f6-a5c5294c9ba6	TCGA-A6-5661-01Z-00-DX1.bad2d858-11b4-4b9c-a720-daaae592cf48.svs	f53e6c9afcad8e8d3dee383e8bff8c79	168764181	released
306 | 4b7e4329-0b52-4304-9bf4-ebefa608a471	TCGA-CK-4951-01Z-00-DX1.abdbb15c-fd40-4a55-bf54-5668b3d4ea13.svs	907a29f6e6a7afc646c21efa4bb187d3	466097743	released
307 | f4e7f126-b2e2-4e0e-b7cc-4bf4df4656f9	TCGA-QG-A5Z2-01Z-00-DX1.51896C31-235E-48EF-90F7-FC05350CA564.svs	27f4279a3d7aa930faa37ed471fc8301	1295303325	released
308 | 0c59c32a-ce9a-4557-be21-90d72ddc34b4	TCGA-AU-3779-01Z-00-DX1.4134005A-8A79-46DC-8737-B3C8AAC2DFCA.svs	7ceabfc298c637ca5182027dd5255d01	216962640	released
309 | 0acc3fcf-1209-4824-8d25-e4d0c0f2ac1b	TCGA-A6-2674-01Z-00-DX1.d301f1f5-6f4a-49e6-9c93-f4e8b7f616b8.svs	a666025465245698744ae068cd0c6d1e	103852457	released
310 | 94d2b166-580e-4362-b6e8-99fbb104dd2c	TCGA-CM-6168-01Z-00-DX1.96af6eb2-9d51-4671-baf8-1a73d0c66869.svs	7f724c7b5dfe55dc9ca592ae48a2fb87	950412267	released
311 | 128f36a1-0433-4533-87c3-743f13a586fe	TCGA-AA-A01T-01Z-00-DX1.0C795296-87D6-4B90-9363-CF5CD7A2924D.svs	97f13d83dfde8b102db62cc44bd9e8ca	1034121582	released
312 | 5fa14139-2603-4f30-a508-ec94432dc9e6	TCGA-AA-3667-01Z-00-DX1.28dc3612-1c43-4727-a134-698cc4315dc3.svs	ffa499c90d821cab321fd477c0eb7823	325517958	released
313 | 3bcf18c0-708e-45ca-bd05-99ceca10dbfe	TCGA-G4-6309-01Z-00-DX1.3eb20fdc-eb86-4bd5-8194-76b02d4fa472.svs	e3dc737523d839d50de38ad808e68283	685147629	released
314 | adfeb295-e16d-4d70-a2d4-77537f553ade	TCGA-AA-3666-01Z-00-DX1.fe976853-fbde-46e8-b915-3c98440c9315.svs	700e6d8717ec5b5c09790f387975a24b	443279054	released
315 | 9486ce7d-c24d-4efe-955f-5610b76669aa	TCGA-AY-6196-01Z-00-DX1.747B47B0-386A-4BA9-A8B7-274F1699D15E.svs	902433d502f7eb2efa2b0a797be826a2	753956247	released
316 | cae38e3c-38b7-40ef-9a59-2a60c5aeb1c0	TCGA-AA-3930-01Z-00-DX1.065c480c-9ac3-4d98-a351-cb320b6a5ba0.svs	c8bd28683a292d46e4bc17cd0d598339	188799232	released
317 | f69aeb1a-626a-4dfa-bf5f-60ff674f26aa	TCGA-QG-A5YV-01Z-00-DX1.9B7FD3EA-D1AB-44B3-B728-820939EF56EA.svs	948869d55d75f30a6a684d2d54d0fa64	2029458767	released
318 | aa8f15ca-4f2a-44ed-baf0-2288fab2b63b	TCGA-CK-5913-01Z-00-DX1.64d267c5-895f-4274-9d98-bfd2d338aee2.svs	f2bb85b6543a7bd921da3b92e062d07d	1094685675	released
319 | 3c258af5-5104-48f2-97e0-e5e61c27d0d3	TCGA-NH-A6GC-06Z-00-DX1.5F90CBDB-794B-498D-B75F-60EBDE17B22A.svs	152cdae3f4c87a9f33ecc1c6ff1a66cf	1307807919	released
320 | 50e5358f-6e30-4d47-8142-15a0cb821c49	TCGA-G4-6323-01Z-00-DX1.f97c27a6-9fbe-4a81-b4aa-020c25279449.svs	f26dff2bc5a5e3d2b372a5d77e324e80	497904457	released
321 | ae3fe372-c84a-40af-aa78-71004e45e951	TCGA-AA-A01F-01Z-00-DX1.A09E4A5B-1DD2-472C-B387-91803FEE514A.svs	739a8a13c52433d0403d55b6edb041f0	283821418	released
322 | 38d409ab-73c7-4c38-91bc-e86ee83a404c	TCGA-AA-A01V-01Z-00-DX1.1AC200C2-F577-421D-B91F-F0A7251C3D90.svs	237db216032141ecc6b266d3f43c01f5	1697474132	released
323 | 7c617b9e-ba32-45b4-9fa1-51d3af5dc1da	TCGA-G4-6303-01Z-00-DX1.5819f041-d9b8-4fd2-bf52-65677be31df1.svs	e368809221ec41b7ad7d132092e384a1	448618155	released
324 | 176f2cac-68c9-42fd-bc5f-7d096ecdd069	TCGA-AA-3977-01Z-00-DX1.08ffa326-08fd-4215-9bf7-81fcf33b4f5a.svs	a2af43d3c3e0935dd79f9dc05595c1a9	144131888	released
325 | 9fc46b76-6d18-4268-9254-7476ae14f0d2	TCGA-A6-5667-01Z-00-DX1.1973b80d-b6b8-4ed8-9bc1-3aef51fbd9e6.svs	2a5ed1c50491b237828b6d97c1655def	85349859	released
326 | 25d00401-5200-417f-8955-ddba65eebdb7	TCGA-AA-A02E-01Z-00-DX1.04D47621-9DCF-437C-A4D6-44D17579FE6D.svs	6ce648cb975dd66d7884252867d0fcde	638400086	released
327 | daff3278-bdad-4db0-96c8-290918dd99ac	TCGA-AA-3495-01Z-00-DX1.67DEE36B-724E-4B4F-B3A9-B4E8CCCEFA80.svs	19b86037a8f2e4ee7a4deaef583c63bc	10496896	released
328 | 5858e780-9a2b-4020-bc48-924d974d9344	TCGA-CA-5797-01Z-00-DX1.6549a80e-4b68-4147-949b-6149ab680313.svs	1e9449990d78917a3a586407691f37d6	367291891	released
329 | 544b2a2e-17e4-4fde-b6ca-696d6dde973e	TCGA-A6-6141-01Z-00-DX1.34b5db5c-74df-47d9-bb89-beec93ded868.svs	52fbf13ce86531d57acaf1c3aa0ace2d	127865907	released
330 | ae37fe26-235b-465b-89c2-dd3b30473da2	TCGA-CM-6172-01Z-00-DX1.a5d23c88-a173-46a2-b8dd-6d873b8216c7.svs	af4f243456646f4a9c0c2b79a1ccf3ed	565840317	released
331 | db59d95f-12ab-4633-84b9-5a535bba48aa	TCGA-DM-A1HA-01Z-00-DX1.E56FC26A-DDB9-4121-9E79-5009FB23CCEB.svs	083ca57bcc95b532139bf6b00f46a339	1319195465	released
332 | e95e537b-3824-421d-9003-4a6e39a0a342	TCGA-AA-A02H-01Z-00-DX1.5343879F-6C5D-48B3-8D78-D895ED118F42.svs	6ac36397200182c5556800890d667e6b	824996070	released
333 | af585d94-6f4d-44f7-ad96-6ada1e9fcad6	TCGA-CM-6679-01Z-00-DX1.b3a899df-256a-4546-9428-a6dd2695b2cf.svs	594ca8c0be1689e4ce8caec7dd1d4c10	1037864379	released
334 | 766bd15e-1002-440a-9517-8c5a5ffa73ce	TCGA-G4-6302-01Z-00-DX1.78bd777d-4b82-44de-9ef1-c4c641364015.svs	c93c117dd6bd705b5c5be3c890d3d29a	453412639	released
335 | eef92134-10aa-4c76-9cbb-5f721b9b39e6	TCGA-AA-3841-01Z-00-DX1.10f98d09-027f-4495-9ad8-7c8dc49a07d7.svs	8320bb1d379f0fe241a14c02cd5763eb	386590228	released
336 | 13943a8e-e3d3-4db4-9000-bc3a16666b56	TCGA-A6-A56B-01Z-00-DX1.52FE9FA5-05F1-49EA-98BE-887CF7B3A52F.svs	85b748958f42319f6281ba4a89c9ed2e	686462025	released
337 | f1d67a87-f8cd-49e7-803b-e686e5aee85f	TCGA-WS-AB45-01Z-00-DX1.1FD99E7A-830F-40DC-98CD-53C62C678AC6.svs	efadc855b8556ab5a389bf2161b802f8	1165858209	released
338 | 2d961af6-9f08-4db7-92b2-52b2380cd022	TCGA-CK-6747-01Z-00-DX1.7824596c-84db-4bee-b149-cd8f617c285f.svs	bb38bdc8c4904f2360508405093105c1	841386755	released
339 | 0bd355d5-4062-42ba-88e4-30af0dd8f1e6	TCGA-AA-3941-01Z-00-DX1.7c445fb8-53ee-4813-9ca8-8f7c3cc0bdde.svs	decfaf6f8be35dfc074e05aac811b9c7	434976358	released
340 | 4c4f4942-530b-4043-84b9-785b4b3e9dba	TCGA-A6-3809-01Z-00-DX1.c26f03e8-c285-4a66-925d-ae9cba17d7b3.svs	f6a152cc11d4144dc341b23f7744c88c	172517035	released
341 | 6198072f-e4be-4fbd-8802-2a03df6ca140	TCGA-AZ-6605-01Z-00-DX1.4d634c3e-a1c0-42e1-a4d6-5980eddfa0ca.svs	b39d98248cb6b03551a53e369ea6a7ac	864007365	released
342 | 623ae7a5-3efb-4db1-96c3-619e8f1ebcff	TCGA-DM-A1D4-01Z-00-DX1.38346604-2BAF-44F8-BD96-5BF58253C6AD.svs	fd53d9424ad2b890450f290a5aeddde3	854473239	released
343 | ef9e8336-1099-42ae-aa5e-56ef59d3da3a	TCGA-CA-6719-01Z-00-DX1.590fea56-aae4-4108-9169-a67ec8cd95b7.svs	d6cc5c6e75568555cd50b071a4431c78	610172865	released
344 | 5c7c8d94-86ce-4db2-8d05-e7d215c1bf2f	TCGA-CK-6748-01Z-00-DX1.1dd76660-7858-470c-a27b-36586b788125.svs	e632382da400642ade246517fd967ee0	955476269	released
345 | eebf2fbf-1fbe-4784-a37f-29434f8c00dc	TCGA-NH-A50U-01Z-00-DX1.CA30EE72-7149-44E0-9082-AC1922ADDB09.svs	d5bee7021a126c526c224db0e9f9d2d4	1238355055	released
346 | 01f2d311-0825-4c67-af09-cf1a5ae6b1af	TCGA-F4-6459-01Z-00-DX1.80a78213-1137-4521-9d60-ac64813dec4c.svs	46bd67c21bb83bcadad500df58c4b02e	788730317	released
347 | 04d586ad-4f74-453f-a9c6-f8bd134ae11c	TCGA-4T-AA8H-01Z-00-DX1.A46C759C-74A2-4724-B6B5-DECA0D16E029.svs	88f16f34c8c5cf0857da7c2b55bb3764	2145045839	released
348 | eb3a8b3e-fcd9-446b-b851-f6880ad9b3f5	TCGA-D5-6538-01Z-00-DX1.fab8da8e-1e0a-4fb2-987c-9792c05d5a3a.svs	cb794652967c1deea59e348bb270afa6	691200189	released
349 | dd4e7430-4e6a-4f87-b857-bab4438051ff	TCGA-CM-5348-01Z-00-DX1.2ad0b8f6-684a-41a7-b568-26e97675cce9.svs	849c422e76ee6ae5da2bb052e99a9e78	747746979	released
350 | 25e4ffbf-32ed-407c-ac26-fcd5aa122e52	TCGA-CM-6677-01Z-00-DX1.e3428c0a-a194-4e38-b105-8244701fcc71.svs	2081870e61285b6ef56fc29111e7c234	1036643329	released
351 | 098102b9-668a-4743-b8b8-b0f1e1bd345c	TCGA-AD-A5EK-01Z-00-DX1.709B3557-3E24-4CAB-8CD6-604C9438BC12.svs	8789cc8d537fd9248c8dfec608a6ad18	332315158	released
352 | b83a00d6-84ff-462b-ae9e-091c9fa40b7a	TCGA-A6-2677-01Z-00-DX1.dc0903dc-fef2-47ca-8f04-1ef25a4d8338.svs	056a392d77655305fcba2f4c58b9e358	143704055	released
353 | 2d08a6f8-70b3-4c3e-aa8f-1bc325f37282	TCGA-G4-6294-01Z-00-DX1.29a1716d-8875-49f0-8a83-f416221623b3.svs	5ffa774bd6c439f192f317879661e926	796787795	released
354 | 325ffce4-1a0e-4bcc-bfc3-1a9959c26aee	TCGA-AA-3967-01Z-00-DX1.b80c87c9-00f4-44f6-bc59-19d2b94942ac.svs	adebd2cebe18701fe5db796ccb3823d9	263867890	released
355 | 28fa386a-97e2-4144-8ab0-29ebe01a7774	TCGA-AA-3979-01Z-00-DX1.e63b4db2-dc9b-4afb-a288-89a905beacd0.svs	cabe5300975784139bc69d139ff9a176	294535960	released
356 | 9bf65c40-afed-49c7-9a5f-60309cd687c6	TCGA-A6-6649-01Z-00-DX1.9439bce8-5715-4d76-a5d8-c6cbe1b79435.svs	7dd602f157f148a0971f8b64a016dd31	149890641	released
357 | 20a11849-e57a-47ad-854c-757e17cc277b	TCGA-AA-3867-01Z-00-DX1.dbc11b4b-732c-4b0a-aaef-ba94b0218fe6.svs	92e09946b141030b1bf3b325dc5f834e	195279618	released
358 | d5609bba-5096-42e3-9003-40a5f46cc0d5	TCGA-AD-6548-01Z-00-DX1.4e047481-8926-48e6-9eba-46597c4cc396.svs	23e815e60efe15858eaff4a90d8bec0d	95284133	released
359 | e0057043-fae2-4b6b-8298-1d06e48100e3	TCGA-AA-3562-01Z-00-DX1.e07893e6-646d-41b5-be51-9c19d51f6743.svs	ee851ee5d5140ad73a12d38647ab7174	507082138	released
360 | c9352fff-715b-4e2e-a7c3-3afcd2536d20	TCGA-G4-6321-01Z-00-DX1.20bd4687-4b24-4666-a722-d42b9731136e.svs	d992cddcb41bbf5db7696238464846dc	553593089	released
361 | 10b38301-6ac8-40a9-ae81-b2ecf64a5d3c	TCGA-AA-3531-01Z-00-DX1.19cdaa4b-5a53-4198-90da-5800827d90bf.svs	95f46e4100b7a99d8c8db993df7533b7	729212438	released
362 | ae0b360c-6eb7-49ae-8cdb-ccaacfec7cfe	TCGA-AA-3858-01Z-00-DX1.6336815f-9887-4f74-a15d-78e7f6cacb59.svs	28eb6fb92863985198c96c69a57040aa	76257166	released
363 | 8780a5fa-9824-457f-9f2c-5b2d00f7063d	TCGA-F4-6807-01Z-00-DX1.84bfb631-af3d-45e7-a7db-730844a53625.svs	cfdfc0af520d8c0d5e7a42bd7426a932	729259219	released
364 | bb9f6b33-f007-4915-9b73-05f847db0964	TCGA-T9-A92H-01Z-00-DX2.43894C88-2096-4932-9E9D-17BDCACF988C.svs	577adb75be53d83868dd3d69dcca6d77	648256730	released
365 | 5783bad7-1d94-4257-a234-af0330b96ad9	TCGA-AZ-4616-01Z-00-DX1.0a0f6eaa-4db6-4479-a9df-f09387f555b1.svs	cd3aa1c6f793431275f8ac9d6b3bd576	785672629	released
366 | 84ac5304-9d03-4e0f-93cd-5be1064a033a	TCGA-AY-A8YK-01Z-00-DX1.89E3C546-0425-449B-A6FB-1C35168EA7EB.svs	b1effb5068f42fc75d2d34e50dc9a3d2	605800784	released
367 | 74a7137c-ea54-477d-a505-2cf147f3cf24	TCGA-AA-A029-01Z-00-DX1.36BA3129-431D-4AE5-98E6-BA064D0B5062.svs	6906bf8d08a47ad5d56b1a617834d563	1404898144	released
368 | 3962ba26-19f7-46be-af73-e39b37a51c64	TCGA-AA-3712-01Z-00-DX1.00E0ACE2-8CC5-4063-9C65-3CDD7F21B189.svs	0eafd121078c0d89769e4da2203b09d3	8597952	released
369 | c34291fd-7152-470b-a025-df01f9de4cd7	TCGA-CM-6680-01Z-00-DX1.68fe763e-d8f4-44ca-8604-4da4e57cee06.svs	c00ca5a1ac197266cab8e980eed455bd	989642281	released
370 | c946aab2-ea76-4974-bcf7-1f29afbc2a6f	TCGA-AA-3861-01Z-00-DX1.1735d004-51bd-447a-add4-05f0c583c6ca.svs	f20c587393ad0367ed8fbbb21eb9691b	291059298	released
371 | 4a644f46-af06-44ac-a7c6-f13258d70357	TCGA-AA-3971-01Z-00-DX1.348b1114-a9d5-4a37-8dfd-3bc8738fec35.svs	0a96825b278ba18ccb01eac5c5627af9	547416550	released
372 | c78c413d-d6e8-434f-811c-069ce6162bf1	TCGA-CA-6716-01Z-00-DX1.fd53102c-7f2b-47f1-b4d1-5693e81a0478.svs	7cdd6dec650cc2642715c628dfb3b182	685217797	released
373 | a83b7423-32c7-45e0-953d-91566c6eb432	TCGA-F4-6854-01Z-00-DX1.a4d18769-1632-41e4-b49d-4a88d36e21ab.svs	f45c64f7158b98b0ac21840d07d0f585	291382125	released
374 | a7a52fb9-3775-4211-9d9d-6506f9db54f4	TCGA-AA-A00O-01Z-00-DX1.6787693F-6E3E-479A-A4DE-44186659285F.svs	bb9fb5dfacf5bd282bafb911eda94f96	354929216	released
375 | 22c257cf-df9e-441a-a21c-d78b2b528087	TCGA-AA-A01P-01Z-00-DX1.D7AAA4F0-C956-4346-8948-DADACDFB3B69.svs	a2b15eaeb61e396a7c7fe995787d940a	1690505016	released
376 | 97d5443f-9bcd-4ae9-96a1-68d76d2ecb68	TCGA-AA-A00K-01Z-00-DX1.25BD5724-7D30-4885-A3F9-D94FAED92984.svs	5450b843ceb3f95d1458d8f16b8d6898	741434230	released
377 | 16dd80c6-0980-48a3-af88-4bf03cf71fce	TCGA-A6-6651-01Z-00-DX1.09ad2d69-d71d-4fa2-9504-80557a053db4.svs	7fc14985bd62c98b49f3136ebc4365ae	271258745	released
378 | a45a2571-20b3-4f8d-b7fb-a210ec8fec52	TCGA-CM-6161-01Z-00-DX1.552104aa-6fd7-4d53-918b-fe67d359815c.svs	b4ebc59a1beb3641f28f6cc9149b212f	1012516645	released
379 | 4be5d625-9594-4bee-b6b2-2827ce65b473	TCGA-AY-5543-01Z-00-DX1.f3614d19-8391-49cc-a0e4-932e717696d3.svs	de780a5c577e404e05b312d772aed862	583606263	released
380 | 48e8fd12-2a4a-4d83-905c-bd6f602af0be	TCGA-F4-6461-01Z-00-DX1.f7da24ac-4a3c-4814-8d11-3138a954e0df.svs	026dff2ff5346cbbbc64f6dd961e1073	823013559	released
381 | cd316e1d-8468-4aff-aa55-685dab5f9b19	TCGA-CK-5916-01Z-00-DX1.726a78b1-e64f-4dd6-8f7e-e43e98f1f453.svs	7787d8fbc1a50d0e6c0af9ce94c41d86	1180333133	released
382 | 7046e623-8670-4b8e-b1da-e2b91021b520	TCGA-A6-A5ZU-01Z-00-DX1.8E7A136C-46C4-4233-A747-EBDC4F3227FB.svs	fe4bf70e6ebfb79b1e15b1cca6f20e5c	237676301	released
383 | 115cba1c-9950-4067-a213-3f7963b51960	TCGA-AA-3869-01Z-00-DX1.ef8b8cf0-5de5-4475-ac37-286d58604a0f.svs	a46bc829df411d195f13c866c1725046	240964558	released
384 | eedce645-a8d2-4fa7-9a68-db5e96fa4920	TCGA-NH-A50T-01Z-00-DX1.4624B690-C0DE-42BD-852C-6EBABF40255F.svs	6f9b069a96a3a72b0a3d276139afc84a	1631868451	released
385 | 1f590735-088d-4821-85d1-599b77725221	TCGA-D5-5540-01Z-00-DX1.4e4c69ca-f736-4db3-a401-c4f814d975dd.svs	a360f7d22d64a79644df7c2b8fe31c12	536715265	released
386 | b984bce8-16ea-4564-8dda-c119f02020d7	TCGA-AA-A01Q-01Z-00-DX1.4432694B-F24B-4942-91FD-27DEF1D84921.svs	9181b12ae601fd4776acf791d0adec95	2276985914	released
387 | 57c2d0c6-5dfa-4c95-982c-6c189d9b9ad6	TCGA-CM-5864-01Z-00-DX1.2cb87875-6cae-4d8e-9c93-4a83941c0ca9.svs	8225d8734d1845c7c1bd08ca8b86cce4	816045533	released
388 | 021821c9-68f3-4be3-97ec-4ade5b4aaa60	TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs	a298a95d847578d42744d31375665de5	73685251	released
389 | b8085eff-8407-4fd5-b2ba-70fa649aa07d	TCGA-AA-3488-01Z-00-DX1.EDF60198-F7AB-45BB-9A1B-C2E2FA141989.svs	7419e53add8dd7b359e82cb52fc98ec8	8169406	released
390 | 6d30682e-a80b-4554-8b77-16f3ffd5314f	TCGA-AA-A01G-01Z-00-DX1.8A288E53-BA38-4BAC-81B5-2E0E41EA0D85.svs	29f312a2b2b9482a6444f7a7c7f274a2	385414306	released
391 | b6c0ea5f-32d3-4210-b85b-9cb435e8167e	TCGA-AA-3494-01Z-00-DX1.E275AF20-3AA7-4191-BD1F-FFE744CA6A2F.svs	24045d59ba9c9c989cb23fa6a349c790	9102328	released
392 | 8e58762c-e960-4319-b329-09ea285fc8c1	TCGA-AA-A02R-01Z-00-DX1.B332C84C-EE97-4855-B773-9B5CBFA45096.svs	797ebc74d34098e7c707a3391cd3304f	784086292	released
393 | bc747420-7064-416a-a0f9-bc866553971a	TCGA-AZ-5403-01Z-00-DX1.1c557fea-6627-48e9-abb9-79da22c40cef.svs	552e67cde8e1e355e8d2a3aeb587e29b	873110079	released
394 | 3ac10c45-4096-4839-982d-3019e77587db	TCGA-AA-3509-01Z-00-DX1.EAE46823-3132-486F-8C2A-C0F548A08335.svs	cd95f2c4c2c918c3d5250f7092ab9fa0	11994936	released
395 | 68f65912-47da-44b1-8f4c-a6107428fba6	TCGA-4N-A93T-01Z-00-DX2.875E7F95-A6D4-4BEB-A331-F9D8080898C2.svs	24bd028468f4a70dd6169c0a2cfe402d	1594624613	released
396 | f1a82927-49c3-4fd1-963a-1e24e1d8e9ab	TCGA-AA-A01I-01Z-00-DX1.D24F43B2-F46E-4F7F-85A0-91F3A04E0785.svs	c4696764f9521c3170e834bcd26980b0	355705024	released
397 | 0b55ba87-0994-4679-9d1c-6cc3ddfd3078	TCGA-A6-5662-01Z-00-DX1.82569684-1c31-4346-af9b-c296a020f624.svs	4c15ecb676dc05e99167ee33c63a20ed	94426693	released
398 | ce04c99c-0acd-4b24-b385-fbb400528285	TCGA-AA-A02Y-01Z-00-DX1.D1EE29E8-A27E-4035-807B-324A63239116.svs	744436dbc7fe88c6295c3674500b5610	778158250	released
399 | 9fa06e23-f31e-488e-9263-5823cd89e777	TCGA-AZ-6598-01Z-00-DX1.1fc4cd61-4524-413b-b36d-ad438785bc06.svs	bcf7ab0678d762485004915250a894a9	783755825	released
400 | 5630d41c-57aa-4049-b4d5-a2a7b84b7b4a	TCGA-DM-A0XD-01Z-00-DX1.DAFA56D4-85CB-4FB1-B5BB-E993CA522FF8.svs	7c209affd85518a3579a7b8120069a49	268034261	released
401 | 38c8505b-bff5-43ea-8a8d-0911a84469e4	TCGA-AA-3696-01Z-00-DX1.947f2c09-dfe9-4fdb-bf1a-9bf46d67f617.svs	c1b9598e05160016cf6ceb1155bef3da	199444326	released
402 | 2fcfc546-8fa2-4f4f-92e1-12b4f6434a51	TCGA-CK-6746-01Z-00-DX1.0aae8eec-1e82-494b-8779-d79fea8bec0c.svs	d65b76a220e98802b5928fc387086f11	696389971	released
403 | 262569d9-5fb0-4e7b-923c-a8285ced66b0	TCGA-AA-A01K-01Z-00-DX1.2E147232-BC3C-48CC-B75E-43E6AA4A0BF8.svs	203210a2f5a6154866cb5c0d9625eea5	469163238	released
404 | 35c1e23c-8adb-4a7f-865f-be1d6e10b3ef	TCGA-AA-3489-01Z-00-DX1.AE299B70-B14C-4FFE-B1F9-38B2EB267FA9.svs	4d0517059a282253dd048d176eb40e32	7610002	released
405 | cc712259-7cd4-4d5c-968e-8101e6430885	TCGA-D5-6927-01Z-00-DX1.ff21d627-dbb8-4200-937b-f8be8b86b6d4.svs	648384c16ea05b0a1b27cdee96981fc7	243402799	released
406 | 94098492-5d24-44cb-84b9-4765a66b48be	TCGA-AA-3831-01Z-00-DX1.0565c3fc-fe21-4b34-ae1b-626a46edaa9e.svs	6455cf1539c097126479a1cd3a823d6c	382342020	released
407 | a8b0f094-f4cc-42d0-9168-41b5691765ff	TCGA-AA-A017-01Z-00-DX1.E7B1384E-3C57-4CE5-B85E-3B8FD328B5A2.svs	54e31b1b4bceddd8a0a4f934e15c4d29	566921464	released
408 | 73844452-7492-4521-b6e6-0560220d3468	TCGA-D5-6924-01Z-00-DX1.a198456a-cf26-4cf3-a07a-edde8a4a710f.svs	eb4bbd33736fc67adae638c9a93f0c06	223768941	released
409 | 8db3d085-f23d-4270-a52a-cbf1e6296381	TCGA-AA-3864-01Z-00-DX1.f6992bc7-ba05-4c30-9500-8f7b07b30f9a.svs	d4b91ecc26276d57c2fefff788ec3322	258401346	released
410 | 2463c8f1-f8e8-4fab-881f-8875c2ec663b	TCGA-AZ-5407-01Z-00-DX1.5218a617-9817-44f4-8f00-8e9e3d04bd70.svs	e3e726eaa0de7651da32c97a41422451	332652095	released
411 | 29b005db-4568-4685-8a33-7b0c331f6ae3	TCGA-AA-3518-01Z-00-DX1.5aad7b19-2900-4f1c-9312-f8d8c4725449.svs	7782dd4c0cb17998ccca2f49e4c41a92	434527634	released
412 | 5c5928b2-a799-4159-b895-191ee8ebdba5	TCGA-AA-A00R-01Z-00-DX1.7520405C-E7DD-46A4-BB68-ADFA511AEA64.svs	347af311ff655ce123b5d2ac57883a46	431269154	released
413 | f94393a0-f0a4-44bb-9350-417c63cba839	TCGA-AA-3685-01Z-00-DX1.57ef312b-70e0-46f5-b847-0e0ac32f1824.svs	96bb87adb252d2b7d2f1f16133807d87	339477890	released
414 | cde87510-8e8f-48cb-ad4a-6a25771ad569	TCGA-A6-A565-01Z-00-DX1.42172A22-6F86-4661-BF2A-78815B721503.svs	c95bf4edeb5e29235065f68d613eacb3	232172845	released
415 | e329f6c8-9bb9-4717-83ce-ae378a509b83	TCGA-AA-3553-01Z-00-DX1.45a24cd4-6eb2-4f20-9453-05476da34cdd.svs	deb9c1934a54664efcf8905198403332	347790740	released
416 | a3891d0c-2e0e-4d9c-9fb7-61384da0938f	TCGA-CM-6678-01Z-00-DX1.b0e06829-c119-4131-a2ec-22d41d8d6068.svs	3c4d743780b435011668ba2ec08329a8	525522273	released
417 | bf04fde6-7ddf-42c2-9390-ab39db4e63ea	TCGA-F4-6805-01Z-00-DX1.9927edbb-2801-4988-b113-1fdfd31a72a0.svs	d57676123902c920e6a45237d0328b67	333170911	released
418 | 5a2d15f8-078a-41df-9610-63a7ce6b1195	TCGA-DM-A1D9-01Z-00-DX1.C286F663-142A-4F8E-BFCD-56E33F73F7E8.svs	b37035ee13bc34fda614faaec7231de3	1101041371	released
419 | 80335604-0905-4719-a4a0-4f8e23884435	TCGA-D5-6539-01Z-00-DX1.fe2a2e60-1db0-4019-9920-99416b34f05e.svs	2d4373cbc81a8782485ac0d04dcfe397	472493909	released
420 | d32f8922-be9a-4841-9131-7d966c18e27d	TCGA-AA-3680-01Z-00-DX1.9eef1b8f-c3c1-486f-83e7-a88182ce892a.svs	1cf7e142ff15cab566257ad4ab7e5902	355788840	released
421 | b00fb1a3-02c5-458e-8f5e-559bcc9dedf0	TCGA-A6-5656-01Z-00-DX1.8a8ebf52-8217-4288-8886-7eefa6cdfdca.svs	9a0ac925e9202cc89da1b61f1cfc12a4	76721811	released
422 | 69554aaa-2fc4-4b2a-b169-2d7d5a804c12	TCGA-AY-6197-01Z-00-DX1.AD42F96E-6583-4AB8-A6BD-C8334EA9DE14.svs	df7bf8bf38a273119dc0e1aebaccfbf4	582326955	released
423 | e5315d4a-f1fa-452c-87ca-0a26668da9da	TCGA-D5-6533-01Z-00-DX1.a4b5096e-88cc-4797-b8d5-1a9cf1e74a55.svs	c99324aef6fc612248a26d8f6c3e2601	1043046443	released
424 | 011da0ba-03eb-41cd-ae90-2dca944410e5	TCGA-A6-2683-01Z-00-DX1.0dfc5d0a-68f4-45e1-a879-0428313c6dbc.svs	f3a937afcf00e57e3a43f0069d30833c	196816423	released
425 | 06c18d96-66e2-4f68-834c-3da4bc9878da	TCGA-AA-3814-01Z-00-DX1.15a569dc-30d6-4bef-908b-6183df4e9e01.svs	f8ec5bca8b6f7f9c7a45e385274a37b6	134641764	released
426 | f77be3ea-4595-49a6-b41c-de074b132bf8	TCGA-AD-6895-01Z-00-DX1.7FB1FBC6-683B-4285-89D1-A7A20F07A9D4.svs	8215e85b50f8a6db314fe4a6b494e99d	41088411	released
427 | 0ecb2d7b-0a69-46ce-80d6-1be65cec3cc7	TCGA-AA-3524-01Z-00-DX1.b1aae264-87be-4514-8f9d-25660b39caa7.svs	2be367752573aed43b3c62c0f90aabe9	218205050	released
428 | 76c04cfc-4bf8-486e-ad34-c60da2d3d64b	TCGA-AA-3506-01Z-00-DX1.08CFD143-1A4C-4262-831E-79D9C4BBF453.svs	0bdf643d51430ace076583e7716e142d	10839534	released
429 | 776eb2f6-2fbc-49e0-93a2-162528750d28	TCGA-AA-3970-01Z-00-DX1.712c069a-aeaf-498b-80fa-7bb481b13825.svs	f10987db05667b11704690d186a15677	239274648	released
430 | a7afe2d0-7290-4aeb-9840-7a44f0e76bd1	TCGA-AA-3715-01Z-00-DX1.24d6e746-ad61-4587-a2b9-8903331b279c.svs	3f75cf83ce4f5fd892ec7161f13c942d	477031190	released
431 | 5f4f67bf-5b13-4aa4-929b-9aa096bb89d5	TCGA-AA-3697-01Z-00-DX1.AAB8DB74-F76D-4D0A-A50E-E7F97504A3C4.svs	822740217a3c8b6cc37f481b5d6907f8	10081986	released
432 | 8bb9896d-ef4a-4050-9699-dafe404724a6	TCGA-AA-3952-01Z-00-DX1.6a51a689-74cb-4204-9f7a-f5e3fc55fb2d.svs	5ce699ef00dd294be823d81e8ab2d4d4	150102558	released
433 | b7c23d21-c9b8-4d40-b32d-01a7081e5189	TCGA-AA-3675-01Z-00-DX1.9afbbb26-2574-46af-8154-5f39bab6f01a.svs	ed3533ce12164b7fbe87173999d4ce14	202032336	released
434 | aaeba14c-59c5-4123-bb25-e4d3d590bccf	TCGA-D5-5538-01Z-00-DX1.7187bda7-9217-4395-a5e9-647357bc1c3a.svs	1c109ae57910f520533b47023f85cb9d	952661967	released
435 | ac5cf94c-5829-4da1-80be-8d24eb798058	TCGA-AA-3862-01Z-00-DX1.67a0bc0d-1fe0-4c90-bb2d-5b12224cc846.svs	c1a690f05869bee51882d35bd0718ec7	261221416	released
436 | 2078ca83-55e7-4f53-83d6-82e1682cb493	TCGA-G4-6307-01Z-00-DX1.fff548a0-8bc8-428e-a4ce-3a5d0b3f060b.svs	824a749ebde80308e80c5c77d84b4f3b	722061487	released
437 | 39c5d9fb-92de-4cec-b690-92e27924f759	TCGA-AA-A02K-01Z-00-DX1.732DD8F9-A21A-4E97-A779-3400A6C3D19D.svs	5713131cca5c85f9662dc6fe23d3b564	1037402762	released
438 | fac4903a-786f-4976-a7c4-6fce3f27e83f	TCGA-AA-A03F-01Z-00-DX1.8E1A83FE-2C85-4444-A8FC-D0691817968A.svs	d6318aa3639258c7cb36432dfac2d8af	665111554	released
439 | 56b13994-61dd-4f3b-b26f-faacf93a339d	TCGA-AD-6965-01Z-00-DX1.0330727c-42f5-4a08-a35d-af81eda1d0f1.svs	f20ab48932746f68fb7069d8651b854a	81686773	released
440 | 6dffd67f-8ea7-4216-8bea-c58885e6f4e6	TCGA-AA-A02O-01Z-00-DX1.CB9BE08B-D78B-46B3-8339-A3DADD24439F.svs	57c555ada5e71909d4555068b503c2ef	1037851442	released
441 | 91b1621c-62be-4bc3-8b7e-1cae7f38467d	TCGA-AA-A00D-01Z-00-DX1.A4358CDC-9B7E-4802-BF1C-741F533BBD96.svs	d3c6ea55bbbbbffffae50f5ab1806e9c	676981030	released
442 | b339b9d1-af19-46e3-94cf-eb21c391da0e	TCGA-AA-3972-01Z-00-DX1.a60c2c2b-71ea-4cd6-a56b-c8e409a181ac.svs	fb3f1aacde4f99b6cb91e1719e9cdbae	145650676	released
443 | 3b88b579-a27a-4ac4-8533-48dded7d11c7	TCGA-AA-3530-01Z-00-DX1.298325a7-f5f8-4b5e-b3da-dd9dab6e4820.svs	e42be462d3859ccc2b6aa4d573358799	490887204	released
444 | 2225f73e-f46f-4927-b23a-5a647f9fff6a	TCGA-AZ-6608-01Z-00-DX1.40d9f93f-f7d8-4138-9af1-bb579c53194b.svs	09f441018fe1e2e6f98a8a079d3b01ad	1021912177	released
445 | 6fccd4e0-9cc3-4abe-b216-e1049284a4a4	TCGA-AA-3662-01Z-00-DX1.625F1BCC-5E59-411E-AE23-6F43CE6122B2.svs	3d53e0cb4dcd5f52e1965e9252385813	9915016	released
446 | dcc94838-617f-4e8f-ba5e-e0f1c553df75	TCGA-AA-3552-01Z-00-DX1.84133d42-9a39-44b5-a1ec-a5382650c939.svs	6cea14c6d2bc32581944f89c23c9b866	288649760	released
447 | 69b4eafa-a83c-4e5a-8855-95de51585362	TCGA-AA-3688-01Z-00-DX1.642ce194-6dc0-4a96-aa79-674f48966df3.svs	d6ed143f3296241eaf54f182f4ba0403	306717586	released
448 | 3042cadb-953f-4294-8b34-4a43d8b6e984	TCGA-G4-6625-01Z-00-DX1.0fa26667-2581-4f96-a891-d78dbc3299b4.svs	8bde526c0bee086cf2a22161c19aacd9	100860407	released
449 | 5ba7738a-dd63-4c39-b48d-7b9c28be1d7d	TCGA-AD-6888-01Z-00-DX1.47AE342C-4577-4D8B-9048-0B106C5960E7.svs	daddd8af9fcd8de15a48e2cf0bc11581	172226543	released
450 | efd81ef1-0260-4e2c-ad3e-64733b3072a2	TCGA-AA-3496-01Z-00-DX1.B109A6F3-02E0-4181-B69A-00CBA758C074.svs	5f91a86ba11b95295f83138d14be30e1	8600540	released
451 | 896cee98-ffaa-4aec-8aa1-f067482fedd5	TCGA-AA-3521-01Z-00-DX1.9d6be975-7be1-4f6c-99db-5101369c6624.svs	684f23f65c117e2014c43baa0fa4816b	532409280	released
452 | eb10274f-9337-4ef0-9762-5430ede6d6f9	TCGA-AD-6964-01Z-00-DX1.83AF88B9-C59B-48C6-A739-85ACB8F8ECA9.svs	79b4e25e233030ecdf3eb4e8fe2f9877	293608971	released
453 | 507cdbcd-48ec-4fad-ab69-bf0ca8635f62	TCGA-G4-6628-01Z-00-DX1.d67973d1-9544-47e1-9ecb-e9d8d7f310e6.svs	04580917cd4410efda60f1d39250868b	915453785	released
454 | 4610d8e4-ddc8-41e3-a92f-9cc32c8c8e8a	TCGA-AA-3556-01Z-00-DX1.63a74b91-44e8-4ffd-8737-bcf6992183c3.svs	0a4f391274f3fd7ad0947c3e80f6ae39	106976814	released
455 | 42b1d9e6-a4c8-4625-b443-2bb2a5f69b02	TCGA-A6-2676-01Z-00-DX1.c465f6e0-b47c-48e9-bdb1-67077bb16c67.svs	f5ef4b33c21b4ed62f847429bb6bf4db	193490107	released
456 | 0595727b-58bb-45e8-8a54-ce1e8e9df83a	TCGA-T9-A92H-01Z-00-DX1.9BA130C5-CAEF-4452-BB1F-61963B0DA3C5.svs	4fa21a93270bc82721a8e1cc55506c80	718633496	released
457 | d74e8e4d-25c4-4e11-911c-9e77abb72854	TCGA-DM-A28K-01Z-00-DX1.766edac4-a5c8-45ef-aff6-73e308c1f442.svs	72b10ac65f6a230e7603574cb4ff05e0	1688054259	released
458 | 83e92efd-308c-424d-a818-6400ff676b16	TCGA-AA-A01C-01Z-00-DX1.C69C8FC3-04A2-46B8-8577-7A7F082248CB.svs	5a41f6b7a70b5ee2d85fb7e7970147c5	465560132	released
459 | 9b05cf34-c548-482f-9e41-a77e2ba5a253	TCGA-AA-3510-01Z-00-DX1.B4FCE76A-3B60-4D7D-9F3E-973AE17EA8E2.svs	689522752425b575316f1d26f7ad3bab	9950312	released
460 | 5af6b02b-f142-41f7-a1a4-1fd61dc73f3e	TCGA-AY-A71X-01Z-00-DX1.68F9BC0F-1D60-4AEF-9083-509387038F03.svs	529813f3f323a7f80739fb3740212c31	2153405756	released
461 | 


--------------------------------------------------------------------------------