├── .gitignore ├── docker └── Dockerfile ├── .github └── workflows │ ├── Dockerfile │ └── publish.yml ├── scripts ├── video │ ├── boltz-pre-pull.yaml │ ├── csi-pvc-and-pod.yaml │ ├── boltz-cache-populate-job.yaml │ ├── boltz-multi-job.yaml │ └── run-full-deploy.sh ├── boltz-pre-pulling-job.yaml ├── upload_data_to_pvc.sh ├── download_results_from_pvc.sh ├── boltz-multi-job.yaml └── boltz-cache-download-job.yaml ├── data ├── yamls_001 │ ├── s_110.yaml │ ├── s_1138.yaml │ ├── s_112.yaml │ └── s_1162.yaml ├── yamls_016 │ ├── s_854.yaml │ ├── s_94.yaml │ ├── s_975.yaml │ └── s_996.yaml ├── yamls_002 │ ├── s_1166.yaml │ ├── s_1211.yaml │ ├── s_1240.yaml │ └── s_1168.yaml ├── yamls_004 │ ├── s_1656.yaml │ ├── s_1683.yaml │ ├── s_1575.yaml │ └── s_1684.yaml ├── yamls_005 │ ├── s_1750.yaml │ ├── s_1745.yaml │ ├── s_1758.yaml │ └── s_1700.yaml ├── yamls_015 │ ├── s_758.yaml │ ├── s_771.yaml │ ├── s_745.yaml │ └── s_81.yaml ├── yamls_003 │ ├── s_1375.yaml │ ├── s_1427.yaml │ ├── s_1329.yaml │ └── s_1378.yaml ├── yamls_007 │ ├── s_1902.yaml │ ├── s_1879.yaml │ ├── s_1880.yaml │ └── s_1851.yaml ├── yamls_010 │ ├── s_279.yaml │ ├── s_269.yaml │ ├── s_311.yaml │ └── s_274.yaml ├── yamls_011 │ ├── s_345.yaml │ ├── s_377.yaml │ ├── s_376.yaml │ └── s_360.yaml ├── yamls_014 │ ├── s_627.yaml │ ├── s_62.yaml │ ├── s_656.yaml │ └── s_7.yaml ├── yamls_008 │ ├── s_203.yaml │ ├── s_1948.yaml │ ├── s_1915.yaml │ └── s_1929.yaml ├── yamls_009 │ ├── s_224.yaml │ ├── s_248.yaml │ ├── s_206.yaml │ └── s_238.yaml ├── yamls_012 │ ├── s_42.yaml │ ├── s_441.yaml │ ├── s_401.yaml │ └── s_44.yaml ├── yamls_013 │ ├── s_49.yaml │ ├── s_483.yaml │ ├── s_489.yaml │ └── s_508.yaml └── yamls_006 │ ├── s_1789.yaml │ ├── s_1796.yaml │ ├── s_1803.yaml │ └── s_1802.yaml ├── README.md ├── LICENSE ├── blogpost-boltz2-mk8s.md └── tutorial.md /.gitignore: -------------------------------------------------------------------------------- 1 | # macOS 2 | .DS_Store 3 | 4 | # Jupyter 5 | .ipynb_checkpoints/ 6 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04 2 | 3 | # Install git, pip3, and uv 4 | RUN apt-get update && apt-get install -y git python3-pip && pip3 install uv 5 | 6 | WORKDIR /app 7 | RUN git clone https://github.com/jwohlwend/boltz.git /app/boltz 8 | 9 | WORKDIR /app/boltz 10 | RUN uv pip install --system -e .[cuda] 11 | 12 | ENTRYPOINT ["boltz"] -------------------------------------------------------------------------------- /.github/workflows/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 2 | 3 | # Install git, pip3, and uv 4 | RUN apt-get update && apt-get install -y git python3-pip && pip3 install uv 5 | 6 | WORKDIR /app 7 | RUN git clone https://github.com/jwohlwend/boltz.git /app/boltz 8 | 9 | WORKDIR /app/boltz 10 | RUN uv pip install --system -e .[cuda] 11 | 12 | ENTRYPOINT ["boltz"] 13 | -------------------------------------------------------------------------------- /scripts/video/boltz-pre-pull.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: boltz-pre-pull 5 | labels: 6 | app: boltz-pre-pull 7 | spec: 8 | selector: 9 | matchLabels: 10 | app: boltz-pre-pull 11 | template: 12 | metadata: 13 | labels: 14 | app: boltz-pre-pull 15 | spec: 16 | tolerations: 17 | - operator: "Exists" 18 | containers: 19 | - name: prepull 20 | image: $BOLTZ_IMAGE 21 | imagePullPolicy: IfNotPresent 22 | command: ["sleep", "infinity"] 23 | -------------------------------------------------------------------------------- /scripts/boltz-pre-pulling-job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: boltz-pre-pulling 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: boltz-pre-pulling 9 | template: 10 | metadata: 11 | labels: 12 | app: boltz-pre-pulling 13 | spec: 14 | tolerations: 15 | - operator: "Exists" # Run on all nodes, including tainted ones 16 | containers: 17 | - name: prepull 18 | image: ghcr.io/nebius-academy/boltz2-mk8s:latest 19 | command: ["sleep", "infinity"] # Keep running until manually deleted 20 | restartPolicy: Always 21 | -------------------------------------------------------------------------------- /scripts/video/csi-pvc-and-pod.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: csi-pvc 5 | spec: 6 | accessModes: 7 | - ReadWriteMany 8 | resources: 9 | requests: 10 | storage: 64Gi 11 | storageClassName: csi-mounted-fs-path-sc 12 | --- 13 | kind: Pod 14 | apiVersion: v1 15 | metadata: 16 | name: my-csi-app 17 | spec: 18 | containers: 19 | - name: my-csi-app 20 | image: busybox 21 | volumeMounts: 22 | - mountPath: "/data" 23 | name: my-csi-volume 24 | command: [ "sleep", "1000000" ] 25 | securityContext: 26 | allowPrivilegeEscalation: false 27 | privileged: false 28 | volumes: 29 | - name: my-csi-volume 30 | persistentVolumeClaim: 31 | claimName: csi-pvc 32 | -------------------------------------------------------------------------------- /data/yamls_001/s_110.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'COCC(c1cccc(C#N)c1)NCC1CC1' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_016/s_854.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC(c1c(cc(cc1OC)c1cccc2c1CCO2)OC)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_002/s_1166.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC1(CC(N)=O)CNC(c1c(c2ccccc2)nco1)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_004/s_1656.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'Cc1cc(C(NC2CCCN(C2)S(C)(=O)=O)=O)no1' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_004/s_1683.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CNC(C1CCCCN1S(c1ccccc1c1ccno1)(=O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_005/s_1750.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CCS(NC1CCN(CC1)CC1CC2(C1)COCCO2)(=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_015/s_758.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C(C(COc1ccccc1C(F)(F)F)O)n1cnc(C#N)n1' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_015/s_771.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CN(Cc1nnc2ccccn12)C(CSc1cccc(c1)OC)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_001/s_1138.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC1COc1cccc(c1)C(N1CC[C@H](C1)C(O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_002/s_1211.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'Cc1ccc(CNCc2nnnn2c2cccc(c2)[Br])c2c1cccn2' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_002/s_1240.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CCN(C(C1)c1nc(c2ncccn2)no1)C(c1cccnc1)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_003/s_1375.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CN1C(=C(C#N)C(N(C)C1=O)=O)N1CCC(CO)C(C1)O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_003/s_1427.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CNC(Nc1ccc(cc1)c1cccc(c1)C1CCS(N1)(=O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_004/s_1575.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC(CN(C1)c1cc(ncn1)N1CCOc2c(C1)cccc2F)CO' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_005/s_1745.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CN1C(c2cc(c3nc(C4CCOC4)no3)sc2N(C)C1=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_007/s_1902.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC(NC1(CCCC1)C(NCC#CCNC(c1cccnn1)=O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_010/s_279.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1Cc2cc(ccc2C1C(O)=O)c1cc(ccc1[Cl])S(N)(=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_011/s_345.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC1CCC(CN1C(c1c(C)c2C(CCCc2[nH]1)=O)=O)C(O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_011/s_377.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C(C(c1ccoc1)O)NC(c1cccc(c1)Nc1c2ccccc2ncn1)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_014/s_627.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC(c1cc(no1)[Br])NCC(COc1cccc2c1cc[nH]2)O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_002/s_1168.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C(Cc1ccc(cc1)O)CNC(c1cc2c(c(cs2)C(O)=O)[nH]1)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_003/s_1329.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CCN(C(C1)CC(N)=O)C(c1cc(c2ccccc2[Cl])on1)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_003/s_1378.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CN(Cc1ncc(n1C)[Cl])C1=C(C#N)C(N(C)C(N1C)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_004/s_1684.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC1CN(CC(C)N1)C(C(NC(Cc1ncc[nH]1)c1ccccc1)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_005/s_1758.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'COc1cc(ccc1C1(CN(CCC(Nc2ccccc2OC)=O)C1)O)[Br]' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_008/s_203.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'Cc1cc(c(cc1NC(c1ccc(cc1)n1cnnc1)=O)C(O)=O)[Br]' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_009/s_224.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CCc1nnc(c2ccc(cc2)NC(CNC(c2cc3CCCc3s2)=O)=O)o1' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_009/s_248.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC(CCN(C1)C(c1cccc2c1CCS2(=O)=O)=O)(c1ccccc1)O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_010/s_269.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CN(Cc1ccccc1S(N)(=O)=O)c1cc(c2CCCc2n1)C(F)(F)F' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_010/s_311.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CCn1cc(c2ccccc2)nc1NC(c1cc2CCS(Cc2nc1)(=O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_011/s_376.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC(C(C)NC(c1ccn(c2ccc(C)cc2)n1)=O)NC(c1cn[nH]c1)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_012/s_42.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC(c1ccc(cc1)c1nn[nH]n1)NC(c1cnn(C)c1Cc1ccccc1)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_012/s_441.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CN(CCCc1cc(c2ccc(cc2)F)n(C)n1)c1c(C#N)c2CCCc2nn1' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_013/s_49.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC1CCCN(C1)c1cc(ccn1)C(NCC1CCn2c(C1)c(cn2)C(N)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_014/s_62.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CCN2C(C1)=NN(CC(Nc1cccc3CN(Cc13)c1ccccn1)=O)C2=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_014/s_656.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1[C@H](c2ccc(cc2)[Cl])[C@H](CN1C(CO)=O)C(O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_015/s_745.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'Cc1ccc(c(c1)[Cl])S(Nc1ncn(Cc2ccc(C#N)cc2)n1)(=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_015/s_81.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC(Nc2ccc(cc2C1)NC1CCCN(Cc2ccc(cc2F)F)C1=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_016/s_94.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC(C)C(N1C[C@H](c2cccc(c2)[Br])[C@H](C1)C(O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_016/s_975.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CN(C[C@H]1NC(c1cnc[nH]1)=O)C(c1cccc(CO)c1)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_016/s_996.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C(C(NCC(c1ccccc1[Cl])N1CCOCC1)=O)c1c[nH]c2ccccc12' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_001/s_112.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'Cc1cnc(C)c(n1)N1C[C@H](c2cccc(c2)[Br])[C@H](C1)C(O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_006/s_1789.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1C[C@@H]2C(N(C(N2C1)=O)c1cccc(c1)N1C(CCC1=O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_006/s_1796.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC(Nc2ccc(cc2C1)NC([C@H]1C[C@@H]1c1cccc(c1)O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_007/s_1879.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'Cc1cccc2C(N(CCC(Nc3ccc(cc3)c3nnc4CCCCCn34)=O)C=Nc12)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_008/s_1948.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC(C)Cc1ccc2cc(C(Nc3cccc(c3)N3C(NN=N3)=O)=O)[nH]c2c1' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_009/s_206.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C(C(Nc1ccc(cc1)C(N1CC(Nc2ccccc12)=O)=O)=O)c1cnn2ccccc12' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_010/s_274.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CN1C([C@H](CNc2ccccc12)NC(c1cc(ccc1F)S(C)(=O)=O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_011/s_360.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'COc1cccc(c1)[C@H](CO)NC(c1c(cn(c2ccc(cc2F)F)n1)O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_012/s_401.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC2CN(CC1N2C(c1cc(C2CC2)[nH]n1)=O)C(c1cc(ccc1F)O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_012/s_44.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1C2(CC1(C2)c1ccccc1)C(Nc1ccc2c(c1)c(C(N)=O)n[nH]2)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Boltz-2 Inference on Nebius MK8s with GPU and Shared Filesystem 2 | 3 | This repository contains scripts and Kubernetes manifests for running **[Boltz-2](https://github.com/deepmind/boltz)** — an open-source biomolecular foundation model — on **Nebius AI Cloud** using a GPU-enabled Kubernetes (MK8s) cluster and a shared filesystem. 4 | 5 | Boltz-2 predicts both **3D protein–ligand complex structures** and **binding affinities**, enabling fast *in silico* screening for drug discovery. 6 | 7 | [**Check the full, step-by-step tutorial here**](https://github.com/Nebius-Academy/boltz2-mk8s/blob/main/tutorial.md) 8 | 9 | --- 10 | 11 | ## Features 12 | - Automated setup of a Nebius MK8s cluster with GPU nodes. 13 | - Shared filesystem (PVC) for storing input and output data. 14 | - Inference with parallel GPU jobs. 15 | - Scripts for uploading input data and downloading results from PVC. 16 | -------------------------------------------------------------------------------- /data/yamls_005/s_1700.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC(CCC=C1)C(N1C[C@@H](C(O)=O)[C@@H](C1)c1ccc(cc1)[Cl])=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_006/s_1803.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CCc2nnc(c3ccc(c(c3)NC(C34CCC(N3c3ccccc3S4)=O)=O)F)n2CC1' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_007/s_1880.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CCN(C1)S(c1ccc2c(CCCN2C(CC2c3ccccc3C(=O)O2)=O)c1)(=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_008/s_1915.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1C[C@H](C[C@@H]1C(Nc1cccc(c1)NC(c1cc2cnccc2cc1F)=O)=O)O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_008/s_1929.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC(NC1C(NC[C@@H]1CCN(C1)C(c1ccc2CNC(Nc2c1)=O)=O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_013/s_483.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CN(CC(N[C@@]1(C[C@@H](C1)O)c1c(cccc1F)F)=O)c1ccc(C#N)cc1F' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_013/s_489.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'CC1(C)CN(CC1N(C)C(c1c2CCCCc2n(C)n1)=O)C([C@@H]1C[C@H]1C#N)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_014/s_7.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'Cc1cc(C)nc2c1c(nn2C)OCC(N1CCC[C@@H]2[C@@H](C(C)(C)[C@H]12)O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_001/s_1162.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1CC[C@@H]2C[C@@H](C[C@@H]2C1)NC(c1cc2c(c(cs2)C(O)=O)[nH]1)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_006/s_1802.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1COc2ccc(cc2OC1)S(Nc1ccc(cc1)NC(CC1c2ccccc2C(=O)O1)=O)(=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_007/s_1851.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C1=CSc2nc(cn12)c1ccc(cc1)NC(c1ccc2C(c3ccccc3S(c2c1)(=O)=O)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_009/s_238.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'C(C1C(N(C2CC3(C2)CNC(=O)O3)C(=Nc2ccc(cc2)O)S1)=O)C(Nc1cccc(c1)O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /data/yamls_013/s_508.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" 6 | msa: /data/msa/uniref.a3m 7 | - ligand: 8 | id: B 9 | smiles: 'Cc1cccc(c1)[C@@H]1CN(C[C@H]1C(O)=O)C([C@H]1[C@H]2C[C@H]2C(N1)=O)=O' 10 | properties: 11 | - affinity: 12 | binder: B 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Nebius Academy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/video/boltz-cache-populate-job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: boltz-cache-populate 5 | spec: 6 | backoffLimit: 2 7 | template: 8 | spec: 9 | restartPolicy: OnFailure 10 | containers: 11 | - name: downloader 12 | image: ubuntu:22.04 13 | command: ["/bin/bash", "-c"] 14 | args: 15 | - apt-get update -y && apt-get install -y wget tar ca-certificates || true; 16 | mkdir -p /data/.boltz; 17 | cd /data/.boltz; 18 | wget -nc https://huggingface.co/boltz-community/boltz-1/resolve/main/ccd.pkl; 19 | wget -nc https://huggingface.co/boltz-community/boltz-2/resolve/main/mols.tar; 20 | if [ -f mols.tar ] && [ ! -d mols ]; then tar -xf mols.tar && touch .mols_extracted; fi; 21 | wget -nc https://model-gateway.boltz.bio/boltz2_conf.ckpt || true; 22 | wget -nc https://model-gateway.boltz.bio/boltz2_aff.ckpt || true; 23 | volumeMounts: 24 | - name: my-pvc 25 | mountPath: /data 26 | volumes: 27 | - name: my-pvc 28 | persistentVolumeClaim: 29 | claimName: csi-pvc 30 | -------------------------------------------------------------------------------- /scripts/upload_data_to_pvc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | PVC_NAME="boltz-fs-pvc" 5 | LOCAL_BASE="data" # Local directory to upload 6 | POD_NAME="pvc-uploader" # Temporary pod name 7 | 8 | # Create a temporary pod with the PVC mounted 9 | kubectl run "$POD_NAME" \ 10 | --image=ubuntu:22.04 \ 11 | --restart=Never \ 12 | --overrides=" 13 | { 14 | \"spec\": { 15 | \"volumes\": [ 16 | { \"name\": \"data\", \"persistentVolumeClaim\": { \"claimName\": \"$PVC_NAME\" } } 17 | ], 18 | \"containers\": [ 19 | { 20 | \"name\": \"shell\", 21 | \"image\": \"ubuntu:22.04\", 22 | \"command\": [\"sleep\", \"infinity\"], 23 | \"volumeMounts\": [{\"name\": \"data\", \"mountPath\": \"/data\"}] 24 | } 25 | ] 26 | } 27 | } 28 | " >/dev/null 29 | 30 | # Wait until the pod is ready 31 | kubectl wait --for=condition=Ready pod/"$POD_NAME" --timeout=120s 32 | 33 | # Archive the local folder and extract it inside the PVC 34 | tar -C "$LOCAL_BASE" -cf - . \ 35 | | kubectl exec -i "$POD_NAME" -- tar -C /data -xf - 36 | 37 | # Verify uploaded contents 38 | kubectl exec "$POD_NAME" -- ls -lah /data 39 | 40 | # Delete the temporary pod 41 | kubectl delete pod "$POD_NAME" 42 | -------------------------------------------------------------------------------- /scripts/download_results_from_pvc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | PVC_NAME="boltz-fs-pvc" 5 | POD_NAME="pvc-downloader" 6 | 7 | # Create a temporary pod with the PVC mounted 8 | kubectl run "$POD_NAME" \ 9 | --image=ubuntu:22.04 \ 10 | --restart=Never \ 11 | --overrides=" 12 | { 13 | \"spec\": { 14 | \"volumes\": [ 15 | { \"name\": \"data\", \"persistentVolumeClaim\": { \"claimName\": \"$PVC_NAME\" } } 16 | ], 17 | \"containers\": [ 18 | { 19 | \"name\": \"pvc-downloader\", 20 | \"image\": \"ubuntu:22.04\", 21 | \"command\": [\"sleep\", \"infinity\"], 22 | \"volumeMounts\": [{\"name\": \"data\", \"mountPath\": \"/data\"}] 23 | } 24 | ] 25 | } 26 | } 27 | " >/dev/null 28 | 29 | # Wait until the pod is ready 30 | kubectl wait --for=condition=Ready pod/"$POD_NAME" --timeout=120s 31 | 32 | # Create a tarball inside the pod 33 | TMP_TAR="/tmp/results.tgz" 34 | kubectl exec "$POD_NAME" -- \ 35 | bash -lc "tar czf \"$TMP_TAR\" -C /data results" 36 | 37 | # Copy the tarball to the local machine 38 | kubectl cp "$POD_NAME:$TMP_TAR" ./results.tgz 39 | 40 | # Extract locally 41 | tar -xzf results.tgz 42 | rm results.tgz 43 | 44 | # Delete the pod 45 | kubectl delete pod "$POD_NAME" 46 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Build and publish to GHCR 2 | 3 | # Запускать на пуш в main (или укажите нужную ветку) 4 | on: 5 | push: 6 | branches: [ main ] 7 | 8 | permissions: 9 | contents: read 10 | packages: write 11 | 12 | jobs: 13 | build-and-push: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v4 18 | 19 | - name: Set up QEMU (for multi-arch, optional) 20 | uses: docker/setup-qemu-action@v2 21 | 22 | - name: Set up Docker Buildx 23 | uses: docker/setup-buildx-action@v3 24 | 25 | - name: Log in to GHCR 26 | uses: docker/login-action@v2 27 | with: 28 | registry: ghcr.io 29 | username: ${{ github.actor }} 30 | password: ${{ secrets.GITHUB_TOKEN }} 31 | 32 | - name: Build and push image 33 | uses: docker/build-push-action@v4 34 | with: 35 | context: . 36 | file: ./docker/Dockerfile 37 | push: true 38 | # image name: ghcr.io//:tag 39 | # github.repository_owner -> org (e.g. Nebius-Academy) 40 | # github.event.repository.name -> repo (e.g. boltz-runner) 41 | tags: | 42 | ghcr.io/nebius-academy/${{ github.event.repository.name }}:latest 43 | # Для multi-arch (если нужно), раскомментируйте: 44 | # platforms: linux/amd64,linux/arm64 45 | -------------------------------------------------------------------------------- /scripts/boltz-multi-job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: boltz-runner 5 | spec: 6 | completions: 16 # Total tasks (001..016) 7 | parallelism: 16 # Run all tasks in parallel 8 | completionMode: Indexed # Indexing 0..15 available via annotation 9 | template: 10 | spec: 11 | containers: 12 | - name: boltz-runner 13 | image: ghcr.io/nebius-academy/boltz2-mk8s:latest 14 | env: 15 | - name: JOB_INDEX 16 | valueFrom: 17 | fieldRef: 18 | fieldPath: metadata.annotations['batch.kubernetes.io/job-completion-index'] 19 | command: ["bash","-lc"] 20 | args: 21 | - | 22 | set -e 23 | idx=$(printf '%03d' $(( JOB_INDEX + 1 ))) # Format index with leading zeros 24 | d="/data/yamls_${idx}" 25 | echo "▶ Using directory: ${d}" 26 | boltz predict "${d}" --cache /data/.boltz --use_msa_server --out_dir /data/results/ 27 | resources: 28 | limits: 29 | nvidia.com/gpu: 1 30 | volumeMounts: 31 | - name: data-volume 32 | mountPath: /data 33 | - name: shm-volume 34 | mountPath: /dev/shm 35 | restartPolicy: Never 36 | volumes: 37 | - name: data-volume 38 | persistentVolumeClaim: 39 | claimName: boltz-fs-pvc 40 | - name: shm-volume 41 | emptyDir: 42 | medium: Memory 43 | imagePullSecrets: 44 | - name: ghcr-credentials 45 | -------------------------------------------------------------------------------- /scripts/video/boltz-multi-job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: boltz-runner 5 | spec: 6 | completions: 16 # Total tasks (001..016) 7 | parallelism: 16 # Run all tasks in parallel 8 | completionMode: Indexed # Indexing 0..15 available via annotation 9 | template: 10 | spec: 11 | restartPolicy: OnFailure 12 | containers: 13 | - name: boltz-runner 14 | image: $BOLTZ_IMAGE 15 | env: 16 | - name: JOB_INDEX 17 | valueFrom: 18 | fieldRef: 19 | fieldPath: metadata.annotations['batch.kubernetes.io/job-completion-index'] 20 | - name: NUM_WORKERS 21 | value: "2" 22 | - name: OMP_NUM_THREADS 23 | value: "1" 24 | - name: MKL_NUM_THREADS 25 | value: "1" 26 | command: ["bash","-lc"] 27 | args: 28 | - | 29 | set -e 30 | idx=$(printf '%03d' $(( JOB_INDEX + 1 ))) 31 | d="/data/yamls_${idx}" 32 | mkdir -p /data/results 33 | echo "Using directory: ${d}" 34 | boltz predict "${d}" --cache /data/.boltz --out_dir /data/results/ 35 | resources: 36 | limits: 37 | nvidia.com/gpu: 1 38 | volumeMounts: 39 | - name: my-csi-volume 40 | mountPath: /data 41 | - name: dshm 42 | mountPath: /dev/shm 43 | volumes: 44 | - name: my-csi-volume 45 | persistentVolumeClaim: 46 | claimName: csi-pvc 47 | - name: dshm 48 | emptyDir: 49 | medium: Memory 50 | sizeLimit: 8Gi 51 | -------------------------------------------------------------------------------- /scripts/boltz-cache-download-job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: boltz-cache-download 5 | namespace: default 6 | spec: 7 | backoffLimit: 0 8 | template: 9 | spec: 10 | restartPolicy: Never 11 | containers: 12 | - name: downloader 13 | image: ubuntu:22.04 14 | command: ["/bin/bash","-lc"] 15 | args: 16 | - | 17 | set -euo pipefail 18 | BOLTZ_DIR=/data/.boltz 19 | mkdir -p "$BOLTZ_DIR" 20 | cd "$BOLTZ_DIR" 21 | 22 | # Install tools if not present 23 | if ! command -v wget >/dev/null 2>&1; then 24 | apt-get update -y 25 | apt-get install -y wget ca-certificates tar 26 | fi 27 | 28 | echo "📥 Download ccd.pkl" 29 | if [[ ! -s ccd.pkl ]]; then 30 | wget -nc https://huggingface.co/boltz-community/boltz-1/resolve/main/ccd.pkl 31 | else 32 | echo " ↪ already exists, skip." 33 | fi 34 | 35 | echo "📥 Download mols.tar" 36 | if [[ ! -s mols.tar && ! -f .mols_extracted ]]; then 37 | wget -nc https://huggingface.co/boltz-community/boltz-2/resolve/main/mols.tar 38 | else 39 | echo " ↪ archive present or already extracted, skip download." 40 | fi 41 | 42 | echo "📦 Extract mols.tar (once)" 43 | if [[ ! -d mols && ! -f .mols_extracted ]]; then 44 | tar --no-same-owner --no-same-permissions \ 45 | -xf mols.tar \ 46 | --checkpoint=2000 \ 47 | --checkpoint-action=echo="…extracted %u files" 48 | touch .mols_extracted 49 | else 50 | echo " ↪ already extracted, skip." 51 | fi 52 | 53 | echo "📥 Download boltz2_conf.ckpt" 54 | if [[ ! -s boltz2_conf.ckpt ]]; then 55 | wget --tries=3 --timeout=10 https://model-gateway.boltz.bio/boltz2_conf.ckpt || \ 56 | wget -nc https://huggingface.co/boltz-community/boltz-2/resolve/main/boltz2_conf.ckpt 57 | else 58 | echo " ↪ already exists, skip." 59 | fi 60 | 61 | echo "📥 Download boltz2_aff.ckpt" 62 | if [[ ! -s boltz2_aff.ckpt ]]; then 63 | wget --tries=3 --timeout=10 https://model-gateway.boltz.bio/boltz2_aff.ckpt || \ 64 | wget -nc https://huggingface.co/boltz-community/boltz-2/resolve/main/boltz2_aff.ckpt 65 | else 66 | echo " ↪ already exists, skip." 67 | fi 68 | 69 | echo "✅ Done. Listing contents:" 70 | ls -lah "$BOLTZ_DIR" 71 | volumeMounts: 72 | - name: data 73 | mountPath: /data 74 | volumes: 75 | - name: data 76 | persistentVolumeClaim: 77 | claimName: boltz-fs-pvc 78 | -------------------------------------------------------------------------------- /scripts/video/run-full-deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | # 1. MK8S cluster 5 | 6 | # 1.1 create registry, service account, MK8S cluster and node group 7 | 8 | git clone https://github.com/Nebius-Academy/boltz2-mk8s.git 9 | cd boltz2-mk8s 10 | 11 | # 1.2 set environment variables 12 | 13 | export PROJECT_ID= 14 | export REGION_ID= 15 | export NB_REGISTRY_ID= 16 | export CLUSTER_ID= 17 | export MOUNT_TAG= 18 | export NB_REGISTRY_PATH=$(echo $NB_REGISTRY_ID | cut -d- -f2) 19 | 20 | # 1.3 configure Nebius CLI, docker login with short-lived access token, connect `kubectl` with the cluster 21 | 22 | nebius config set parent-id $PROJECT_ID 23 | 24 | nebius iam get-access-token | \ 25 | docker login cr.$REGION_ID.nebius.cloud \ 26 | --username iam \ 27 | --password-stdin 28 | 29 | nebius mk8s cluster get-credentials --id $CLUSTER_ID --external 30 | 31 | # 2. Docker container 32 | 33 | # 2.1 build -> tag/push -> pre-pull -> check status 34 | 35 | docker build -t boltz-runner -f docker/Dockerfile . 36 | 37 | export BOLTZ_IMAGE=cr.$REGION_ID.nebius.cloud/$NB_REGISTRY_PATH/boltz-runner:v1.0.0 38 | docker tag boltz-runner $BOLTZ_IMAGE 39 | docker push $BOLTZ_IMAGE 40 | 41 | envsubst '$BOLTZ_IMAGE' < scripts/video/boltz-pre-pull.yaml | kubectl apply -f - 42 | 43 | if kubectl rollout status daemonset/boltz-pre-pull --timeout=30m; then 44 | echo "✅ COMPLETED" 45 | kubectl delete daemonset/boltz-pre-pull 46 | else 47 | echo "❌ FAILED" 48 | exit 1 49 | fi 50 | 51 | # 3. mount shared filesystem & upload data 52 | 53 | # 3.1 install Container Storage Interface driver 54 | 55 | helm pull oci://cr.eu-north1.nebius.cloud/mk8s/helm/csi-mounted-fs-path --version 0.1.3 56 | 57 | helm upgrade csi-mounted-fs-path ./csi-mounted-fs-path-0.1.3.tgz --install \ 58 | --set dataDir=/mnt/$MOUNT_TAG/csi-mounted-fs-path-data/ 59 | 60 | rm csi-mounted-fs-path-0.1.3.tgz 61 | 62 | # 3.2 mounting shared filesystem to pods 63 | 64 | kubectl apply -f scripts/video/csi-pvc-and-pod.yaml 65 | 66 | # 3.3 upload data 67 | 68 | kubectl cp ./data/. my-csi-app:/data 69 | 70 | kubectl exec -it my-csi-app -- ls -lah /data 71 | 72 | kubectl apply -f scripts/video/boltz-cache-populate-job.yaml 73 | 74 | kubectl exec -it my-csi-app -- ls -lah /data/.boltz 75 | 76 | # 3.4 check status 77 | 78 | if kubectl wait --for=condition=complete job/boltz-cache-populate --timeout=30m; then 79 | echo "✅ COMPLETED" 80 | kubectl delete job boltz-cache-populate --wait=false || true 81 | kubectl delete pods -l job-name=boltz-cache-populate || true 82 | else 83 | echo "❌ FAILED" 84 | exit 1 85 | fi 86 | 87 | # 4. run boltz-2 and download results 88 | 89 | # 4.1 run boltz-2 90 | 91 | envsubst '$BOLTZ_IMAGE' < scripts/video/boltz-multi-job.yaml | kubectl apply -f - 92 | 93 | kubectl get pods 94 | kubectl logs jobs/boltz-runner 95 | kubectl exec -it my-csi-app -- ls -lah /data/results 96 | 97 | echo "Waiting for boltz-runner job to complete..." 98 | kubectl wait --for=condition=complete job/boltz-runner --timeout=-1s 99 | completed=$(kubectl get pods -l job-name=boltz-runner --no-headers | grep 'Completed' | wc -l) 100 | echo "✅ COMPLETED: $completed/16 pods" 101 | 102 | # 4.2 download results 103 | 104 | kubectl cp my-csi-app:/data/results ./results -c my-csi-app -------------------------------------------------------------------------------- /blogpost-boltz2-mk8s.md: -------------------------------------------------------------------------------- 1 | # Running Boltz-2 inference at scale in Nebius AI Cloud 2 | 3 | ## Abstract 4 | 5 | Boltz-2 is an open-source biomolecular foundation model that predicts protein–ligand 3D structures and binding affinities with near-FEP accuracy while running orders of magnitude faster. This article shows how Nebius AI Cloud – using Managed Kubernetes, GPU node group, and shared filesystem – provides a practical, reproducible blueprint for running Boltz-2 from single-GPU experiments to scalable multi-node screening pipelines. 6 | 7 | ## Introduction 8 | 9 | Accurately modeling biomolecular interactions is one of the central challenges in biology and drug discovery. Proteins, nucleic acids and small molecules form complex, often dynamic assemblies whose structural details determine biological function and therapeutic effect. Among these properties, binding affinity – the strength of interaction between a small molecule and its protein target – is a primary determinant of a compound’s potency and a crucial filter in hit discovery and lead optimization. 10 | 11 | *In-silico* prediction of binding affinity remains difficult despite its importance in drug design. Atomistic approaches like free-energy perturbation (FEP) can near experimental accuracy, but their heavy compute demands and requirement for expert handling make them unsuitable for high-throughput screening. Faster heuristics such as molecular docking trade speed for precision and frequently lack the ranking power required for confident decision making. 12 | 13 | [Boltz-2](https://www.biorxiv.org/content/10.1101/2025.06.14.659707v1) is a structural-biology foundation model that combines high-quality structure prediction and affinity estimation. It uses a co-folding trunk for protein–ligand complex prediction, a dedicated affinity module (PairFormer + prediction heads), and controllability features – e.g., conditioning on experimental method (X-ray / NMR / MD), pocket/distance steering, and multimeric templates – to improve robustness. These advances let Boltz-2 produce structure and affinity outputs that align well with experiments while running orders of magnitude faster than FEP, enabling high-throughput ranking and screening of hundreds of thousands of compounds per day on parallel high-performance computing. 14 | 15 | Boltz-2 already works in real pipelines: retrospective and prospective tests show it helps hit-to-lead optimization, large-scale hit discovery, and generative design loops that are later validated with targeted FEP. It produces experimentally relevant hypotheses at scales that were previously impractical for physics-based methods. However, model advances alone don’t solve the engineering problems of production inference. At scale, Boltz-2 depends on large, low-latency datasets – ligand libraries, the Chemical Component Dictionary (CCD), MSA caches, and the like – so you need solid operational patterns: efficient data locality and caching, parallel job orchestration with GPU-aware scheduling, fault tolerance and reproducibility for long runs, and cost-aware lifecycle management to avoid idle expensive resources. 16 | 17 | Drawing on the companion [tutorial](https://github.com/dashabalashova/boltz2-mk8s/blob/main/tutorial.md) (which contains tested, runnable commands and manifests), this article focuses on the infrastructure and workflow patterns required to run Boltz-2 reliably at scale on Nebius AI Cloud. Specifically, we cover: cluster orchestration and job scheduling; [Managed Service for Kubernetes®](https://nebius.com/services/managed-kubernetes) to reduce operational burden; GPU node groups sized for Boltz-2’s memory and throughput; and a shared filesystem for centralized model caches, ligand libraries, and outputs. The remainder of the article translates Boltz-2’s scientific requirements into a concrete operational blueprint you can reuse for both exploratory experiments and production-grade screening pipelines. 18 | 19 | --- 20 | 21 | ## Resource requirements and scaling 22 | 23 | Boltz-2 has about 1 billion trainable parameters. In addition to the model weights, it requires a large cache (ligand libraries and Canonical Components Dictionary). 24 | 25 | Running inference needs GPUs with high memory capacity. For this benchmark we use [NVIDIA L40S](https://www.nvidia.com/en-us/data-center/l40s/) cards with 48GB VRAM: ~11 GB for structure prediction and ~7–8 GB for affinity prediction. That leaves spare capacity for batching and multiple concurrent jobs. 26 | 27 | Assuming ~40–60 seconds per protein–ligand prediction, 16 GPUs running in parallel would yield on the order of 1,000 predictions per hour (≈960–1,440 depending on per-prediction runtime). This is why we run Boltz-2 on a multi-node Kubernetes cluster instead of a single GPU VM. For small workloads (just a few molecules), a standalone GPU VM or Jupyter session is sufficient. At scale, however, Kubernetes orchestration and shared storage become essential. 28 | 29 | --- 30 | 31 | ## Managed Service for Kubernetes® 32 | 33 | Kubernetes is the backbone of distributed AI in Nebius AI Cloud. It ensures that all components – compute nodes, storage volumes, and jobs – are orchestrated automatically. 34 | 35 | For Boltz-2 inference, Kubernetes handles: 36 | 37 | - Job scheduling – distributing protein–ligand tasks evenly across GPUs 38 | - Resilience – if a pod fails, it is automatically restarted 39 | - Parallelism – hundreds of inference jobs can run concurrently 40 | - Resource management – GPU, CPU, and RAM allocations are tracked cluster-wide 41 | 42 | Without orchestration, researchers would need to manually launch and monitor hundreds of jobs. Kubernetes makes large-scale biomolecular inference manageable and predictable. 43 | 44 | Nebius provides Managed Service for Kubernetes® – a fully managed control plane that takes away the operational overhead of setting up, patching, and scaling clusters. 45 | 46 | Why it matters for Boltz-2: 47 | - No need to manually install GPU drivers – they come preconfigured 48 | - Node groups can be created with one command and scaled up or down depending on workload 49 | - Security and IAM are integrated with the cloud platform 50 | 51 | This allows research teams to focus on drug discovery experiments rather than infrastructure plumbing. 52 | 53 | --- 54 | 55 | ## Infrastructure and workflow 56 | 57 | This section describes the compute and data platform required to run Boltz-2 at scale on Nebius AI Cloud, and the repeatable workflow for packaging, launching, and cleaning up inference runs. For tested, runnable commands and Kubernetes manifests see the companion [tutorial](https://github.com/dashabalashova/boltz2-mk8s/blob/main/tutorial.md). 58 | 59 | ### GPU node groups with NVIDIA L40S 60 | 61 | Inference runs on GPU node groups – collections of worker nodes equipped with NVIDIA L40S GPUs. Each node in our configuration has: 62 | - 2× L40S GPUs 63 | - 64 vCPUs 64 | - 384 GB RAM 65 | - 64 GB fast SSD boot disk 66 | 67 | These specifications ensure that the model and its cache fit comfortably, while leaving room for parallel batches. GPU node groups are billed only while active, so it’s recommended to delete them after finishing jobs. 68 | 69 | ### Shared filesystem for large datasets 70 | 71 | A critical enabler is the shared filesystem, mounted across all nodes. Boltz-2 requires: 72 | - Ligand libraries 73 | - Canonical Components Dictionary 74 | - Multiple sequence alignments (MSAs) 75 | - Input YAML batch files 76 | 77 | With a shared filesystem: 78 | - All nodes can read from the same dataset without duplicating it locally 79 | - Prediction results are written back to a common location 80 | - Workflows remain synchronized, even across dozens of nodes 81 | 82 | In Nebius AI Cloud this is implemented with a network SSD filesystem, attached through the CSI driver and exposed to Kubernetes as a PersistentVolumeClaim (PVC). 83 | 84 | ### Workflow overview 85 | 86 | Running Boltz-2 on Nebius follows a simple, repeatable workflow: 87 | 1. Set up the environment – install CLI tools to manage cloud resources. 88 | 2. Package the model runner – build a container image with Boltz-2 code and dependencies, push it to Nebius Container Registry. 89 | 3. Create Kubernetes cluster with GPU nodes – launch a Managed Kubernetes cluster with a GPU node group. Attach a shared filesystem. 90 | 4. Upload input data – place YAML job batches and MSAs into the shared PVC. 91 | 5. Pre-load model cache – pre-download ligand libraries and CCD data into the shared filesystem. 92 | 6. Run inference jobs – launch multiple parallel jobs via Kubernetes, each processing a batch of inputs. 93 | 7. Collect results – gather predictions (structures and affinities) from the shared filesystem. 94 | 8. Clean up resources – delete GPU node groups, PVCs, and registries to stop billing. 95 | 96 | --- 97 | 98 | ## Conclusion 99 | 100 | Boltz-2 makes high-throughput, experimentally relevant *in silico* drug screening practical when paired with the right platform and operational patterns. Using Managed Kubernetes to orchestrate GPU node groups and a shared filesystem for centralized model caches and ligand libraries provides a reliable, scalable, and cost-aware foundation. The workflow we present is reproducible and directly applicable to both exploratory experiments and production-grade screening pipelines. 101 | -------------------------------------------------------------------------------- /tutorial.md: -------------------------------------------------------------------------------- 1 | # Guide: Boltz-2 inference on Managed Service for Kubernetes® cluster with shared filesystem 2 | 3 | [Boltz-2](https://github.com/jwohlwend/boltz) is an open-source biomolecular foundation model for predicting both complex 3D structures and binding affinities. It enables accurate and fast *in silico* screening for drug discovery, matching the accuracy of physics-based free-energy perturbation (FEP) methods while running up to 1000x faster. 4 | 5 | This guide explains how to set up a Managed Service for a [Kubernetes](https://kubernetes.io/) cluster and a shared filesystem in Nebius AI Cloud, and run Boltz-2 inference. 6 | 7 | **Resource requirements, scaling and cost** 8 | 9 | Boltz-2 is a large biomolecular foundation model with about 1 billion trainable parameters. In addition to the weights, it requires a substantial model cache (ligand libraries, Canonical Components Dictionary data). Running inference requires powerful GPUs with high memory capacity: in this guide, we use NVIDIA L40S GPUs with 48 GB of VRAM, which provide both sufficient capacity and high throughput. In practice, GPU memory usage is moderate compared to the total card size: structure prediction requires ~11 GB and affinity prediction ~7-8 GB, leaving spare capacity on the L40S for batching, parallel jobs, and stable large-scale runs. 10 | 11 | The typical Boltz-2 inference time is 40-60 seconds per protein–ligand pair. With multiple GPUs, throughput scales almost linearly: for example, 16 parallel tasks yield ~1,000 pairs per hour. This is why the guide provisions a multi-node GPU cluster rather than a single GPU. For small workloads (a few molecules), a single GPU VM is sufficient, and inference can be run directly from Jupyter or the command line. Kubernetes becomes useful at scale — hundreds or thousands of pairs — where parallel execution and shared storage simplify management. 12 | 13 | GPU nodes incur costs as soon as they are created and running, and charges stop only after the node group is deleted. Storage (filesystems, PVCs) and container registries also accumulate charges while they exist. Be sure to delete all resources (see [Clean up](#8-clean-up-optional)) when finished to avoid unnecessary costs. 14 | 15 | **General note on applicability** 16 | 17 | This guide is written for Boltz-2, but the majority of the workflow is actually model-agnostic and can be reused for other machine learning models running on Kubernetes. Steps such as setting up the environment and installing CLI tools, creating a GPU-enabled Kubernetes cluster with a shared filesystem, uploading data to the PVC, pre-pulling container images, launching inference jobs in parallel, collecting results, and cleaning up cloud resources are all universal. 18 | 19 | What is specific to Boltz-2 are the details of the Dockerfile and runner image (since they depend on the Boltz-2 codebase and dependencies), the input data (YAML batches and MSA files), the model cache (Canonical Components Dictionary and ligand libraries), and the particular Kubernetes job YAMLs provided in the repository. 20 | 21 | For any other model, you would keep the same general structure of the workflow, but adapt these model-specific pieces: the container build, the input and output data format, how weights or caches are downloaded, and the exact resource requirements. 22 | 23 | **Reference** 24 | ``` 25 | @article{Passaro2025.06.14.659707, 26 | title = {Boltz-2: Towards Accurate and Efficient Binding Affinity Prediction}, 27 | author = {Passaro, Saro and Corso, Gabriele and Wohlwend, Jeremy and Reveiz, Mateo and Thaler, Stephan and Somnath, Vignesh Ram and Getz, Noah and Portnoi, Tally and Roy, Julien and Stark, Hannes and Kwabi-Addo, David and Beaini, Dominique and Jaakkola, Tommi and Barzilay, Regina}, 28 | title = {Boltz-2: Towards Accurate and Efficient Binding Affinity Prediction}, 29 | year = {2025}, 30 | doi = {10.1101/2025.06.14.659707}, 31 | journal = {bioRxiv} 32 | } 33 | ``` 34 | 35 | --- 36 | 37 | ## 1. Prepare your environment 38 | 39 | In this section, you will install and configure all the necessary command-line tools to manage Nebius AI Cloud resources and the Kubernetes cluster from your local environment. 40 | 41 | ### Install command line interfaces and tools 42 | 43 | Install the required command line interfaces (CLIs) and tools using the copy-and-paste commands provided in the following steps: 44 | 45 |
46 | Ubuntu (x86-64) 47 | 48 | ```bash 49 | sudo apt-get install jq 50 | curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" 51 | sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl 52 | curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null 53 | sudo apt-get install apt-transport-https --yes 54 | echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list 55 | sudo apt-get update 56 | sudo apt-get install helm 57 | curl -sSL https://storage.eu-north1.nebius.cloud/cli/install.sh | bash 58 | nebius profile create 59 | ``` 60 |
61 | 62 |
63 | Ubuntu (ARM64) 64 | 65 | ```bash 66 | sudo apt-get install jq 67 | curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/arm64/kubectl" 68 | sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl 69 | curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null 70 | sudo apt-get install apt-transport-https --yes 71 | echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list 72 | sudo apt-get update 73 | sudo apt-get install helm 74 | curl -sSL https://storage.eu-north1.nebius.cloud/cli/install.sh | bash 75 | nebius profile create 76 | ``` 77 |
78 | 79 |
80 | macOS (Apple silicon) 81 | 82 | ```bash 83 | brew install jq 84 | curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/darwin/arm64/kubectl" 85 | chmod +x ./kubectl 86 | sudo mv ./kubectl /usr/local/bin/kubectl 87 | sudo chown root: /usr/local/bin/kubectl 88 | brew install helm 89 | curl -sSL https://storage.eu-north1.nebius.cloud/cli/install.sh | bash 90 | nebius profile create 91 | ``` 92 |
93 | 94 |
95 | macOS (Intel) 96 | 97 | ```bash 98 | brew install jq 99 | curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/darwin/amd64/kubectl" 100 | chmod +x ./kubectl 101 | sudo mv ./kubectl /usr/local/bin/kubectl 102 | sudo chown root: /usr/local/bin/kubectl 103 | brew install helm 104 | curl -sSL https://storage.eu-north1.nebius.cloud/cli/install.sh | bash 105 | nebius profile create 106 | ``` 107 |
108 | 109 |
110 | 111 | These commands will install the following tools: 112 | 113 | - [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) — the Kubernetes command-line interface. 114 | - [jq](https://jqlang.org/download/) — a lightweight JSON processor, used here to parse JSON output from the Nebius AI Cloud CLI and extract resource IDs for other commands. 115 | - [helm](https://helm.sh/docs/intro/install/) — a package manager for Kubernetes that simplifies deployment and management of applications by packaging them into reusable charts. 116 | - [Nebius AI Cloud CLI](https://docs.nebius.com/cli/quickstart) — the command-line interface for managing all Nebius AI Cloud resources. 117 | 118 | The last command, `nebius profile create`, opens the Nebius AI Cloud web console sign-in screen in your browser. Sign in to complete the initialization. 119 | 120 | Run the following commands to verify that all the required CLIs and tools are installed correctly: 121 | 122 | ```bash 123 | kubectl version --client 124 | jq --version 125 | helm version 126 | nebius version 127 | ``` 128 | 129 | After that, save your project ID in the CLI configuration: 130 | 131 | 1. Copy your project ID from the [Project settings](https://console.nebius.com/settings/) page in the web console. 132 | 2. Run the following command, replacing `` with your actual project ID: 133 | ```bash 134 | nebius config set parent-id 135 | ``` 136 | 137 | Note: in the [Project settings](https://console.nebius.com/settings/) page, you can also create new projects. Click the project name in the top navigation bar, select **Create project**, set a name and parameters, and save. Each project will have its own unique project ID. 138 | 139 | --- 140 | 141 | ## 2. Build and push Boltz-2 runner image 142 | 143 | In this section, you will package a Boltz-2 runner code into a Docker image, upload it to the Container Registry, and make it available for deployment in Kubernetes. 144 | 145 | Run the following command from the project root to build the Docker image defined in `docker/Dockerfile`: 146 | 147 | ```bash 148 | sudo docker build -t boltz-runner -f docker/Dockerfile . 149 | ``` 150 | 151 | Set the Region from the [Project settings](https://console.nebius.com/settings/) page into ``. Then create a new registry, tag `boltz-runner` Docker image with the correct registry path, and push it. 152 | 153 | ```bash 154 | export REGION_ID= 155 | export NB_REGISTRY_PATH=$(nebius registry create \ 156 | --name boltz-registry \ 157 | --format json | jq -r ".metadata.id" | cut -d- -f 2) 158 | docker tag boltz-runner:latest \ 159 | cr.$REGION_ID.nebius.cloud/$NB_REGISTRY_PATH/boltz-runner:latest 160 | docker push cr.$REGION_ID.nebius.cloud/$NB_REGISTRY_PATH/boltz-runner:latest 161 | ``` 162 | 163 | --- 164 | 165 | ## 3. Create cluster and set up PersistentVolumeClaim 166 | 167 | In this section, you will create a GPU-enabled Kubernetes cluster and set up a shared network filesystem. A PersistentVolumeClaim (PVC) named `boltz-fs-pvc` will provide persistent storage for all Boltz-2 jobs, allowing pods to share and retain both the input data (YAMLs, MSAs) and the prediction results across their lifecycle. 168 | 169 | ### Set up variables for cluster configuration 170 | 171 | ```bash 172 | FS_NAME="boltz-fs" 173 | CLUSTER_NAME="boltz-cluster" 174 | NODE_GROUP_NAME="boltz-nodegroup" 175 | SA_NAME="boltz-sa" 176 | NODE_USERNAME="user" 177 | ``` 178 | 179 | ### Get default subnet ID 180 | 181 | The cluster’s control plane and nodes will use IP addresses from the default subnet. 182 | 183 | ```bash 184 | export NB_SUBNET_ID=$(nebius vpc subnet list --format json | jq -r '.items[0].metadata.id') 185 | ``` 186 | 187 | ### Create network SSD filesystem 188 | 189 | **Parameters:** 190 | - `--size-gibibytes 32` — total storage capacity of the shared filesystem (**32 GiB**). 191 | - `--block-size-bytes 4096` — block size (**4 KiB**), a common default for general-purpose workloads. 192 | 193 | ```bash 194 | export NB_FS_ID=$(nebius compute filesystem create \ 195 | --name $FS_NAME \ 196 | --size-gibibytes 32 \ 197 | --type network_ssd \ 198 | --block-size-bytes 4096 \ 199 | --format json | jq -r ".metadata.id") 200 | ``` 201 | 202 | ### Create cluster 203 | 204 | ```bash 205 | export NB_CLUSTER_ID=$(nebius mk8s cluster create \ 206 | --name $CLUSTER_NAME \ 207 | --control-plane-subnet-id $NB_SUBNET_ID \ 208 | '{"spec": { "control_plane": { "endpoints": {"public_endpoint": {}}}}}' \ 209 | --format json | jq -r '.metadata.id') 210 | ``` 211 | 212 | ### Generate kubeconfig for kubectl 213 | 214 | This command downloads and configures the kubeconfig file so that `kubectl` can connect to your newly created cluster. The `--external` flag ensures that the public control plane endpoint is used, and `--force` overwrites any existing configuration for this cluster. 215 | 216 | ```bash 217 | nebius mk8s cluster get-credentials --id $NB_CLUSTER_ID --external --force 218 | ``` 219 | 220 | ### Create user with SSH access and auto-mount shared filesystem 221 | 222 | First, ensure you have an SSH public key. If you already have `~/.ssh/id_ed25519.pub`, you can skip this step. If not, generate one with: 223 | 224 | ```bash 225 | ssh-keygen -t ed25519 -C 226 | ``` 227 | 228 | Next, define the `cloud-init` configuration: 229 | 230 | ```bash 231 | SSH_KEY=$(cat ~/.ssh/id_ed25519.pub) 232 | CLOUD_INIT=$(cat <> /etc/fstab 243 | EOF 244 | ) 245 | ``` 246 | 247 | Notes: 248 | - `csi-storage` — the mount tag assigned when the filesystem was created, it specifies which shared filesystem to mount; 249 | - `virtiofs` — the filesystem type used for high-performance, low-latency sharing between the node and network storage; 250 | - `/etc/fstab` entry ensures that the filesystem is automatically re-mounted if the node restarts. 251 | 252 | ### Create service account 253 | 254 | Create a service account for the node group and grant it editor permissions by adding it to the Editor IAM group. 255 | 256 | Copy Editor Group ID from the [IAM](https://console.nebius.com/iam/) page in the web console and replace `` with it to add the service account to that group. 257 | 258 | ```bash 259 | NB_SA_ID=$(nebius iam service-account create \ 260 | --name $SA_NAME --format json | jq -r '.metadata.id') 261 | 262 | nebius iam group-membership create \ 263 | --parent-id \ 264 | --member-id $NB_SA_ID 265 | ``` 266 | 267 | ### Create node group and add it to cluster 268 | 269 | ```bash 270 | nebius mk8s node-group create \ 271 | --name $NODE_GROUP_NAME \ 272 | --parent-id $NB_CLUSTER_ID \ 273 | --fixed-node-count 2 \ 274 | --template-filesystems "[{\"attach_mode\": \"READ_WRITE\", \"mount_tag\": \"csi-storage\", \"existing_filesystem\": {\"id\": \"$NB_FS_ID\"}}]" \ 275 | --template-service-account-id $NB_SA_ID \ 276 | --template-cloud-init-user-data "$CLOUD_INIT" \ 277 | --template-resources-platform "gpu-l40s-d" \ 278 | --template-resources-preset "2gpu-64vcpu-384gb" \ 279 | --template-boot-disk-type network_ssd \ 280 | --template-boot-disk-size-bytes 68719476736 \ 281 | --template-network-interfaces "[{\"public_ip_address\": {}, \"subnet_id\": \"$NB_SUBNET_ID\"}]" \ 282 | --template-gpu-settings-drivers-preset cuda12 283 | ``` 284 | 285 | What it does: 286 | - creates the GPU node group with exactly two nodes (`--fixed-node-count 2`); 287 | - attaches the shared filesystem created earlier (`mount_tag: csi-storage`); 288 | - configures nodes using the `CLOUD_INIT` script; 289 | - uses the `gpu-l40s-d` platform with: 290 | - 2x NVIDIA L40S GPUs; 291 | - 64 vCPUs; 292 | - 384 GB RAM; 293 | - boots from the network SSD with 64 GiB disk size (`--template-boot-disk-size-bytes 68719476736`), this disk stores the OS, Docker images, temporary files, and any data not placed in shared storage; 294 | - preconfigures CUDA 12 GPU drivers (`--template-gpu-settings-drivers-preset cuda12`). 295 | 296 | ### Install CSI driver 297 | 298 | Install the `csi-mounted-fs-path` driver, which allows Kubernetes to mount the shared filesystem into pods. 299 | 300 | ```bash 301 | helm pull oci://cr.eu-north1.nebius.cloud/mk8s/helm/csi-mounted-fs-path --version 0.1.3 302 | helm upgrade csi-mounted-fs-path ./csi-mounted-fs-path-0.1.3.tgz --install --set dataDir="/mnt/data/csi-mounted-fs-path-data/" 303 | ``` 304 | 305 | ### Create PersistentVolumeClaim 306 | 307 | Create a PersistentVolumeClaim named `boltz-fs-pvc` that requests 32 GiB of shared storage using the `csi-mounted-fs-path-sc` StorageClass. This PVC will be used as a shared filesystem between Boltz-2 jobs. 308 | 309 | ```bash 310 | kubectl apply -f - <