├── .coveragerc ├── .github └── workflows │ ├── docs.yaml │ ├── main.yaml │ └── pr.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CITATION.cff ├── LICENSE.txt ├── NOTICE ├── README.md ├── docker-compose.yml ├── dockerfiles ├── base │ ├── Dockerfile │ ├── env.yml │ └── requirements.txt ├── main │ └── Dockerfile └── test │ └── Dockerfile ├── docs ├── api │ └── index.md ├── apidoc.py ├── citation.md ├── conf.py ├── contribution │ ├── development.md │ ├── documentation.md │ ├── index.md │ ├── pipeline.md │ ├── release.md │ └── workflow.png ├── dataset.md ├── evaluation.md ├── examples │ ├── 1_download.ipynb │ ├── 2_query_filter_index.ipynb │ ├── 3_access_system_files.ipynb │ ├── 4_align_mask_crop.ipynb │ ├── 5_dataset_and_loader.ipynb │ ├── 6_custom_split.ipynb │ ├── index.md │ ├── mlsb_challenge.md │ └── split_plots │ │ ├── chain_composition.png │ │ ├── domain_classifications.png │ │ ├── ligand_types.png │ │ ├── molecular_descriptors.png │ │ ├── plinder_clusters.png │ │ ├── priorities.png │ │ └── split_proportions.png ├── index.md ├── static │ ├── assets │ │ └── general │ │ │ ├── plinder_data_hierarchy.png │ │ │ ├── plinder_icon.png │ │ │ └── plinder_logo.png │ └── plinder.css ├── tablegen.py ├── tutorial │ ├── api.ipynb │ ├── dataset.md │ └── index.md └── viewcode.py ├── environment.yml ├── flows ├── configs │ ├── split_configs │ │ ├── batch_0 │ │ │ ├── 1.yaml │ │ │ ├── 2.yaml │ │ │ ├── 3.yaml │ │ │ ├── 4.yaml │ │ │ ├── 5.yaml │ │ │ └── 6.yaml │ │ ├── batch_1 │ │ │ ├── 1.yaml │ │ │ ├── 2.yaml │ │ │ ├── 3.yaml │ │ │ ├── 4.yaml │ │ │ ├── 5.yaml │ │ │ └── 6.yaml │ │ ├── batch_2 │ │ │ ├── 0_5.yaml │ │ │ └── 1_5.yaml │ │ ├── batch_3 │ │ │ ├── 1.yaml │ │ │ ├── 10.yaml │ │ │ ├── 11.yaml │ │ │ ├── 12.yaml │ │ │ ├── 13.yaml │ │ │ ├── 14.yaml │ │ │ ├── 2.yaml │ │ │ ├── 3.yaml │ │ │ ├── 5.yaml │ │ │ ├── 6.yaml │ │ │ ├── 7.yaml │ │ │ └── 9.yaml │ │ ├── batch_4 │ │ │ ├── 4.yaml │ │ │ └── 8.yaml │ │ ├── batch_5 │ │ │ ├── 10.yaml │ │ │ ├── 9.yaml │ │ │ ├── batch_11.yaml │ │ │ └── batch_12.yaml │ │ └── batch_6 │ │ │ ├── 10.yaml │ │ │ ├── 2.yaml │ │ │ ├── 7.yaml │ │ │ └── 9.yaml │ └── v2 │ │ ├── download_and_make_dbs.yaml │ │ ├── make_batch_scores_only.yaml │ │ ├── make_components.yaml │ │ ├── make_entries_ligands.yaml │ │ ├── make_leakage.yaml │ │ ├── make_protein_scores.yaml │ │ ├── make_splits.yaml │ │ ├── split_multi_graph.yaml │ │ └── split_single_graph.yaml ├── data_ingest.py ├── data_ingest_report.py ├── docker.py ├── proc.py ├── report.py └── split_eval.py ├── mypy.ini ├── pyproject.toml ├── pytest.ini ├── requirements_data.txt ├── scripts └── write_data_dicionary.py ├── src └── plinder │ ├── __init__.py │ ├── _version.py │ ├── core │ ├── __init__.py │ ├── index │ │ ├── __init__.py │ │ ├── system.py │ │ └── utils.py │ ├── loader │ │ ├── __init__.py │ │ ├── dataset.py │ │ ├── featurizer.py │ │ ├── transforms.py │ │ └── utils.py │ ├── scores │ │ ├── __init__.py │ │ ├── clusters.py │ │ ├── index.py │ │ ├── ligand.py │ │ ├── links.py │ │ ├── protein.py │ │ └── query.py │ ├── split │ │ ├── __init__.py │ │ ├── plot.py │ │ └── utils.py │ ├── structure │ │ ├── __init__.py │ │ ├── atoms.py │ │ ├── contacts.py │ │ ├── diffdock_utils.py │ │ ├── models.py │ │ ├── smallmols_similarity.py │ │ ├── smallmols_utils.py │ │ ├── structure.py │ │ ├── superimpose.py │ │ ├── surgery.py │ │ └── vendored.py │ └── utils │ │ ├── __init__.py │ │ ├── config.py │ │ ├── constants.py │ │ ├── cpl.py │ │ ├── dataclass.py │ │ ├── dec.py │ │ ├── gcs.py │ │ ├── io.py │ │ ├── load_systems.py │ │ ├── log.py │ │ ├── schemas.py │ │ └── unpack.py │ ├── data │ ├── __init__.py │ ├── _version.py │ ├── clusters.py │ ├── column_descriptions │ │ ├── __init__.py │ │ ├── entry.tsv │ │ ├── entry_validation.tsv │ │ ├── extra.tsv │ │ ├── ligand_interacting_ligand_chains.tsv │ │ ├── ligand_interacting_ligand_chains_validation.tsv │ │ ├── ligand_neighboring_ligand_chains.tsv │ │ ├── ligand_neighboring_ligand_chains_validation.tsv │ │ ├── ligand_protein_chains.tsv │ │ ├── ligand_protein_chains_validation.tsv │ │ ├── ligands.tsv │ │ ├── posebusters_checks.tsv │ │ ├── qc.tsv │ │ ├── similarity_clusters.tsv │ │ ├── system.tsv │ │ ├── system_ligand_chains.tsv │ │ ├── system_ligand_chains_validation.tsv │ │ ├── system_ligand_validation.tsv │ │ ├── system_pocket.tsv │ │ ├── system_pocket_validation.tsv │ │ ├── system_protein_chains.tsv │ │ └── system_protein_chains_validation.tsv │ ├── common │ │ ├── __init__.py │ │ ├── _version.py │ │ ├── constants.py │ │ └── log.py │ ├── databases.py │ ├── docs.py │ ├── final_structure_qc.py │ ├── get_system_annotations.py │ ├── leakage.py │ ├── pipeline │ │ ├── __init__.py │ │ ├── config.py │ │ ├── io.py │ │ ├── mpqueue.py │ │ ├── pipeline.py │ │ ├── tasks.py │ │ ├── transform.py │ │ └── utils.py │ ├── save_linked_structures.py │ ├── splits.py │ ├── structure │ │ ├── __init__.py │ │ ├── atoms.py │ │ └── contacts.py │ └── utils │ │ ├── __init__.py │ │ └── annotations │ │ ├── __init__.py │ │ ├── aggregate_annotations.py │ │ ├── get_ligand_validation.py │ │ ├── get_similarity_scores.py │ │ ├── interaction_utils.py │ │ ├── interface_gap.py │ │ ├── ligand_utils.py │ │ ├── mmpdb_utils.py │ │ ├── protein_utils.py │ │ ├── rdkit_utils.py │ │ ├── save_utils.py │ │ ├── static_files │ │ ├── artifacts_badlist.csv │ │ ├── cofactors.json │ │ ├── dates.csv │ │ ├── ligand_list.tsv │ │ └── prdcc.chemlib │ │ └── utils.py │ ├── eval │ ├── __init__.py │ └── docking │ │ ├── __init__.py │ │ ├── make_plots.py │ │ ├── stratify_test_set.py │ │ ├── utils.py │ │ └── write_scores.py │ └── methods │ └── __init__.py ├── tests ├── __init__.py ├── conftest.py ├── core │ ├── test_atoms.py │ ├── test_core_config.py │ ├── test_core_scores.py │ ├── test_core_system.py │ ├── test_data_loader.py │ ├── test_dataclass.py │ ├── test_gcs.py │ ├── test_index_utils.py │ ├── test_smallmols_utils.py │ ├── test_split_plot.py │ ├── test_superimpose.py │ └── test_transforms.py ├── data │ ├── pipeline │ │ ├── test_config.py │ │ ├── test_end_to_end.py │ │ ├── test_io.py │ │ ├── test_pipeline.py │ │ ├── test_tasks.py │ │ ├── test_transform.py │ │ └── test_utils.py │ ├── test_clusters.py │ ├── test_docs.py │ ├── test_plinder_data.py │ └── test_save_linked_structures.py ├── test_annotations.py ├── test_data │ ├── 2g.zip │ ├── 7nac.json │ ├── components.cif │ ├── components.cif.gz │ ├── components.parquet │ ├── ecod_mini.tsv │ ├── eval │ │ ├── fingerprints │ │ │ └── ligands_per_system.parquet │ │ ├── index │ │ │ └── annotation_table.parquet │ │ ├── predicted_poses │ │ │ ├── 1a3b__1__1.B__1.D │ │ │ │ ├── rank1.sdf │ │ │ │ └── rank1_named.sdf │ │ │ └── 1ai5__1__1.A_1.B__1.D │ │ │ │ └── rank1.sdf │ │ ├── predictions.csv │ │ ├── results.csv │ │ ├── scores │ │ │ └── search_db=holo │ │ │ │ └── small_score.parquet │ │ ├── splits │ │ │ └── split.parquet │ │ └── systems │ │ │ ├── a3.zip │ │ │ └── ai.zip │ ├── kinase_ligand_ccd_codes.parquet │ ├── kinase_uniprotac.parquet │ ├── mini_all_entries.json │ ├── mini_score_dataset.parquet │ ├── mini_score_seq_dataset.parquet │ ├── mini_structure_checks_report.tsv │ ├── mini_system_files_new │ │ ├── 1fbh__1__1.A_1.B__1.E_1.F │ │ │ ├── chain_mapping.json │ │ │ ├── ligand_files │ │ │ │ ├── 1.E.sdf │ │ │ │ └── 1.F.sdf │ │ │ ├── sequences.fasta │ │ │ ├── system.cif │ │ │ └── system.pdb │ │ ├── 1fbz__1__1.A__1.C │ │ │ ├── chain_mapping.json │ │ │ ├── ligand_files │ │ │ │ └── 1.C.sdf │ │ │ ├── sequences.fasta │ │ │ ├── system.cif │ │ │ └── system.pdb │ │ └── 3fbp__1__1.A_1.B__1.C │ │ │ ├── chain_mapping.json │ │ │ ├── ligand_files │ │ │ └── 1.C.sdf │ │ │ ├── sequences.fasta │ │ │ ├── system.cif │ │ │ └── system.pdb │ ├── mmp │ │ ├── mini_clusters │ │ │ └── cluster=components │ │ │ │ ├── directed=False │ │ │ │ └── metric=protein_fident_weighted_sum │ │ │ │ │ └── threshold=95.parquet │ │ │ │ └── directed=True │ │ │ │ └── metric=pocket_fident │ │ │ │ └── threshold=100.parquet │ │ ├── mmp_mini_data.tsv │ │ ├── mmp_test_pocket_fident_weighted_sum__1.0__strong__component.csv │ │ ├── mmp_test_protein_fident_weighted_sum__0.95__weak__component.csv │ │ └── tiny_mmp_index.csv.gz │ ├── panther_classifications_mini.tar.gz │ ├── panther_raw.tar.gz │ ├── pdb_seqres.txt.gz │ ├── plinder │ │ └── mount │ │ │ ├── clusters │ │ │ └── subdir │ │ │ │ └── clusters.parquet │ │ │ ├── entries │ │ │ ├── 9h.zip │ │ │ ├── av.zip │ │ │ ├── ng.zip │ │ │ └── v2.zip │ │ │ ├── fingerprints │ │ │ └── ligands_per_system.parquet │ │ │ ├── index │ │ │ └── annotation_table.parquet │ │ │ ├── ligand_scores │ │ │ └── ligand_scores.parquet │ │ │ ├── links │ │ │ └── kind=apo │ │ │ │ └── links.parquet │ │ │ ├── manifest │ │ │ └── manifest.parquet │ │ │ ├── mmp │ │ │ └── plinder_mmp_series.parquet │ │ │ ├── scores │ │ │ ├── search_db=apo │ │ │ │ ├── pocket_lddt.parquet │ │ │ │ └── protein_fident_qcov_weighted_sum.parquet │ │ │ ├── search_db=holo │ │ │ │ ├── pocket_lddt.parquet │ │ │ │ └── protein_fident_qcov_weighted_sum.parquet │ │ │ └── search_db=pred │ │ │ │ ├── pocket_lddt.parquet │ │ │ │ └── protein_fident_qcov_weighted_sum.parquet │ │ │ ├── splits │ │ │ └── split.parquet │ │ │ ├── strat │ │ │ └── val_vs_test_data │ │ │ │ ├── max_similarities__test_vs_val__pli_unique_qcov.parquet │ │ │ │ ├── max_similarities__test_vs_val__pocket_fident_qcov.parquet │ │ │ │ ├── max_similarities__test_vs_val__pocket_lddt.parquet │ │ │ │ ├── max_similarities__test_vs_val__pocket_lddt_qcov.parquet │ │ │ │ ├── max_similarities__test_vs_val__pocket_qcov.parquet │ │ │ │ ├── max_similarities__test_vs_val__protein_fident_weighted_sum.parquet │ │ │ │ ├── max_similarities__test_vs_val__protein_lddt_weighted_sum.parquet │ │ │ │ ├── max_similarities__test_vs_val__protein_seqsim_weighted_sum.parquet │ │ │ │ ├── max_similarities__test_vs_val__tanimoto_similarity_max.parquet │ │ │ │ └── test_set.parquet │ │ │ └── systems │ │ │ ├── 9h.zip │ │ │ ├── av.zip │ │ │ ├── ng.zip │ │ │ └── v2.zip │ ├── smiles_from_nextgen_bonds_data.csv │ ├── split_challenge │ │ ├── eval_metrics_config.yaml │ │ ├── test_lig_profile.csv │ │ ├── test_source_file.csv │ │ └── test_submission.csv │ ├── split_plot_split.parquet │ ├── system_instance_dataframe │ │ ├── instance_dataframe7.csv │ │ ├── plinder_final_dir_structure │ │ │ ├── apo │ │ │ │ ├── 6S8O__1__1.A.cif │ │ │ │ ├── 6S8O__1__1.A.pdb │ │ │ │ ├── 7OS1__1__1.A.cif │ │ │ │ └── 7OS1__1__1.A.pdb │ │ │ ├── holo │ │ │ │ ├── 2Y4I__1__1.B__1.E_1.F__ATP_MG │ │ │ │ │ ├── chain_mapping.json │ │ │ │ │ ├── ligand_files │ │ │ │ │ │ ├── 1.E.sdf │ │ │ │ │ │ └── 1.F.sdf │ │ │ │ │ ├── sequences.fasta │ │ │ │ │ ├── system.cif │ │ │ │ │ └── system.pdb │ │ │ │ └── 8BCH__1__1.A__1.B__QA9 │ │ │ │ │ ├── chain_mapping.json │ │ │ │ │ ├── ligand_files │ │ │ │ │ └── 1.B.sdf │ │ │ │ │ ├── sequences.fasta │ │ │ │ │ ├── system.cif │ │ │ │ │ └── system.pdb │ │ │ └── predicted │ │ │ │ ├── AF-O75643.cif │ │ │ │ └── AF-O75643.pdb │ │ └── system_dataframe7.csv │ ├── test_kinase_klifs.csv │ ├── validation │ │ ├── 1qz5_validation.xml.gz │ │ ├── 2dty_validation.xml.gz │ │ ├── 2e84_validation.xml.gz │ │ ├── 2ixb_validation.xml.gz │ │ ├── 2leb_validation.xml.gz │ │ ├── 2y4i_validation.xml.gz │ │ ├── 4fxd_validation.xml.gz │ │ ├── 5lwx_validation.xml.gz │ │ ├── 6fx1_validation.xml.gz │ │ ├── 6lu7_validation.xml.gz │ │ ├── 6m92_validation.xml.gz │ │ ├── 8a7u_validation.xml.gz │ │ └── 8pn3_validation.xml.gz │ └── xx │ │ ├── output │ │ ├── 4ci1-assembly.cif │ │ └── 5a7w-assembly.cif │ │ ├── pdb_00001ngx │ │ └── pdb_00001ngx_xyz-enrich.cif.gz │ │ ├── pdb_00001ppc │ │ └── pdb_00001ppc_xyz-enrich.cif.gz │ │ ├── pdb_00001qz5 │ │ ├── 1qz5-assembly.cif │ │ ├── pdb_00001qz5_xyz-enrich.cif │ │ └── pdb_00001qz5_xyz-enrich.cif.gz │ │ ├── pdb_00002dty │ │ └── pdb_00002dty_xyz-enrich.cif.gz │ │ ├── pdb_00002e84 │ │ └── pdb_00002e84_xyz-enrich.cif.gz │ │ ├── pdb_00002gdo │ │ └── pdb_00002gdo_xyz-enrich.cif.gz │ │ ├── pdb_00002hyy │ │ └── pdb_00002hyy_xyz-enrich.cif.gz │ │ ├── pdb_00002ixb │ │ └── pdb_00002ixb_xyz-enrich.cif.gz │ │ ├── pdb_00002leb │ │ └── pdb_00002leb_xyz-enrich.cif.gz │ │ ├── pdb_00002p1q │ │ └── pdb_00002p1q_xyz-enrich.cif.gz │ │ ├── pdb_00002y4i │ │ └── pdb_00002y4i_xyz-enrich.cif.gz │ │ ├── pdb_00003cyh │ │ └── pdb_00003cyh_xyz-enrich.cif.gz │ │ ├── pdb_00003cz3 │ │ └── pdb_00003cz3_xyz-enrich.cif.gz │ │ ├── pdb_00003g32 │ │ └── pdb_00003g32_xyz-enrich.cif.gz │ │ ├── pdb_00003grt │ │ └── pdb_00003grt_xyz-enrich.cif.gz │ │ ├── pdb_00003ot7 │ │ └── pdb_00003ot7_xyz-enrich.cif.gz │ │ ├── pdb_00004ci1 │ │ ├── 4ci1-assembly.cif │ │ ├── pdb_00004ci1_xyz-enrich.cif │ │ └── pdb_00004ci1_xyz-enrich.cif.gz │ │ ├── pdb_00004fxd │ │ └── pdb_00004fxd_xyz-enrich.cif.gz │ │ ├── pdb_00004jvn │ │ └── pdb_00004jvn_xyz-enrich.cif.gz │ │ ├── pdb_00004nhc │ │ └── pdb_00004nhc_xyz-enrich.cif.gz │ │ ├── pdb_00004qyf │ │ └── pdb_00004qyf_xyz-enrich.cif.gz │ │ ├── pdb_00004tz4 │ │ └── pdb_00004tz4_xyz-enrich.cif.gz │ │ ├── pdb_00005a7w │ │ ├── 5a7w-assembly.cif │ │ ├── 5a7w_A_35M.sdf │ │ ├── 5a7w_A_hyd.pdb │ │ ├── 5a7w_interactions.txt │ │ └── pdb_00005a7w_xyz-enrich.cif.gz │ │ ├── pdb_00005fkw │ │ └── pdb_00005fkw_xyz-enrich.cif.gz │ │ ├── pdb_00005lwx │ │ └── pdb_00005lwx_xyz-enrich.cif.gz │ │ ├── pdb_00006f6r │ │ └── pdb_00006f6r_xyz-enrich.cif.gz │ │ ├── pdb_00006fx1 │ │ └── pdb_00006fx1_xyz-enrich.cif.gz │ │ ├── pdb_00006i41 │ │ └── pdb_00006i41_xyz-enrich.cif.gz │ │ ├── pdb_00006lu7 │ │ └── pdb_00006lu7_xyz-enrich.cif.gz │ │ ├── pdb_00006m92 │ │ └── pdb_00006m92_xyz-enrich.cif.gz │ │ ├── pdb_00006ntj │ │ └── pdb_00006ntj_xyz-enrich.cif.gz │ │ ├── pdb_00006u6k │ │ └── pdb_00006u6k_xyz-enrich.cif.gz │ │ ├── pdb_00007az3 │ │ └── pdb_00007az3_xyz-enrich.cif.gz │ │ ├── pdb_00007bqu │ │ └── pdb_00007bqu_xyz-enrich.cif.gz │ │ ├── pdb_00007gj7 │ │ └── pdb_00007gj7_xyz-enrich.cif.gz │ │ ├── pdb_00007gl9 │ │ └── pdb_00007gl9_xyz-enrich.cif.gz │ │ └── pdb_00008pn3 │ │ └── pdb_00008pn3_xyz-enrich.cif.gz ├── test_eval.py └── test_final_structure_checks.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = true 3 | parallel = true 4 | relative_files = true 5 | source = plinder 6 | 7 | [report] 8 | show_missing = true 9 | precision = 4 10 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | on: 3 | push: 4 | tags: 5 | - 'v[0-9]+.[0-9]+.[0-9]+' 6 | workflow_dispatch: 7 | 8 | permissions: 9 | id-token: write 10 | contents: read 11 | pages: write 12 | 13 | concurrency: 14 | group: pages 15 | cancel-in-progress: false 16 | 17 | jobs: 18 | deploy: 19 | environment: 20 | name: github-pages 21 | url: ${{ steps.deployment.outputs.page_url }} 22 | name: Deploy docs 23 | runs-on: ubuntu-latest 24 | steps: 25 | - name: Checkout repo 26 | uses: actions/checkout@v4 27 | - name: Setup micromamba 28 | uses: mamba-org/setup-micromamba@v1 29 | with: 30 | environment-file: environment.yml 31 | create-args: python=3.10 32 | init-shell: bash 33 | cache-downloads: true 34 | cache-environment: true 35 | post-cleanup: all 36 | - name: Build docs 37 | shell: bash -el {0} 38 | id: build 39 | run: | 40 | which python 41 | python -m pip install six 42 | python -m pip install '.[docs,loader]' 43 | sphinx-build docs build/docs 44 | - name: Upload build artifact 45 | uses: actions/upload-pages-artifact@v3 46 | with: 47 | path: build/docs 48 | - name: Deploy to GitHub Pages 49 | id: deployment 50 | uses: actions/deploy-pages@v4 51 | -------------------------------------------------------------------------------- /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | name: Package 2 | permissions: 3 | contents: write 4 | id-token: write 5 | packages: write 6 | on: 7 | push: 8 | branches: 9 | - main 10 | concurrency: 11 | group: ${{ github.workflow }}-${{ github.ref }} 12 | jobs: 13 | publish: 14 | name: Package 15 | if: ${{ ! contains(toJSON(github.event.commits.*.message), '[skip ci]') }} 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout repo 19 | uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 22 | - name: Setup python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: "3.10" 26 | - name: Configure docker 27 | run: echo ${{ secrets.GITHUB_TOKEN }} | docker login ghcr.io -u ${{ github.repository_owner }} --password-stdin 28 | - name: Install build and tag requirements 29 | run: python -m pip install build semver 30 | - name: Get version bump 31 | id: get-tag 32 | run: echo "bump=$(python flows/docker.py bump)" >> $GITHUB_OUTPUT 33 | - name: Pull, build, tag, bump base image 34 | if: steps.get-tag.outputs.bump != '' 35 | run: python flows/docker.py pull --build --promote 36 | - name: Build image 37 | if: steps.get-tag.outputs.bump != '' 38 | run: python flows/docker.py build 39 | - name: Run tests against new image and save images 40 | if: steps.get-tag.outputs.bump != '' 41 | run: | 42 | python flows/docker.py test --push 43 | if ! test -f reports/.coverage; then 44 | echo "Missing test coverage reports (reports/.coverage). Not all tests have passed?" 45 | exit 1 46 | else 47 | echo "Test coverage report found!" 48 | fi 49 | - name: Build distribution 50 | run: python -m build 51 | - name: Publish package distributions to PyPI 52 | if: steps.get-tag.outputs.bump != '' 53 | uses: pypa/gh-action-pypi-publish@release/v1 54 | - name: Save git tag 55 | if: steps.get-tag.outputs.bump != '' 56 | run: git push origin ${{ steps.get-tag.outputs.bump }} 57 | - name: Copy and surgery coverage 58 | if: steps.get-tag.outputs.bump != '' 59 | run: | 60 | cp reports/.coverage . 61 | sqlite3 .coverage "update file set path='src/' || substr(path, 40);" 62 | - name: Post coverage comment 63 | if: steps.get-tag.outputs.bump != '' 64 | uses: py-cov-action/python-coverage-comment-action@v3 65 | with: 66 | GITHUB_TOKEN: ${{ github.token }} 67 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | exclude: | 4 | (?x)^( 5 | docs/.*png| 6 | tests/test_data/.*| 7 | src/plinder/data/utils/annotations/static_files/dates.csv| 8 | private/.*ipynb 9 | )$ 10 | repos: 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v3.2.0 13 | hooks: 14 | - id: trailing-whitespace 15 | - id: end-of-file-fixer 16 | - id: check-yaml 17 | - id: check-added-large-files 18 | - repo: local 19 | hooks: 20 | - id: ruff-format 21 | name: ruff-format 22 | entry: bash -c 'ruff format --force-exclude --preview src tests' 23 | language: system 24 | types: [python] 25 | - id: ruff-linter 26 | name: ruff-linter 27 | entry: bash -c 'ruff check --fix src tests' 28 | language: system 29 | types: [python] 30 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | PLINDER - The Protein Ligand INteractions Dataset and Evaluation Resource 2 | Copyright (c) 2024, Plinder Development Team 3 | 4 | The PLINDER project is a collaboration between the 5 | University of Basel, SIB Swiss Institute of Bioinformatics, 6 | VantAI, NVIDIA, and MIT CSAIL. 7 | 8 | If you find this software useful, please cite: 9 | 10 | Durairaj, Janani, Yusuf Adeshina, Zhonglin Cao, Xuejin Zhang, Vladas Oleinikovas, Thomas Duignan, Zachary McClure, et al. “PLINDER: The Protein-Ligand Interactions Dataset and Evaluation Resource.” bioRxiv, July 17, 2024, 2024.07.17.603955. https://doi.org/10.1101/2024.07.17.603955. 11 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | base: 4 | image: ${IMAGE_REPO:-ghcr.io/plinder-org}/plinder-base:${BASE_TAG:-latest} 5 | build: 6 | context: ./dockerfiles/base 7 | 8 | plinder: 9 | image: ${IMAGE_REPO:-ghcr.io/plinder-org}/plinder:${BUILD_TAG:-latest} 10 | build: 11 | context: . 12 | dockerfile: ./dockerfiles/main/Dockerfile 13 | args: 14 | BASE_IMAGE: ${IMAGE_REPO:-ghcr.io/plinder-org}/plinder-base 15 | BASE_TAG: ${BASE_TAG:-latest} 16 | depends_on: 17 | - base 18 | 19 | test: 20 | image: ${IMAGE_REPO:-ghcr.io/plinder-org}/plinder:${BUILD_TAG:-latest} 21 | depends_on: 22 | - plinder 23 | volumes: 24 | - ./tests/:/app/tests/ 25 | - ./pytest.ini:/app/pytest.ini 26 | - ./.coveragerc:/app/.coveragerc 27 | - ./reports/:/app/reports/ 28 | - ./examples/:/app/examples/ 29 | command: /bin/bash -c "python -m pytest -n auto -v && cp .coverage reports/.coverage" 30 | -------------------------------------------------------------------------------- /dockerfiles/base/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 mambaorg/micromamba:git-c160e88-jammy 2 | 3 | USER root 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | # Install OS dependencies 7 | RUN apt-get --allow-releaseinfo-change update \ 8 | && apt-get -y install --no-install-recommends \ 9 | apt-utils \ 10 | dialog 2>&1 \ 11 | && apt-get install -y --no-install-recommends \ 12 | git \ 13 | gpg \ 14 | wget \ 15 | man-db \ 16 | procps \ 17 | tree \ 18 | unzip \ 19 | gcc \ 20 | build-essential \ 21 | lsb-release \ 22 | curl \ 23 | vim \ 24 | exuberant-ctags \ 25 | apt-transport-https \ 26 | ca-certificates \ 27 | gnupg \ 28 | sudo \ 29 | libgl1-mesa-glx \ 30 | libxrender1 \ 31 | rsync \ 32 | libtiff-dev \ 33 | && apt-get autoclean \ 34 | && apt-get autoremove \ 35 | && rm -rf /var/lib/apt/lists/* 36 | 37 | # Install rust toolchain 38 | RUN curl https://sh.rustup.rs -sSf | sh -s -- -y 39 | ENV PATH=/root/.cargo/bin:${PATH} 40 | 41 | # Install conda env 42 | COPY env.yml /tmp/env.yml 43 | RUN micromamba install -y -f /tmp/env.yml && \ 44 | micromamba clean --all --yes && \ 45 | rm /tmp/env.yml 46 | 47 | # Install gsutil without full gcloud sdk 48 | RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | \ 49 | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \ 50 | && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | \ 51 | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - \ 52 | && apt-get update -y \ 53 | && apt-get install google-cloud-cli -y \ 54 | && apt-get autoclean \ 55 | && apt-get autoremove \ 56 | && rm -rf /var/lib/apt/lists/* 57 | 58 | ENV BASH_ENV=/usr/local/bin/_activate_current_env.sh 59 | 60 | COPY requirements.txt /tmp/requirements.txt 61 | RUN --mount=type=secret,id=INDEX_URL \ 62 | PIP_KEYRING_PROVIDER=subprocess python -m pip install \ 63 | --extra-index-url "$(cat /run/secrets/INDEX_URL)" \ 64 | --no-cache-dir -r /tmp/requirements.txt && \ 65 | rm /tmp/requirements.txt 66 | 67 | ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"] 68 | 69 | CMD ["/bin/bash"] 70 | -------------------------------------------------------------------------------- /dockerfiles/base/env.yml: -------------------------------------------------------------------------------- 1 | name: base 2 | channels: 3 | - metalcycling 4 | - conda-forge 5 | - aivant 6 | - defaults 7 | - bioconda 8 | dependencies: 9 | - python=3.10.* 10 | - pyopenssl=23.2.0 11 | - requests=2.25.1 12 | - google-cloud-storage 13 | - gcsfs 14 | - reduce 15 | - aivant::openstructure=2.8.0 16 | - boost=1.82 17 | - mmseqs2 18 | - foldseek 19 | - plip=2.3.0 20 | - pip: 21 | - keyrings.google-artifactregistry-auth==1.1.2 22 | -------------------------------------------------------------------------------- /dockerfiles/main/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE 2 | ARG BASE_TAG 3 | FROM --platform=linux/amd64 ${BASE_IMAGE}:${BASE_TAG} 4 | ARG MAMBA_DOCKERFILE_ACTIVATE=1 5 | ARG DEPENDENCY_BLOCKS=test 6 | 7 | WORKDIR /app 8 | COPY requirements_data.txt requirements_data.txt 9 | RUN python -m pip install -r requirements_data.txt 10 | COPY src src 11 | COPY pyproject.toml pyproject.toml 12 | RUN --mount=source=.git,target=.git,type=bind \ 13 | --mount=type=secret,id=INDEX_URL \ 14 | PIP_KEYRING_PROVIDER=subprocess python -m pip install \ 15 | --extra-index-url "$(cat /run/secrets/INDEX_URL)" \ 16 | --no-cache-dir '.['"${DEPENDENCY_BLOCKS}"']' 17 | -------------------------------------------------------------------------------- /dockerfiles/test/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE 2 | ARG BASE_TAG 3 | FROM --platform=linux/amd64 ${BASE_IMAGE}:${BASE_TAG} 4 | ARG MAMBA_DOCKERFILE_ACTIVATE=1 5 | COPY src src 6 | COPY pyproject.toml pyproject.toml 7 | RUN --mount=source=.git,target=.git,type=bind \ 8 | python -m pip install --no-cache-dir . 9 | -------------------------------------------------------------------------------- /docs/api/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | sd_hide_title: true 3 | --- 4 | 5 | # Python API 6 | 7 | ## API reference 8 | 9 | The ``plinder.core`` package provides utilities for interacting with the PLINDER dataset. 10 | 11 | It contains an additional ``plinder.core.scores`` sub-package for querying parquet collections. 12 | 13 | :::{toctree} 14 | :maxdepth: 1 15 | :hidden: 16 | :glob: 17 | 18 | ./*/index 19 | ::: 20 | -------------------------------------------------------------------------------- /docs/citation.md: -------------------------------------------------------------------------------- 1 | # Citation 2 | 3 | If you find PLINDER useful, please cite: 4 | 5 | > Janani Durairaj, Yusuf Adeshina, Zhonglin Cao, Xuejin Zhang, Vladas Oleinikovas, 6 | Thomas Duignan, Zachary McClure, Xavier Robin, Gabriel Studer, Daniel Kovtun, 7 | Emanuele Rossi, Guoqing Zhou, Srimukh Veccham, Clemens Isert, Yuxing Peng, 8 | Prabindh Sundareson, Mehmet Akdel, Gabriele Corso, Hannes Stärk, Gerardo Tauriello, 9 | Zachary Carpenter, Michael Bronstein, Emine Kucukbenli, Torsten Schwede, Luca Naef,\ 10 | "PLINDER: The protein-ligand interactions dataset and evaluation resource",\ 11 | _bioRxiv_ 12 | July 2024; 13 | doi: [10.1101/2024.07.17.603955](https://doi.org/10.1101/2024.07.17.603955) 14 | 15 | You may also get a useful summary from the 16 | [ICML'24 ML4LMS poster submission](https://openreview.net/forum?id=7UvbaTrNbP). 17 | -------------------------------------------------------------------------------- /docs/contribution/documentation.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | 3 | The *PLINDER* documentation resides in the `docs` directory. 4 | The documents are written in *Markdown* files rendered with [*Sphinx*](https://www.sphinx-doc.org) and [*MyST*](https://myst-parser.readthedocs.io). 5 | The [Python API Tutorial](/tutorial/api) uses a [*Jupyter*](https://jupyter.org/) 6 | notebook rendered with [*MyST-NB*](https://myst-nb.readthedocs.io) 7 | 8 | ## Building 9 | 10 | Building the documentation requires some extra requirements that are specified 11 | in the `pyproject.toml`. 12 | 13 | ```console 14 | $ pip install -e ".[docs]" 15 | ``` 16 | 17 | To build the documentation run 18 | 19 | ```console 20 | $ sphinx-build docs 21 | ``` 22 | -------------------------------------------------------------------------------- /docs/contribution/index.md: -------------------------------------------------------------------------------- 1 | # Contributor guide 2 | 3 | The PLINDER project is a community effort, launched by the University of Basel, 4 | SIB Swiss Institute of Bioinformatics, VantAI, NVIDIA, MIT CSAIL, and will be regularly 5 | updated. 6 | We highly welcome contributions! 7 | 8 | This guide gives an introduction about how to maintain and improve `plinder` as 9 | developer 10 | 11 | # Code organization 12 | 13 | This code is split into 4 sub-packages 14 | 15 | - `plinder.core`: Provides core data structures for interacting with 16 | and loading the dataset. 17 | Parts of it are exposed as the [public API](/api/index). 18 | - `plinder.data`: Contains core code for generating the PLINDER dataset. 19 | - `plinder.eval`: Offers evaluation harness for the dataset that takes as an input 20 | predicted and ground truth structures in a pre-determined folder structure an 21 | returns a leaderboard-ready set of entries. 22 | Parts of it are user-faced via [CLI scripts](/evaluation). 23 | - `plinder.methods`: Implements methods in the leaderboard that leverage 24 | PLINDER primitives for training and running. 25 | 26 | :::{toctree} 27 | :maxdepth: 1 28 | :hidden: 29 | 30 | pipeline 31 | development 32 | documentation 33 | release 34 | ::: 35 | -------------------------------------------------------------------------------- /docs/contribution/release.md: -------------------------------------------------------------------------------- 1 | # Release 2 | 3 | Code is contributed via pull requests on *GitHub*. 4 | A new `plinder` version is released on each pull request merge. 5 | In consequence each merge automatically induces a 6 | [Semantic Version](https://semver.org/lang/de/) bump. 7 | The version bumping semantics is controlled via the commit history since the previous 8 | release: 9 | 10 | - If `bumpversion skip` is present in the commit message, the version will not be bumped 11 | - If `bumpversion major` is present in the commit message, the major version will be bumped 12 | - If `bumpversion minor` is present in the commit message, the minor version will be bumped 13 | - If `bumpversion patch` is present in the commit message (or nothing is found), the patch version will be bumped 14 | 15 | :::{note} 16 | The CI workflow will use the **most recent** match in the commit history to make its decision. 17 | ::: 18 | 19 | Each new version release automatically triggers the following platforms: 20 | 21 | - A new *PyPI* release is created. 22 | - A new *Docker* image is pushed to the 23 | [registry](https://github.com/plinder-org/plinder/pkgs/container/plinder). 24 | - This documentation website is updated. 25 | -------------------------------------------------------------------------------- /docs/contribution/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/contribution/workflow.png -------------------------------------------------------------------------------- /docs/examples/index.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | The following examples show advanced applications of *PLINDER* to real world problems. 4 | Although they address a specific question, they can be easily adapted to use them for 5 | your own purposes. 6 | 7 | :::{toctree} 8 | :maxdepth: 1 9 | :glob: 10 | 11 | ./* 12 | ::: 13 | -------------------------------------------------------------------------------- /docs/examples/split_plots/chain_composition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/examples/split_plots/chain_composition.png -------------------------------------------------------------------------------- /docs/examples/split_plots/domain_classifications.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/examples/split_plots/domain_classifications.png -------------------------------------------------------------------------------- /docs/examples/split_plots/ligand_types.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/examples/split_plots/ligand_types.png -------------------------------------------------------------------------------- /docs/examples/split_plots/molecular_descriptors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/examples/split_plots/molecular_descriptors.png -------------------------------------------------------------------------------- /docs/examples/split_plots/plinder_clusters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/examples/split_plots/plinder_clusters.png -------------------------------------------------------------------------------- /docs/examples/split_plots/priorities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/examples/split_plots/priorities.png -------------------------------------------------------------------------------- /docs/examples/split_plots/split_proportions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/examples/split_plots/split_proportions.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | sd_hide_title: true 3 | html_theme.sidebar_secondary.remove: true 4 | --- 5 | 6 | # PLINDER documentation 7 | 8 | 9 | ![plinder](/static/assets/general/plinder_logo.png){w=40em align=center} 10 | 11 | **PLINDER**, short for **p**rotein **l**igand **in**teractions **d**ataset and 12 | **e**valuation **r**esource, is a comprehensive, annotated, high quality dataset and 13 | resource for training and evaluation of protein-ligand docking algorithms: 14 | 15 | - \> 400k PLI systems across > 11k SCOP domains and > 50k unique small molecules 16 | - 500+ annotations for each system, including protein and ligand properties, quality, 17 | matched molecular series and more 18 | - Automated curation pipeline to keep up with the PDB 19 | - 14 PLI metrics and over 20 billion similarity scores 20 | - Unbound \(_apo_\) and _predicted_ Alphafold2 structures linked to _holo_ systems 21 | - _train-val-test_ splits and ability to tune splitting based on the learning task 22 | - Robust evaluation harness to simplify and standard performance comparison between models. 23 | 24 | 25 | ::::::{grid} 1 1 2 2 26 | 27 | :::::{grid-item-card} 28 | :link: tutorial/dataset 29 | :link-type: doc 30 | 31 | ::::{grid} 2 32 | 33 | :::{grid-item} 34 | :columns: 3 35 | :class: main-button 36 | 37 | ::: 38 | 39 | :::{grid-item} 40 | :columns: 9 41 | **Dataset access** 42 | 43 | Access the PLI systems and their annotations directly via the files 44 | ::: 45 | :::: 46 | ::::: 47 | 48 | 49 | :::::{grid-item-card} 50 | :link: tutorial/api 51 | :link-type: doc 52 | 53 | ::::{grid} 2 54 | 55 | :::{grid-item} 56 | :columns: 3 57 | :class: main-button 58 | 59 | ::: 60 | 61 | :::{grid-item} 62 | :columns: 9 63 | **Python API** 64 | 65 | Use the dedicated Python package to explore the data 66 | ::: 67 | :::: 68 | ::::: 69 | 70 | :::::: 71 | 72 | % TODO: re-add `contribution/index` 73 | 74 | :::{toctree} 75 | :maxdepth: 1 76 | :hidden: 77 | 78 | tutorial/index 79 | dataset 80 | api/index 81 | evaluation 82 | examples/index 83 | contribution/index 84 | citation 85 | ::: 86 | -------------------------------------------------------------------------------- /docs/static/assets/general/plinder_data_hierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/static/assets/general/plinder_data_hierarchy.png -------------------------------------------------------------------------------- /docs/static/assets/general/plinder_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/static/assets/general/plinder_icon.png -------------------------------------------------------------------------------- /docs/static/assets/general/plinder_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/docs/static/assets/general/plinder_logo.png -------------------------------------------------------------------------------- /docs/static/plinder.css: -------------------------------------------------------------------------------- 1 | html { 2 | --pst-font-family-base: "Geologica", "Sans-Serif"; 3 | --pst-font-family-heading: "Montserrat", "Sans-Serif"; 4 | --pst-font-weight-heading: 700; 5 | } 6 | 7 | html[data-theme="light"] { 8 | --pst-color-primary: #da5da4; 9 | --pst-color-primary-highlight: #da5da4; 10 | --pst-color-secondary: #5c7ec0; 11 | --pst-color-secondary-highlight: #5c7ec0; 12 | --pst-color-accent: #a05ba4; 13 | --pst-color-inline-code: #a05ba4; 14 | --pst-color-inline-code-links: #a05ba4; 15 | --pst-color-table-row-hover-bg: #dddad7; 16 | } 17 | 18 | html[data-theme="dark"] { 19 | --pst-color-primary: #da5da4; 20 | --pst-color-primary-highlight: #da5da4; 21 | --pst-color-secondary: #5c7ec0; 22 | --pst-color-secondary-highlight: #5c7ec0; 23 | --pst-color-accent: #a05ba4; 24 | --pst-color-inline-code: #a05ba4; 25 | --pst-color-inline-code-links: #a05ba4; 26 | --pst-color-table-row-hover-bg: #dddad7; 27 | } 28 | 29 | 30 | /* Separation between methods in the API reference */ 31 | .py.method, .py.attribute{ 32 | border-top: 1px solid #cccccc; 33 | padding-top: 30px; 34 | padding-bottom: 30px; 35 | } 36 | 37 | /* The tutorial buttons on the home page */ 38 | .main-button { 39 | padding-right: 20px; 40 | align-self: center; 41 | } 42 | 43 | /* The tutorial button icons on the home page */ 44 | svg.main-button-icon path { 45 | fill: var(--pst-color-primary); 46 | } 47 | 48 | /* The cross/check marks in the annotation table*/ 49 | svg.marks { 50 | height: 32px; 51 | } 52 | 53 | .dt-container table,.pst-scrollable-table-container table { 54 | font-size: small; 55 | } 56 | 57 | /* The annotation table column containing the cross/check marks */ 58 | table.dataTable td.dt-type-numeric { 59 | text-align: center !important; 60 | } 61 | 62 | 63 | /* The `Example` column in the annotation table */ 64 | .example { 65 | white-space: nowrap; 66 | overflow: hidden; 67 | text-overflow: ellipsis; 68 | max-width: 150px; 69 | } 70 | -------------------------------------------------------------------------------- /docs/tutorial/index.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2 | 3 | The following tutorials represent the two approaches to using PLINDER: 4 | The files from the dataset can be accessed directly using your preferred tooling. 5 | On the other side, the dedicated `plinder` Python package provides a simplified 6 | efficient way for working with the data. 7 | 8 | ```{toctree} 9 | :maxdepth: 1 10 | 11 | dataset 12 | api 13 | ``` 14 | -------------------------------------------------------------------------------- /docs/viewcode.py: -------------------------------------------------------------------------------- 1 | # The code in this file is based on the file with the same name in the Biotite project 2 | # licensed under BSD-3-clause license. 3 | from __future__ import annotations 4 | 5 | import inspect 6 | from importlib import import_module 7 | from typing import Any 8 | 9 | import plinder 10 | 11 | 12 | def linkcode_resolve(domain: str, info: dict[str, Any]) -> str | None: 13 | """ 14 | See https://www.sphinx-doc.org/en/master/usage/extensions/linkcode.html. 15 | """ 16 | version = plinder.__version__ 17 | base_url = f"https://github.com/plinder-org/plinder/blob/v{version}/src/" 18 | 19 | if domain != "py": 20 | return None 21 | 22 | package_name = info["module"] 23 | attr_name = info["fullname"] 24 | 25 | package = import_module(package_name) 26 | try: 27 | attr = getattr(package, attr_name) 28 | except AttributeError: 29 | # The attribute is not defined within PLINDER or is part of a class 30 | # -> do not provide a link 31 | return None 32 | attr = getattr(package, attr_name) 33 | module = inspect.getmodule(attr) 34 | 35 | try: 36 | source_lines, first = inspect.getsourcelines(attr) 37 | except TypeError: 38 | # The attribute is some special object, e.g. a 'partial' object 39 | return None 40 | last = first + len(source_lines) - 1 41 | 42 | return base_url + f"{module.__name__.replace('.', '/')}.py#L{first}-L{last}" 43 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Conda environment definition with dependencies 3 | # 4 | name: plinder 5 | channels: 6 | - metalcycling 7 | - conda-forge 8 | - aivant 9 | - defaults 10 | - bioconda 11 | dependencies: 12 | - python=3.10.* 13 | - reduce 14 | - aivant::openstructure=2.8.0 15 | - boost=1.82 16 | - mmseqs2 17 | - foldseek 18 | - plip=2.3.0 19 | - pip: 20 | - keyrings.google-artifactregistry-auth==1.1.2 21 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_0/1.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: pocket_qcov 7 | threshold: 50 8 | depth: 2 9 | - metric: protein_lddt_weighted_sum 10 | threshold: 70 11 | depth: 1 12 | sampling_cluster_metric: pli_qcov 13 | sampling_cluster_threshold: 70 14 | sampling_cluster_directed: true 15 | val_cluster_metric: pocket_qcov 16 | val_cluster_threshold: 50 17 | val_cluster_directed: false 18 | num_representatives: 5 19 | min_community_size: 2 20 | max_leakage_count: 200 21 | mms_unique_quality_count: 2 22 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_0/2.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_qcov 4 | threshold: 20 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_community_size: 2 14 | max_leakage_count: 200 15 | mms_unique_quality_count: 2 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_0/3.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: protein_seqsim_weighted_max 4 | threshold: 30 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_community_size: 2 14 | max_leakage_count: 200 15 | mms_unique_quality_count: 2 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_0/4.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: protein_seqsim_weighted_max 4 | threshold: 30 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_community_size: 2 14 | max_leakage_count: 200 15 | mms_unique_quality_count: 2 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_0/5.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: protein_lddt_weighted_sum 7 | threshold: 55 8 | depth: 1 9 | sampling_cluster_metric: pli_qcov 10 | sampling_cluster_threshold: 70 11 | sampling_cluster_directed: true 12 | val_cluster_metric: pocket_qcov 13 | val_cluster_threshold: 50 14 | val_cluster_directed: false 15 | num_representatives: 5 16 | min_community_size: 2 17 | max_leakage_count: 200 18 | mms_unique_quality_count: 2 19 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_0/6.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_community_size: 2 14 | max_leakage_count: 200 15 | mms_unique_quality_count: 2 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_1/1.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: pocket_qcov 7 | threshold: 50 8 | depth: 2 9 | - metric: protein_lddt_weighted_sum 10 | threshold: 70 11 | depth: 1 12 | sampling_cluster_metric: pli_qcov 13 | sampling_cluster_threshold: 70 14 | sampling_cluster_directed: true 15 | val_cluster_metric: pocket_qcov 16 | val_cluster_threshold: 50 17 | val_cluster_directed: false 18 | num_representatives: 5 19 | min_community_size: 2 20 | max_leakage_count: 300 21 | mms_unique_quality_count: 2 22 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_1/2.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_qcov 4 | threshold: 20 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_community_size: 2 14 | max_leakage_count: 300 15 | mms_unique_quality_count: 2 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_1/3.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: protein_seqsim_weighted_max 4 | threshold: 30 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_community_size: 2 14 | max_leakage_count: 300 15 | mms_unique_quality_count: 2 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_1/4.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: protein_seqsim_weighted_max 4 | threshold: 30 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_community_size: 2 14 | max_leakage_count: 300 15 | mms_unique_quality_count: 2 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_1/5.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: protein_lddt_weighted_sum 7 | threshold: 55 8 | depth: 1 9 | sampling_cluster_metric: pli_qcov 10 | sampling_cluster_threshold: 70 11 | sampling_cluster_directed: true 12 | val_cluster_metric: pocket_qcov 13 | val_cluster_threshold: 50 14 | val_cluster_directed: false 15 | num_representatives: 5 16 | min_community_size: 2 17 | max_leakage_count: 300 18 | mms_unique_quality_count: 2 19 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_1/6.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_community_size: 2 14 | max_leakage_count: 300 15 | mms_unique_quality_count: 2 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_2/0_5.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: protein_lddt_weighted_sum 7 | threshold: 55 8 | depth: 1 9 | sampling_cluster_metric: pli_qcov 10 | sampling_cluster_threshold: 70 11 | sampling_cluster_directed: true 12 | val_cluster_metric: pocket_qcov 13 | val_cluster_threshold: 50 14 | val_cluster_directed: false 15 | num_representatives: 5 16 | min_community_size: 2 17 | max_leakage_count: 200 18 | mms_unique_quality_count: 2 19 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_2/1_5.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: protein_lddt_weighted_sum 7 | threshold: 55 8 | depth: 1 9 | sampling_cluster_metric: pli_qcov 10 | sampling_cluster_threshold: 70 11 | sampling_cluster_directed: true 12 | val_cluster_metric: pocket_qcov 13 | val_cluster_threshold: 50 14 | val_cluster_directed: false 15 | num_representatives: 5 16 | min_community_size: 2 17 | max_leakage_count: 300 18 | mms_unique_quality_count: 2 19 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/1.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: pocket_qcov 7 | threshold: 50 8 | depth: 2 9 | - metric: protein_lddt_weighted_sum 10 | threshold: 70 11 | depth: 1 12 | sampling_cluster_metric: pli_qcov 13 | sampling_cluster_threshold: 70 14 | sampling_cluster_directed: true 15 | val_cluster_metric: pocket_qcov 16 | val_cluster_threshold: 50 17 | val_cluster_directed: false 18 | num_representatives: 5 19 | min_sample_cluster_size: 2 20 | max_leakage_count: 400 21 | mms_unique_quality_count: 3 22 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/10.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: pocket_qcov 7 | threshold: 50 8 | depth: 2 9 | - metric: protein_lddt_weighted_sum 10 | threshold: 70 11 | depth: 1 12 | sampling_cluster_metric: pli_qcov 13 | sampling_cluster_threshold: 70 14 | sampling_cluster_directed: true 15 | val_cluster_metric: pocket_qcov 16 | val_cluster_threshold: 50 17 | val_cluster_directed: false 18 | num_representatives: 10 19 | min_sample_cluster_size: 2 20 | max_leakage_count: 400 21 | mms_unique_quality_count: 3 22 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/11.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_qcov 4 | threshold: 20 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_lddt 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/12.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_qcov 4 | threshold: 20 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_lddt 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 10 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/13.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_qcov 4 | threshold: 20 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 300 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/14.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_qcov 4 | threshold: 20 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 500 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/2.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_qcov 4 | threshold: 20 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/3.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: protein_seqsim_weighted_max 4 | threshold: 30 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/5.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/6.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_qcov 4 | threshold: 20 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 10 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/7.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: protein_seqsim_weighted_max 4 | threshold: 30 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 10 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_3/9.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 10 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_4/4.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: protein_lddt_weighted_sum 7 | threshold: 55 8 | depth: 1 9 | sampling_cluster_metric: pli_qcov 10 | sampling_cluster_threshold: 70 11 | sampling_cluster_directed: true 12 | val_cluster_metric: pocket_qcov 13 | val_cluster_threshold: 50 14 | val_cluster_directed: false 15 | num_representatives: 5 16 | min_sample_cluster_size: 2 17 | max_leakage_count: 400 18 | mms_unique_quality_count: 3 19 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_4/8.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 30 5 | depth: 2 6 | - metric: protein_lddt_weighted_sum 7 | threshold: 55 8 | depth: 1 9 | sampling_cluster_metric: pli_qcov 10 | sampling_cluster_threshold: 70 11 | sampling_cluster_directed: true 12 | val_cluster_metric: pocket_qcov 13 | val_cluster_threshold: 50 14 | val_cluster_directed: false 15 | num_representatives: 10 16 | min_sample_cluster_size: 2 17 | max_leakage_count: 400 18 | mms_unique_quality_count: 3 19 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_5/10.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 1 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 10 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_5/9.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 10 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_5/batch_11.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 1 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pli_qcov 10 | val_cluster_threshold: 70 11 | val_cluster_directed: true 12 | num_representatives: 10 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_5/batch_12.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pli_qcov 10 | val_cluster_threshold: 70 11 | val_cluster_directed: true 12 | num_representatives: 10 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_6/10.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 20 5 | depth: 2 6 | - metric: pocket_qcov 7 | threshold: 50 8 | depth: 2 9 | - metric: protein_lddt_weighted_sum 10 | threshold: 70 11 | depth: 1 12 | sampling_cluster_metric: pli_qcov 13 | sampling_cluster_threshold: 70 14 | sampling_cluster_directed: true 15 | val_cluster_metric: pocket_qcov 16 | val_cluster_threshold: 50 17 | val_cluster_directed: false 18 | num_representatives: 5 19 | min_sample_cluster_size: 2 20 | max_leakage_count: 400 21 | mms_unique_quality_count: 3 22 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_6/2.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_qcov 4 | threshold: 20 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_6/7.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: protein_seqsim_weighted_max 4 | threshold: 30 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/split_configs/batch_6/9.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 5 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/configs/v2/download_and_make_dbs.yaml: -------------------------------------------------------------------------------- 1 | ingest: 2 | run_specific_stages: download_rcsb_files,download_alternative_datasets,make_dbs 3 | plinder_mount: /plinder 4 | plinder_release: 2024-06 5 | force_update: true 6 | scatter: 7 | two_char_batch_size: 4 8 | -------------------------------------------------------------------------------- /flows/configs/v2/make_batch_scores_only.yaml: -------------------------------------------------------------------------------- 1 | ingest: 2 | run_specific_stages: make_batch_scores 3 | plinder_mount: /plinder 4 | plinder_release: 2024-06 5 | scorer: 6 | sub_databases: holo,apo,pred 7 | scatter: 8 | run_batch_searches_batch_size: 10050 9 | make_batch_scores_batch_size: 90 10 | -------------------------------------------------------------------------------- /flows/configs/v2/make_components.yaml: -------------------------------------------------------------------------------- 1 | ingest: 2 | run_specific_stages: make_components_and_communities,make_mmp_index 3 | plinder_mount: /plinder 4 | plinder_release: 2024-06 5 | scatter: 6 | skip_existing_clusters: False 7 | -------------------------------------------------------------------------------- /flows/configs/v2/make_entries_ligands.yaml: -------------------------------------------------------------------------------- 1 | ingest: 2 | run_specific_stages: make_entries,structure_qc,make_system_archives,make_ligands,compute_ligand_fingerprints,make_ligand_scores 3 | plinder_mount: /plinder 4 | plinder_release: 2024-06 5 | scatter: 6 | two_char_batch_size: 1 7 | annotation_batch_size: 120 8 | make_ligands_batch_size: 220 9 | skip_existing_entries: true 10 | skip_missing_annotations: true 11 | -------------------------------------------------------------------------------- /flows/configs/v2/make_leakage.yaml: -------------------------------------------------------------------------------- 1 | ingest: 2 | run_specific_stages: make_leakage 3 | plinder_mount: /plinder 4 | plinder_release: 2024-06 5 | -------------------------------------------------------------------------------- /flows/configs/v2/make_protein_scores.yaml: -------------------------------------------------------------------------------- 1 | ingest: 2 | run_specific_stages: make_sub_dbs,run_batch_searches,make_batch_scores 3 | plinder_mount: /plinder 4 | plinder_release: 2024-06 5 | scorer: 6 | sub_databases: holo,apo,pred 7 | scatter: 8 | run_batch_searches_batch_size: 10050 9 | make_batch_scores_batch_size: 111 10 | -------------------------------------------------------------------------------- /flows/configs/v2/make_splits.yaml: -------------------------------------------------------------------------------- 1 | ingest: 2 | run_specific_stages: make_splits 3 | plinder_mount: /plinder 4 | plinder_release: 2024-06 5 | scatter: 6 | split_config_dir: gs://plinder-collab-bucket/metaflow/config/split/v2/ 7 | -------------------------------------------------------------------------------- /flows/configs/v2/split_multi_graph.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pli_qcov 4 | threshold: 20 5 | depth: 2 6 | - metric: pocket_qcov 7 | threshold: 50 8 | depth: 2 9 | - metric: protein_lddt_weighted_sum 10 | threshold: 70 11 | depth: 1 12 | sampling_cluster_metric: pli_qcov 13 | sampling_cluster_threshold: 70 14 | sampling_cluster_directed: true 15 | val_cluster_metric: pocket_qcov 16 | val_cluster_threshold: 50 17 | val_cluster_directed: false 18 | num_representatives: 10 19 | min_sample_cluster_size: 2 20 | max_leakage_count: 400 21 | mms_unique_quality_count: 3 22 | -------------------------------------------------------------------------------- /flows/configs/v2/split_single_graph.yaml: -------------------------------------------------------------------------------- 1 | split: 2 | graph_configs: 3 | - metric: pocket_lddt 4 | threshold: 50 5 | depth: 2 6 | sampling_cluster_metric: pli_qcov 7 | sampling_cluster_threshold: 70 8 | sampling_cluster_directed: true 9 | val_cluster_metric: pocket_qcov 10 | val_cluster_threshold: 50 11 | val_cluster_directed: false 12 | num_representatives: 10 13 | min_sample_cluster_size: 2 14 | max_leakage_count: 400 15 | mms_unique_quality_count: 3 16 | -------------------------------------------------------------------------------- /flows/data_ingest_report.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | """ 4 | The filestore instance name is: plinder-data-gen. 5 | """ 6 | 7 | from metaflow import FlowSpec, kubernetes, environment, step, retry 8 | 9 | import report 10 | 11 | MOUNT = "/plinder" 12 | 13 | K8S = dict( 14 | cpu=1, 15 | image="ghcr.io/plinder-org/plinder:v0.1.1", 16 | persistent_volume_claims={ 17 | "plinder-data-gen-pvc": MOUNT, 18 | }, 19 | ) 20 | ENV = dict( 21 | vars=dict( 22 | PLINDER_MOUNT=MOUNT, 23 | PLINDER_RELEASE="2024-04", 24 | ) 25 | ) 26 | 27 | 28 | class PlinderDataIngestReportFlow(FlowSpec): 29 | 30 | @kubernetes(**K8S) 31 | @environment(**ENV) 32 | @retry 33 | @step 34 | def start(self): 35 | self.dfs = report.main(upload=True) 36 | self.next(self.end) 37 | 38 | @step 39 | def end(self): 40 | for name, df in self.dfs.items(): 41 | df.to_csv(f"reports/{name}.csv", index=False) 42 | 43 | 44 | 45 | if __name__ == '__main__': 46 | PlinderDataIngestReportFlow() 47 | -------------------------------------------------------------------------------- /flows/proc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | import io 4 | import logging 5 | from subprocess import PIPE, STDOUT, Popen 6 | from typing import Any, Dict, List, Optional, Union 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | LOG = logging.getLogger("proc") 10 | 11 | 12 | class Proc(Popen): 13 | """ 14 | An opinionated Popen with an execute method 15 | to mirror communicate that handles input as strings, 16 | supports early return, and logs as the process runs. 17 | Also retain stdout and stderr as a list of strings 18 | for downstream access. 19 | """ 20 | 21 | def __init__( 22 | self, 23 | *args, 24 | stdin=PIPE, 25 | stdout=PIPE, 26 | stderr=STDOUT, 27 | text=True, 28 | **kwargs, 29 | ): 30 | super().__init__( 31 | *args, 32 | stdin=stdin, 33 | stdout=stdout, 34 | stderr=stderr, 35 | text=text, 36 | **kwargs, 37 | ) 38 | self._stderr = stderr 39 | # squelch the static type checkers that 40 | # see these pipes as None since we're using 41 | # PIPEs for everything 42 | self.stdin: io.TextIOWrapper 43 | self.stdout: io.TextIOWrapper 44 | self.stderr: io.TextIOWrapper 45 | 46 | def execute( 47 | self, 48 | *, 49 | inputs: Optional[str] = None, 50 | ) -> None: 51 | """ 52 | Like Popen.communicate but tail output 53 | 54 | Parameters 55 | ---------- 56 | inputs : str 57 | a command to pass to the subprocess 58 | 59 | """ 60 | if isinstance(self.args, list): 61 | LOG.info(" ".join(map(str, self.args))) 62 | else: 63 | LOG.info(self.args) 64 | 65 | streams = [("stdout", "stdout"), ("stderr", "stderr")] 66 | if self._stderr == STDOUT: 67 | streams = [("stdout", "stream")] 68 | with self: 69 | try: 70 | if inputs is not None: 71 | self.stdin.write(inputs) 72 | self.stdin.close() 73 | for attr, stream in streams: 74 | for line in getattr(self, attr): 75 | LOG.info(f"{stream}: {line.rstrip()}") 76 | except Exception: 77 | self.kill() 78 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | strict = True 3 | ignore_missing_imports = True 4 | disallow_untyped_decorators = False 5 | 6 | [mypy-plinder._version] 7 | ignore_errors = True 8 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | minversion = 6.0 3 | addopts = 4 | --doctest-modules 5 | --doctest-glob='*md' 6 | --doctest-continue-on-failure 7 | --cov=plinder 8 | --cov-report="term-missing:skip-covered" 9 | --cov-fail-under=60 10 | --color=yes 11 | --durations=5 12 | doctest_optionflags = 13 | NORMALIZE_WHITESPACE 14 | ELLIPSIS 15 | testpaths = 16 | tests 17 | filterwarnings = 18 | ignore::DeprecationWarning 19 | ignore::UserWarning 20 | -------------------------------------------------------------------------------- /requirements_data.txt: -------------------------------------------------------------------------------- 1 | networkit == 11.0.0 2 | tabulate 3 | pdb-validation @ git+https://git.scicore.unibas.ch/schwede/ligand-validation.git 4 | mmpdb @ git+https://github.com/rdkit/mmpdb.git 5 | https://download.pytorch.org/whl/cpu/torch-2.5.1%2Bcpu-cp310-cp310-linux_x86_64.whl#sha256=7f91a2200e352745d70e22396bd501448e28350fbdbd8d8b1c83037e25451150 6 | -------------------------------------------------------------------------------- /scripts/write_data_dicionary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pathlib import Path 3 | 4 | import pandas as pd 5 | 6 | # TODO: is this function used anywhere? 7 | df = pd.concat([ 8 | pd.read_csv(path, sep='\t') for path in 9 | Path('src/plinder-data/column_descriptions').rglob('*.tsv') 10 | ]).reset_index(drop=True) 11 | with open('src/plinder-data/data_dictionary.md', 'w') as f: 12 | f.write(df.to_markdown() + '\n') 13 | -------------------------------------------------------------------------------- /src/plinder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from pathlib import Path 4 | 5 | from ._version import _get_version 6 | 7 | _root = Path(__file__).parent 8 | __version__ = _get_version() 9 | -------------------------------------------------------------------------------- /src/plinder/_version.py: -------------------------------------------------------------------------------- 1 | def _get_version() -> str: 2 | try: 3 | from setuptools_scm import get_version as scm_get_version 4 | 5 | version: str = scm_get_version( 6 | root="../..", 7 | relative_to=__file__, 8 | ) 9 | except (ImportError, LookupError): 10 | try: 11 | from importlib.metadata import PackageNotFoundError 12 | from importlib.metadata import version as importlib_version 13 | 14 | try: 15 | version = importlib_version(__name__.split(".")[0]) 16 | except PackageNotFoundError: 17 | version = "unknown" 18 | except (ImportError, ModuleNotFoundError): 19 | version = "unknown" 20 | 21 | return version 22 | -------------------------------------------------------------------------------- /src/plinder/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | """ 4 | The plinder.core package collects useful functions and classes for interacting 5 | with the PLINDER dataset. It manages app configuration and will automatically 6 | download (and / or sync) the dataset to a local cache in a lazy manner, when 7 | particular assets are requested. One side effect of this is that plinder.core 8 | will (by default) compare the MD5 checksums of files on disk and files in cloud 9 | storage when they are accessed. 10 | 11 | Note 12 | ---- 13 | You can disable the MD5 checksum comparison between local files and remote files 14 | by setting the environment variable `PLINDER_OFFLINE=true`. 15 | """ 16 | from plinder.core.index.system import PlinderSystem 17 | from plinder.core.index.utils import get_manifest, get_plindex 18 | from plinder.core.split.utils import get_split 19 | from plinder.core.utils.config import get_config 20 | 21 | __all__ = [ 22 | "get_config", 23 | "get_plindex", 24 | "get_manifest", 25 | "get_split", 26 | "PlinderSystem", 27 | ] 28 | -------------------------------------------------------------------------------- /src/plinder/core/index/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | 4 | from plinder.core.index.system import PlinderSystem 5 | 6 | __all__ = ["PlinderSystem"] 7 | -------------------------------------------------------------------------------- /src/plinder/core/loader/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | 4 | from .dataset import PlinderDataset 5 | 6 | __all__ = ["PlinderDataset"] 7 | -------------------------------------------------------------------------------- /src/plinder/core/loader/transforms.py: -------------------------------------------------------------------------------- 1 | from plinder.core.structure.structure import Structure 2 | 3 | 4 | class StructureTransform: 5 | def __call__(self, structure: Structure) -> Structure: 6 | return self.transform(structure) 7 | 8 | def transform(self, structure: Structure) -> Structure: 9 | raise NotImplementedError 10 | 11 | def __repr__(self) -> str: 12 | return self.__class__.__name__ 13 | 14 | 15 | class SelectAtomTypes(StructureTransform): 16 | def __init__(self, atom_types: list[str] = ["CA"]) -> None: 17 | self.atom_types = atom_types 18 | 19 | def transform(self, structure: Structure) -> Structure: 20 | return structure.filter("atom_name", self.atom_types) # type: ignore 21 | -------------------------------------------------------------------------------- /src/plinder/core/scores/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | """ 4 | The plinder.core.scores subpackage provides a consistent API for querying 5 | the various parquet collections in the PLINDER dataset. The preferred 6 | parquet reader engine is duckdb, but much of the code previously used 7 | pandas and pyarrow directly. The internal query API supports converting 8 | the same pyarrow query filters used in pd.read_parquet into raw SQL for 9 | duckdb to execute. 10 | """ 11 | from .clusters import query_clusters 12 | from .index import query_index 13 | from .ligand import cross_similarity as cross_ligand_similarity 14 | from .ligand import query_ligand_similarity 15 | from .links import query_links 16 | from .protein import ( 17 | cross_similarity as cross_protein_similarity, 18 | ) 19 | from .protein import ( 20 | query_protein_similarity, 21 | ) 22 | 23 | __all__ = [ 24 | "query_ligand_similarity", 25 | "cross_ligand_similarity", 26 | "query_protein_similarity", 27 | "cross_protein_similarity", 28 | "query_clusters", 29 | "query_links", 30 | "query_index", 31 | ] 32 | -------------------------------------------------------------------------------- /src/plinder/core/scores/clusters.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from __future__ import annotations 4 | 5 | import pandas as pd 6 | from duckdb import sql 7 | 8 | from plinder.core.scores.query import FILTERS, make_query 9 | from plinder.core.utils import cpl 10 | from plinder.core.utils.config import get_config 11 | from plinder.core.utils.dec import timeit 12 | from plinder.core.utils.log import setup_logger 13 | from plinder.core.utils.schemas import CLUSTER_SCHEMA 14 | 15 | LOG = setup_logger(__name__) 16 | 17 | 18 | @timeit 19 | def query_clusters( 20 | *, 21 | columns: list[str] | None = None, 22 | filters: FILTERS = None, 23 | ) -> pd.DataFrame | None: 24 | """ 25 | Query the cluster database. 26 | 27 | Parameters 28 | ---------- 29 | columns : list[str], default=None 30 | the columns to return 31 | filters : list[tuple[str, str, str]] 32 | the filters to apply 33 | 34 | Returns 35 | ------- 36 | df : pd.DataFrame | None 37 | the cluster results 38 | """ 39 | 40 | cfg = get_config() 41 | dataset = cpl.get_plinder_path(rel=cfg.data.clusters) 42 | query = make_query( 43 | schema=CLUSTER_SCHEMA, 44 | dataset=dataset, 45 | filters=filters, 46 | columns=columns, 47 | nested=True, 48 | allow_no_filters=True, 49 | ) 50 | if query is None: 51 | LOG.warning("try minimally passing filters=[('metric', '==', 'pli_qcov')]") 52 | return None 53 | return sql(query).to_df() 54 | -------------------------------------------------------------------------------- /src/plinder/core/scores/index.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from __future__ import annotations 4 | 5 | import pandas as pd 6 | from duckdb import sql 7 | 8 | from plinder.core.scores.query import FILTERS, make_query 9 | from plinder.core.utils import cpl 10 | from plinder.core.utils.config import get_config 11 | from plinder.core.utils.log import setup_logger 12 | 13 | LOG = setup_logger(__name__) 14 | 15 | 16 | def query_index( 17 | *, 18 | columns: list[str] | None = None, 19 | splits: list[str] | None = None, 20 | filters: FILTERS = None, 21 | ) -> pd.DataFrame: 22 | """ 23 | Query the index database. 24 | 25 | Parameters 26 | ---------- 27 | columns : list[str], default=None 28 | the columns to return 29 | filters : list[tuple[str, str, str]] 30 | the filters to apply 31 | 32 | Returns 33 | ------- 34 | df : pd.DataFrame | None 35 | the index results 36 | """ 37 | cfg = get_config() 38 | dataset = cpl.get_plinder_path(rel=f"{cfg.data.index}/{cfg.data.index_file}") 39 | if columns is None: 40 | columns = ["system_id", "entry_pdb_id"] 41 | if "system_id" not in columns and "*" not in columns: 42 | columns = ["system_id"] + columns 43 | # START patch-1 44 | # TODO-1: remove this patch after binding_affinity is fixed 45 | if "system_has_binding_affinity" in columns or "ligand_binding_affinity" in columns: 46 | raise ValueError( 47 | "columns containing binding_affinity have been removed until bugfix" 48 | "see: https://github.com/plinder-org/plinder/issues/94" 49 | ) 50 | # END patch-1 51 | query = make_query( 52 | dataset=dataset, 53 | columns=columns, 54 | filters=filters, 55 | allow_no_filters=True, 56 | ) 57 | assert query is not None 58 | df = sql(query).to_df() 59 | # START patch-2 60 | # TODO-2: remove this patch after entry_release_date is fixed 61 | if "entry_release_date" in df.columns: 62 | from importlib import resources 63 | 64 | df_fixed_time = pd.read_csv( 65 | resources.files("plinder") / "data/utils/annotations/static_files/dates.csv" 66 | )[["entry_release_date", "entry_pdb_id"]] 67 | if "entry_pdb_id" not in df.columns: 68 | # hacky fix - assuming standard pdb names - to be removed 69 | df["entry_pdb_id"] = df.system_id.apply(lambda x: x[:4]) 70 | df = df.drop("entry_release_date", axis=1).merge( 71 | df_fixed_time, on="entry_pdb_id" 72 | ) 73 | # END patch-2 74 | if splits is None: 75 | splits = ["train", "val"] 76 | split = cpl.get_plinder_path(rel=f"{cfg.data.splits}/{cfg.data.split_file}") 77 | split_df = pd.read_parquet(split) 78 | split_dict = dict(zip(split_df["system_id"], split_df["split"])) 79 | df["split"] = df["system_id"].map(lambda x: split_dict.get(x, "unassigned")) 80 | if "*" not in splits: 81 | df = df[df["split"].isin(splits)].reset_index(drop=True) 82 | return df 83 | -------------------------------------------------------------------------------- /src/plinder/core/scores/links.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | 7 | import pandas as pd 8 | from duckdb import sql 9 | 10 | from plinder.core.scores.query import FILTERS, make_query 11 | from plinder.core.utils import cpl 12 | from plinder.core.utils.config import get_config 13 | from plinder.core.utils.dec import timeit 14 | from plinder.core.utils.log import setup_logger 15 | 16 | LOG = setup_logger(__name__) 17 | 18 | 19 | @timeit 20 | def query_links( 21 | *, 22 | columns: list[str] | None = None, 23 | filters: FILTERS = None, 24 | ) -> pd.DataFrame: 25 | """ 26 | Query the linked systems dataset 27 | 28 | Parameters 29 | ---------- 30 | columns : list[str], default=None 31 | the columns to return 32 | filters : list[tuple[str, str, str]] 33 | the filters to apply 34 | 35 | Returns 36 | ------- 37 | df : pd.DataFrame 38 | the linked systems results 39 | """ 40 | cfg = get_config() 41 | dataset = cpl.get_plinder_path(rel=cfg.data.links) 42 | new = any((path.parent.stem == "kind=apo" for path in dataset.rglob("*.parquet"))) 43 | if (dataset / "apo_links.parquet").is_file(): 44 | LOG.warning("found old apo links, removing") 45 | (dataset / "apo_links.parquet").unlink() 46 | if (dataset / "pred_links.parquet").is_file(): 47 | LOG.warning("found old pred links, removing") 48 | (dataset / "pred_links.parquet").unlink() 49 | if not new and columns and "filename" not in columns: 50 | # bugfix: necessary for determining the "kind" below 51 | columns.append("filename") 52 | elif columns and "kind" not in columns: 53 | columns.append("kind") 54 | query = make_query( 55 | dataset=dataset, 56 | filters=filters, 57 | nested=new, 58 | columns=columns or ["*"], 59 | allow_no_filters=True, 60 | include_filename=not new, 61 | ) 62 | assert query is not None 63 | df = sql(query).to_df() 64 | if not new: 65 | df["kind"] = df["filename"].apply(lambda x: Path(x).stem.split("_links")[0]) 66 | df.drop(columns=["filename"], inplace=True) 67 | return df 68 | -------------------------------------------------------------------------------- /src/plinder/core/split/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import get_split 2 | 3 | __all__ = ["get_split"] 4 | -------------------------------------------------------------------------------- /src/plinder/core/split/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import pandas as pd 4 | from omegaconf import DictConfig 5 | 6 | from plinder.core.utils import cpl 7 | from plinder.core.utils.config import get_config 8 | from plinder.core.utils.dec import timeit 9 | from plinder.core.utils.log import setup_logger 10 | 11 | LOG = setup_logger(__name__) 12 | 13 | _SPLIT = None 14 | 15 | 16 | @timeit 17 | def get_split( 18 | *, 19 | cfg: Optional[DictConfig] = None, 20 | ) -> pd.DataFrame: 21 | """ 22 | Fetch the plinder split and cache it 23 | 24 | Parameters 25 | ---------- 26 | cfg : DictConfig, default=None 27 | the plinder-core config 28 | 29 | Returns 30 | ------- 31 | pd.DataFrame 32 | the plinder split 33 | """ 34 | global _SPLIT 35 | if _SPLIT is not None: 36 | return _SPLIT 37 | cfg = cfg or get_config() 38 | suffix = f"{cfg.data.splits}/{cfg.data.split_file}" 39 | split = cpl.get_plinder_path(rel=suffix) 40 | LOG.info(f"reading {split}") 41 | _SPLIT = pd.read_parquet(split) 42 | return _SPLIT 43 | -------------------------------------------------------------------------------- /src/plinder/core/structure/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/src/plinder/core/structure/__init__.py -------------------------------------------------------------------------------- /src/plinder/core/structure/models.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from pydantic import BaseModel, ConfigDict 4 | 5 | 6 | class Base(BaseModel): 7 | model_config = ConfigDict( 8 | extra="forbid", validate_assignment=True, use_enum_values=True 9 | ) 10 | 11 | 12 | class ChainConfig(Base): 13 | """Preparation configuration settings. 14 | 15 | Default configuration for preparation of normalized monomers, 16 | ready for use in PPI generation tasks. 17 | """ 18 | 19 | decoy_receptor: list[str] = ["R"] 20 | decoy_ligand: list[str] = ["L"] 21 | native_receptor: list[str] = ["R"] 22 | native_ligand: list[str] = ["L"] 23 | 24 | 25 | class BackboneDefinition(str, Enum): 26 | # Biotite (standard) C, CA, N 27 | biotite = "biotite" 28 | # DockQ definition: C, CA, N, O 29 | dockq = "dockq" 30 | -------------------------------------------------------------------------------- /src/plinder/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /src/plinder/core/utils/dataclass.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable, Mapping 2 | from dataclasses import fields, is_dataclass 3 | from typing import Any 4 | 5 | import pandas as pd 6 | from biotite.structure.atoms import AtomArray 7 | 8 | 9 | def atom_array_summary_markdown_repr(array: AtomArray) -> str: 10 | df = pd.DataFrame( 11 | { 12 | k: array.get_annotation(k) 13 | for k in array.get_annotation_categories() 14 | if k not in ["element", "atom_id", "b_factor", "atom_name"] 15 | } 16 | ).drop_duplicates() 17 | markdown: str = df.to_markdown(index=False) 18 | return markdown 19 | 20 | 21 | def stringify_dataclass( 22 | obj: Any, indent: int = 4, _indents: int = 0, verbose_atom_array: bool = False 23 | ) -> str: 24 | """Pretty repr (or print) a (possibly deeply-nested) dataclass. 25 | Each new block will be indented by `indent` spaces (default is 4). 26 | 27 | https://stackoverflow.com/questions/66807878/pretty-print-dataclasses-prettier-with-line-breaks-and-indentation 28 | """ 29 | if isinstance(obj, str): 30 | return f"'{obj}'" 31 | 32 | if not is_dataclass(obj) and not isinstance(obj, (Mapping, Iterable)): 33 | return str(obj) 34 | 35 | if hasattr(obj, "shape"): 36 | if isinstance(obj, AtomArray) and verbose_atom_array: 37 | return "\n" + atom_array_summary_markdown_repr(obj) 38 | 39 | return f"{type(obj)} with shape {obj.shape}" 40 | 41 | this_indent = indent * _indents * " " 42 | next_indent = indent * (_indents + 1) * " " 43 | # dicts, lists, and tuples will re-assign this 44 | start, end = f"{type(obj).__name__}(", ")" 45 | 46 | if is_dataclass(obj): 47 | body = "\n".join( 48 | f"{next_indent}{field.name}=" 49 | f"{stringify_dataclass(getattr(obj, field.name), indent, _indents + 1)}," 50 | for field in fields(obj) 51 | ) 52 | 53 | elif isinstance(obj, Mapping): 54 | if isinstance(obj, dict): 55 | start, end = "{", "}" 56 | 57 | body = "\n".join( 58 | f"{next_indent}{stringify_dataclass(key, indent, _indents + 1)}: " 59 | f"{stringify_dataclass(value, indent, _indents + 1)}," 60 | for key, value in obj.items() 61 | ) 62 | 63 | else: # is Iterable 64 | if isinstance(obj, list): 65 | start, end = "[", "]" 66 | elif isinstance(obj, tuple): 67 | start = "(" 68 | 69 | body = "\n".join( 70 | f"{next_indent}{stringify_dataclass(item, indent, _indents + 1)}," 71 | for item in obj 72 | ) 73 | 74 | return f"{start}\n{body}\n{this_indent}{end}" 75 | -------------------------------------------------------------------------------- /src/plinder/core/utils/dec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from functools import wraps 4 | from time import time 5 | from typing import Any, Callable, TypeVar 6 | 7 | from plinder.core.utils.log import setup_logger 8 | 9 | T = TypeVar("T") 10 | 11 | 12 | def timeit(func: Callable[..., T]) -> Callable[..., T]: 13 | """ 14 | Simple function timer decorator 15 | """ 16 | 17 | @wraps(func) 18 | def wrapped(*args: Any, **kwargs: Any) -> T: 19 | log = setup_logger(".".join([func.__module__, func.__name__])) 20 | ts = time() 21 | result = None 22 | try: 23 | result = func(*args, **kwargs) 24 | log.info(f"runtime succeeded: {time() - ts:.2f}s") 25 | except Exception: 26 | log.error(f"runtime failed: {time() - ts:.2f}s") 27 | raise 28 | return result 29 | 30 | return wrapped 31 | -------------------------------------------------------------------------------- /src/plinder/core/utils/io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | """ 4 | Wrap all network requests in a retry decorator 5 | and use a convention of looking for a file in a 6 | pre-determined location before fetching it from 7 | the network. 8 | """ 9 | 10 | from functools import wraps 11 | from pathlib import Path 12 | from time import sleep 13 | from typing import Any, Callable, Optional, TypeVar 14 | 15 | import requests 16 | from biotite.database.rcsb import fetch 17 | from biotite.structure.io.pdbx import CIFFile, get_structure, set_structure 18 | 19 | from plinder.core.utils.log import setup_logger 20 | 21 | LOG = setup_logger(__name__) 22 | T = TypeVar("T") 23 | 24 | 25 | def retry(func: Callable[..., T]) -> Callable[..., T]: 26 | @wraps(func) 27 | def inner(*args: Any, **kwargs: Any) -> T: 28 | name = func.__name__ 29 | mod = func.__module__ 30 | log = setup_logger(".".join([mod, name])) 31 | retries = 5 32 | exc = None 33 | for i in range(1, retries + 1): 34 | try: 35 | return func(*args, **kwargs) 36 | except Exception as e: 37 | wait = 2**i 38 | log.error(f"failed: {repr(e)}, retry in: {wait}s") 39 | exc = e 40 | sleep(wait) 41 | raise Exception(f"Timeout error {exc}") 42 | 43 | return inner 44 | 45 | 46 | @retry 47 | def download_alphafold_cif_file( 48 | uniprot_id: str, 49 | output_folder: Path, 50 | url: str = "https://alphafold.ebi.ac.uk/files", 51 | force_update: bool = False, 52 | ) -> Optional[Path]: 53 | cif_file_path = output_folder / f"AF-{uniprot_id}-F1-model_v4.cif" 54 | if not cif_file_path.is_file() or force_update: 55 | resp = requests.get(f"{url}/{cif_file_path.name}") 56 | if resp.status_code == 404: 57 | LOG.info(f"UniProt ID {uniprot_id} not in AlphaFold database") 58 | return None 59 | resp.raise_for_status() 60 | with open(cif_file_path, "w") as f: 61 | f.write(resp.text) 62 | return cif_file_path 63 | 64 | 65 | @retry 66 | def download_pdb_chain_cif_file(pdb_id: str, chain_id: str, filename: Path) -> Path: 67 | structure = get_structure( 68 | CIFFile.read( 69 | fetch( 70 | pdb_ids=pdb_id, 71 | format="cif", 72 | overwrite=False, 73 | ) 74 | ), 75 | model=1, 76 | use_author_fields=False, 77 | ) 78 | write_file = CIFFile() 79 | set_structure(write_file, structure[structure.chain_id == chain_id]) 80 | write_file.write(filename.as_posix()) 81 | return filename 82 | -------------------------------------------------------------------------------- /src/plinder/core/utils/load_systems.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # Copyright (c) 2024, Plinder Development Team 3 | # Distributed under the terms of the Apache License 2.0 4 | from typing import Any, Optional, Union 5 | 6 | from omegaconf import DictConfig 7 | 8 | from plinder.core.index.system import PlinderSystem 9 | from plinder.core.index.utils import get_manifest 10 | from plinder.core.utils.unpack import expand_config_context 11 | 12 | 13 | # TODO: not used currently 14 | def load_systems( 15 | *, 16 | system_ids: Optional[Union[str, list[str]]] = None, 17 | pdb_ids: Optional[Union[str, list[str]]] = None, 18 | two_char_codes: Optional[Union[str, list[str]]] = None, 19 | cfg: Optional[DictConfig] = None, 20 | ) -> dict[str, Any]: 21 | kind, items = expand_config_context( 22 | system_ids=system_ids, pdb_ids=pdb_ids, two_char_codes=two_char_codes, cfg=cfg 23 | ) 24 | if kind == "system_ids": 25 | return {system_id: PlinderSystem(system_id=system_id) for system_id in items} 26 | manifest = get_manifest() 27 | systems = {} 28 | if kind == "pdb_ids": 29 | for pdb_id in items: 30 | ids = manifest[manifest["pdb_id"] == pdb_id]["system_id"].to_list() 31 | for system_id in ids: 32 | systems[system_id] = PlinderSystem(system_id=system_id) 33 | return systems 34 | manifest["two_char_code"] = manifest["entry_pdb_id"].str[1:3] 35 | for two_char_code in items: 36 | ids = manifest[manifest["two_char_code"] == two_char_code][ 37 | "system_id" 38 | ].to_list() 39 | for system_id in ids: 40 | systems[system_id] = PlinderSystem(system_id=system_id) 41 | return systems 42 | -------------------------------------------------------------------------------- /src/plinder/core/utils/log.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from __future__ import annotations 4 | 5 | import inspect 6 | import logging 7 | import os 8 | 9 | LOGGING_FORMAT: str = "%(asctime)s | %(name)s:%(lineno)d | %(levelname)s : %(message)s" 10 | try: 11 | DEFAULT_LOGGING_LEVEL: int = int(os.getenv("PLINDER_LOG_LEVEL", "20")) 12 | except ValueError: 13 | DEFAULT_LOGGING_LEVEL = logging.INFO 14 | 15 | 16 | class PlinderLoggingError(Exception): 17 | pass 18 | 19 | 20 | def setup_logger( 21 | logger_name: str | None = None, 22 | log_level: int = DEFAULT_LOGGING_LEVEL, 23 | log_file: str | None = None, 24 | propagate: bool = False, 25 | ) -> logging.Logger: 26 | """ 27 | Setup logger for the module name as the logger name by default 28 | for easy tracing of what's happening in the code 29 | 30 | Parameters 31 | ---------- 32 | logger_name : str 33 | Name of the logger 34 | log_level : int 35 | Log level 36 | log_file: str | None 37 | optional log file to write to 38 | propagate : bool 39 | propagate log events to parent loggers, default = False 40 | 41 | Returns 42 | ------- 43 | logging.Logger: 44 | logger object 45 | 46 | Examples 47 | -------- 48 | >>> logger = setup_logger("some_logger_name") 49 | >>> logger.name 50 | 'some_logger_name' 51 | >>> logger.level 52 | 20 53 | >>> logger = setup_logger(log_level=logging.DEBUG) 54 | >>> logger.name 55 | 'log.py' 56 | >>> logger.level 57 | 10 58 | """ 59 | 60 | if logger_name is None: 61 | # Get module name as the logger name, this is copied from: 62 | # https://stackoverflow.com/questions/13699283/how-to-get-the-callers-filename-method-name-in-python 63 | frame = inspect.stack()[1] 64 | module = inspect.getmodule(frame[0]) 65 | file_path = __file__ if module is None else module.__file__ 66 | logger_name = os.path.basename(file_path) if file_path is not None else "log" 67 | 68 | # Set up logger with the given logger name 69 | logger = logging.getLogger(logger_name) 70 | # Check if logging level has been set externally otherwise first pass logger.level == 0 (NOTSET) 71 | set_level = not bool(logger.level) 72 | if set_level: 73 | logger.setLevel(log_level) 74 | handler = logging.StreamHandler() 75 | if set_level: 76 | handler.setLevel(log_level) 77 | formatter = logging.Formatter(LOGGING_FORMAT) 78 | handler.setFormatter(formatter) 79 | if not len(logger.handlers): 80 | logger.addHandler(handler) 81 | 82 | if log_file is not None: 83 | file_handler = logging.FileHandler(log_file) 84 | file_handler.setFormatter(formatter) 85 | if set_level: 86 | file_handler.setLevel(log_level) 87 | if not [h for h in logger.handlers if h.__class__ == logging.FileHandler]: 88 | logger.addHandler(file_handler) 89 | logger.propagate = propagate 90 | 91 | return logger 92 | -------------------------------------------------------------------------------- /src/plinder/core/utils/schemas.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | import pyarrow as pa 4 | 5 | PROTEIN_SIMILARITY_SCHEMA = pa.schema( 6 | [ 7 | ("query_system", pa.string()), 8 | ("target_system", pa.string()), 9 | ("protein_mapping", pa.string()), 10 | ("mapping", pa.string()), 11 | ("protein_mapper", pa.dictionary(pa.int8(), pa.string())), 12 | ("source", pa.dictionary(pa.int8(), pa.string(), ordered=True)), 13 | ("metric", pa.dictionary(pa.int8(), pa.string(), ordered=True)), 14 | ("similarity", pa.int8()), 15 | ] 16 | ) 17 | 18 | 19 | NETWORKX_CLUSTER_SCHEMA = pa.schema( 20 | [ 21 | ("system_id", pa.dictionary(pa.int32(), pa.string())), 22 | ("component", pa.dictionary(pa.int32(), pa.string())), 23 | ("community", pa.dictionary(pa.int32(), pa.string())), 24 | ] 25 | ) 26 | 27 | 28 | GRAPHTOOL_CLUSTER_SCHEMA = pa.schema( 29 | [ 30 | ("system_id", pa.string()), 31 | ("component", pa.dictionary(pa.int32(), pa.string())), 32 | ("metric", pa.dictionary(pa.int32(), pa.string())), 33 | ("directed", pa.dictionary(pa.int32(), pa.string())), 34 | ("threshold", pa.int8()), 35 | ] 36 | ) 37 | 38 | 39 | CLUSTER_SCHEMA = pa.schema( 40 | [ 41 | ("system_id", pa.string()), 42 | ("label", pa.string()), 43 | ("metric", pa.string()), 44 | ("cluster", pa.string()), 45 | ("directed", pa.bool_()), 46 | ("threshold", pa.int8()), 47 | ] 48 | ) 49 | 50 | 51 | TANIMOTO_SCORE_SCHEMA = pa.schema( 52 | [ 53 | pa.field("query_ligand_id", pa.int32()), 54 | pa.field("target_ligand_id", pa.int32()), 55 | pa.field("tanimoto_similarity_max", pa.int8()), 56 | ] 57 | ) 58 | 59 | 60 | CLUSTER_DATASET_SCHEMA = pa.schema( 61 | [ 62 | ("metric", pa.string()), 63 | ("directed", pa.bool_()), 64 | ("threshold", pa.int8()), 65 | ("system_id", pa.string()), 66 | ("component", pa.string()), 67 | ] 68 | ) 69 | 70 | 71 | SPLIT_DATASET_SCHEMA = pa.schema( 72 | [ 73 | ("system_id", pa.string()), 74 | ("split", pa.string()), 75 | ("cluster", pa.string()), 76 | ("cluster_for_val_split", pa.string()), 77 | ] 78 | ) 79 | 80 | 81 | # subject to criteria used in save_linked_structures.py 82 | # TODO: this schema is now out of date since addition of 83 | # scores.json contents but it now contains >50 columns 84 | STRUCTURE_LINK_SCHEMA = pa.schema( 85 | [ 86 | ("query_system", pa.string()), 87 | ("target_system", pa.string()), 88 | ("protein_qcov_weighted_sum", pa.float32()), 89 | ("protein_fident_weighted_sum", pa.float32()), 90 | ("pocket_fident", pa.float32()), 91 | ("target_id", pa.string()), 92 | ("sort_score", pa.float32()), 93 | ] 94 | ) 95 | -------------------------------------------------------------------------------- /src/plinder/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from textwrap import dedent 4 | 5 | try: 6 | import ost # noqa 7 | import networkit # noqa 8 | except (ImportError, ModuleNotFoundError): 9 | raise ImportError( 10 | dedent( 11 | """\ 12 | plinder.data requires the OpenStructureToolkit >= 2.8.0 (ost) and networkit == 11.0.0 to be installed. 13 | Please refer to the documentation for installation instructions and current limitations. 14 | See details here: 15 | 16 | https://plinder-org.github.io/plinder/contribution/development.html#creating-the-conda-environment 17 | """ 18 | ) 19 | ) 20 | -------------------------------------------------------------------------------- /src/plinder/data/_version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | """ 4 | Module for supplying version information. 5 | 6 | This module provides the function `_get_version()`, which gets the version if 7 | either setuptools_scm or our package is installed, and returns "unknown" 8 | otherwise. Getting the version from setuptools_scm is primarily useful for 9 | Docker image building (where it doesn't make sense to make the host install our 10 | package just to obtain the version information to pass to the image build 11 | process) and for editable installs (where having setuptools_scm installed is 12 | the only way to get accurate version information). 13 | 14 | When our package is installed, its version (the result of `get_version()`) can 15 | be accessed as `plinder.__version__`. 16 | """ 17 | 18 | import warnings 19 | 20 | 21 | def _get_version() -> str: 22 | try: 23 | # Our first choice would be to get the version from setuptools_scm if it 24 | # is installed (only way that works with editable installs) 25 | from setuptools_scm import get_version as scm_get_version 26 | 27 | version: str = scm_get_version(root="../..", relative_to=__file__) 28 | except (ImportError, LookupError): 29 | from importlib.metadata import PackageNotFoundError 30 | from importlib.metadata import version as importlib_version 31 | 32 | try: 33 | # Our second choice is to try to get the version from importlib 34 | version = importlib_version("plinder") 35 | except PackageNotFoundError: 36 | # We will land here if our package isn't actually installed 37 | warnings.warn("Neither our package nor setuptools_scm are installed") 38 | version = "unknown" 39 | version = "unknown" 40 | return version 41 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/src/plinder/data/column_descriptions/__init__.py -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/entry.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | entry_pdb_id str RCSB PDB ID. See https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entry.id.html 3 | entry_release_date str RCSB structure release date. See https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_database_PDB_rev.date_original.html 4 | entry_oligomeric_state str | None Author's provided description of quaternary structure in RCSB. See https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_pdbx_struct_assembly.oligomeric_details.html 5 | entry_determination_method str | None RCSB method of structure determination. See https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_exptl.method.html 6 | entry_keywords str | None RCSB keywords describing the structure. See https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_struct_keywords.pdbx_keywords.html 7 | entry_pH str | None pH at which structure is solved. See https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_exptl_crystal_grow.pH.html 8 | entry_resolution float | None RCSB structure resolution. See https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_refine.ls_d_res_high.html 9 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/entry_validation.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | entry_validation_resolution float Resolution of the PDB entry 3 | entry_validation_rfree float The similarity between the observed structure-factor amplitudes and those calculated from the model. Rfree should be higher than R because it is calculated using reflections not used in the refinement. See https://www.wwpdb.org/validation/XrayValidationReportHelp 4 | entry_validation_r float The similarity between the observed structure-factor amplitudes and those calculated from the model. See https://www.wwpdb.org/validation/XrayValidationReportHelp 5 | entry_validation_clashscore float The Molprobity Clashscore is an approximation of the overall severity of the clashes in a structure, which is defined as the number of clashes per 1000 atoms (including hydrogens). See https://www.wwpdb.org/validation/XrayValidationReportHelp 6 | entry_validation_percent_rama_outliers float | None The percentage of Ramachandran outliers with respect to the total number of residues in the entry for which the outlier assessment is available. See https://www.wwpdb.org/validation/XrayValidationReportHelp 7 | entry_validation_percent_rota_outliers float | None The percentage of residues with an unusual sidechain conformation with respect to the total number of residues for which the assessment is available. See https://www.wwpdb.org/validation/XrayValidationReportHelp 8 | entry_validation_data_completeness float | None The number of expected diffraction spots is a function of data resolution and the space group. This metric describes the number of recorded reflections as a percentage of the number expected. See https://www.wwpdb.org/validation/XrayValidationReportHelp 9 | entry_validation_percent_RSRZ_outliers float | None The percentage Real-Space R-value Z-score outliers with respect to the total number of residues for which RSRZ was computed. See https://www.wwpdb.org/validation/XrayValidationReportHelp 10 | entry_validation_atom_count int Number of atoms in the asymmetric unit of the PDB entry 11 | entry_validation_molprobity float | None Overall Molprobity "effective resolution", a single-score validation number based on the correlation of multiple criteria with crystallographic resolution. as described here: https://github.com/rlabduke/MolProbity/blob/6e7512e85bdea23f7ffb16e606d1f9a0abf6e5d4/cmdline/molparser.py#L662 12 | entry_validation_mean_b_factor float Mean B-value calculated over all modelled atoms 13 | entry_validation_median_b_factor float Median B-value calculated over all modelled atoms 14 | entry_validation_pdbx_resolution float See https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_refine.ls_d_res_high.html 15 | entry_validation_pdbx_reflns_resolution float | None See https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_reflns.d_resolution_high.html 16 | entry_validation_meanI_over_sigI_obs float | None Each reflection has an intensity (I) and an uncertainty in measurement (σ(I)), thus this metric describes the signal-to-noise ratio 17 | entry_validation_r_minus_rfree float The difference between r and rfree 18 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/extra.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | uniqueness str Uniqueness identifier differentiating systems which are simple crystal symmetries in a biounit 3 | biounit_num_ligands int Number of ligands within systems in the biounit 4 | biounit_num_unique_ccd_codes int Number of unique CCD codes within systems in the biounit 5 | biounit_num_proper_ligands int Number of proper ligands within systems in the biounit 6 | system_ligand_has_lipinski bool Whether the system has a lipinski ligand 7 | system_ligand_has_cofactor bool Whether the system has a cofactor ligand 8 | system_ligand_has_fragment bool Whether the system has a fragment ligand 9 | system_ligand_has_oligo bool Whether the system has an oligomer ligand 10 | system_ligand_has_artifact bool Whether the system has an artifact ligand 11 | system_ligand_has_other bool Whether the system has a ligand labelled as other 12 | system_ligand_has_covalent bool Whether the system has a covalent ligand 13 | system_ligand_has_invalid bool Whether the system has an invalid ligand 14 | system_ligand_has_ion bool Whether the system has an ion 15 | system_protein_chains_total_length int Total length of all protein chains in the system 16 | system_unique_ccd_codes str Unique CCD codes in the system 17 | system_proper_unique_ccd_codes str Unique CCD codes of proper ligands in the system 18 | entry_pass_validation_criteria bool Whether the entry passes crystal validation criteria 19 | system_pass_validation_criteria bool Whether the system passes crystal validation criteria 20 | ligand_interactions list[str] Interactions of the ligand 21 | ligand_auth_id str Author chain ID of the ligand 22 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/ligand_interacting_ligand_chains.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | ligand_interacting_ligand_chains_asym_id list[str] Chain asymmetric id 3 | ligand_interacting_ligand_chains_auth_id list[str] Chain author id 4 | ligand_interacting_ligand_chains_entity_id list[str] Chain entity id 5 | ligand_interacting_ligand_chains_length list[int] SEQRES length 6 | ligand_interacting_ligand_chains_num_unresolved_residues list[int] Number of unresolved residues (SEQRES length - len(residues)) 7 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/ligand_interacting_ligand_chains_validation.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | ligand_interacting_ligand_chains_validation_num_residues list[int] Number of residues in the list 3 | ligand_interacting_ligand_chains_validation_num_processed_residues list[int] Number of processed residues in the list 4 | ligand_interacting_ligand_chains_validation_percent_processed_residues list[float] Percentage of processed residues in the list 5 | ligand_interacting_ligand_chains_validation_average_rsr list[float] Average Real-Space R-value across all residues in the list 6 | ligand_interacting_ligand_chains_validation_average_rsrz list[float] Average Real-Space R-value Z-score across all residues in the list 7 | ligand_interacting_ligand_chains_validation_average_rscc list[float] Average Real-Space Correlation Coefficient across all residues in the list 8 | ligand_interacting_ligand_chains_validation_average_occupancy list[float] Average occupancy across all residues in the list 9 | ligand_interacting_ligand_chains_validation_percent_rsr_under_threshold list[float] Percentage of residues with RSR under the threshold 10 | ligand_interacting_ligand_chains_validation_percent_rscc_over_threshold list[float] Percentage of residues with RSCC over the threshold 11 | ligand_interacting_ligand_chains_validation_percent_occupancy_over_threshold list[float] Percentage of residues with occupancy over the threshold 12 | ligand_interacting_ligand_chains_validation_average_b_factor list[float] Average B factor across all residues in the list 13 | ligand_interacting_ligand_chains_validation_unknown_residue_count list[int] Number of unknown residues in the list 14 | ligand_interacting_ligand_chains_validation_atom_count list[int] Number of atoms across all residues in the list 15 | ligand_interacting_ligand_chains_validation_heavy_atom_count list[int] Number of heavy atoms across all residues in the list 16 | ligand_interacting_ligand_chains_validation_num_unresolved_heavy_atoms list[int] Number of unresolved heavy atoms across all residues in the list 17 | ligand_interacting_ligand_chains_validation_max_alt_count list[int] The highest number of configurations in a single residue in the list 18 | ligand_interacting_ligand_chains_validation_percent_outliers_chirality list[float] Percent outliers for chirality 19 | ligand_interacting_ligand_chains_validation_percent_outliers_clashes list[float] Percent outliers for clashes 20 | ligand_interacting_ligand_chains_validation_percent_outliers_density list[float] Percent outliers for density 21 | ligand_interacting_ligand_chains_validation_percent_outliers_geometry list[float] Percent outliers for geometry 22 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/ligand_neighboring_ligand_chains.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | ligand_neighboring_ligand_chains_asym_id list[str] Chain asymmetric id 3 | ligand_neighboring_ligand_chains_auth_id list[str] Chain author id 4 | ligand_neighboring_ligand_chains_entity_id list[str] Chain entity id 5 | ligand_neighboring_ligand_chains_length list[int] SEQRES length 6 | ligand_neighboring_ligand_chains_num_unresolved_residues list[int] Number of unresolved residues (SEQRES length - len(residues)) 7 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/ligand_neighboring_ligand_chains_validation.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | ligand_neighboring_ligand_chains_validation_num_residues list[int] Number of residues in the list 3 | ligand_neighboring_ligand_chains_validation_num_processed_residues list[int] Number of processed residues in the list 4 | ligand_neighboring_ligand_chains_validation_percent_processed_residues list[float] Percentage of processed residues in the list 5 | ligand_neighboring_ligand_chains_validation_average_rsr list[float] Average Real-Space R-value across all residues in the list 6 | ligand_neighboring_ligand_chains_validation_average_rsrz list[float] Average Real-Space R-value Z-score across all residues in the list 7 | ligand_neighboring_ligand_chains_validation_average_rscc list[float] Average Real-Space Correlation Coefficient across all residues in the list 8 | ligand_neighboring_ligand_chains_validation_average_occupancy list[float] Average occupancy across all residues in the list 9 | ligand_neighboring_ligand_chains_validation_percent_rsr_under_threshold list[float] Percentage of residues with RSR under the threshold 10 | ligand_neighboring_ligand_chains_validation_percent_rscc_over_threshold list[float] Percentage of residues with RSCC over the threshold 11 | ligand_neighboring_ligand_chains_validation_percent_occupancy_over_threshold list[float] Percentage of residues with occupancy over the threshold 12 | ligand_neighboring_ligand_chains_validation_average_b_factor list[float] Average B factor across all residues in the list 13 | ligand_neighboring_ligand_chains_validation_unknown_residue_count list[int] Number of unknown residues in the list 14 | ligand_neighboring_ligand_chains_validation_atom_count list[int] Number of atoms across all residues in the list 15 | ligand_neighboring_ligand_chains_validation_heavy_atom_count list[int] Number of heavy atoms across all residues in the list 16 | ligand_neighboring_ligand_chains_validation_num_unresolved_heavy_atoms list[int] Number of unresolved heavy atoms across all residues in the list 17 | ligand_neighboring_ligand_chains_validation_max_alt_count list[int] The highest number of configurations in a single residue in the list 18 | ligand_neighboring_ligand_chains_validation_percent_outliers_chirality list[float] Percent outliers for chirality 19 | ligand_neighboring_ligand_chains_validation_percent_outliers_clashes list[float] Percent outliers for clashes 20 | ligand_neighboring_ligand_chains_validation_percent_outliers_density list[float] Percent outliers for density 21 | ligand_neighboring_ligand_chains_validation_percent_outliers_geometry list[float] Percent outliers for geometry 22 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/ligand_protein_chains.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | ligand_protein_chains_asym_id list[str] Chain asymmetric id 3 | ligand_protein_chains_auth_id list[str] Chain author id 4 | ligand_protein_chains_entity_id list[str] Chain entity id 5 | ligand_protein_chains_length list[int] SEQRES length 6 | ligand_protein_chains_num_unresolved_residues list[int] Number of unresolved residues (SEQRES length - len(residues)) 7 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/ligand_protein_chains_validation.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | ligand_protein_chains_validation_num_residues list[int] Number of residues in the list 3 | ligand_protein_chains_validation_num_processed_residues list[int] Number of processed residues in the list 4 | ligand_protein_chains_validation_percent_processed_residues list[float] Percentage of processed residues in the list 5 | ligand_protein_chains_validation_average_rsr list[float] Average Real-Space R-value across all residues in the list 6 | ligand_protein_chains_validation_average_rsrz list[float] Average Real-Space R-value Z-score across all residues in the list 7 | ligand_protein_chains_validation_average_rscc list[float] Average Real-Space Correlation Coefficient across all residues in the list 8 | ligand_protein_chains_validation_average_occupancy list[float] Average occupancy across all residues in the list 9 | ligand_protein_chains_validation_percent_rsr_under_threshold list[float] Percentage of residues with RSR under the threshold 10 | ligand_protein_chains_validation_percent_rscc_over_threshold list[float] Percentage of residues with RSCC over the threshold 11 | ligand_protein_chains_validation_percent_occupancy_over_threshold list[float] Percentage of residues with occupancy over the threshold 12 | ligand_protein_chains_validation_average_b_factor list[float] Average B factor across all residues in the list 13 | ligand_protein_chains_validation_unknown_residue_count list[int] Number of unknown residues in the list 14 | ligand_protein_chains_validation_atom_count list[int] Number of atoms across all residues in the list 15 | ligand_protein_chains_validation_heavy_atom_count list[int] Number of heavy atoms across all residues in the list 16 | ligand_protein_chains_validation_num_unresolved_heavy_atoms list[int] Number of unresolved heavy atoms across all residues in the list 17 | ligand_protein_chains_validation_max_alt_count list[int] The highest number of configurations in a single residue in the list 18 | ligand_protein_chains_validation_percent_outliers_chirality list[float] Percent outliers for chirality 19 | ligand_protein_chains_validation_percent_outliers_clashes list[float] Percent outliers for clashes 20 | ligand_protein_chains_validation_percent_outliers_density list[float] Percent outliers for density 21 | ligand_protein_chains_validation_percent_outliers_geometry list[float] Percent outliers for geometry 22 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/qc.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | all_ligand_chains_present bool all the ligand chains detected for the system are present in the system CIF, PDB and SDF files 3 | all_protein_chains_present bool all the protein chains detected for the system are present in the system CIF, PDB and SDF files 4 | complex_loadbable_via_biotite bool the complex can successfully be loaded with the biotite library 5 | ligand_is_obabel_loadable bool the ligand can successfully be loaded with the OpenBabel library 6 | ligand_is_obabel_loadable_with_rdkit_fix bool the ligand can successfully be loaded with the OpenBabel library after fixing valence and chirality issues 7 | ligand_is_rdkit_loadable bool the ligand can successfully be loaded with the RDKit library 8 | ligand_is_rdkit_loadable_with_fix bool the ligand can successfully be loaded with the RDKit library after fixing valence and chirality issues 9 | ligand_loadbable_via_biotite bool the ligand can successfully be loaded with the biotite library 10 | ligand_matches_smiles_atom_num bool the number of atoms in the saved ligand matches the expected number from the resolved SMILES 11 | ligand_positions_correct bool the positions of the saved ligand atoms match the expected positions from the PDB entry 12 | ligand_protein_neighbor_still_preserved_complex bool the neighboring protein chains of the saved complex match what was annotated from the PDB entry 13 | ligand_rdkit_validation list[str] list of RDKit fixes performed 14 | ligand_molvs_validation float [DESCRIPTION] 15 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/system.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | system_biounit_id str Biounit ID 3 | system_protein_chains_asym_id list[str] Interacting protein chains of the system 4 | system_id_no_biounit str ID of the system without the biounit 5 | system_ligand_chains list[str] Ligand chains of the system 6 | system_num_pocket_residues int Number of pocket residues of the system 7 | system_proper_num_pocket_residues int Number of pocket residues of the system excluding ions and artifacts 8 | system_num_interactions int Number of interactions of the system 9 | system_proper_num_interactions int Number of interactions of the system 10 | system_num_unique_interactions int Number of unique interactions of the system 11 | system_proper_num_unique_interactions int Number of unique interactions of the system 12 | system_num_covalent_ligands int Number of covalent ligands of the system 13 | system_proper_num_covalent_ligands int Number of covalent ligands of the system 14 | system_id str ID of the system 15 | system_type str Type of the system (one of: holo, ion, artifact) 16 | system_has_kinase_inhibitor bool Whether the system has a kinase inhibitor 17 | system_has_binding_affinity bool Whether any ligand in the system has a binding affinity from BindingDB 18 | system_num_protein_chains int Number of interacting protein chains of the system 19 | system_proper_num_protein_chains int Number of interacting protein chains of the system excluding ions and artifacts 20 | system_num_ligand_chains int Number of ligand chains of the system 21 | system_proper_num_ligand_chains int Number of ligand chains of the system excluding ions and artifacts 22 | system_num_crystal_contacted_residues int Number of residues from other symmetry mates which are in contact with any ligand in the system. 23 | system_num_atoms_with_crystal_contacts int Number of atoms in the system ligands which are in contact with residues from other symmetry mates. 24 | system_num_heavy_atoms int | None Number of heavy atoms in the system ligands 25 | system_num_resolved_heavy_atoms int | None Number of resolved heavy atoms in the system ligands 26 | system_ligand_max_qed float Maximum QED of the system ligands 27 | system_ligand_max_molecular_weight float Maximum molecular weight of the system ligands 28 | system_proper_ligand_max_molecular_weight float Maximum molecular weight of the system ligands excluding ions and artifacts 29 | system_num_unresolved_heavy_atoms int | None Number of unresolved heavy atoms in the system ligands 30 | system_fraction_atoms_with_crystal_contacts float | None Fraction of atoms in the system ligands which are in contact with residues from other symmetry mates. 31 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/system_ligand_chains.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | system_ligand_chains_asym_id list[str] Chain asymmetric id 3 | system_ligand_chains_auth_id list[str] Chain author id 4 | system_ligand_chains_entity_id list[str] Chain entity id 5 | system_ligand_chains_length list[int] SEQRES length 6 | system_ligand_chains_num_unresolved_residues list[int] Number of unresolved residues (SEQRES length - len(residues)) 7 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/system_ligand_chains_validation.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | system_ligand_chains_validation_num_residues list[int] Number of residues in the list 3 | system_ligand_chains_validation_num_processed_residues list[int] Number of processed residues in the list 4 | system_ligand_chains_validation_percent_processed_residues list[float] Percentage of processed residues in the list 5 | system_ligand_chains_validation_average_rsr list[float] Average Real-Space R-value across all residues in the list 6 | system_ligand_chains_validation_average_rsrz list[float] Average Real-Space R-value Z-score across all residues in the list 7 | system_ligand_chains_validation_average_rscc list[float] Average Real-Space Correlation Coefficient across all residues in the list 8 | system_ligand_chains_validation_average_occupancy list[float] Average occupancy across all residues in the list 9 | system_ligand_chains_validation_percent_rsr_under_threshold list[float] Percentage of residues with RSR under the threshold 10 | system_ligand_chains_validation_percent_rscc_over_threshold list[float] Percentage of residues with RSCC over the threshold 11 | system_ligand_chains_validation_percent_occupancy_over_threshold list[float] Percentage of residues with occupancy over the threshold 12 | system_ligand_chains_validation_average_b_factor list[float] Average B factor across all residues in the list 13 | system_ligand_chains_validation_unknown_residue_count list[int] Number of unknown residues in the list 14 | system_ligand_chains_validation_atom_count list[int] Number of atoms across all residues in the list 15 | system_ligand_chains_validation_heavy_atom_count list[int] Number of heavy atoms across all residues in the list 16 | system_ligand_chains_validation_num_unresolved_heavy_atoms list[int] Number of unresolved heavy atoms across all residues in the list 17 | system_ligand_chains_validation_max_alt_count list[int] The highest number of configurations in a single residue in the list 18 | system_ligand_chains_validation_percent_outliers_chirality list[float] Percent outliers for chirality 19 | system_ligand_chains_validation_percent_outliers_clashes list[float] Percent outliers for clashes 20 | system_ligand_chains_validation_percent_outliers_density list[float] Percent outliers for density 21 | system_ligand_chains_validation_percent_outliers_geometry list[float] Percent outliers for geometry 22 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/system_ligand_validation.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | system_ligand_validation_num_residues int Number of residues in the list 3 | system_ligand_validation_num_processed_residues int Number of processed residues in the list 4 | system_ligand_validation_percent_processed_residues float Percentage of processed residues in the list 5 | system_ligand_validation_average_rsr float Average Real-Space R-value across all residues in the list 6 | system_ligand_validation_average_rsrz float Average Real-Space R-value Z-score across all residues in the list 7 | system_ligand_validation_average_rscc float Average Real-Space Correlation Coefficient across all residues in the list 8 | system_ligand_validation_average_occupancy float Average occupancy across all residues in the list 9 | system_ligand_validation_percent_rsr_under_threshold float Percentage of residues with RSR under the threshold 10 | system_ligand_validation_percent_rscc_over_threshold float Percentage of residues with RSCC over the threshold 11 | system_ligand_validation_percent_occupancy_over_threshold float Percentage of residues with occupancy over the threshold 12 | system_ligand_validation_average_b_factor float Average B factor across all residues in the list 13 | system_ligand_validation_unknown_residue_count int Number of unknown residues in the list 14 | system_ligand_validation_atom_count int Number of atoms across all residues in the list 15 | system_ligand_validation_heavy_atom_count int Number of heavy atoms across all residues in the list 16 | system_ligand_validation_num_unresolved_heavy_atoms int Number of unresolved heavy atoms across all residues in the list 17 | system_ligand_validation_max_alt_count int The highest number of configurations in a single residue in the list 18 | system_ligand_validation_percent_outliers_chirality float Percent outliers for chirality 19 | system_ligand_validation_percent_outliers_clashes float Percent outliers for clashes 20 | system_ligand_validation_percent_outliers_density float Percent outliers for density 21 | system_ligand_validation_percent_outliers_geometry float Percent outliers for geometry 22 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/system_pocket.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | system_pocket_CATH str CATH domain for the pocket 3 | system_pocket_ECOD str ECOD domain for the pocket 4 | system_pocket_ECOD_t_name str ECOD_t_name domain for the pocket 5 | system_pocket_Pfam str Pfam domain for the pocket 6 | system_pocket_SCOP2 str SCOP2 domain for the pocket 7 | system_pocket_SCOP2B str SCOP2B domain for the pocket 8 | system_pocket_PANTHER str PANTHER domain for the pocket 9 | system_pocket_UniProt str UniProt domain for the pocket 10 | system_pocket_kinase_name str kinase_name domain for the pocket 11 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/system_pocket_validation.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | system_pocket_validation_num_residues int Number of residues in the list 3 | system_pocket_validation_num_processed_residues int Number of processed residues in the list 4 | system_pocket_validation_percent_processed_residues float Percentage of processed residues in the list 5 | system_pocket_validation_average_rsr float Average Real-Space R-value across all residues in the list 6 | system_pocket_validation_average_rsrz float Average Real-Space R-value Z-score across all residues in the list 7 | system_pocket_validation_average_rscc float Average Real-Space Correlation Coefficient across all residues in the list 8 | system_pocket_validation_average_occupancy float Average occupancy across all residues in the list 9 | system_pocket_validation_percent_rsr_under_threshold float Percentage of residues with RSR under the threshold 10 | system_pocket_validation_percent_rscc_over_threshold float Percentage of residues with RSCC over the threshold 11 | system_pocket_validation_percent_occupancy_over_threshold float Percentage of residues with occupancy over the threshold 12 | system_pocket_validation_average_b_factor float Average B factor across all residues in the list 13 | system_pocket_validation_unknown_residue_count int Number of unknown residues in the list 14 | system_pocket_validation_atom_count int Number of atoms across all residues in the list 15 | system_pocket_validation_heavy_atom_count int Number of heavy atoms across all residues in the list 16 | system_pocket_validation_num_unresolved_heavy_atoms int Number of unresolved heavy atoms across all residues in the list 17 | system_pocket_validation_max_alt_count int The highest number of configurations in a single residue in the list 18 | system_pocket_validation_percent_outliers_chirality float Percent outliers for chirality 19 | system_pocket_validation_percent_outliers_clashes float Percent outliers for clashes 20 | system_pocket_validation_percent_outliers_density float Percent outliers for density 21 | system_pocket_validation_percent_outliers_geometry float Percent outliers for geometry 22 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/system_protein_chains.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | system_protein_chains_asym_id list[str] Chain asymmetric id 3 | system_protein_chains_auth_id list[str] Chain author id 4 | system_protein_chains_entity_id list[str] Chain entity id 5 | system_protein_chains_length list[int] SEQRES length 6 | system_protein_chains_num_unresolved_residues list[int] Number of unresolved residues (SEQRES length - len(residues)) 7 | -------------------------------------------------------------------------------- /src/plinder/data/column_descriptions/system_protein_chains_validation.tsv: -------------------------------------------------------------------------------- 1 | Name Type Description 2 | system_protein_chains_validation_num_residues list[int] Number of residues in the list 3 | system_protein_chains_validation_num_processed_residues list[int] Number of processed residues in the list 4 | system_protein_chains_validation_percent_processed_residues list[float] Percentage of processed residues in the list 5 | system_protein_chains_validation_average_rsr list[float] Average Real-Space R-value across all residues in the list 6 | system_protein_chains_validation_average_rsrz list[float] Average Real-Space R-value Z-score across all residues in the list 7 | system_protein_chains_validation_average_rscc list[float] Average Real-Space Correlation Coefficient across all residues in the list 8 | system_protein_chains_validation_average_occupancy list[float] Average occupancy across all residues in the list 9 | system_protein_chains_validation_percent_rsr_under_threshold list[float] Percentage of residues with RSR under the threshold 10 | system_protein_chains_validation_percent_rscc_over_threshold list[float] Percentage of residues with RSCC over the threshold 11 | system_protein_chains_validation_percent_occupancy_over_threshold list[float] Percentage of residues with occupancy over the threshold 12 | system_protein_chains_validation_average_b_factor list[float] Average B factor across all residues in the list 13 | system_protein_chains_validation_unknown_residue_count list[int] Number of unknown residues in the list 14 | system_protein_chains_validation_atom_count list[int] Number of atoms across all residues in the list 15 | system_protein_chains_validation_heavy_atom_count list[int] Number of heavy atoms across all residues in the list 16 | system_protein_chains_validation_num_unresolved_heavy_atoms list[int] Number of unresolved heavy atoms across all residues in the list 17 | system_protein_chains_validation_max_alt_count list[int] The highest number of configurations in a single residue in the list 18 | system_protein_chains_validation_percent_outliers_chirality list[float] Percent outliers for chirality 19 | system_protein_chains_validation_percent_outliers_clashes list[float] Percent outliers for clashes 20 | system_protein_chains_validation_percent_outliers_density list[float] Percent outliers for density 21 | system_protein_chains_validation_percent_outliers_geometry list[float] Percent outliers for geometry 22 | -------------------------------------------------------------------------------- /src/plinder/data/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /src/plinder/data/common/_version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /src/plinder/data/common/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from plinder.core.utils.log import setup_logger 4 | 5 | LOG = setup_logger(__name__) 6 | 7 | LOG.warning( 8 | "This module is deprecated. Please use plinder.core.utils.constants instead." 9 | ) 10 | -------------------------------------------------------------------------------- /src/plinder/data/common/log.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from plinder.core.utils.log import setup_logger 4 | 5 | LOG = setup_logger(__name__) 6 | 7 | LOG.warning("This module is deprecated. Please use plinder.core.utils.log instead.") 8 | -------------------------------------------------------------------------------- /src/plinder/data/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /src/plinder/data/structure/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /src/plinder/data/structure/atoms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from plinder.core.utils.log import setup_logger 4 | 5 | LOG = setup_logger(__name__) 6 | 7 | LOG.warning( 8 | "This module is deprecated. Please use plinder.core.structure.atoms instead." 9 | ) 10 | -------------------------------------------------------------------------------- /src/plinder/data/structure/contacts.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from plinder.core.utils.log import setup_logger 4 | 5 | LOG = setup_logger(__name__) 6 | 7 | LOG.warning( 8 | "This module is deprecated. Please use plinder.core.structure.contacts instead." 9 | ) 10 | -------------------------------------------------------------------------------- /src/plinder/data/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /src/plinder/data/utils/annotations/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /src/plinder/data/utils/annotations/static_files/artifacts_badlist.csv: -------------------------------------------------------------------------------- 1 | # ARTIFACTS CURATED ON APRIL 2024 2 | # using additional inspection on top of commonly appearing ligands 3 | # comparing to BIOLIP, AF2 and RFAA artifact list 4 | # cleaning them for drug-like molecules and cofactors 5 | # removing entries that are be filtered by other default filters 6 | # update: May 31, 2024 7 | # whitelisted 7 entries for being valid amino acid analogs: 8 | # MSE, SEP, TPO, ABA, HCS, MEG, OXM 9 | 02U 10 | 12P 11 | 13P 12 | 144 13 | 15P 14 | 16P 15 | 1EM 16 | 1PE 17 | 1PG 18 | 1PS 19 | 2DP 20 | 2JC 21 | 2NV 22 | 2OP 23 | 2PE 24 | 32M 25 | 33O 26 | 3HR 27 | 3PG 28 | 3SY 29 | 3V3 30 | 543 31 | 6JZ 32 | 6PE 33 | 7E8 34 | 7E9 35 | 7I7 36 | 7N5 37 | 7PE 38 | 7PG 39 | 7PH 40 | 90A 41 | 9FO 42 | 9JE 43 | 9YU 44 | AAE 45 | AE3 46 | AE4 47 | AGA 48 | AKR 49 | AUC 50 | B3H 51 | B3P 52 | B4T 53 | B4X 54 | BAM 55 | BCN 56 | BDN 57 | BE7 58 | BEN 59 | BET 60 | BEZ 61 | BGL 62 | BHG 63 | BNG 64 | BNZ 65 | BOG 66 | BTB 67 | BU1 68 | BXC 69 | C10 70 | C14 71 | C8E 72 | CAC 73 | CAD 74 | CAQ 75 | CD4 76 | CE1 77 | CE9 78 | CHT 79 | CIT 80 | CN3 81 | CN6 82 | CPS 83 | CXE 84 | CXS 85 | D10 86 | D12 87 | D1D 88 | D22 89 | DAO 90 | DD9 91 | DDQ 92 | DDR 93 | DEP 94 | DET 95 | DHB 96 | DHJ 97 | DIO 98 | DKA 99 | DMF 100 | DMI 101 | DMR 102 | DOX 103 | DPG 104 | DR6 105 | DRE 106 | DTD 107 | DTT 108 | DTU 109 | DTV 110 | E4N 111 | EAP 112 | EEE 113 | EPE 114 | ETE 115 | ETF 116 | ETX 117 | F09 118 | F4R 119 | FJO 120 | FTT 121 | FW5 122 | GLV 123 | GOL 124 | GVT 125 | GYF 126 | HAE 127 | HAI 128 | HCA 129 | HED 130 | HEX 131 | HEZ 132 | HP6 133 | HSG 134 | HSH 135 | HT3 136 | HTG 137 | HTH 138 | HTO 139 | HZA 140 | I3C 141 | ICT 142 | IHP 143 | IHS 144 | IMD 145 | IPH 146 | JDJ 147 | K12 148 | KDO 149 | L1P 150 | L2C 151 | L2P 152 | L3P 153 | L4P 154 | LAC 155 | LDA 156 | LI1 157 | LMR 158 | LMT 159 | LMU 160 | LUT 161 | M2M 162 | MAC 163 | MAE 164 | MB3 165 | MBN 166 | MBO 167 | MC3 168 | ME2 169 | MES 170 | MLA 171 | MLI 172 | MLT 173 | MPD 174 | MPO 175 | MRD 176 | MYR 177 | N8E 178 | NBN 179 | NET 180 | NEX 181 | NHE 182 | O4B 183 | OCT 184 | OES 185 | OGA 186 | OP2 187 | OTE 188 | P03 189 | P15 190 | P1O 191 | P22 192 | P25 193 | P2K 194 | P33 195 | P3G 196 | P4C 197 | P4G 198 | P4K 199 | P6G 200 | PA8 201 | PC8 202 | PD7 203 | PE3 204 | PE4 205 | PE5 206 | PE6 207 | PE7 208 | PE8 209 | PEG 210 | PEP 211 | PEU 212 | PEX 213 | PG0 214 | PG4 215 | PG5 216 | PG6 217 | PG8 218 | PGE 219 | PGF 220 | PGO 221 | PGR 222 | PHB 223 | PHQ 224 | PL9 225 | PLC 226 | PMS 227 | PPI 228 | PQ9 229 | PQE 230 | PTD 231 | PUT 232 | PVO 233 | PX2 234 | PX4 235 | QGT 236 | QJE 237 | QLB 238 | RG1 239 | RWB 240 | SAR 241 | SGM 242 | SIN 243 | SOG 244 | SP5 245 | SPD 246 | SPJ 247 | SPM 248 | SPZ 249 | SQU 250 | SRT 251 | TAM 252 | TAR 253 | TAU 254 | TBU 255 | TCE 256 | TCN 257 | TEA 258 | TFA 259 | THE 260 | TLA 261 | TMA 262 | TOE 263 | TRD 264 | TRS 265 | UMQ 266 | UND 267 | V1J 268 | VX 269 | XAT 270 | XP4 271 | XPA 272 | XPE 273 | Y69 274 | -------------------------------------------------------------------------------- /src/plinder/data/utils/annotations/static_files/prdcc.chemlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/src/plinder/data/utils/annotations/static_files/prdcc.chemlib -------------------------------------------------------------------------------- /src/plinder/data/utils/annotations/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from __future__ import annotations 4 | 5 | from functools import cached_property 6 | from pathlib import Path 7 | from typing import Any, Generator 8 | 9 | from pydantic import BaseModel 10 | 11 | 12 | class DocBaseModel(BaseModel): 13 | @classmethod 14 | def get_descriptions_and_types(cls) -> dict[str, tuple[str | None, str | None]]: 15 | """ 16 | Returns a dictionary mapping attribute and property names to their descriptions and types. 17 | 18 | Returns: 19 | -------- 20 | dict[str, str | None] 21 | A dictionary mapping attribute and property names to their descriptions and types. 22 | """ 23 | descriptions = {} 24 | annotations = cls.__annotations__ 25 | for name, value in cls.model_fields.items(): 26 | descriptions[name] = (value.description, annotations[name]) 27 | 28 | for name, prop in cls.__dict__.items(): 29 | if isinstance(prop, cached_property) or isinstance(prop, property): 30 | dtype = None 31 | if hasattr(prop, "func"): 32 | dtype = prop.func.__annotations__.get("return", None) 33 | descriptions[name] = (prop.__doc__, dtype) 34 | return descriptions 35 | 36 | @classmethod 37 | def document_properties( 38 | cls, prefix: str 39 | ) -> Generator[tuple[str, str | None, str], Any, Any]: 40 | for field, field_info in cls.get_descriptions_and_types().items(): 41 | description, dtype = field_info 42 | if field.startswith(prefix): 43 | name = field 44 | else: 45 | name = f"{prefix}_{field}" 46 | if description: 47 | descr = description.lstrip().replace("\n", " ") 48 | if descr.startswith("__"): 49 | continue 50 | else: 51 | descr = "[DESCRIPTION MISSING]" 52 | if "pass_criteria" in name and "validation" not in name: 53 | name = name.replace("pass_criteria", "pass_validation_criteria") 54 | yield (name, dtype, descr.strip()) 55 | 56 | @classmethod 57 | def document_properties_to_tsv( 58 | cls, prefix: str, filename: Path, nested: bool = False 59 | ) -> None: 60 | with open(filename, "w") as tsv: 61 | tsv.write("\t".join(["Name", "Type", "Description"]) + "\n") 62 | for name, dtype, descr in cls.document_properties(prefix): 63 | if nested: 64 | tsv.write("\t".join([name, f"list[{str(dtype)}]", descr]) + "\n") 65 | else: 66 | tsv.write("\t".join([name, str(dtype), descr]) + "\n") 67 | -------------------------------------------------------------------------------- /src/plinder/eval/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from textwrap import dedent 4 | 5 | try: 6 | import ost # noqa 7 | except (ImportError, ModuleNotFoundError): 8 | raise ImportError( 9 | dedent( 10 | """\ 11 | plinder.eval requires the OpenStructureToolkit >= 2.8.0 (ost) to be installed. 12 | Please refer to the documentation for installation instructions and current limitations. 13 | See details here: 14 | 15 | https://plinder-org.github.io/plinder/contribution/development.html#creating-the-conda-environment 16 | """ 17 | ) 18 | ) 19 | -------------------------------------------------------------------------------- /src/plinder/eval/docking/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /src/plinder/methods/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | -------------------------------------------------------------------------------- /tests/core/test_core_system.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | from plinder.core import index 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "system_id", 10 | [ 11 | "19hc__1__1.A_1.B__1.D_1.L_1.Q_1.S_1.U", 12 | "19hc__1__1.A_1.B__1.E_1.F_1.H_1.J_1.O", 13 | "19hc__1__1.A_1.B__1.G", 14 | "19hc__1__1.A_1.B__1.K_1.M_1.N", 15 | "19hc__1__1.A_1.B__1.R", 16 | "19hc__1__1.A_1.B__1.V_1.X_1.Y", 17 | "19hc__1__1.A_1.B__1.W", 18 | "19hc__1__1.A__1.I", 19 | "19hc__1__1.B__1.T", 20 | ], 21 | ) 22 | def test_plinder_system(system_id, read_plinder_mount): 23 | index.PlinderSystem(system_id=system_id).system 24 | 25 | 26 | @pytest.mark.parametrize( 27 | "system_id", 28 | [ 29 | "19hc__1__1.A__1.C", 30 | "19hc__1__1.B__1.P", 31 | ], 32 | ) 33 | def test_plinder_system_fails(system_id, read_plinder_mount): 34 | with pytest.raises(ValueError): 35 | index.PlinderSystem(system_id=system_id).system 36 | 37 | 38 | def test_plinder_system_system_files(read_plinder_mount): 39 | system_id = "19hc__1__1.A_1.B__1.V_1.X_1.Y" 40 | s = index.PlinderSystem(system_id=system_id) 41 | assert len(s.structures) == 9 42 | assert len(s.ligand_sdfs) == 3 43 | assert len(s.system_cif) 44 | assert len(s.receptor_cif) 45 | assert len(s.receptor_pdb) 46 | assert len(s.sequences) 47 | assert s.chain_mapping is not None and len(s.chain_mapping) 48 | assert s.water_mapping is not None and len(s.water_mapping) 49 | assert Path(s.system_cif).is_file() 50 | assert Path(s.receptor_cif).is_file() 51 | assert Path(s.receptor_pdb).is_file() 52 | assert Path(s.sequences_fasta).is_file() 53 | assert isinstance(s.chain_mapping, dict) 54 | assert isinstance(s.water_mapping, dict) 55 | 56 | 57 | def test_plinder_structure(read_plinder_mount): 58 | system_id = "1avd__1__1.A__1.C" 59 | s = index.PlinderSystem(system_id=system_id) 60 | holo_struc = s.holo_structure 61 | ligand_mols = holo_struc.ligand_mols 62 | # test the mask order for smiles 63 | assert np.all( 64 | ligand_mols["1.C"][3][0] 65 | == np.array([[13, 4, 5, 7, 9, 10, 1, 0, 3, 6, 8, 12, 11, 2]]) 66 | ) 67 | assert holo_struc.protein_sequence is not None 68 | assert len(holo_struc.protein_sequence) 69 | assert holo_struc.protein_atom_array is not None 70 | assert len(holo_struc.protein_atom_array) 71 | assert holo_struc.ligand_sdfs is not None 72 | assert len(holo_struc.ligand_sdfs) 73 | -------------------------------------------------------------------------------- /tests/core/test_data_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | def test_data_loader(read_plinder_mount): 4 | from plinder.core.loader import PlinderDataset 5 | 6 | ds = PlinderDataset(split="removed", use_alternate_structures=False) 7 | assert len(ds[0]) 8 | -------------------------------------------------------------------------------- /tests/core/test_dataclass.py: -------------------------------------------------------------------------------- 1 | from plinder.core.utils.dataclass import ( 2 | atom_array_summary_markdown_repr, 3 | stringify_dataclass, 4 | ) 5 | 6 | 7 | def test_stringify_dataclass(read_plinder_mount): 8 | from plinder.core import PlinderSystem 9 | 10 | system_id = "19hc__1__1.A_1.B__1.V_1.X_1.Y" 11 | system = PlinderSystem(system_id=system_id) 12 | struct = system.holo_structure 13 | assert isinstance(stringify_dataclass(struct), str) 14 | 15 | 16 | def test_markdown_repr(read_plinder_mount): 17 | from plinder.core import PlinderSystem 18 | 19 | system_id = "19hc__1__1.A_1.B__1.V_1.X_1.Y" 20 | system = PlinderSystem(system_id=system_id) 21 | struct = system.holo_structure 22 | markdown = atom_array_summary_markdown_repr(struct.protein_atom_array) 23 | assert isinstance(markdown, str) 24 | -------------------------------------------------------------------------------- /tests/core/test_gcs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | import os 4 | 5 | import pytest 6 | from omegaconf import DictConfig 7 | from plinder.core.utils import gcs 8 | 9 | 10 | class _Blob: 11 | def __init__(self, name): 12 | self.name = name 13 | 14 | def download_as_bytes(self): 15 | return b"test" 16 | 17 | def download_to_filename(self, local_path): 18 | pass 19 | 20 | 21 | class _Bucket: 22 | def __init__(self, name): 23 | self.name = name 24 | 25 | def blob(self, name): 26 | return _Blob(name) 27 | 28 | def list_blobs(self, prefix): 29 | return [] 30 | 31 | 32 | CONF = DictConfig( 33 | { 34 | "data": { 35 | "plinder_bucket": "plinder", 36 | } 37 | } 38 | ) 39 | 40 | 41 | @pytest.fixture 42 | def mock_buckets(monkeypatch): 43 | monkeypatch.setattr( 44 | "plinder.core.utils.gcs.BUCKETS", 45 | { 46 | "plinder": _Bucket("plinder"), 47 | "plinder-test": _Bucket("plinder-test"), 48 | }, 49 | ) 50 | 51 | 52 | def test_download_as_str(mock_buckets): 53 | assert gcs.download_as_str(gcs_path="gs://plinder/test", cfg=CONF) == "test" 54 | 55 | 56 | def test_download_to_file(mock_buckets, tmp_path): 57 | gcs.download_to_file( 58 | gcs_path="gs://plinder/test", 59 | local_path=(tmp_path / "afile.txt").as_posix(), 60 | cfg=CONF, 61 | ) 62 | 63 | 64 | def test_download_many(mock_buckets, tmp_path): 65 | gcs_paths = [ 66 | "gs://plinder/afile.txt", 67 | "gs://plinder/bfile.txt", 68 | ] 69 | local_paths = [ 70 | (tmp_path / "afile.txt").as_posix(), 71 | (tmp_path / "bfile.txt").as_posix(), 72 | ] 73 | gcs.download_many(gcs_paths=gcs_paths, local_paths=local_paths, cfg=CONF) 74 | 75 | 76 | def test_list_dir(mock_buckets): 77 | assert isinstance(gcs.list_dir(gcs_path="gs://plinder/test", cfg=CONF), list) 78 | 79 | 80 | def test_real_download(): 81 | gcs.download_as_str( 82 | gcs_path="gs://plinder/2024-04/v1/README.md", cfg=CONF 83 | ).startswith("plinder-data") 84 | 85 | 86 | def test_real_list_dir(): 87 | assert len(gcs.list_dir(gcs_path="gs://plinder/2024-04/v1/", cfg=CONF)) 88 | 89 | 90 | def test_real_download_many(tmp_path): 91 | gcs_paths = [ 92 | "gs://plinder/2024-04/v1/README.md", 93 | "gs://plinder/2024-04/v1/README.md", 94 | ] 95 | local_paths = [ 96 | (tmp_path / "afile.txt").as_posix(), 97 | (tmp_path / "bfile.txt").as_posix(), 98 | ] 99 | for path in local_paths: 100 | assert not os.path.exists(path) 101 | gcs.download_many(gcs_paths=gcs_paths, local_paths=local_paths, cfg=CONF) 102 | for path in local_paths: 103 | assert os.path.exists(path) 104 | 105 | 106 | def test_default_behavior(): 107 | assert gcs.download_as_str(gcs_path="gs://plinder/2024-04/v1/README.md").startswith( 108 | "plinder-data" 109 | ) 110 | -------------------------------------------------------------------------------- /tests/core/test_index_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | import os 4 | from pathlib import Path 5 | 6 | import pytest 7 | from plinder.core.index import utils 8 | 9 | 10 | def mock_path(*, rel: str = "", download: bool = False, force_progress: bool = False): 11 | obj = Path( 12 | "/".join( 13 | [ 14 | str(os.getenv("PLINDER_MOUNT")), 15 | str(os.getenv("PLINDER_BUCKET")), 16 | str(os.getenv("PLINDER_RELEASE")), 17 | ] 18 | ) 19 | ) 20 | return obj / rel if rel else obj 21 | 22 | 23 | @pytest.fixture 24 | def mock_cpl(read_plinder_mount, monkeypatch): 25 | # patch cpl at core.utils not core.index.utils because of unpack 26 | monkeypatch.setattr( 27 | "plinder.core.utils.cpl.get_plinder_path", 28 | mock_path, 29 | ) 30 | monkeypatch.setattr( 31 | "plinder.core.utils.cpl.download_paths", 32 | lambda **kws: None, 33 | ) 34 | 35 | 36 | def test_get_plindex(mock_cpl): 37 | df = utils.get_plindex() 38 | assert len(df.index) == 57 39 | assert "pli_unique_qcov__50__strong__component" in df.columns 40 | 41 | 42 | def test_get_manifest(mock_cpl): 43 | df = utils.get_manifest() 44 | assert len(df.index) == 57 45 | 46 | 47 | def test_load_entries(mock_cpl): 48 | blob = utils.load_entries(two_char_codes=["9h"]) 49 | assert len(blob) == 1 50 | 51 | 52 | @pytest.mark.parametrize( 53 | "args", 54 | [ 55 | [], 56 | ["--release", "2024-04"], 57 | ["--iteration", "v1"], 58 | ["--release", "2024-06", "--iteration", "v2"], 59 | ], 60 | ) 61 | def test_download_cmd(args, mock_cpl): 62 | utils.download_plinder_cmd(args=args + ["-y"]) 63 | -------------------------------------------------------------------------------- /tests/core/test_smallmols_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | import pytest 4 | from rdkit import Chem 5 | 6 | 7 | @pytest.mark.parametrize( 8 | ["smiles", "num_problems"], 9 | [ 10 | ["CC(=O)OCCN(C)(C)C", 0], # AtomValenceException 11 | ["c1ccnc1", 0], # KekulizeException 12 | ], 13 | ) 14 | def test_valence_issue_handling(smiles, num_problems): 15 | from plinder.core.structure.smallmols_utils import fix_valency_issues 16 | 17 | mol = Chem.MolFromSmiles(smiles, sanitize=False) 18 | mol = fix_valency_issues(mol) 19 | problems = Chem.DetectChemistryProblems(mol) 20 | assert len(problems) == num_problems 21 | 22 | 23 | @pytest.mark.parametrize( 24 | ["smiles", "num_charged_atoms"], 25 | [ 26 | ["CC(=O)OCCN(C)(C)C", 0], 27 | ["[O-]C(=[O])CC[NH+](C)(C)", 0], 28 | ["[O-]C(=[OH+])CC[NH+](C)(C)", 0], 29 | ["OC(=[O])CC[N+](C)(C)(C)", 1], 30 | ["[O-]C(=[O])CC[N+](C)(C)(C)", 2], 31 | ["[O-]C(=[O])C.C[N+](C)(C)(C)", 2], 32 | ], 33 | ) 34 | def test_uncharge_mol(smiles, num_charged_atoms): 35 | from plinder.core.structure.smallmols_utils import uncharge_mol 36 | 37 | mol = Chem.MolFromSmiles(smiles, sanitize=False) 38 | mol = uncharge_mol(mol) 39 | assert ( 40 | sum([at.GetFormalCharge() != 0 for at in mol.GetAtoms()]) == num_charged_atoms 41 | ) 42 | 43 | 44 | @pytest.mark.parametrize( 45 | ["smiles", "inchikey", "remove_stereo"], 46 | [ 47 | ["CC/C=C/Cl", "DUDKKPVINWLFBI-ONEGZZNKSA-N", False], 48 | ["CC/C=C\\Cl", "DUDKKPVINWLFBI-ARJAWSKDSA-N", False], 49 | ["CC/C=C/Cl", "DUDKKPVINWLFBI-UHFFFAOYSA-N", True], 50 | ["CC/C=C\\Cl", "DUDKKPVINWLFBI-UHFFFAOYSA-N", True], 51 | ["CCC=CCl", "DUDKKPVINWLFBI-UHFFFAOYSA-N", False], 52 | ["CCC=CCl", "DUDKKPVINWLFBI-UHFFFAOYSA-N", True], 53 | ["C[C@@](F)(Cl)CBr", "REKDFINPOZVXJS-VKHMYHEASA-N", False], 54 | ["C[C@](F)(Cl)CBr", "REKDFINPOZVXJS-GSVOUGTGSA-N", False], 55 | ["C[C@](F)(Cl)CBr", "REKDFINPOZVXJS-UHFFFAOYSA-N", True], 56 | ["C[C@@](F)(Cl)CBr", "REKDFINPOZVXJS-UHFFFAOYSA-N", True], 57 | ["CC(F)(Cl)CBr", "REKDFINPOZVXJS-UHFFFAOYSA-N", True], 58 | ], 59 | ) 60 | def test_inchikey(smiles, inchikey, remove_stereo): 61 | from plinder.core.structure.smallmols_similarity import smiles2inchikey 62 | 63 | assert inchikey == smiles2inchikey(smiles, remove_stereo=remove_stereo) 64 | 65 | 66 | def test_matched_templates(): 67 | from plinder.core.structure.smallmols_utils import ( 68 | get_matched_template_v2, 69 | mol_assigned_bond_orders_by_template, 70 | ) 71 | 72 | mol1 = Chem.MolFromSmiles("FC(Cl)(Br)C.CNCC1CCCCC1.CCC(OC)O") 73 | template = Chem.MolFromSmiles("F[C@@](Br)(Cl)CCCNCc1cc(C(=O)N/C=C/C(OC)=O)ccc1") 74 | matched_template = get_matched_template_v2(template, mol1) 75 | fixed_mol = mol_assigned_bond_orders_by_template(matched_template, mol1) 76 | fixed_mol_SMILES = Chem.CanonSmiles(Chem.MolToSmiles(fixed_mol)) 77 | assert fixed_mol_SMILES.count("=") >= 2 78 | assert fixed_mol_SMILES == "C=CC(=O)OC.CC(F)(Cl)Br.CNCc1ccccc1" 79 | -------------------------------------------------------------------------------- /tests/core/test_split_plot.py: -------------------------------------------------------------------------------- 1 | def test_split_plot(write_plinder_mount, split_plot_split_file, tmp_path): 2 | from plinder.core.split.plot import SplitPropertiesPlotter 3 | 4 | output_dir = tmp_path / "split_plots" 5 | output_dir.mkdir(exist_ok=True) 6 | 7 | plotter = SplitPropertiesPlotter.from_files( 8 | data_dir=write_plinder_mount, 9 | split_file=split_plot_split_file, 10 | output_dir=output_dir, 11 | # stratified_train_test_file=write_plinder_mount / "strat" / "train_vs_test_data" / "test_set.parquet", 12 | # stratified_train_val_file=write_plinder_mount / "strat" / "train_vs_val_data" / "val_set.parquet", 13 | stratified_val_test_file=write_plinder_mount 14 | / "strat" 15 | / "val_vs_test_data" 16 | / "test_set.parquet", 17 | make_plots=False, 18 | ) 19 | try: 20 | plotter.plot_all() 21 | except Exception as e: 22 | print(e) 23 | pass 24 | 25 | assert len(list(output_dir.rglob("*"))) > 0 26 | -------------------------------------------------------------------------------- /tests/core/test_superimpose.py: -------------------------------------------------------------------------------- 1 | from plinder.core.index import PlinderSystem 2 | from plinder.core.structure.structure import Structure 3 | 4 | 5 | def test_superimpose_chain(read_plinder_mount): 6 | """ 7 | Check if :func:`superimpose_chain()` can handle different scenarios. 8 | In all cases the superimposed structure should have the original number of atoms 9 | and a low RMSD to the fixed structure. 10 | """ 11 | # TODO: review if this test is still relevant 12 | pass 13 | system_id_1 = "19hc__1__1.A_1.B__1.G" 14 | system_id_2 = "19hc__1__1.A_1.B__1.V_1.X_1.Y" 15 | # system_dir_1 = read_plinder_mount / "systems" / system_id_1 16 | # system_dir_2 = read_plinder_mount / "systems" / system_id_2 17 | chain_id_1 = "1.A" 18 | # chain_id_2 = "1.A" 19 | struct1 = PlinderSystem(system_id=system_id_1).holo_structure 20 | struct2 = PlinderSystem(system_id=system_id_2).holo_structure 21 | 22 | chain_1_array = struct1.protein_atom_array[ 23 | struct1.protein_atom_array.chain_id == chain_id_1 24 | ] 25 | # # TODO: test assertions here 26 | # chain_2_array = struct2.protein_atom_array[ 27 | # struct2.protein_atom_array.chain_id == chain_id_2 28 | # ] 29 | 30 | super_chain_1, raw_rmsd, refined_rmsd = struct1.superimpose(struct2) 31 | assert isinstance(super_chain_1, Structure) 32 | super_chain_1_array = super_chain_1.protein_atom_array[ 33 | super_chain_1.protein_atom_array.chain_id == chain_id_1 34 | ] 35 | assert super_chain_1_array.shape == chain_1_array.shape 36 | 37 | # check alignment quality 38 | assert abs(raw_rmsd - refined_rmsd) < 0.01 or raw_rmsd > refined_rmsd 39 | assert refined_rmsd < 2.0 40 | -------------------------------------------------------------------------------- /tests/core/test_transforms.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from plinder.core import PlinderSystem 3 | from plinder.core.loader.transforms import ( 4 | SelectAtomTypes, 5 | StructureTransform, 6 | ) 7 | from plinder.core.utils import constants as pc 8 | 9 | 10 | def test_transform_abc(read_plinder_mount): 11 | s = PlinderSystem(system_id="19hc__1__1.A_1.B__1.V_1.X_1.Y").holo_structure 12 | with pytest.raises(NotImplementedError): 13 | StructureTransform().transform(s) 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "system_id, atom_types", 18 | [ 19 | ("19hc__1__1.A_1.B__1.V_1.X_1.Y", ["CA"]), 20 | ("19hc__1__1.A_1.B__1.V_1.X_1.Y", ["CA", "N", "C", "O"]), 21 | ("19hc__1__1.A_1.B__1.V_1.X_1.Y", ["foo"]), 22 | ], 23 | ) 24 | def test_select_atom_types_structure_transform( 25 | read_plinder_mount, system_id, atom_types 26 | ): 27 | valid_atom_names = set(pc.ALL_ATOMS) 28 | expected_atom_names = set(atom_types).intersection(valid_atom_names) 29 | s = PlinderSystem(system_id=system_id).holo_structure 30 | t = SelectAtomTypes(atom_types=atom_types) 31 | assert len(s.protein_unique_atom_names) > len(expected_atom_names) 32 | s = t.transform(s) 33 | assert set(s.protein_unique_atom_names) == set(expected_atom_names) 34 | -------------------------------------------------------------------------------- /tests/data/pipeline/test_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | import unittest.mock 4 | from textwrap import dedent 5 | 6 | import pytest 7 | from omegaconf import OmegaConf 8 | from plinder.data.pipeline import config 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "value, raises", 13 | [ 14 | (0, True), 15 | (1, False), 16 | (2, False), 17 | ], 18 | ) 19 | def test_foldseek_config(value, raises): 20 | if raises: 21 | with pytest.raises(ValueError): 22 | config.FoldseekConfig(alignment_type=value) 23 | else: 24 | config.FoldseekConfig(alignment_type=value) 25 | 26 | 27 | def test_flow_config(): 28 | dc = config._config.DataConfig() 29 | cfg = OmegaConf.structured(config._config.DataConfig()) 30 | assert dc.plinder_mount == cfg.plinder_mount 31 | 32 | 33 | def test_default_config(): 34 | cfg = config.get_config(cached=False) 35 | assert cfg.data.plinder_release is not None 36 | 37 | 38 | def test_get_config_metaflow(tmp_path): 39 | file = tmp_path / "conf.yaml" 40 | file.write_text( 41 | dedent( 42 | """ 43 | flow: 44 | skip_specific_stages: foo 45 | """ 46 | ) 47 | ) 48 | contents = dedent( 49 | """ 50 | context: 51 | two_char_codes: xx 52 | """ 53 | ) 54 | cfg = config.get_config( 55 | cached=False, 56 | config_file=file.as_posix(), 57 | config_contents=contents, 58 | ) 59 | assert cfg.flow.skip_specific_stages == ["foo"] 60 | assert cfg.context.two_char_codes == ["xx"] 61 | 62 | 63 | def test_get_config_comma_delimited(): 64 | contents = dedent( 65 | """ 66 | context: 67 | two_char_codes: xx,yy,zz 68 | """ 69 | ) 70 | cfg = config.get_config( 71 | cached=False, 72 | config_contents=contents, 73 | ) 74 | assert cfg.context.two_char_codes == ["xx", "yy", "zz"] 75 | 76 | 77 | def test_get_config_cli(): 78 | test_args = ["prog", "flow.download_rcsb_files_batch_size=4"] 79 | with unittest.mock.patch("sys.argv", test_args): 80 | cfg = config.get_config(cached=False) 81 | assert cfg.flow.download_rcsb_files_batch_size == 4 82 | -------------------------------------------------------------------------------- /tests/data/pipeline/test_end_to_end.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | from plinder.data.pipeline import config, pipeline, tasks 4 | 5 | 6 | def test_end_to_end(mock_alternative_datasets): 7 | mock_alternative_datasets("4jvm") # different from 19hc 8 | 9 | stages = ",".join(tasks.STAGES[: tasks.STAGES.index("collate_partitions")]) 10 | import sys 11 | 12 | print(stages, file=sys.stderr, flush=True) 13 | conf = { 14 | "context": { 15 | "two_char_codes": "9h", 16 | }, 17 | "flow": { 18 | "run_specific_stages": stages, 19 | }, 20 | "scorer": { 21 | "sub_databases": "holo", 22 | }, 23 | } 24 | cfg = config.get_config(config=conf) 25 | pipe = pipeline.IngestPipeline(conf=cfg) 26 | pipe.run() 27 | -------------------------------------------------------------------------------- /tests/data/pipeline/test_pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | 4 | from plinder.data.pipeline import pipeline 5 | 6 | 7 | def test_pipeline_noop(tmp_path): 8 | conf = tmp_path / "test.yaml" 9 | conf.write_text( 10 | """\ 11 | flow: 12 | run_specific_stages: download_rcsb_files 13 | skip_specific_stages: download_rcsb_files 14 | """ 15 | ) 16 | pipe = pipeline.IngestPipeline( 17 | config_file=conf.as_posix(), config_args=[], cached=False 18 | ) 19 | pipe.run() 20 | -------------------------------------------------------------------------------- /tests/data/pipeline/test_tasks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | import pytest 4 | from plinder.data.pipeline import io, tasks 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "inputs, expected", 9 | [ 10 | ({"batch_size": 4, "two_char_codes": []}, [4, 4]), 11 | ({"batch_size": 4, "two_char_codes": []}, [4, 3]), 12 | ({"two_char_codes": ["xx"], "batch_size": 4}, [1]), 13 | ], 14 | ) 15 | def test_scatter_download_rcsb_files(inputs, expected, tmp_path): 16 | codes = ["aa", "bb", "cc", "dd", "ee", "ff", "gg", "hh"] 17 | _orig_rsync_rcsb = io.rsync_rcsb 18 | _orig_list_rcsb = io.list_rcsb 19 | io.rsync_rcsb = lambda **kws: codes 20 | io.list_rcsb = lambda **kws: codes if expected[0] == expected[1] else codes[:-1] 21 | chunks = tasks.scatter_download_rcsb_files(data_dir=tmp_path, **inputs) 22 | for chunk, expect in zip(chunks, expected): 23 | assert len(chunk) == expect 24 | io.rsync_rcsb = _orig_rsync_rcsb 25 | io.list_rcsb = _orig_list_rcsb 26 | 27 | 28 | def test_download_rcsb_files(tmp_path): 29 | _orig_rsync_rcsb = io.rsync_rcsb 30 | io.rsync_rcsb = lambda *args, **kws: None 31 | tasks.download_rcsb_files(data_dir=tmp_path, two_char_codes=["aa"]) 32 | io.rsync_rcsb = _orig_rsync_rcsb 33 | -------------------------------------------------------------------------------- /tests/data/pipeline/test_transform.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | import pytest 4 | from plinder.data.pipeline import transform 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "pdb_range", 9 | [ 10 | "SM:9-163", 11 | "SM:9--163", 12 | "SM:-9--163", 13 | "SM:9-163L", 14 | "B:-3-715", 15 | "A:-4-131", 16 | "B:-1-75", 17 | "B:-4-1927", 18 | ], 19 | ) 20 | def test_parse_pdb_range(pdb_range): 21 | assert len(transform.parse_pdb_range(pdb_range)) == 3 22 | 23 | 24 | def test_transform_ecod_data(tmp_path): 25 | ecod_path = tmp_path / "ecod_raw.tsv" 26 | sample = """\ 27 | #/data/ecod/database_versions/v291/ecod.develop291.domains.txt 28 | #ECOD version develop291 29 | #Domain list version 1.6 30 | #Grishin lab (http://prodata.swmed.edu/ecod) 31 | #uid ecod_domain_id manual_rep t_id pdb chain pdb_range seqid_range unp_acc arch_name x_name h_name t_name f_name asm_status ligand 32 | 000000267 e1udzA1 MANUAL_REP 1.1.1 1udz A A:203-381 A:4-182 P56690 beta barrels "cradle loop barrel" "RIFT-related" "acid protease" F_UNCLASSIFIED NOT_DOMAIN_ASSEMBLY NO_LIGANDS_4A 33 | 000023408 e1ileA4 AUTO_NONREP 1.1.1 1ile A A:203-381 A:203-381 P56690 beta barrels "cradle loop barrel" "RIFT-related" "acid protease" F_UNCLASSIFIED NOT_DOMAIN_ASSEMBLY NO_LIGANDS_4A 34 | 000023411 e1ue0B1 AUTO_NONREP 1.1.1 1ue0 B B:203-381 B:4-182 P56690 beta barrels "cradle loop barrel" "RIFT-related" "acid protease" F_UNCLASSIFIED NOT_DOMAIN_ASSEMBLY NO_LIGANDS_4A 35 | 000158260 e1wk8A1 AUTO_NONREP 1.1.1 1wk8 A A:201-382 A:7-188 P56690 beta barrels "cradle loop barrel" "RIFT-related" "acid protease" F_UNCLASSIFIED NOT_DOMAIN_ASSEMBLY NO_LIGANDS_4A 36 | 001842922 e5fofA1 AUTO_NONREP 1.1.1 5fof A A:258-340,A:363-567 A:29-103,A:126-330 B3L7I1 beta barrels "cradle loop barrel" "RIFT-related" "acid protease" F_UNCLASSIFIED NOT_DOMAIN_ASSEMBLY NO_LIGANDS_4A 37 | 001842923 e5fofB1 AUTO_NONREP 1.1.1 5fof B B:258-340,B:363-567 B:29-103,B:126-330 B3L7I1 beta barrels "cradle loop barrel" "RIFT-related" "acid protease" F_UNCLASSIFIED NOT_DOMAIN_ASSEMBLY NO_LIGANDS_4A 38 | """ 39 | ecod_path.write_text(sample) 40 | df = transform.transform_ecod_data(raw_ecod_path=ecod_path) 41 | assert len(df.index) == 8 42 | -------------------------------------------------------------------------------- /tests/data/test_clusters.py: -------------------------------------------------------------------------------- 1 | def test_make_components_and_communities(write_plinder_mount): 2 | from plinder.data.clusters import make_components_and_communities 3 | 4 | i = len(list((write_plinder_mount / "clusters").rglob("*"))) 5 | make_components_and_communities( 6 | data_dir=write_plinder_mount, 7 | metric="pocket_lddt", 8 | threshold=50, 9 | ) 10 | j = len(list((write_plinder_mount / "clusters").rglob("*"))) 11 | assert i < j 12 | -------------------------------------------------------------------------------- /tests/data/test_docs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | 4 | from plinder.data import docs 5 | 6 | 7 | def test_make_column_descriptions(read_plinder_mount): 8 | from plinder.core.scores import query_index 9 | 10 | df = query_index(columns=["*"], splits=["*"]).drop(columns=["split"]) 11 | 12 | schema = docs.get_all_column_descriptions(plindex=df) 13 | columns = schema["Name"].to_list() 14 | assert not len(df.columns.difference(columns)) 15 | -------------------------------------------------------------------------------- /tests/data/test_plinder_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | def test_ost(): 4 | import ost 5 | 6 | assert ost is not None 7 | -------------------------------------------------------------------------------- /tests/data/test_save_linked_structures.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def test_save_linked_structures(write_plinder_mount): 5 | from plinder.data import save_linked_structures 6 | 7 | cfg = save_linked_structures.LinkedStructureConfig( 8 | filter_criteria={ 9 | "pocket_lddt": 0, 10 | "protein_fident_qcov_weighted_sum": 0, 11 | } 12 | ) 13 | 14 | try: 15 | save_linked_structures.make_linked_structures_data_file( 16 | data_dir=write_plinder_mount, 17 | search_db="holo", 18 | superposed_folder=write_plinder_mount / "linked_staging", 19 | output_file=write_plinder_mount 20 | / "linked_structures" 21 | / "holo_links.parquet", 22 | cfg=cfg, 23 | num_processes=1, 24 | ) 25 | 26 | save_linked_structures.save_linked_structures( 27 | links_file=write_plinder_mount / "linked_structures" / "holo_links.parquet", 28 | data_dir=write_plinder_mount, 29 | search_db="holo", 30 | output_folder=write_plinder_mount / "linked_structures", 31 | num_threads=1, 32 | ) 33 | 34 | df = pd.read_parquet( 35 | write_plinder_mount / "linked_structures" / "holo_links.parquet" 36 | ) 37 | assert isinstance(df, pd.DataFrame) 38 | except Exception: 39 | pass 40 | -------------------------------------------------------------------------------- /tests/test_data/2g.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/2g.zip -------------------------------------------------------------------------------- /tests/test_data/components.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/components.cif.gz -------------------------------------------------------------------------------- /tests/test_data/components.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/components.parquet -------------------------------------------------------------------------------- /tests/test_data/eval/fingerprints/ligands_per_system.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/eval/fingerprints/ligands_per_system.parquet -------------------------------------------------------------------------------- /tests/test_data/eval/index/annotation_table.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/eval/index/annotation_table.parquet -------------------------------------------------------------------------------- /tests/test_data/eval/predicted_poses/1ai5__1__1.A_1.B__1.D/rank1.sdf: -------------------------------------------------------------------------------- 1 | 2 | RDKit 3D 3 | 4 | 13 13 0 0 0 0 0 0 0 0999 V2000 5 | 14.3900 41.2165 36.8972 O 0 0 0 0 0 0 0 0 0 0 0 0 6 | 14.7868 40.6349 37.9260 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 15.8499 41.1096 38.6874 O 0 0 0 0 0 0 0 0 0 0 0 0 8 | 14.1217 39.3999 38.3700 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 14.2344 38.3620 37.3250 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 14.9642 37.1961 37.5279 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 15.0642 36.2278 36.5413 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 14.4360 36.4044 35.3291 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 13.7096 37.5577 35.1181 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 13.0579 37.7548 33.8781 N 0 0 0 0 0 4 0 0 0 0 0 0 15 | 12.6538 38.8966 33.6005 O 0 0 0 0 0 0 0 0 0 0 0 0 16 | 12.8940 36.6896 33.0293 O 0 0 0 0 0 1 0 0 0 0 0 0 17 | 13.6068 38.5386 36.1140 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 1 2 2 0 19 | 2 3 1 0 20 | 2 4 1 0 21 | 4 5 1 0 22 | 5 6 2 0 23 | 6 7 1 0 24 | 7 8 2 0 25 | 8 9 1 0 26 | 9 10 1 0 27 | 10 11 2 0 28 | 10 12 1 0 29 | 9 13 2 0 30 | 13 5 1 0 31 | M CHG 2 10 1 12 -1 32 | M END 33 | $$$$ 34 | -------------------------------------------------------------------------------- /tests/test_data/eval/predictions.csv: -------------------------------------------------------------------------------- 1 | id,reference_system_id,receptor_file,rank,confidence,ligand_file 2 | 1ai5__1__1.A_1.B__1.D,1ai5__1__1.A_1.B__1.D,,1,1.0,tests/test_data/eval/predicted_poses/1ai5__1__1.A_1.B__1.D/rank1.sdf 3 | 1a3b__1__1.B__1.D,1a3b__1__1.B__1.D,,1,1.0,tests/test_data/eval/predicted_poses/1a3b__1__1.B__1.D/rank1.sdf 4 | -------------------------------------------------------------------------------- /tests/test_data/eval/results.csv: -------------------------------------------------------------------------------- 1 | Subset,No. systems,Top n,Success Rate (%),Median RMSD,Stdev RMSD,Mean lDDT-PLI,Stdev lDDT-PLI 2 | all,2,1,50.0,2.6411634035685183,1.0239794879969462,0.6844228286926055,0.17372764152683012 3 | novel_pocket_pli,2,1,50.0,2.6411634035685183,1.0239794879969462,0.6844228286926055,0.17372764152683012 4 | novel_ligand,2,1,50.0,2.6411634035685183,1.0239794879969462,0.6844228286926055,0.17372764152683012 5 | novel_protein,2,1,50.0,2.6411634035685183,1.0239794879969462,0.6844228286926055,0.17372764152683012 6 | novel_all,2,1,50.0,2.6411634035685183,1.0239794879969462,0.6844228286926055,0.17372764152683012 7 | -------------------------------------------------------------------------------- /tests/test_data/eval/scores/search_db=holo/small_score.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/eval/scores/search_db=holo/small_score.parquet -------------------------------------------------------------------------------- /tests/test_data/eval/splits/split.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/eval/splits/split.parquet -------------------------------------------------------------------------------- /tests/test_data/eval/systems/a3.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/eval/systems/a3.zip -------------------------------------------------------------------------------- /tests/test_data/eval/systems/ai.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/eval/systems/ai.zip -------------------------------------------------------------------------------- /tests/test_data/kinase_ligand_ccd_codes.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/kinase_ligand_ccd_codes.parquet -------------------------------------------------------------------------------- /tests/test_data/kinase_uniprotac.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/kinase_uniprotac.parquet -------------------------------------------------------------------------------- /tests/test_data/mini_score_dataset.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/mini_score_dataset.parquet -------------------------------------------------------------------------------- /tests/test_data/mini_score_seq_dataset.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/mini_score_seq_dataset.parquet -------------------------------------------------------------------------------- /tests/test_data/mini_structure_checks_report.tsv: -------------------------------------------------------------------------------- 1 | system_id ligand_instance ligand_asym_id ligand_is_rdkit_loadable ligand_is_rdkit_loadable_with_fix ligand_is_obabel_loadable ligand_is_obabel_loadable_with_rdkit_fix ligand_matches_smiles_atom_num ligand_positions_correct ligand_loadbable_via_biotite ligand_molvs_validation ligand_rdkit_validation complex_loadbable_via_biotite ligand_protein_neighbor_still_preserved_complex all_ligand_chains_present all_protein_chains_present 2 | 1fbz__1__1.A__1.C 1 C True True True True True True True [] [] True True True True 3 | 3fbp__1__1.A_1.B__1.C 1 C True True True True True True True [] [] True True True True 4 | 1fbh__1__1.A_1.B__1.E_1.F 1 E True True True True True True True [] [] True True True True 5 | 1fbh__1__1.A_1.B__1.E_1.F 1 F True True True True True True True [] [] True True True True 6 | -------------------------------------------------------------------------------- /tests/test_data/mini_system_files_new/1fbh__1__1.A_1.B__1.E_1.F/chain_mapping.json: -------------------------------------------------------------------------------- 1 | {"1.A": "A", "1.B": "B", "1.E": "a", "1.F": "b"} -------------------------------------------------------------------------------- /tests/test_data/mini_system_files_new/1fbh__1__1.A_1.B__1.E_1.F/ligand_files/1.E.sdf: -------------------------------------------------------------------------------- 1 | 1.E 2 | 3 | 4 | 25 25 0 0 0 0 999 V2000 5 | 17.9190 82.4080 9.9820 P 0 0 0 0 0 0 6 | 18.5670 82.1420 8.6850 O 0 0 0 0 0 0 7 | 17.5240 83.8080 10.3210 O 0 0 0 0 0 0 8 | 16.6470 81.4410 10.0610 O 0 0 0 0 0 0 9 | 18.8580 81.9330 11.1840 O 0 0 0 0 0 0 10 | 18.8730 82.6060 12.4470 C 0 0 0 0 0 0 11 | 17.9540 81.8590 13.3990 C 0 0 0 0 0 0 12 | 17.0070 81.1890 12.6460 O 0 0 0 0 0 0 13 | 18.6440 80.8600 14.3450 C 0 0 0 0 0 0 14 | 19.7490 80.2560 13.6910 O 0 0 0 0 0 0 15 | 18.9850 81.7180 15.5620 C 0 0 0 0 0 0 16 | 19.1070 81.0110 16.7950 O 0 0 0 0 0 0 17 | 17.7700 82.6620 15.6370 C 0 0 0 0 0 0 18 | 17.3380 82.8370 14.2730 O 0 0 0 0 0 0 19 | 18.1140 83.9900 16.2700 C 0 0 0 0 0 0 20 | 17.1830 84.9760 15.8160 O 0 0 0 0 0 0 21 | 16.4600 85.9800 16.8590 P 0 0 0 0 0 0 22 | 16.7410 85.4650 18.2680 O 0 0 0 0 0 0 23 | 17.1190 87.2990 16.5300 O 0 0 0 0 0 0 24 | 14.9750 85.9890 16.5300 O 0 0 0 0 0 0 25 | 16.8780 80.6580 10.6450 H 0 0 0 0 0 0 26 | 16.6150 81.9170 11.9830 H 0 0 0 0 0 0 27 | 19.4330 80.1170 12.8340 H 0 0 0 0 0 0 28 | 18.6300 81.5030 17.5940 H 0 0 0 0 0 0 29 | 17.3510 87.7580 17.2640 H 0 0 0 0 0 0 30 | 1 2 2 0 0 0 31 | 1 3 1 0 0 0 32 | 1 4 1 0 0 0 33 | 1 5 1 0 0 0 34 | 4 21 1 0 0 0 35 | 5 6 1 0 0 0 36 | 6 7 1 0 0 0 37 | 7 8 1 0 0 0 38 | 7 9 1 0 0 0 39 | 7 14 1 0 0 0 40 | 8 22 1 0 0 0 41 | 9 10 1 0 0 0 42 | 9 11 1 0 0 0 43 | 10 23 1 0 0 0 44 | 11 12 1 0 0 0 45 | 11 13 1 0 0 0 46 | 12 24 1 0 0 0 47 | 13 14 1 0 0 0 48 | 13 15 1 0 0 0 49 | 15 16 1 0 0 0 50 | 16 17 1 0 0 0 51 | 17 18 2 0 0 0 52 | 17 19 1 0 0 0 53 | 17 20 1 0 0 0 54 | 19 25 1 0 0 0 55 | M END 56 | > 57 | 2 58 | 59 | > 60 | 1.E 61 | 62 | $$$$ 63 | -------------------------------------------------------------------------------- /tests/test_data/mini_system_files_new/1fbh__1__1.A_1.B__1.E_1.F/ligand_files/1.F.sdf: -------------------------------------------------------------------------------- 1 | 1.F 2 | 3 | 4 | 25 25 0 0 0 0 999 V2000 5 | 16.7440 82.4510 9.6370 P 0 0 0 0 0 0 6 | 17.4150 83.3730 8.6860 O 0 0 0 0 0 0 7 | 15.4100 82.8250 10.1880 O 0 0 0 0 0 0 8 | 16.6610 81.0360 8.9130 O 0 0 0 0 0 0 9 | 17.6680 82.1830 10.9220 O 0 0 0 0 0 0 10 | 17.1250 81.6540 12.1350 C 0 0 0 0 0 0 11 | 18.0050 82.0820 13.3020 C 0 0 0 0 0 0 12 | 19.0390 82.8920 12.8150 O 0 0 0 0 0 0 13 | 18.6040 80.9380 14.1210 C 0 0 0 0 0 0 14 | 19.7600 80.4480 13.4780 O 0 0 0 0 0 0 15 | 18.8290 81.5990 15.4800 C 0 0 0 0 0 0 16 | 18.8560 80.7160 16.6020 O 0 0 0 0 0 0 17 | 17.6040 82.5450 15.5840 C 0 0 0 0 0 0 18 | 17.1840 82.7970 14.2320 O 0 0 0 0 0 0 19 | 17.9420 83.8370 16.2720 C 0 0 0 0 0 0 20 | 17.0200 84.8390 15.8290 O 0 0 0 0 0 0 21 | 16.4380 85.9510 16.8680 P 0 0 0 0 0 0 22 | 16.9420 87.3090 16.4070 O 0 0 0 0 0 0 23 | 14.9370 85.7590 16.7350 O 0 0 0 0 0 0 24 | 16.9330 85.5960 18.2620 O 0 0 0 0 0 0 25 | 16.1110 80.4990 9.5060 H 0 0 0 0 0 0 26 | 18.7050 83.7990 12.7170 H 0 0 0 0 0 0 27 | 20.0020 81.1160 12.8170 H 0 0 0 0 0 0 28 | 18.3460 81.0940 17.3310 H 0 0 0 0 0 0 29 | 14.7360 84.8900 17.0870 H 0 0 0 0 0 0 30 | 1 2 2 0 0 0 31 | 1 3 1 0 0 0 32 | 1 4 1 0 0 0 33 | 1 5 1 0 0 0 34 | 4 21 1 0 0 0 35 | 5 6 1 0 0 0 36 | 6 7 1 0 0 0 37 | 7 8 1 0 0 0 38 | 7 9 1 0 0 0 39 | 7 14 1 0 0 0 40 | 8 22 1 0 0 0 41 | 9 10 1 0 0 0 42 | 9 11 1 0 0 0 43 | 10 23 1 0 0 0 44 | 11 12 1 0 0 0 45 | 11 13 1 0 0 0 46 | 12 24 1 0 0 0 47 | 13 14 1 0 0 0 48 | 13 15 1 0 0 0 49 | 15 16 1 0 0 0 50 | 16 17 1 0 0 0 51 | 17 18 2 0 0 0 52 | 17 19 1 0 0 0 53 | 17 20 1 0 0 0 54 | 19 25 1 0 0 0 55 | M END 56 | > 57 | 3 58 | 59 | > 60 | 1.F 61 | 62 | $$$$ 63 | -------------------------------------------------------------------------------- /tests/test_data/mini_system_files_new/1fbh__1__1.A_1.B__1.E_1.F/sequences.fasta: -------------------------------------------------------------------------------- 1 | >1.A 2 | TDQAAFDTNIVTLTRFVMEQGRKARGTGEMTQLLNSLCTAVKAISTAVRKAGIAHLYGIAGSTNVTGDQVKKLDVLSNDLVINVLKSSFATCVLVTEEDKNAIIVEPEKRGKYVVCFDPLDGSSNIDCLVSIGTIFGIYRKNSTDEPSEKDALQPGRNLVAAGYALYGSATMLVLAMVNGVNCFMLDPAIGEFILVDRNVKIKKKGSIYSINEGYAKEFDPAITEYIQRKKFPPDNSAPYGARYVGSMVADVHRTLVYGGIFMYPANKKSPKGKLRLLYECNPMAYVMEKAGGLATTGKEAVLDIVPTDIHQRAPIILGSPEDVTELLEIYQKHA 3 | >1.B 4 | TDQAAFDTNIVTLTRFVMEQGRKARGTGEMTQLLNSLCTAVKAISTAVRKAGIAHLYGIAGSTNVTGDQVKKLDVLSNDLVINVLKSSFATCVLVTEEDKNAIIVEPEKRGKYVVCFDPLDGSSNIDCLVSIGTIFGIYRKNSTDEPSEKDALQPGRNLVAAGYALYGSATMLVLAMVNGVNCFMLDPAIGEFILVDRNVKIKKKGSIYSINEGYAKEFDPAITEYIQRKKFPPDNSAPYGARYVGSMVADVHRTLVYGGIFMYPANKKSPKGKLRLLYECNPMAYVMEKAGGLATTGKEAVLDIVPTDIHQRAPIILGSPEDVTELLEIYQKHA 5 | -------------------------------------------------------------------------------- /tests/test_data/mini_system_files_new/1fbz__1__1.A__1.C/chain_mapping.json: -------------------------------------------------------------------------------- 1 | {"1.A": "A", "1.C": "a"} -------------------------------------------------------------------------------- /tests/test_data/mini_system_files_new/1fbz__1__1.A__1.C/sequences.fasta: -------------------------------------------------------------------------------- 1 | >1.A 2 | EPEPWFFKNLSRKDAERQLLAPGNTHGSFLIRESESTAGSFCLSVRDFDQNQGEVVKHYKIRNLDNGGFYISPRITFPGLHELVRHYTNASDGLCTRLSRPCQT 3 | -------------------------------------------------------------------------------- /tests/test_data/mini_system_files_new/3fbp__1__1.A_1.B__1.C/chain_mapping.json: -------------------------------------------------------------------------------- 1 | {"1.A": "A", "1.B": "B", "1.C": "a"} -------------------------------------------------------------------------------- /tests/test_data/mini_system_files_new/3fbp__1__1.A_1.B__1.C/ligand_files/1.C.sdf: -------------------------------------------------------------------------------- 1 | 1.C 2 | 3 | 4 | 22 22 0 0 0 0 999 V2000 5 | 39.1310 88.9630 27.4490 P 0 0 0 0 0 0 6 | 39.8890 90.1670 27.9770 O 0 0 0 0 0 0 7 | 40.1510 87.9600 26.9580 O 0 0 0 0 0 0 8 | 38.2890 88.3290 28.5390 O 0 0 0 0 0 0 9 | 37.5960 92.9050 26.7480 O 0 0 0 0 0 0 10 | 38.2490 91.8250 26.0350 C 0 0 0 0 0 0 11 | 37.3940 90.5430 26.0420 C 0 0 0 0 0 0 12 | 38.2080 89.3540 26.2070 O 0 0 0 0 0 0 13 | 36.6180 90.4870 24.7050 C 0 0 0 0 0 0 14 | 36.4540 89.1480 24.2860 O 0 0 0 0 0 0 15 | 35.3210 91.2440 24.9970 C 0 0 0 0 0 0 16 | 35.3160 92.5900 24.5790 O 0 0 0 0 0 0 17 | 35.1010 91.0630 26.5160 C 0 0 0 0 0 0 18 | 36.3580 90.5970 27.0520 O 0 0 0 0 0 0 19 | 34.0100 90.1150 26.9290 C 0 0 0 0 0 0 20 | 34.0340 90.1390 28.3910 O 0 0 0 0 0 0 21 | 33.2340 89.1330 29.3550 P 0 0 0 0 0 0 22 | 34.0380 88.8170 30.5910 O 0 0 0 0 0 0 23 | 32.9070 87.8240 28.6710 O 0 0 0 0 0 0 24 | 31.9740 89.7730 29.8740 O 0 0 0 0 0 0 25 | 37.0780 88.6410 24.8510 H 0 0 0 0 0 0 26 | 36.0210 92.9900 25.1360 H 0 0 0 0 0 0 27 | 1 2 2 0 0 0 28 | 1 3 1 0 0 0 29 | 1 4 1 0 0 0 30 | 1 8 1 0 0 0 31 | 5 6 1 0 0 0 32 | 6 7 1 0 0 0 33 | 7 8 1 0 0 0 34 | 7 9 1 0 0 0 35 | 7 14 1 0 0 0 36 | 9 10 1 0 0 0 37 | 9 11 1 0 0 0 38 | 10 21 1 0 0 0 39 | 11 12 1 0 0 0 40 | 11 13 1 0 0 0 41 | 12 22 1 0 0 0 42 | 13 14 1 0 0 0 43 | 13 15 1 0 0 0 44 | 15 16 1 0 0 0 45 | 16 17 1 0 0 0 46 | 17 18 2 0 0 0 47 | 17 19 1 0 0 0 48 | 17 20 1 0 0 0 49 | M END 50 | > 51 | 2 52 | 53 | > 54 | 1.C 55 | 56 | $$$$ 57 | -------------------------------------------------------------------------------- /tests/test_data/mini_system_files_new/3fbp__1__1.A_1.B__1.C/sequences.fasta: -------------------------------------------------------------------------------- 1 | >1.A 2 | TDQAAFDTNIVTLTRFVMEQGRKARGTGEMTQLLNSLCTAVKAISTAVRKAGIAHLYGIAGSTNVTGDQVKKLDVLSNDLVINVLKSSFATCVLVTEEDKNAIIVEPEKRGKYVVCFDPLDGSSNIDCLVSIGTIFGIYRKNSTDEPSEKDALQPGRNLVAAGYALYGSATMLVLAMVNGVNCFMLDPAIGEFILVDRNVKIKKKGSIYSINEGYAKEFDPAITEYIQRKKFPPDNSAPYGARYVGSMVADVHRTLVYGGIFMYPANKKSPKGKLRLLYECNPMAYVMEKAGGLATTGKEAVLDIVPTDIHQRAPIILGSPEDVTELLEIYQKHA 3 | >1.B 4 | TDQAAFDTNIVTLTRFVMEQGRKARGTGEMTQLLNSLCTAVKAISTAVRKAGIAHLYGIAGSTNVTGDQVKKLDVLSNDLVINVLKSSFATCVLVTEEDKNAIIVEPEKRGKYVVCFDPLDGSSNIDCLVSIGTIFGIYRKNSTDEPSEKDALQPGRNLVAAGYALYGSATMLVLAMVNGVNCFMLDPAIGEFILVDRNVKIKKKGSIYSINEGYAKEFDPAITEYIQRKKFPPDNSAPYGARYVGSMVADVHRTLVYGGIFMYPANKKSPKGKLRLLYECNPMAYVMEKAGGLATTGKEAVLDIVPTDIHQRAPIILGSPEDVTELLEIYQKHA 5 | -------------------------------------------------------------------------------- /tests/test_data/mmp/mini_clusters/cluster=components/directed=False/metric=protein_fident_weighted_sum/threshold=95.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/mmp/mini_clusters/cluster=components/directed=False/metric=protein_fident_weighted_sum/threshold=95.parquet -------------------------------------------------------------------------------- /tests/test_data/mmp/mini_clusters/cluster=components/directed=True/metric=pocket_fident/threshold=100.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/mmp/mini_clusters/cluster=components/directed=True/metric=pocket_fident/threshold=100.parquet -------------------------------------------------------------------------------- /tests/test_data/mmp/tiny_mmp_index.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/mmp/tiny_mmp_index.csv.gz -------------------------------------------------------------------------------- /tests/test_data/panther_classifications_mini.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/panther_classifications_mini.tar.gz -------------------------------------------------------------------------------- /tests/test_data/panther_raw.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/panther_raw.tar.gz -------------------------------------------------------------------------------- /tests/test_data/pdb_seqres.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/pdb_seqres.txt.gz -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/clusters/subdir/clusters.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/clusters/subdir/clusters.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/entries/9h.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/entries/9h.zip -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/entries/av.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/entries/av.zip -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/entries/ng.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/entries/ng.zip -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/entries/v2.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/entries/v2.zip -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/fingerprints/ligands_per_system.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/fingerprints/ligands_per_system.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/index/annotation_table.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/index/annotation_table.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/ligand_scores/ligand_scores.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/ligand_scores/ligand_scores.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/links/kind=apo/links.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/links/kind=apo/links.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/manifest/manifest.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/manifest/manifest.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/mmp/plinder_mmp_series.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/mmp/plinder_mmp_series.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/scores/search_db=apo/pocket_lddt.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/scores/search_db=apo/pocket_lddt.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/scores/search_db=apo/protein_fident_qcov_weighted_sum.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/scores/search_db=apo/protein_fident_qcov_weighted_sum.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/scores/search_db=holo/pocket_lddt.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/scores/search_db=holo/pocket_lddt.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/scores/search_db=holo/protein_fident_qcov_weighted_sum.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/scores/search_db=holo/protein_fident_qcov_weighted_sum.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/scores/search_db=pred/pocket_lddt.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/scores/search_db=pred/pocket_lddt.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/scores/search_db=pred/protein_fident_qcov_weighted_sum.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/scores/search_db=pred/protein_fident_qcov_weighted_sum.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/splits/split.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/splits/split.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pli_unique_qcov.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pli_unique_qcov.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pocket_fident_qcov.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pocket_fident_qcov.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pocket_lddt.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pocket_lddt.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pocket_lddt_qcov.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pocket_lddt_qcov.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pocket_qcov.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__pocket_qcov.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__protein_fident_weighted_sum.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__protein_fident_weighted_sum.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__protein_lddt_weighted_sum.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__protein_lddt_weighted_sum.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__protein_seqsim_weighted_sum.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__protein_seqsim_weighted_sum.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__tanimoto_similarity_max.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/max_similarities__test_vs_val__tanimoto_similarity_max.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/strat/val_vs_test_data/test_set.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/strat/val_vs_test_data/test_set.parquet -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/systems/9h.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/systems/9h.zip -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/systems/av.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/systems/av.zip -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/systems/ng.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/systems/ng.zip -------------------------------------------------------------------------------- /tests/test_data/plinder/mount/systems/v2.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/plinder/mount/systems/v2.zip -------------------------------------------------------------------------------- /tests/test_data/split_challenge/eval_metrics_config.yaml: -------------------------------------------------------------------------------- 1 | overlap_cols_list: 2 | tanisim_0.26: 3 | pass_percentage: 0.1 4 | higher_than: false 5 | plip_0.7: 6 | pass_percentage: 0.2 7 | higher_than: false 8 | test_set_quantile_dicts: 9 | entry_rfree: 10 | cutoff: 0.45 11 | higher_than: false 12 | entry_r_minus_rfree: 13 | cutoff: 0.05 14 | higher_than: false 15 | -------------------------------------------------------------------------------- /tests/test_data/split_challenge/test_lig_profile.csv: -------------------------------------------------------------------------------- 1 | ligand_ccd_code,ligtype,ligand_type,ligand_smiles,rdkit_valid,smiles_nochiral,organic,nha,mw,n_ring,numhacceptors,numhdonors,n_ro_bonds,fcsp3,ecfp6,tanisim_0.26,tanisim_0.5,tanisim_0.7,tanisim_0.99,tanisim_0.95,tanisim_0.9,tanisim_0.85,kinase_inhibitor 2 | NHE,SMALLMOLECULE,drug-like,C1CCC(CC1)NCCS(=O)(=O)O,True,O=S(=O)(O)CCNC1CCCCC1,True,13,207.09,1,3,2,4,1.0,[0 0 1 ... 0 0 0],0,0,0,0,0,0,0,False 3 | 3VN,SMALLMOLECULE,other,C(CCN)C[C@@](CC[C@](CCCCN)(C(=O)O)N)(C(=O)O)N,True,NCCCCC(N)(CCC(N)(CCCCN)C(=O)O)C(=O)O,True,22,318.23,0,6,6,13,0.86,[0 0 0 ... 0 0 0],0,1,1,1,1,1,1,False 4 | DMU,SMALLMOLECULE,oligosaccharide,CCCCCCCCCCO[C@H]1[C@@H]([C@H]([C@@H]([C@H](O1)CO)O[C@@H]2[C@@H]([C@H]([C@@H]([C@H](O2)CO)O)O)O)O)O,True,CCCCCCCCCCOC1OC(CO)C(OC2OC(CO)C(O)C(O)C2O)C(O)C1O,True,33,482.27,2,11,7,14,1.0,[0 0 0 ... 1 0 0],0,2,2,2,2,2,2,False 5 | 78P,SMALLMOLECULE,fragment,C[C@@]1(CCCN1)c2[nH]c3c(cccc3n2)C(=O)N,True,CC1(c2nc3cccc(C(N)=O)c3[nH]2)CCCN1,True,18,244.13,3,3,3,2,0.38,[0 0 0 ... 0 0 0],0,3,3,3,3,3,3,False 6 | -------------------------------------------------------------------------------- /tests/test_data/split_challenge/test_submission.csv: -------------------------------------------------------------------------------- 1 | system_ID,split 2 | 7KKZ__1__1.A_1.B__1.C__NHE,train 3 | 7KKT__1__1.A_1.F__1.G__3VN,train 4 | 7KKR__1__1.A_1.B__1.E__DMU,test 5 | 7KKR__1__1.A_1.B_1.C__1.H_1.J__DMU_DMU,test 6 | 7KKR__1__1.A_1.B_1.C__1.H_1.J__DMU_DMU,val 7 | 7KKQ__1__1.A__1.E__78P,val 8 | -------------------------------------------------------------------------------- /tests/test_data/split_plot_split.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/split_plot_split.parquet -------------------------------------------------------------------------------- /tests/test_data/system_instance_dataframe/instance_dataframe7.csv: -------------------------------------------------------------------------------- 1 | id,primary_id,structure_file,chain_id,structure_type,fasta,molecule_type,smiles,uniprot,addons 2 | 1,1,holo/2Y4I__1__1.B__1.E_1.F__ATP_MG/system.cif,R,Holo,holo/2Y4I__1__1.B__1.E_1.F__ATP_MG/sequences.fasta,protein,UNDEFINED,UNDEFINED,"('MG',1,'1.F')" 3 | 2,2,holo/2Y4I__1__1.B__1.E_1.F__ATP_MG/ligand_files/1.E.sdf,S,Holo,UNDEFINED,small_molecule,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,UNDEFINED, 4 | 3,3,holo/8BCH__1__1.A__1.B__QA9/system.cif,R,Holo,holo/8BCH__1__1.A__1.B__QA9/sequences.fasta,protein,UNDEFINED,UNDEFINED, 5 | 4,4,holo/8BCH__1__1.A__1.B__QA9/ligand_files/1.B.sdf,S,Holo,UNDEFINED,small_molecule,N=C(N)NS(=O)(=O)c1ccc(N)cc1,UNDEFINED, 6 | 5,3,apo/7OS1__1__1.A.cif,A,Apo,UNDEFINED,protein,UNDEFINED,O75643, 7 | 6,3,apo/6S8O__1__1.A.cif,A,Apo,UNDEFINED,protein,UNDEFINED,O75643, 8 | 7,3,predicted/AF_O75643.cif,R,Predicted,UNDEFINED,protein,UNDEFINED,O75643, 9 | -------------------------------------------------------------------------------- /tests/test_data/system_instance_dataframe/plinder_final_dir_structure/holo/2Y4I__1__1.B__1.E_1.F__ATP_MG/chain_mapping.json: -------------------------------------------------------------------------------- 1 | {"1.B": "A", "1.E": "a", "1.F": "b"} 2 | -------------------------------------------------------------------------------- /tests/test_data/system_instance_dataframe/plinder_final_dir_structure/holo/2Y4I__1__1.B__1.E_1.F__ATP_MG/ligand_files/1.E.sdf: -------------------------------------------------------------------------------- 1 | 1.E 2 | 3 | 4 | 31 33 0 0 0 0 999 V2000 5 | -21.7750 39.8180 3.8230 P 0 0 0 0 0 0 6 | -20.3360 39.3640 3.7520 O 0 0 0 0 0 0 7 | -21.9640 41.3070 3.6590 O 0 0 0 0 0 0 8 | -22.5680 39.1990 4.9500 O 0 0 0 0 0 0 9 | -22.3390 37.6200 2.1950 P 0 0 0 0 0 0 10 | -22.1730 36.8770 3.5000 O 0 0 0 0 0 0 11 | -23.4640 37.2580 1.2530 O 0 0 0 0 0 0 12 | -22.4330 39.1990 2.4910 O 0 0 0 0 0 0 13 | -19.7250 36.6180 1.6960 P 0 0 0 0 0 0 14 | -18.5170 37.1930 0.9980 O 0 0 0 0 0 0 15 | -19.6770 36.3970 3.1870 O 0 0 0 0 0 0 16 | -20.9680 37.5830 1.3490 O 0 0 0 0 0 0 17 | -20.1020 35.2300 0.9730 O 0 0 0 0 0 0 18 | -21.4070 34.6750 1.1010 C 0 0 0 0 0 0 19 | -21.4190 33.2980 0.4560 C 0 0 0 0 0 0 20 | -20.1630 32.6530 0.6770 O 0 0 0 0 0 0 21 | -22.5030 32.4240 1.0620 C 0 0 0 0 0 0 22 | -23.4360 32.0380 0.0470 O 0 0 0 0 0 0 23 | -21.7870 31.2120 1.6290 C 0 0 0 0 0 0 24 | -22.3620 29.9960 1.1470 O 0 0 0 0 0 0 25 | -20.3460 31.3260 1.1680 C 0 0 0 0 0 0 26 | -19.4650 31.1320 2.3380 N 0 0 0 0 0 0 27 | -18.9120 32.1260 3.0550 C 0 0 0 0 0 0 28 | -18.1550 31.6350 4.0670 N 0 0 0 0 0 0 29 | -18.2200 30.2930 4.0040 C 0 0 0 0 0 0 30 | -17.6520 29.1660 4.7780 C 0 0 0 0 0 0 31 | -16.8460 29.3980 5.8430 N 0 0 0 0 0 0 32 | -17.9670 27.9110 4.3830 N 0 0 0 0 0 0 33 | -18.7680 27.6820 3.3250 C 0 0 0 0 0 0 34 | -19.3180 28.6590 2.5780 N 0 0 0 0 0 0 35 | -19.0860 29.9630 2.8610 C 0 0 0 0 0 0 36 | 1 2 2 0 0 0 37 | 1 3 1 0 0 0 38 | 1 4 1 0 0 0 39 | 1 8 1 0 0 0 40 | 5 6 2 0 0 0 41 | 5 7 1 0 0 0 42 | 5 8 1 0 0 0 43 | 5 12 1 0 0 0 44 | 9 10 2 0 0 0 45 | 9 11 1 0 0 0 46 | 9 12 1 0 0 0 47 | 9 13 1 0 0 0 48 | 13 14 1 0 0 0 49 | 14 15 1 0 0 0 50 | 15 16 1 0 0 0 51 | 15 17 1 0 0 0 52 | 16 21 1 0 0 0 53 | 17 18 1 0 0 0 54 | 17 19 1 0 0 0 55 | 19 20 1 0 0 0 56 | 19 21 1 0 0 0 57 | 21 22 1 0 0 0 58 | 22 23 1 0 0 0 59 | 22 31 1 0 0 0 60 | 23 24 2 0 0 0 61 | 24 25 1 0 0 0 62 | 25 26 1 0 0 0 63 | 25 31 2 0 0 0 64 | 26 27 1 0 0 0 65 | 26 28 2 0 0 0 66 | 28 29 1 0 0 0 67 | 29 30 2 0 0 0 68 | 30 31 1 0 0 0 69 | M END 70 | > 71 | 2 72 | 73 | > 74 | 1.E 75 | 76 | $$$$ 77 | -------------------------------------------------------------------------------- /tests/test_data/system_instance_dataframe/plinder_final_dir_structure/holo/2Y4I__1__1.B__1.E_1.F__ATP_MG/ligand_files/1.F.sdf: -------------------------------------------------------------------------------- 1 | 1.F 2 | 3 | 4 | 1 0 0 0 0 0 999 V2000 5 | -21.3530 34.4120 4.6050 Mg 0 0 0 0 0 0 6 | M END 7 | > 8 | 3 9 | 10 | > 11 | 1.F 12 | 13 | $$$$ 14 | -------------------------------------------------------------------------------- /tests/test_data/system_instance_dataframe/plinder_final_dir_structure/holo/2Y4I__1__1.B__1.E_1.F__ATP_MG/sequences.fasta: -------------------------------------------------------------------------------- 1 | >1.B 2 | GPMPKKKPTPIQLNPAPDGSAVNGTSSAETNLEALQKKLLELELDEQQRKRLEAFLTQKQKVGELKDDDFEKISELGAGNGGVVFKVSHKPSGLVMARKLIHLEIKPAIRNQIIRELQVLHECNSPYIVGFYGAFYSDGEISICMEHMDGGSLDQVLKKAGRIPEQILGKVSIAVIKGLTYLREKHKIMHRDVKPSNILVNSRGEIKLCDFGVSGQLIDSMANSFVGTRSYMSPERLQGTHYSVQSDIWSMGLSLVEMAVGRYPIPPPDAKELELMFGCQVEGDAAETPPRPRTPGRPLSSYGMDSRPPMAIFELLDYIVNEPPPKLPSAVFSLEFQDFVNKCLIKNPAERADLKQLMVHAFIKRSDAEEVDFAGWLCSTIGLNQPSTPTHAAGV 3 | -------------------------------------------------------------------------------- /tests/test_data/system_instance_dataframe/plinder_final_dir_structure/holo/8BCH__1__1.A__1.B__QA9/chain_mapping.json: -------------------------------------------------------------------------------- 1 | {"1.A": "A", "1.B": "a"} 2 | -------------------------------------------------------------------------------- /tests/test_data/system_instance_dataframe/plinder_final_dir_structure/holo/8BCH__1__1.A__1.B__QA9/ligand_files/1.B.sdf: -------------------------------------------------------------------------------- 1 | 1.B 2 | 3 | 4 | 14 14 0 0 0 0 999 V2000 5 | -3.8600 -7.2830 38.9910 C 0 0 0 0 0 0 6 | -1.4800 -7.7670 39.3050 N 0 0 0 0 0 0 7 | -3.1510 -9.5670 39.3500 C 0 0 0 0 0 0 8 | -7.1590 -12.3080 39.2180 C 0 0 0 0 0 0 9 | -5.4700 -9.0690 39.0400 C 0 0 0 0 0 0 10 | -5.1770 -7.7200 38.9050 C 0 0 0 0 0 0 11 | -2.8540 -8.2130 39.2140 C 0 0 0 0 0 0 12 | -4.4700 -9.9980 39.2620 C 0 0 0 0 0 0 13 | -7.4540 -13.5510 39.9210 N 0 0 0 0 0 0 14 | -6.5590 -12.3670 38.1230 N 0 0 0 0 0 0 15 | -7.5380 -11.0100 39.7710 N 0 0 0 0 0 0 16 | -7.5910 -9.7680 37.5330 O 0 0 0 0 0 0 17 | -8.0110 -8.4090 39.2290 O 0 0 0 0 0 0 18 | -7.1920 -9.5750 38.9130 S 0 0 0 0 0 0 19 | 1 6 1 0 0 0 20 | 1 7 2 0 0 0 21 | 2 7 1 0 0 0 22 | 3 7 1 0 0 0 23 | 3 8 2 0 0 0 24 | 4 9 1 0 0 0 25 | 4 10 2 0 0 0 26 | 4 11 1 0 0 0 27 | 5 6 2 0 0 0 28 | 5 8 1 0 0 0 29 | 5 14 1 0 0 0 30 | 11 14 1 0 0 0 31 | 12 14 2 0 0 0 32 | 13 14 2 0 0 0 33 | M END 34 | > 35 | 2 36 | 37 | > 38 | 1.B 39 | 40 | $$$$ 41 | -------------------------------------------------------------------------------- /tests/test_data/system_instance_dataframe/plinder_final_dir_structure/holo/8BCH__1__1.A__1.B__QA9/sequences.fasta: -------------------------------------------------------------------------------- 1 | >1.A 2 | GAEFMDLDQGGEALAPRQVLDLEDLVFTQGSHFMANKRCQLPDGSFRRQRKGYEEVHVPALKPKPFGSEEQLLPVEKLPKYAQAGFEGFKTLNRIQSKLYRAALETDENLLLCAPTGAGKTNVALMCMLREIGKHINMDGTINVDDFKIIYIAPMRSLVQEMVGSFGKRLATYGITVAELTGDHQLCKEEISATQIIVCTPEKWDIITRKGGERTYTQLVRLIILDEIHLLHDDRGPVLEALVARAIRNIEMTQEDVRLIGLSATLPNYEDVATFLRVDPAKGLFYFDNSFRPVPLEQTYVGITEKKAIKRFQIMNEIVYEKIMEHAGKNQVLVFVHSRKETGKTARAIRDMCLEKDTLGLFLREGSASTEVLRTEAEQCKNLELKDLLPYGFAIHHAGMTRVDRTLVEDLFADKHIQVLVSTATLAWGVNLPAHTVIIKGTQVYSPEKGRWTELGALDILQMLGRAGRPQYDTKGEGILITSHGELQYYLSLLNQQLPIESQMVSKLPDMLNAEIVLGNVQNAKDAVNWLGYAYLYIRMLRSPTLYGISHDDLKGDPLLDQRRLDLVHTAALMLDKNNLVKYDKKTGNFQVTELGRIASHYYITNDTVQTYNQLLKPTLSEIELFRVFSLSSEFKNITVREEEKLELQKLLERVPIPVKESIEEPSAKINVLLQAFISQLKLEGFALMADMVYVTQSAGRLMRAIFEIVLNRGWAQLTDKTLNLCKMIDKRMWQSMCPLRQFRKLPEEVVKKIEKKNFPFERLYDLNHNEIGELIRMPKMGKTIHKYVHLFPKLELSVHLQPITRSTLKVELTITPDFQWDEKVHGSSEAFWILVEDVDSEVILHHEYFLLKAKYAQDEHLITFFVPVFEPLPPQYFIRVVSDRWLSCETQLPVSFRHLILPEKYPPPTELLDLQPLPVSALRNSAFESLYQDKFPFFNPIQTQVFNTVYNSDDNVFVGAPTGSGKTICAEFAILRMLLQSSEGRCVYITPMEALAEQVYMDWYEKFQDRLNKKVVLLTGETSTDLKLLGKGNIIISTPEKWDILSRRWKQRKNVQNINLFVVDEVHLIGGENGPVLEVICSRMRYISSQIERPIRIVALSSSLSNAKDVAHWLGCSATSTFNFHPNVRPVPLELHIQGFNISHTQTRLLSMAKPVYHAITKHSPKKPVIVFVPSRKQTRLTAIDILTTCAADIQRQRFLHCTEKDLIPYLEKLSDSTLKETLLNGVGYLHEGLSPMERRLVEQLFSSGAIQVVVASRSLCWGMNVAAHLVIIMDTQYYNGKIHAYVDYPIYDVLQMVGHANRPLQDDEGRCVIMCQGSKKDFFKKFLYEPLPVESHLDHCMHDHFNAEIVTKTIENKQDAVDYLTWTFLYRRMTQNPNYYNLQGISHRHLSDHLSELVEQTLSDLEQSKCISIEDEMDVAPLNLGMIAAYYYINYTTIELFSMSLNAKTKVRGLIEIISNAAEYENIPIRHHEDNLLRQLAQKVPHKLNNPKFNDPHVKTNLLLQAHLSRMQLSAELQSDTEEILSKAIRLIQACVDVLSSNGWLSPALAAMELAQMVTQAMWSKDSYLKQLPHFTSEHIKRCTDKGVESVFDIMEMEDEERNALLQLTDSQIADVARFCNRYPNIELSYEVVDKDSIRSGGPVVVLVQLEREEEVTGPVIAPLFPQKREEGWWVVIGDAKSNSLISIKRLTLQQKAKVKLDFVAPATGAHNYTLYFMSDAYMGCDQEYKFSVDVKEAETDSDSD 3 | -------------------------------------------------------------------------------- /tests/test_data/system_instance_dataframe/system_dataframe7.csv: -------------------------------------------------------------------------------- 1 | system_id,systemcomponentinstance_ids,cluster_id,split,entry_resolution, 2 | 8BCH__1__1.A__1.B__QA9,"3,4",C0000,train,, 3 | 2Y4I__1__1.B__1.E_1.F__ATP_MG,"1,2",C265,train,, 4 | -------------------------------------------------------------------------------- /tests/test_data/test_kinase_klifs.csv: -------------------------------------------------------------------------------- 1 | kinase_ID,name,HGNC,family,group,kinase_class,species,full_name,uniprot,iuphar,pocket 2 | 1,AKT1,AKT1,Akt,AGC,,Human,v-akt murine thymoma viral oncogene homolog 1,P31749,1479,KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVMEYANGGELFFHLSRLHSEKNVVYRDLKLENLMLITDFGLC 3 | 2,AKT2,AKT2,Akt,AGC,,Human,v-akt murine thymoma viral oncogene homolog 2,P31751,1480,KLLGKGTFGKVILYAMKILHTVTESRVLQNTRPFLTALKYACFVMEYANGGELFFHLSRYLHSRDVVYRDIKLENLMLITDFGLC 4 | -------------------------------------------------------------------------------- /tests/test_data/validation/1qz5_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/1qz5_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/2dty_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/2dty_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/2e84_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/2e84_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/2ixb_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/2ixb_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/2leb_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/2leb_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/2y4i_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/2y4i_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/4fxd_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/4fxd_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/5lwx_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/5lwx_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/6fx1_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/6fx1_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/6lu7_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/6lu7_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/6m92_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/6m92_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/8a7u_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/8a7u_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/validation/8pn3_validation.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/validation/8pn3_validation.xml.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00001ngx/pdb_00001ngx_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00001ngx/pdb_00001ngx_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00001ppc/pdb_00001ppc_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00001ppc/pdb_00001ppc_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00001qz5/pdb_00001qz5_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00001qz5/pdb_00001qz5_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00002dty/pdb_00002dty_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00002dty/pdb_00002dty_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00002e84/pdb_00002e84_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00002e84/pdb_00002e84_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00002gdo/pdb_00002gdo_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00002gdo/pdb_00002gdo_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00002hyy/pdb_00002hyy_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00002hyy/pdb_00002hyy_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00002ixb/pdb_00002ixb_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00002ixb/pdb_00002ixb_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00002leb/pdb_00002leb_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00002leb/pdb_00002leb_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00002p1q/pdb_00002p1q_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00002p1q/pdb_00002p1q_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00002y4i/pdb_00002y4i_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00002y4i/pdb_00002y4i_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00003cyh/pdb_00003cyh_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00003cyh/pdb_00003cyh_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00003cz3/pdb_00003cz3_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00003cz3/pdb_00003cz3_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00003g32/pdb_00003g32_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00003g32/pdb_00003g32_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00003grt/pdb_00003grt_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00003grt/pdb_00003grt_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00003ot7/pdb_00003ot7_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00003ot7/pdb_00003ot7_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00004ci1/pdb_00004ci1_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00004ci1/pdb_00004ci1_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00004fxd/pdb_00004fxd_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00004fxd/pdb_00004fxd_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00004jvn/pdb_00004jvn_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00004jvn/pdb_00004jvn_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00004nhc/pdb_00004nhc_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00004nhc/pdb_00004nhc_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00004qyf/pdb_00004qyf_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00004qyf/pdb_00004qyf_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00004tz4/pdb_00004tz4_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00004tz4/pdb_00004tz4_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00005a7w/pdb_00005a7w_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00005a7w/pdb_00005a7w_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00005fkw/pdb_00005fkw_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00005fkw/pdb_00005fkw_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00005lwx/pdb_00005lwx_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00005lwx/pdb_00005lwx_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00006f6r/pdb_00006f6r_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00006f6r/pdb_00006f6r_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00006fx1/pdb_00006fx1_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00006fx1/pdb_00006fx1_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00006i41/pdb_00006i41_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00006i41/pdb_00006i41_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00006lu7/pdb_00006lu7_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00006lu7/pdb_00006lu7_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00006m92/pdb_00006m92_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00006m92/pdb_00006m92_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00006ntj/pdb_00006ntj_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00006ntj/pdb_00006ntj_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00006u6k/pdb_00006u6k_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00006u6k/pdb_00006u6k_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00007az3/pdb_00007az3_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00007az3/pdb_00007az3_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00007bqu/pdb_00007bqu_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00007bqu/pdb_00007bqu_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00007gj7/pdb_00007gj7_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00007gj7/pdb_00007gj7_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00007gl9/pdb_00007gl9_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00007gl9/pdb_00007gl9_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_data/xx/pdb_00008pn3/pdb_00008pn3_xyz-enrich.cif.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plinder-org/plinder/6065c2f8dda5c412d7e5200fb288c1772b7334bf/tests/test_data/xx/pdb_00008pn3/pdb_00008pn3_xyz-enrich.cif.gz -------------------------------------------------------------------------------- /tests/test_final_structure_checks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, Plinder Development Team 2 | # Distributed under the terms of the Apache License 2.0 3 | import pandas as pd 4 | from plinder.data.final_structure_qc import run_all_checks 5 | 6 | 7 | def test_final_structure_checks( 8 | mini_system_dir, target_structure_validation_file, mini_all_json 9 | ): 10 | df = run_all_checks(mini_system_dir, mini_all_json) 11 | df["ligand_molvs_validation"] = df.ligand_molvs_validation.astype("str") 12 | df["ligand_rdkit_validation"] = df.ligand_rdkit_validation.astype("str") 13 | target_df = pd.read_csv(target_structure_validation_file, sep="\t") 14 | pd.testing.assert_frame_equal(df, target_df.fillna("")) 15 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py310-{lint,type,test} 3 | isolated_build = true 4 | requires = 5 | tox >= 4 6 | tox-gh-actions 7 | tox-extras 8 | 9 | [gh-actions] 10 | python = 11 | 3.10: py310 12 | 13 | [testenv] 14 | skip_sdist = true 15 | skip_install = true 16 | 17 | 18 | [testenv:py310-lint] 19 | tox_extras=lint 20 | deps = 21 | ruff == 0.1.2 22 | pre-commit == 2.21.0 23 | commands = pre-commit run --all-files --show-diff-on-failure 24 | 25 | [testenv:py310-type] 26 | tox_extras=type 27 | deps = 28 | mypy == 1.2.0 29 | types-PyYAML 30 | types-requests 31 | pydantic 32 | commands = mypy src 33 | 34 | [testenv:py310-test] 35 | setenv = 36 | PLINDER_LOG_LEVEL=10 37 | PLINDER_OFFLINE=true 38 | passenv = 39 | PLINDER_REGISTRY 40 | extras = test 41 | commands = python flows/docker.py test --dirty {posargs} 42 | --------------------------------------------------------------------------------