├── .flake8 ├── .github ├── dependabot.yml └── workflows │ ├── common_ccdc_status_checks.yml │ ├── lint-dockerfiles.yml │ ├── lint-markdown.yml │ ├── lint-python.yml │ └── lint-yaml.yml ├── .gitignore ├── .markdownlint.yml ├── .yamllint.yml ├── LICENSE ├── README.md ├── api_paper_2024 ├── example_1 │ ├── ReadMe.md │ ├── data_files │ │ └── demo_subset_of_pubchem.sdf │ ├── isovflavone_search.py │ └── process_pubchem_structures.py ├── example_2 │ ├── ReadMe.md │ ├── about_entry.py │ ├── crossref.py │ ├── language_processing.py │ ├── openalex.py │ ├── opencitations.py │ ├── references.py │ └── url_requesting.py ├── example_3 │ ├── Example │ │ ├── 1FM9_protein.mol2 │ │ ├── a_ligand.mol2 │ │ ├── cavity.atoms │ │ └── unconstrained.conf │ ├── ReadMe.md │ └── similarity_docking.py ├── example_4 │ ├── ReadMe.md │ ├── atom_weighting.json │ ├── mercury_metal_voronoi.py │ ├── mercury_molecular_voronoi.py │ └── voronoi.py └── example_5 │ ├── ReadMe.md │ ├── particle_shape.py │ ├── shape_classification.py │ └── visualiser.py ├── assets ├── csd-python-api-logo.png ├── download_zip.gif ├── search.gif └── single_download.gif ├── notebooks ├── API_Training_Exercises │ └── 01_Simple_Report.ipynb ├── CoRE-MOF │ ├── README.md │ ├── download_unmodified_MOFs_from_CSD.ipynb │ ├── list_coremof_csd_unmodified_20250227.json │ └── structures │ │ ├── CR │ │ ├── ASR │ │ │ ├── 2004[Co][rtl]3[ASR]2.cif │ │ │ ├── ABAVOP.cif │ │ │ ├── ABUXUT.cif │ │ │ ├── ACENIF.cif │ │ │ ├── ACODAA.cif │ │ │ ├── ACODED.cif │ │ │ ├── ACODIH.cif │ │ │ ├── ACOLUB.cif │ │ │ ├── ADABAK.cif │ │ │ ├── AFADAN.cif │ │ │ └── AFITIT.cif │ │ ├── FSR │ │ │ ├── ABAVOP.cif │ │ │ ├── ABUXUT.cif │ │ │ ├── ACENIF.cif │ │ │ ├── ACODED.cif │ │ │ ├── ACODIH.cif │ │ │ ├── ACOLUB.cif │ │ │ ├── ADABAK.cif │ │ │ ├── ADAQAA.cif │ │ │ ├── ADOGUZ.cif │ │ │ └── AFADAN.cif │ │ └── Ion │ │ │ ├── ADOBEB.cif │ │ │ ├── ADOCAY.cif │ │ │ ├── ADOCIG.cif │ │ │ ├── ANAGOO.cif │ │ │ ├── AVAGIP.cif │ │ │ ├── AVIVIK.cif │ │ │ ├── AVIVUW.cif │ │ │ ├── BAMKIM.cif │ │ │ ├── BELVAR.cif │ │ │ └── BOPHIZ.cif │ │ └── NCR │ │ ├── ABECIX.cif │ │ ├── ABECOD.cif │ │ ├── ABECUJ.cif │ │ ├── ABEDEU.cif │ │ ├── ABEDIY.cif │ │ ├── ABEMIF.cif │ │ ├── ABETIN.cif │ │ ├── ABINIM.cif │ │ └── ACATAA.cif ├── Discovery │ ├── .gitignore │ ├── 00_Background │ │ └── 00_Background.ipynb │ ├── 01_CSD_Search │ │ ├── 01_Substructure_searching_the_CSD.ipynb │ │ ├── 02_Similarity_searching_the_CSD.ipynb │ │ ├── 03_MCS_searching_the_CSD.ipynb │ │ ├── Lapatinib.mol │ │ ├── aryl_sulphonamide.con │ │ └── aryl_sulphonamide.qry │ ├── 02_Protein_Ligand │ │ ├── 01_Protein_Ligand_Searching.ipynb │ │ └── 02_API_Protein-Ligand_search_for_FAD.ipynb │ ├── 03_Molecular_geometries │ │ ├── 1ett.mol2 │ │ └── Molecular_geometries.ipynb │ ├── 04_Conformer_generation │ │ ├── Conformer_generation.ipynb │ │ ├── input.sdf │ │ └── lapatinib.mol2 │ ├── 05_Molecular_interactions │ │ └── Molecular_interactions.ipynb │ ├── 06_Interaction_maps │ │ ├── Interaction_maps.ipynb │ │ └── pdb2uw7.ent │ ├── 07_Cavities │ │ ├── Cavities.ipynb │ │ ├── pdb1fax.ent │ │ ├── pdbe_get.ps1 │ │ └── proteins │ │ │ ├── pdb1ett.ent │ │ │ ├── pdb1fax.ent │ │ │ ├── pdb2amq.ent │ │ │ ├── pdb4px6.ent │ │ │ ├── pdb4xg8.ent │ │ │ ├── pdb4yjp.ent │ │ │ ├── pdb5af9.ent │ │ │ ├── pdb5lma.ent │ │ │ └── pdb6lu7.ent │ ├── 08_Docking │ │ ├── 00a_Input_for_GOLD.ipynb │ │ ├── 00b_Input_for_GOLD-RDKit.ipynb │ │ ├── 01_Docking_foreground.ipynb │ │ ├── 02_Docking_background_conf.ipynb │ │ ├── 03_Docking_interactive.ipynb │ │ ├── 04_Docking_interactive_conf.ipynb │ │ ├── 05_Parameter_tests.ipynb │ │ ├── Basic_CLI_useage │ │ │ ├── gold.conf │ │ │ └── run.ps1 │ │ ├── Protein-ligand_descriptors.txt │ │ ├── ReadMe.txt │ │ └── input_files │ │ │ ├── ReadMe.txt │ │ │ ├── gold.conf │ │ │ ├── input.csv │ │ │ └── target │ │ │ ├── ligand.mol2 │ │ │ └── protein.mol2 │ ├── 09_Covalent_Docking │ │ ├── 01_Ligand_Preparation_for_Covalent_Docking.ipynb │ │ ├── 02a_Covalent_Complexes-atom.ipynb │ │ ├── 02b_Covalent_Complexes-substructure.ipynb │ │ ├── 10_Prochiral_Michael_Acceptors.ipynb │ │ ├── ReadMe.txt │ │ ├── cleanup.ps1 │ │ ├── gold_atom.conf │ │ ├── gold_substructure.conf │ │ ├── input.csv │ │ └── substructure.mol2 │ ├── 10_Editing_molecules │ │ └── Editing_molecules.ipynb │ ├── 11_Working_With_Proteins │ │ ├── 11_Working_With_Proteins.ipynb │ │ ├── 3kk6.cif │ │ ├── 3kk6.pdb │ │ └── tiny.pdb │ ├── 12_Ensemble_docking │ │ └── ensemble_docking.ipynb │ └── ReadMe.md ├── README.md └── ccdc_notebook_utilities │ ├── README.md │ ├── __init__.py │ ├── create_logger.py │ └── run_hermes.py └── scripts ├── ReadMe.md ├── concat_mol2 ├── ReadMe.md └── concat_mol2.py ├── conformer_demo ├── AZD9291.mol2 ├── conformer_demo.py └── description.md ├── create_castep_input ├── ReadMe.md ├── assets │ ├── add_script_location.png │ ├── file_output.png │ └── select_script.png └── create_castep_input.py ├── create_gaussian_input ├── ReadMe.md └── create_gaussian_input.py ├── find_binding_conformation ├── ReadMe.md ├── find_binding_conformation.py └── pdb_example.txt ├── gold_multi ├── .gitignore ├── ReadMe.md ├── gold.conf ├── gold_multi.py ├── input.sdf └── target │ ├── ligand.mol2 │ └── protein.mol2 ├── hydrogen_bond_propensity ├── ReadMe.md ├── assets │ ├── HXACAN_Report_Screenshot_1.png │ ├── HXACAN_Report_Screenshot_2.png │ ├── HXACAN_Report_Screenshot_3.png │ └── HXACAN_Report_Screenshot_4.png ├── hydrogen_bond_propensity_report.docx └── hydrogen_bond_propensity_report.py ├── mof_solvent_removal_2017_chem_mater_publication ├── Command_prompt_MOF_solvent_removal.py ├── Mercury_MOF_solvent_removal.py └── ReadMe.md ├── multi_component_hydrogen_bond_propensity ├── ReadMe.md ├── multi_component_hydrogen_bond_propensity_report.docx ├── multi_component_hydrogen_bond_propensity_report.py └── multi_component_pair_hbp_report.docx ├── new_script_readme_template ├── ReadMe.md └── script_example.py ├── november_2023_morphology_webinar ├── ReadMe.md ├── calculate_morphologies_tabulate_output.py ├── exploring_surface_properties.py └── morphology_plot.py ├── packing_similarity_dendrogram ├── ReadMe.md ├── assets │ ├── dendogram_figure_1.png │ ├── dendogram_figure_2.png │ └── dendogram_figure_3.png └── packing_similarity_dendogram.py ├── particle_rugosity ├── ReadMe.md └── particle_rugosity.py ├── refcodes_with_properties ├── ReadMe.md ├── entry_property_calculator.py ├── example_control_file.txt ├── more_elaborate_control.txt ├── refcodes_with_properties.py └── test_entry_property_calculator.py ├── show_semiconductor_properties ├── ReadMe.md ├── hist_data.json ├── semiconductor_template.html ├── semiconductors.gcd └── show_semiconductor_properties.py └── surface_charge ├── ReadMe.md ├── assets ├── adding_location.png ├── csd-python-api-logo.png ├── example_input.png ├── example_output_hxacan28.png └── selecting_script.png ├── surface_charge.py └── surface_charge_calculator.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 200 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Dependabot version updates 2 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuring-dependabot-version-updates 3 | 4 | version: 2 5 | registries: 6 | nuget-artifactory: 7 | type: nuget-feed 8 | url: https://artifactory.ccdc.cam.ac.uk/artifactory/api/npm/v3/ccdc-nuget 9 | username: ${{ secrets.ARTIFACTORY_GH_NUGET_READ_ONLY_USER }} 10 | password: ${{ secrets.ARTIFACTORY_GH_NUGET_READ_ONLY_API }} 11 | nuget-azure-devops: 12 | type: nuget-feed 13 | url: https://pkgs.dev.azure.com/ccdc/_packaging/ccdc/npm/v3/index.json 14 | username: ${{ secrets.AZURE_NUGET_ARTIFACTS_READ_ONLY_USER }} 15 | password: ${{ secrets.AZURE_NUGET_ARTIFACTS_READ_ONLY_TOKEN }} 16 | 17 | npm-artifactory: 18 | type: nuget-feed 19 | url: https://artifactory.ccdc.cam.ac.uk/artifactory/api/npm/ccdc-npm-mix/ 20 | username: ${{ secrets.ARTIFACTORY_GH_NPM_READ_ONLY_USER }} 21 | password: ${{ secrets.ARTIFACTORY_GH_NPM_READ_ONLY_API }} 22 | npm-azure-devops: 23 | type: nuget-feed 24 | url: https://pkgs.dev.azure.com/ccdc/_packaging/ccdc/npm/registry/ 25 | username: ${{ secrets.AZURE_NPM_ARTIFACTS_READ_ONLY_USER }} 26 | password: ${{ secrets.AZURE_NPM_ARTIFACTS_READ_ONLY_TOKEN }} 27 | 28 | updates: 29 | # Enable version updates for NuGet 30 | - package-ecosystem: "nuget" 31 | registries: "*" 32 | # Look for `*.csproj` or `*.sln` files in the `root` directory 33 | directory: "/" 34 | # Check the NuGet registry for updates every day (weekdays) 35 | schedule: 36 | interval: "daily" 37 | time: "15:30" 38 | timezone: "Europe/London" 39 | commit-message: 40 | # Prefix all commit messages with "NO_JIRA" 41 | prefix: "NO_JIRA" 42 | 43 | # Enable version updates for NPM 44 | - package-ecosystem: "npm" 45 | registries: "*" 46 | # Look for `package.json` or `package.lock` files in the `root` directory 47 | directory: "/" 48 | # Check the NPM registry for updates every day (weekdays) 49 | schedule: 50 | interval: "daily" 51 | time: "15:30" 52 | timezone: "Europe/London" 53 | commit-message: 54 | # Prefix all commit messages with "NO_JIRA" 55 | prefix: "NO_JIRA" 56 | 57 | # Enable version update for GitHub Actions 58 | - package-ecosystem: "github-actions" 59 | directory: "/" 60 | # Check GitHub Actions for updates every day (weekdays) 61 | schedule: 62 | interval: "daily" 63 | time: "15:30" 64 | timezone: "Europe/London" 65 | commit-message: 66 | # Prefix all commit messages with "NO_JIRA" 67 | prefix: "NO_JIRA" 68 | -------------------------------------------------------------------------------- /.github/workflows/common_ccdc_status_checks.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This status check is updated on all repositories by the moan.py script 3 | # in the github-repository-management repository. 4 | # Changes in this file will be overwritten periodically. 5 | # To make changes persistent, please mark this repository with update_status_check: false 6 | # or make your changes in the github-repository-management/status_check.yml file 7 | name: Common CCDC PR Checks 8 | on: [pull_request] # yamllint disable-line rule:truthy 9 | jobs: 10 | ccdc-commit-hooks-on-pull-request-check: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | ref: ${{ github.head_ref }} 16 | fetch-depth: 0 17 | - uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.11" 20 | - name: Get the commit message 21 | run: | 22 | echo 'commit_message<> $GITHUB_ENV 23 | git log --format=%B -n 1 ${{ github.event.after }} >> $GITHUB_ENV 24 | echo 'EOF' >> $GITHUB_ENV 25 | - name: Check pull request files with commit hook action 26 | uses: ccdc-opensource/commit-hooks@main 27 | with: 28 | commitMessage: ${{ env.commit_message }} 29 | -------------------------------------------------------------------------------- /.github/workflows/lint-dockerfiles.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Lint Dockerfiles 3 | on: # yamllint disable-line rule:truthy 4 | pull_request: 5 | paths: 6 | - "**/*Dockerfile*" 7 | 8 | jobs: 9 | hadolint: 10 | runs-on: ubuntu-latest 11 | outputs: 12 | changed-files: ${{ steps.file-changes.outputs.changed-files }} 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Modified files 20 | id: file-changes 21 | run: | 22 | echo \ 23 | "changed-files=$(git diff --name-only -r origin/${GITHUB_BASE_REF} origin/${GITHUB_HEAD_REF} \ 24 | | grep 'Dockerfile' \ 25 | | tr '\n' ' ')" \ 26 | >> $GITHUB_OUTPUT 27 | 28 | - name: Check GitHub event type to determine reporter type 29 | run: | 30 | if [ "${{ github.event_name }}" == "pull_request" ]; then 31 | echo "REVIEWDOG_REPORTER=github-pr-review" >> $GITHUB_ENV 32 | else 33 | echo "REVIEWDOG_REPORTER=github-check" >> $GITHUB_ENV 34 | fi 35 | 36 | - name: Run hadolint 37 | uses: reviewdog/action-hadolint@v1 38 | with: 39 | fail_level: none 40 | filter_mode: diff_context 41 | hadolint_flags: ${{ steps.file-changes.outputs.changed-files }} 42 | level: error 43 | reporter: ${{ env.REVIEWDOG_REPORTER }} 44 | -------------------------------------------------------------------------------- /.github/workflows/lint-markdown.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Lint Markdown 3 | on: # yamllint disable-line rule:truthy 4 | pull_request: 5 | paths: 6 | - "**/*.md" 7 | 8 | jobs: 9 | markdownlint: 10 | runs-on: ubuntu-latest 11 | outputs: 12 | changed-files: ${{ steps.file-changes.outputs.changed-files }} 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Modified files 20 | id: file-changes 21 | run: | 22 | echo \ 23 | "changed-files=$(git diff --name-only -r origin/${GITHUB_BASE_REF} origin/${GITHUB_HEAD_REF} \ 24 | | grep '\.md' \ 25 | | tr '\n' ' ')" \ 26 | >> $GITHUB_OUTPUT 27 | 28 | - name: Check GitHub event type to determine reporter type 29 | run: | 30 | if [ "${{ github.event_name }}" == "pull_request" ]; then 31 | echo "REVIEWDOG_REPORTER=github-pr-review" >> $GITHUB_ENV 32 | else 33 | echo "REVIEWDOG_REPORTER=github-check" >> $GITHUB_ENV 34 | fi 35 | 36 | - name: Run markdownlint 37 | uses: reviewdog/action-markdownlint@v0 38 | with: 39 | fail_level: none 40 | filter_mode: diff_context 41 | level: error 42 | markdownlint_flags: ${{ steps.file-changes.outputs.changed-files }} 43 | reporter: ${{ env.REVIEWDOG_REPORTER }} 44 | -------------------------------------------------------------------------------- /.github/workflows/lint-python.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Lint Python 3 | on: # yamllint disable-line rule:truthy 4 | pull_request: 5 | paths: 6 | - "**/*.py" 7 | 8 | jobs: 9 | flake8: 10 | runs-on: ubuntu-latest 11 | outputs: 12 | changed-files: ${{ steps.file-changes.outputs.changed-files }} 13 | strategy: 14 | matrix: 15 | python_version: ["3.11"] 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 21 | 22 | - name: Modified files 23 | id: file-changes 24 | run: | 25 | echo \ 26 | "changed-files=$(git diff --name-only -r origin/${GITHUB_BASE_REF} origin/${GITHUB_HEAD_REF} \ 27 | | grep '\.py' \ 28 | | tr '\n' ' ')" \ 29 | >> $GITHUB_OUTPUT 30 | 31 | - name: Check GitHub event type to determine reporter type 32 | run: | 33 | if [ "${{ github.event_name }}" == "pull_request" ]; then 34 | echo "REVIEWDOG_REPORTER=github-pr-review" >> $GITHUB_ENV 35 | else 36 | echo "REVIEWDOG_REPORTER=github-check" >> $GITHUB_ENV 37 | fi 38 | 39 | - name: Set up Python environment 40 | uses: actions/setup-python@v5 41 | with: 42 | python-version: ${{ matrix.python_version }} 43 | 44 | - name: Lint Python 45 | uses: reviewdog/action-flake8@v3 46 | with: 47 | fail_level: none 48 | filter_mode: added 49 | level: error 50 | flake8_args: ${{ steps.file-changes.outputs.changed-files }} 51 | reporter: ${{ env.REVIEWDOG_REPORTER }} 52 | -------------------------------------------------------------------------------- /.github/workflows/lint-yaml.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Lint YAML 3 | on: # yamllint disable-line rule:truthy 4 | pull_request: 5 | paths: 6 | - "**/*.yml" 7 | - "**/*.yaml" 8 | 9 | jobs: 10 | yamllint: 11 | runs-on: ubuntu-latest 12 | outputs: 13 | changed-files: ${{ steps.file-changes.outputs.changed-files }} 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | 20 | - name: Modified files 21 | id: file-changes 22 | run: | 23 | echo \ 24 | "changed-files=$(git diff --name-only -r origin/${GITHUB_BASE_REF} origin/${GITHUB_HEAD_REF} \ 25 | | grep -E "\.(yml|yaml)" \ 26 | | tr '\n' ' ')" \ 27 | >> $GITHUB_OUTPUT 28 | 29 | - name: Check GitHub event type to determine reporter type 30 | run: | 31 | if [ "${{ github.event_name }}" == "pull_request" ]; then 32 | echo "REVIEWDOG_REPORTER=github-pr-review" >> $GITHUB_ENV 33 | else 34 | echo "REVIEWDOG_REPORTER=github-check" >> $GITHUB_ENV 35 | fi 36 | 37 | - name: Run yamllint 38 | uses: reviewdog/action-yamllint@v1 39 | with: 40 | fail_level: none 41 | filter_mode: diff_context 42 | level: error 43 | yamllint_flags: ${{ steps.file-changes.outputs.changed-files }} 44 | reporter: ${{ env.REVIEWDOG_REPORTER }} 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # PyCharm 132 | .idea/ 133 | -------------------------------------------------------------------------------- /.markdownlint.yml: -------------------------------------------------------------------------------- 1 | --- 2 | MD013: 3 | # Number of characters 4 | line_length: 200 5 | -------------------------------------------------------------------------------- /.yamllint.yml: -------------------------------------------------------------------------------- 1 | # -*- mode: yaml -*- 2 | # vim:ts=2:sw=2:ai:si:syntax=yaml 3 | # 4 | # yamllint configuration directives 5 | # Project Homepage: https://github.com/adrienverge/yamllint 6 | # 7 | # Overriding rules in files: 8 | # http://yamllint.readthedocs.io/en/latest/disable_with_comments.html 9 | --- 10 | extends: default 11 | 12 | # Rules documentation: http://yamllint.readthedocs.io/en/latest/rules.html 13 | rules: 14 | document-start: disable 15 | line-length: 16 | max: 200 17 | braces: 18 | max-spaces-inside: 1 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 The Cambridge Crystallographic Data Centre 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # CSD Python API Scripts 4 | 5 | A repository containing scripts that have been created to leverage the toolkit found within 6 | the [CCDC portfolio](https://www.ccdc.cam.ac.uk/solutions/) that are accessible via 7 | the [CSD Python API](https://www.ccdc.cam.ac.uk/solutions/csd-core/components/csd-python-api/). 8 | 9 | The purpose of this platform is to distribute knowledge and allow for scientific collaborations. Scripts are provided on an as-is basis and while their use is not supported we do welcome feedback on potential improvements. All scripts are tested against the latest version of the CSD Python API as installed with the CSD Portfolio. 10 | 11 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) 12 | 13 | ## Content 14 | 15 | ```graphql 16 | . 17 | ├─ assets # Images for documentation 18 | ├─ scripts # Python scripts 19 | ├─ notebooks # Jupyter notebooks 20 | ├─ LICENSE 21 | └─ README.md 22 | ``` 23 | 24 | ## Licensing Requirements 25 | 26 | Valid CSD Portfolio licence is required for all scripts, some will require additional licence levels for use, such as Discovery and Materials. Requirements are clearly displayed in the `ReadMe` that accompanies each script. 27 | 28 | Don’t have a licence? Contact us [here to request a quote or demonstration.](https://www.ccdc.cam.ac.uk/theccdcprofile/contactus/) 29 | 30 | ## Downloading Scripts 31 | 32 | We advise that users `fork` the repository to ensure they can keep up to date with any modification. 33 | If you don't want to worry about having a GitHub account or are not confident with Git, you can download the scripts straight from this webpage. 34 | 35 | The following section will outline how to download multiple or individual scripts: 36 | 37 | ### 1. Downloading All Scripts in a Zip file 38 | 39 | Whilst in the main repository window click `Code` and select `Download Zip`: 40 | 41 | 42 | 43 | ### 2. Downloading Individual Scripts 44 | 45 | Once you've found a script/file you wish to download, click on it. This will show you the content of the file. In the top right of the file click `Raw`. 46 | This will open the content in a tab where you can `Right-Click` in the text and select `"Save As..."`. 47 | 48 | You will be asked to in your file explorer where you wish to save the file. Please specify the file extension at this point, by default `.txt` is used (For python scripts `.py` is required.) 49 | 50 | 51 | 52 | ## Running scripts through the CSD Python API Miniconda installed 53 | This requires you to already have the CSD Python API installed. Activate the environment as described in 54 | [this FAQ on activating the CSD Python API](https://www.ccdc.cam.ac.uk/support-and-resources/support/case/?caseid=6c3ec918-aede-ed11-96a2-00505695c114) then run 55 | 56 | ```cmd 57 | python script_example.py 58 | ``` 59 | 60 | in the same command prompt / terminal window. 61 | 62 | ## Submitting or Modifying Scripts 63 | 64 | 1. [Create a Fork of the repository](https://docs.github.com/en/get-started/quickstart/contributing-to-projects#forking-a-repository) (A fork is a copy of a repository that you manage. Forks let you make changes to a project without affecting the CSD GitHub repo. You can fetch updates from or submit changes to the CSD GitHub repo with pull requests.) 65 | 66 | 2. [Commit your changes to the forked repository](https://docs.github.com/en/get-started/quickstart/contributing-to-projects#making-and-pushing-changes) (on a branch). 67 | 68 | 3. [Create a Pull Request](https://docs.github.com/en/get-started/quickstart/contributing-to-projects#making-a-pull-request) to this repository. 69 | 70 | 4. Once the code has been reviewed it can be merged into the CSD GitHub repo by someone from the CCDC. 71 | -------------------------------------------------------------------------------- /api_paper_2024/example_1/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Extending Substructure Searching using the CSD Python API 2 | 3 | By searching the CSD using a SMARTS query through the CSD Python API and further inspect molecules as required. 4 | Using SMARTS, it is possible to express a complex Isoflavone query which is recursive in nature. 5 | 6 | ## To Run 7 | 8 | ```bash 9 | python isovflavone_search.py 10 | ``` 11 | -------------------------------------------------------------------------------- /api_paper_2024/example_1/isovflavone_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This script can be used for any purpose without limitation subject to the 4 | # conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 5 | # 6 | # This permission notice and the following statement of attribution must be 7 | # included in all copies or substantial portions of this script. 8 | # 9 | # 2024-05-02: created by the Cambridge Crystallographic Data Centre 10 | # 11 | 12 | """ 13 | Credit - this code was written by Jason C. Cole, Natalie Johnson and Alex Moldovan 14 | """ 15 | import argparse 16 | 17 | from ccdc.io import EntryReader 18 | from ccdc.search import SubstructureSearch, SMARTSSubstructure 19 | 20 | 21 | class TrihydroxyIsoflavoneHitfinder(SubstructureSearch.HitProcessor): 22 | """Post process hits - pick out hits that are only tri-substituted 23 | and then retrieve information from them to tabulate. 24 | """ 25 | 26 | def __init__(self, query_string, database='CSD'): 27 | 28 | self._query = SMARTSSubstructure(query_string) 29 | self._query_string = query_string 30 | self._hits = [] 31 | self._extracted_data = [] 32 | self._entry_reader = EntryReader(database) 33 | self._hits_without_filtering = [] 34 | self._database = database 35 | 36 | @staticmethod 37 | def _hydroxyl_or_hydroxylate(atom): 38 | return (atom.atomic_number == 8) and \ 39 | (len(atom.neighbours) == 1 and atom.formal_charge == -1.0) or \ 40 | (len(atom.neighbours) == 2 and ( 41 | atom.neighbours[0].atomic_number == 1 or atom.neighbours[1].atomic_number == 1)) 42 | 43 | def _find_hydroxyls(self, hit): 44 | return [atom for atom in hit.match_atoms() if self._hydroxyl_or_hydroxylate(atom)] 45 | 46 | def _count_bound_hydroxyls(self, hit): 47 | return len(self._find_hydroxyls(hit)) 48 | 49 | def _get_entry_data_other(self, identifier): 50 | entry = self._entry_reader.entry(identifier) 51 | return entry.attributes 52 | 53 | def _get_entry_data_csd(self, identifier): 54 | entry = self._entry_reader.entry(identifier) 55 | 56 | synonyms = entry.synonyms 57 | chemical_name = entry.chemical_name 58 | return {"Chemical Name": chemical_name, "Synonyms": synonyms} 59 | 60 | def _substitution_pattern(self, hit): 61 | # Get the labelled atoms in the query 62 | 63 | pattern = [] 64 | label_index_lookup = {i: self._query.label_to_atom_index(i) for i in self._query._matches.keys()} 65 | print(label_index_lookup) 66 | match_atoms = hit.match_atoms() 67 | for k in label_index_lookup.keys(): 68 | 69 | atom = match_atoms[label_index_lookup[k]] 70 | 71 | if self._hydroxyl_or_hydroxylate(atom): 72 | pattern.append(str(k)) 73 | 74 | return ",".join(sorted(pattern)) 75 | 76 | def _get_entry_data(self, hit): 77 | if self.database == 'CSD': 78 | d = self._get_entry_data_csd(hit.identifier) 79 | else: 80 | d = self._get_entry_data_other(hit.identifier) 81 | return d | {"Substitution Pattern": self._substitution_pattern(hit)} 82 | 83 | def add_hit(self, hit): 84 | """ 85 | This is the key method that gets called in the search 86 | """ 87 | self._hits_without_filtering.append(hit) 88 | if self._count_bound_hydroxyls(hit) == 3: 89 | self._hits.append(hit) 90 | 91 | def tabulate(self): 92 | """ 93 | Generate a dictionary of dictionaries of relevant data from the hits 94 | """ 95 | data = {} 96 | for hit in self._hits: 97 | data[hit.identifier] = self._get_entry_data(hit) 98 | 99 | return data 100 | 101 | def run(self): 102 | searcher = SubstructureSearch() 103 | searcher.add_substructure(self._query) 104 | super().search(searcher, self._entry_reader) 105 | 106 | 107 | if __name__ == "__main__": 108 | 109 | sub = "$([#1]),$([OH1]),$([OX1H0]),$(O[CH3]),$(Oc1ccccc1)" 110 | query_string = (f"c(!@[{sub}:1])1c(!@[{sub}:2])c(!@[{sub}:3])c(!@[{sub}:4])c(OC(!@[{sub}:5])" 111 | f"=C(c2c(!@[{sub}:6])c(!@[{sub}:7])c(!@[{sub}:8])c(!@[{sub}:9])c(!@[{sub}:10])2)C(=O)c1)") # noqa 112 | 113 | parser = argparse.ArgumentParser() 114 | parser.add_argument('-d', '--database', default='CSD', 115 | help='Path to the file to search or "CSD" to use the CSD') 116 | args = parser.parse_args() 117 | 118 | database = 'tiny.sdf' 119 | filtered_search = TrihydroxyIsoflavoneHitfinder(query_string, args.database) 120 | filtered_search.run() 121 | 122 | data = filtered_search.tabulate() 123 | sorted_ids = sorted(data.keys()) 124 | info_keys = sorted(data[sorted_ids[0]].keys()) 125 | 126 | print(f"Without filtering, we have: {len(filtered_search._hits_without_filtering)} hits") 127 | print(f"After filtering we have: {len(filtered_search._hits)} hits") 128 | 129 | print(",".join(["Identifier"] + info_keys)) 130 | for key in sorted_ids: 131 | datum = data[key] 132 | 133 | print(",".join([key] + [str(datum[x]) for x in info_keys])) 134 | -------------------------------------------------------------------------------- /api_paper_2024/example_1/process_pubchem_structures.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This script can be used for any purpose without limitation subject to the 4 | # conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 5 | # 6 | # This permission notice and the following statement of attribution must be 7 | # included in all copies or substantial portions of this script. 8 | # 9 | # 2024-05-02: created by the Cambridge Crystallographic Data Centre 10 | # 11 | 12 | import sys 13 | 14 | from ccdc.entry import Entry 15 | from ccdc.io import EntryReader, EntryWriter 16 | 17 | 18 | def process_structures(input_file, output_file): 19 | with EntryReader(input_file) as er, EntryWriter(output_file) as ew: 20 | for e in er: 21 | attribs = e.attributes 22 | molecule = e.molecule 23 | molecule.assign_bond_types('all') 24 | ne = Entry.from_molecule(molecule, attributes=attribs) 25 | ew.write(ne) 26 | 27 | 28 | if __name__ == "__main__": 29 | input_file = sys.argv[1] 30 | output_file = sys.argv[2] 31 | process_structures(input_file, output_file) 32 | -------------------------------------------------------------------------------- /api_paper_2024/example_2/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Mining third-party resources for structural context 2 | 3 | The CSD Python API, combined with other Python and REST APIs, can be very powerful for mining for such relationships. 4 | Each CCDC refcode has associated publication information, and many have an associated publication document object 5 | identifier (DOI). 6 | 7 | ## Dependencies 8 | 9 | - habanero 10 | 11 | Optional: 12 | 13 | - wordcloud 14 | - nltk (after install please consult ) 15 | 16 | ```conda install -c conda-forge habanero wordcloud nltk``` 17 | 18 | ## To Run 19 | 20 | Add script folder through CSD Python API drop down menu. 21 | With a structure opened, click `about_entry.py` in menu dropdown. 22 | -------------------------------------------------------------------------------- /api_paper_2024/example_2/crossref.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This script can be used for any purpose without limitation subject to the 4 | # conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 5 | # 6 | # This permission notice and the following statement of attribution must be 7 | # included in all copies or substantial portions of this script. 8 | # 9 | # 2024-05-02: created by the Cambridge Crystallographic Data Centre 10 | # 11 | 12 | import time 13 | 14 | from habanero import Crossref, WorksContainer, RequestError 15 | from habanero import counts 16 | from requests.exceptions import HTTPError, RequestException 17 | 18 | TZERO = time.time() 19 | 20 | 21 | class CrossrefSearch: 22 | def __init__(self, logger, email_address=None): 23 | """ 24 | :param logger: a CCDC logger to log outcomes 25 | :param email_address: An email address. If not None and a valid address, CrossRef will use the polite API 26 | """ 27 | self._email_address = email_address 28 | self._logger = logger 29 | self._repeat_pause = 0.03 30 | self._not_available_message = "Not Available in CrossRef" 31 | 32 | def debug_log(self, msg): 33 | if self._logger is not None: 34 | self._logger.debug(msg) 35 | 36 | def is_crossref_doi(self, publication_doi): 37 | """ 38 | Look up if a given doi is a CrossRef doi 39 | 40 | :param publication_doi: the DOI 41 | :param email_address: An email address. If not None and a valid address, CrossRef will use the polite API 42 | """ 43 | if publication_doi is not None and len(publication_doi) > 0: 44 | time.sleep(self._repeat_pause) # Be nice to crossref - dont hit it too hard. 45 | cr = Crossref(mailto=self._email_address) 46 | try: 47 | ra = cr.registration_agency(publication_doi) 48 | return ra and len(ra) > 0 49 | except KeyError: 50 | pass 51 | except HTTPError: 52 | pass 53 | 54 | return False 55 | 56 | def crossref_record(self, publication_doi): 57 | """ 58 | Generate a dictionary based on information in CrossRef for a publication DOI 59 | :param publication_doi: a publication object possibly extracted from the CSD `ccdc.entry.Entry` 60 | """ 61 | 62 | self.debug_log(f"Start CROSSRef tabulation {time.time() - TZERO}") 63 | data = {} 64 | if self.is_crossref_doi(publication_doi) is False: 65 | data["Information"] = "Paper DOI not available" 66 | return data 67 | 68 | result = None 69 | try: 70 | cr = Crossref(mailto=self._email_address) 71 | 72 | cr_data = WorksContainer(cr.works(publication_doi)) 73 | citation_count = counts.citation_count(publication_doi) 74 | self.debug_log(f"Time after looking up publication doi in CrossRef: {time.time() - TZERO}") 75 | titles = cr_data.title[0] 76 | # Appears to be a list of lists 77 | if len(titles) > 1: 78 | titles = "\n".join(titles) 79 | elif len(titles) == 0: 80 | titles = "Not available" 81 | else: 82 | titles = titles[0] 83 | data["Titles"] = titles 84 | subjects = self._not_available_message 85 | if cr_data.subject[0]: 86 | subjects = "; ".join(cr_data.subject[0]) 87 | data["Scopus Subject Keywords"] = subjects 88 | 89 | try: 90 | first_author_and_institution = "None listed" 91 | if 'author' in cr_data.works[0]: 92 | first_author_list = self.get_author_list(authors=cr_data.works[0]['author']) 93 | first_author_and_institution = "; ".join(first_author_list) 94 | except AttributeError: 95 | first_author_and_institution = self._not_available_message 96 | 97 | data["First Author(s) & Affiliation(s)"] = first_author_and_institution 98 | 99 | data["Paper Citation Count"] = str(citation_count) 100 | 101 | try: 102 | abstract = cr_data.abstract[0].replace("jats:", "") 103 | except AttributeError: 104 | abstract = self._not_available_message 105 | data["Abstract"] = abstract 106 | 107 | try: 108 | funder = "None listed" 109 | if 'funder' in cr_data.works[0]: 110 | funder = "; ".join([x['name'] for x in cr_data.works[0]['funder']]) 111 | except AttributeError: 112 | funder = self._not_available_message 113 | 114 | data["Funder(s)"] = funder 115 | 116 | except RequestError as re: 117 | result = f"Look up failed with an exception {re}" 118 | if re.status_code == 404: 119 | result = "DOI not found in CrossRef" 120 | except RequestException as e: 121 | result = f"Look up failed with an exception {e}" 122 | 123 | if result is not None: 124 | data["Result"] = result 125 | return data 126 | 127 | @staticmethod 128 | def get_author_list(authors): 129 | first_author_list = [] 130 | for author in authors: 131 | if author['sequence'] == 'first': 132 | v = f"{author['given']} {author['family']}" 133 | if 'affiliation' in author and len(author['affiliation']) > 0: 134 | affiliations = ", ".join([a['name'] for a in author['affiliation']]) 135 | v += f" ({affiliations})" 136 | first_author_list.append(v) 137 | return first_author_list 138 | -------------------------------------------------------------------------------- /api_paper_2024/example_2/language_processing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This script can be used for any purpose without limitation subject to the 4 | # conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 5 | # 6 | # This permission notice and the following statement of attribution must be 7 | # included in all copies or substantial portions of this script. 8 | # 9 | # 2024-05-02: created by the Cambridge Crystallographic Data Centre 10 | # 11 | 12 | """ 13 | Credit - this code is adapted from The impact of the Cambridge Structural Database and the small molecule crystal structures it contains: a bibliographic and literature study 14 | Peter Willett, Jason C. Cole and Ian J. Bruno 15 | 16 | @Article{D0CE00045K, 17 | author ="Willett, Peter and Cole, Jason C. and Bruno, Ian J.", 18 | title ="The impact of the Cambridge Structural Database and the small molecule crystal structures it contains: a bibliographic and literature study", 19 | journal ="CrystEngComm", 20 | year ="2020", 21 | volume ="22", 22 | issue ="43", 23 | pages ="7233-7241", 24 | publisher ="The Royal Society of Chemistry", 25 | doi ="10.1039/D0CE00045K", 26 | url ="http://dx.doi.org/10.1039/D0CE00045K", 27 | """ 28 | # For now, to prevent deprecation warnings tripping things up 29 | import warnings 30 | 31 | warnings.filterwarnings("ignore", category=DeprecationWarning) # noqa 32 | 33 | # Flake doesnt like imports after the line above, but we have to have the line above to 34 | # prevent deprecation warnings from these third party packages sending spurious output 35 | # which we dont want, as it is 'seen' as an error when using this code through Mercury. 36 | 37 | from wordcloud import WordCloud # noqa 38 | import nltk # noqa 39 | import nltk.collocations # noqa 40 | from nltk.stem.wordnet import WordNetLemmatizer # noqa 41 | import pandas as pd # noqa 42 | from csv import writer # noqa 43 | 44 | 45 | class FrequencyCalculator: 46 | """ 47 | A class to calculate the frequency of words in a text corpus that build on nltk 48 | """ 49 | 50 | def __init__(self, text): 51 | 52 | def update_word(word): 53 | if word == "bonding": 54 | word = "bond" 55 | return word 56 | 57 | def keep(word, stop_words): 58 | if len(word) <= 3 and word.lower() != "tin": 59 | return False 60 | if word.startswith('/'): 61 | return False 62 | if word in stop_words: 63 | return False 64 | return True 65 | 66 | self.stop = set(nltk.corpus.stopwords.words('english')) 67 | self.bigrams_to_ignore = set() 68 | self.bigram_words_to_ignore = set() 69 | 70 | lem = WordNetLemmatizer() 71 | self._words = [update_word(lem.lemmatize(word)) for word in nltk.word_tokenize(text.lower()) if 72 | keep(word, self.stop)] 73 | 74 | self._fdist = nltk.FreqDist(self._words) 75 | 76 | def most_common_words(self, how_many=None): 77 | n_elements = len(self._words) 78 | if how_many is None: 79 | rowdata = self._fdist.most_common() 80 | else: 81 | rowdata = self._fdist.most_common(how_many) 82 | return n_elements, rowdata 83 | 84 | def write_word_frequency_table(self, how_many, filename='single_word_frequencies.csv'): 85 | 86 | n_elements, rowdata = self.most_common_words(how_many) 87 | 88 | with open(filename, 'w', newline='', encoding='utf-8') as csvfile: 89 | w = writer(csvfile) 90 | w.writerow(['word', 'count', 'frequency']) 91 | for row in rowdata: 92 | towrite = [row[0], row[1], row[1] * 100.0 / n_elements] 93 | w.writerow(towrite) 94 | 95 | """ 96 | Based on 97 | https://medium.com/@nicharuch/collocations-identifying-phrases-that-act-like-individual-words-in-nlp-f58a93a2f84a 98 | """ 99 | 100 | def right_bigram_type(self, ngram): 101 | if '-pron-' in ngram or 't' in ngram: 102 | return False 103 | for word in ngram: 104 | if word in self.stop or word.isspace() or word in self.bigram_words_to_ignore: 105 | return False 106 | acceptable_types = ('JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS') 107 | second_type = ('NN', 'NNS', 'NNP', 'NNPS') 108 | tags = nltk.pos_tag(ngram) 109 | if tags[0][1] in acceptable_types and tags[1][1] in second_type: 110 | # Further checks 111 | return ngram not in self.bigrams_to_ignore 112 | else: 113 | return False 114 | 115 | def calculate_bigram_frequencies(self): 116 | 117 | finder = nltk.collocations.BigramCollocationFinder.from_words(self._words) 118 | bigram_freq = finder.ngram_fd.items() 119 | frequency_table = pd.DataFrame(list(bigram_freq), columns=['bigram', 'freq']).sort_values(by='freq', 120 | ascending=False) 121 | 122 | return frequency_table[frequency_table.bigram.map(lambda x: self.right_bigram_type(x))] 123 | 124 | 125 | def make_word_cloud_from_text(text, fname): 126 | # lower max_font_size 127 | wc = WordCloud(max_font_size=40).generate(text) 128 | wc.to_file(fname) 129 | 130 | 131 | def make_word_cloud_from_frequencies(dataframe, min_count, fname): 132 | d = {} 133 | for a, x in dataframe.values: 134 | if x >= min_count: 135 | d[" ".join(a)] = x 136 | 137 | wc = WordCloud(width=800, height=600, background_color='white', prefer_horizontal=0.5) 138 | wc.generate_from_frequencies(frequencies=d) 139 | wc.to_file(fname) 140 | 141 | 142 | def word_frequency_analysis(min_bigrams, text_to_process, fname): 143 | c = FrequencyCalculator(text_to_process) 144 | make_word_cloud_from_frequencies(c.calculate_bigram_frequencies(), min_bigrams, fname) 145 | -------------------------------------------------------------------------------- /api_paper_2024/example_2/openalex.py: -------------------------------------------------------------------------------- 1 | """ 2 | Credit: by Unai Saralegui 3 | 4 | Much of this code was inspired by code in the opencitingpy project. 5 | """ 6 | 7 | import url_requesting 8 | 9 | 10 | class OpenAlexSearcher: 11 | def __init__(self, logger=None, email=None): 12 | self.logger = logger 13 | self.email = email 14 | 15 | def run(self, url): 16 | return url_requesting.URLRequest(self.logger).run(url) 17 | 18 | def add_parameters(self, url, select_parameters): 19 | if self.email and url.find("email") == -1: 20 | url += f"?email={self.email}" 21 | if len(select_parameters) > 0: 22 | url += '&' 23 | 24 | if len(select_parameters) > 0: 25 | if url[-1] != '&': 26 | if url.find('?') == -1: 27 | url += '?' 28 | else: 29 | url += '&' 30 | url += f"select={','.join(select_parameters)}" 31 | return url 32 | 33 | def convert_to_api_url(self, raw_url): 34 | return raw_url.replace("https://openalex.org/", "https://api.openalex.org/works/") 35 | 36 | def works(self, doi, select_parameters): 37 | ref = f'https://api.openalex.org/works/https://doi.org/{doi}' 38 | self.add_parameters(ref, select_parameters) 39 | return self.run(ref) 40 | 41 | 42 | class WorkCitationsAndReferences: 43 | 44 | def __init__(self, logger=None, email=None): 45 | """ 46 | Get doi for all citations and references to a root doi 47 | """ 48 | self._searcher = OpenAlexSearcher(logger, email) 49 | self.reference_info = [] 50 | self.citation_info = [] 51 | self._logger = logger 52 | 53 | def run(self, doi): 54 | 55 | raw_data = self._searcher.works(doi, ['cited_by_api_url', 'referenced_works']) 56 | ref = self._searcher.add_parameters(raw_data['cited_by_api_url'], ['doi', 'title']) 57 | citations = self._searcher.run(ref) 58 | 59 | self.citation_info = [(x['doi'].replace("https://doi.org/", "").encode("utf-8").decode('utf-8'), 60 | x['title'].encode("utf-8").decode('utf-8')) for x in citations['results'] if 61 | x['doi'] is not None] 62 | 63 | # Alas have to do magic with the references 64 | self.reference_info = [] 65 | for ref in raw_data['referenced_works']: 66 | ref = self._searcher.convert_to_api_url(ref) 67 | ref = self._searcher.add_parameters(ref, ['title', 'doi']) 68 | ref_data = self._searcher.run(ref) 69 | 70 | doi = None 71 | if ref_data['doi'] is not None: 72 | doi = ref_data['doi'].replace("https://doi.org/", "") 73 | 74 | self.reference_info.append((doi, ref_data['title'])) 75 | 76 | 77 | def test_work_citation(): 78 | x = WorkCitationsAndReferences(email=None) 79 | x.run('10.1021/jp103212z') 80 | print(x.citation_info) 81 | print(x.reference_info) 82 | 83 | try: 84 | x.run('10.8989/junk') 85 | print("Broken - should throw with an unknown doi") 86 | except url_requesting.URLRequestError as e: 87 | print(f"Threw as expected {e}") 88 | 89 | 90 | if __name__ == "__main__": 91 | test_work_citation() 92 | -------------------------------------------------------------------------------- /api_paper_2024/example_2/references.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This script can be used for any purpose without limitation subject to the 4 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 5 | # 6 | # This permission notice and the following statement of attribution must be 7 | # included in all copies or substantial portions of this script. 8 | # 9 | """ 10 | Wrapper classes to unfiy citation data sources 11 | """ 12 | 13 | import openalex 14 | import opencitations 15 | import url_requesting 16 | 17 | 18 | class ReferencesAndCitationsError(RuntimeError): 19 | def __init__(self, msg): 20 | super().__init__(msg) 21 | 22 | 23 | class ReferencesAndCitations: 24 | """ 25 | Wrapper class to allow different data sources for harvesting citation and reference information 26 | """ 27 | 28 | def __init__(self, logger=None, email_address=None, datasource="openalex"): 29 | self._logger = logger 30 | self._email_address = email_address 31 | self._datasource = datasource 32 | self._citation_info = [] 33 | self._reference_info = [] 34 | 35 | def _openalex(self, doi): 36 | try: 37 | metadata_searcher = openalex.WorkCitationsAndReferences(self._logger, self._email_address) 38 | metadata_searcher.run(doi) 39 | except url_requesting.URLRequestError as e: 40 | raise ReferencesAndCitationsError( 41 | f"Unable to access meta-data from openalex for related DOIs due to exception {e}") 42 | self._citation_info += metadata_searcher.citation_info 43 | self._reference_info += metadata_searcher.reference_info 44 | 45 | def _opencitations(self, doi): 46 | try: 47 | metadata_searcher = opencitations.WorkCitationsAndReferences(self._logger, self._email_address) 48 | metadata_searcher.run(doi) 49 | except url_requesting.URLRequestError as e: 50 | raise ReferencesAndCitationsError( 51 | f"Unable to access meta-data from opencitations for related DOIs due to exception {e}") 52 | self._citation_info += metadata_searcher.citation_info 53 | self._reference_info += metadata_searcher.reference_info 54 | 55 | def run(self, doi): 56 | if self._datasource == "openalex": 57 | self._openalex(doi) 58 | elif self._datasource == "opencitations": 59 | self._opencitations(doi) 60 | else: 61 | self._openalex(doi) 62 | self._opencitations(doi) 63 | 64 | @property 65 | def reference_titles(self): 66 | return list(set([t[1] for t in self._reference_info if t[1] is not None])) 67 | 68 | @property 69 | def citation_titles(self): 70 | return list(set([t[1] for t in self._citation_info if t[1] is not None])) 71 | 72 | @property 73 | def reference_dois(self): 74 | return list(set([t[0] for t in self._reference_info if t[0] is not None])) 75 | 76 | @property 77 | def citation_dois(self): 78 | return list(set([t[0] for t in self._citation_info if t[0] is not None])) 79 | -------------------------------------------------------------------------------- /api_paper_2024/example_2/url_requesting.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This script can be used for any purpose without limitation subject to the 4 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 5 | # 6 | # This permission notice and the following statement of attribution must be 7 | # included in all copies or substantial portions of this script. 8 | # 9 | """ 10 | A small wrapper to the requests module to implement retries and centralise exceptions 11 | """ 12 | import time 13 | from http import HTTPStatus 14 | 15 | import requests 16 | from requests.exceptions import HTTPError 17 | 18 | 19 | class URLRequestError(RuntimeError): 20 | def __init__(self, msg, status_code): 21 | super().__init__(msg) 22 | self.status_code = status_code 23 | 24 | 25 | class URLRequest: 26 | def __init__(self, logger): 27 | self._logger = logger 28 | 29 | def run(self, url, retries=3): 30 | retry_codes = [ 31 | HTTPStatus.TOO_MANY_REQUESTS, 32 | HTTPStatus.INTERNAL_SERVER_ERROR, 33 | HTTPStatus.BAD_GATEWAY, 34 | HTTPStatus.SERVICE_UNAVAILABLE, 35 | HTTPStatus.GATEWAY_TIMEOUT, 36 | ] 37 | for n in range(retries): 38 | try: 39 | request = requests.get(url) 40 | request.raise_for_status() 41 | if self._logger is not None: 42 | self._logger.debug(f"{url} - success!") 43 | break 44 | except HTTPError as exc: 45 | 46 | code = exc.response.status_code 47 | if code in retry_codes: 48 | if self._logger is not None: 49 | self._logger.debug(f"Exception raised {exc} for {url} - will retry") 50 | time.sleep(n) 51 | continue 52 | if self._logger is not None: 53 | self._logger.warning(f"Unhandleable exception raised {exc} for {url}") 54 | raise URLRequestError(f"Query failed: {exc} {url}", exc.response.status_code) 55 | 56 | try: 57 | return request.json() 58 | except requests.exceptions.JSONDecodeError: 59 | if self._logger is not None: 60 | self._logger.warning(f"Decode of request failed {request} {url}") 61 | raise URLRequestError(f"Decode of request failed {request} {url}", -9999) 62 | -------------------------------------------------------------------------------- /api_paper_2024/example_3/Example/unconstrained.conf: -------------------------------------------------------------------------------- 1 | GOLD CONFIGURATION FILE 2 | 3 | AUTOMATIC SETTINGS 4 | autoscale = 1 5 | 6 | POPULATION 7 | popsiz = auto 8 | select_pressure = auto 9 | n_islands = auto 10 | maxops = auto 11 | niche_siz = auto 12 | 13 | GENETIC OPERATORS 14 | pt_crosswt = auto 15 | allele_mutatewt = auto 16 | migratewt = auto 17 | 18 | FLOOD FILL 19 | radius = 10 20 | origin = 0 0 0 21 | do_cavity = 1 22 | floodfill_atom_no = 0 23 | cavity_file = cavity.atoms 24 | floodfill_center = file 25 | 26 | DATA FILES 27 | ligand_data_file a_ligand.mol2 10 28 | param_file = DEFAULT 29 | set_ligand_atom_types = 1 30 | set_protein_atom_types = 0 31 | directory = scaffold_list 32 | tordist_file = DEFAULT 33 | make_subdirs = 0 34 | save_lone_pairs = 1 35 | fit_points_file = fit_pts.mol2 36 | read_fitpts = 0 37 | 38 | FLAGS 39 | internal_ligand_h_bonds = 0 40 | flip_free_corners = 0 41 | match_ring_templates = 0 42 | flip_amide_bonds = 0 43 | flip_planar_n = 1 flip_ring_NRR flip_ring_NHR 44 | flip_pyramidal_n = 0 45 | rotate_carboxylic_oh = flip 46 | use_tordist = 1 47 | postprocess_bonds = 1 48 | rotatable_bond_override_file = DEFAULT 49 | solvate_all = 1 50 | 51 | TERMINATION 52 | early_termination = 1 53 | n_top_solutions = 3 54 | rms_tolerance = 1.5 55 | 56 | COVALENT BONDING 57 | covalent = 0 58 | 59 | SAVE OPTIONS 60 | save_score_in_file = 1 61 | save_protein_torsions = 1 62 | 63 | FITNESS FUNCTION SETTINGS 64 | initial_virtual_pt_match_max = 3 65 | relative_ligand_energy = 1 66 | gold_fitfunc_path = plp 67 | score_param_file = DEFAULT 68 | 69 | PROTEIN DATA 70 | protein_datafile = 1FM9_protein.mol2 71 | 72 | 73 | -------------------------------------------------------------------------------- /api_paper_2024/example_3/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Similarity-driven docking 2 | 3 | A key feature used in this script is GOLD in interactive docking mode; 4 | this allows a user to set up a GOLD daemon object on a socket and send individual molecules to the socket for docking. 5 | This avoids repeated initialisation when used to receive ligands from external data sources. 6 | The ChEMBL Python API (Davies et al., 2015) is used to facilitate similarity searching in the ChEMBL database. 7 | 8 | ## Dependencies 9 | 10 | Optional: 11 | 12 | - chembl_webresource_client 13 | 14 | ## To Run 15 | 16 | ```bash 17 | python similarity_docking.py Example/unconstrained.conf 18 | ``` 19 | -------------------------------------------------------------------------------- /api_paper_2024/example_4/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Voronoi 2 | 3 | ## Visualising metal-metal interactions using Voronoi Tessellation 4 | 5 | CCDC interface to Voronoi polyhedra which encapsulate the atomic / metal domains 6 | 7 | ## Dependencies 8 | 9 | - `plotly` 10 | - `pyvoro` 11 | 12 | ## To Run 13 | 14 | For general use, it is best to run mercury_molecular_voronoi.py through Mercury - 15 | this script will output an HTML with the Voronoi graph. More complicated queries can 16 | be achieved with the notebook (see notebooks/voronoi) or command line as below. 17 | 18 | ```cmd 19 | usage: voronoi.py [-h] [-m MAXHITS] [-o OUTPUT] [-v] [-w WEIGHTING] [-c COLUMNS] 20 | [-r RADIUS] [-mo] 21 | idents [idents ...] 22 | 23 | positional arguments: 24 | idents the files/refcodes that will be analysed (or .gcd) 25 | 26 | optional arguments: 27 | -h, --help show this help message and exit 28 | -m MAXHITS, --maxhits MAXHITS 29 | number of hits 30 | -o OUTPUT, --output OUTPUT 31 | output location / filetype (.pkl only) 32 | -v, --verbose print as you go 33 | -w WEIGHTING, --weighting WEIGHTING 34 | weighting scheme (vdw, equal, empirical, calculated) 35 | -c COLUMNS, --columns COLUMNS 36 | output columns (short, all) 37 | -r RADIUS, --radius RADIUS 38 | maximum radius for interactions 39 | -mo, --metal_only metal-type interactions (for magnets etc.) 40 | ``` 41 | 42 | ## Author 43 | 44 | Chris Kingsbury (2024) 45 | -------------------------------------------------------------------------------- /api_paper_2024/example_4/mercury_metal_voronoi.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2023-07-05 Created by Chris Kingsbury, the Cambridge Crystallographic Data Centre 9 | # ORCID 0000-0002-4694-5566 10 | # 11 | # Mercury interface to the metal Voronoi polyhedra which encapsulate atoms 12 | # with weighting schemes available 13 | # 14 | 15 | import voronoi 16 | 17 | new_settings = voronoi.DEFAULT_SETTINGS.copy() 18 | new_settings.update({ 19 | "radius": 20.0, # Angstroms; radius for the substructure contacts 20 | "opacity": 0.95, 21 | "metal_only": True, 22 | "weighting": None, 23 | }) 24 | 25 | if __name__ == "__main__": 26 | voronoi.run_crystal_voronoi(settings=new_settings) 27 | -------------------------------------------------------------------------------- /api_paper_2024/example_4/mercury_molecular_voronoi.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2023-07-05 Created by Chris Kingsbury, the Cambridge Crystallographic Data Centre 9 | # ORCID 0000-0002-4694-5566 10 | # 11 | # Mercury interface to the metal Voronoi polyhedra which encapsulate atoms 12 | # with weighting schemes available 13 | # 14 | 15 | import voronoi 16 | 17 | new_settings = voronoi.DEFAULT_SETTINGS.copy() 18 | new_settings.update({ 19 | "radius": 10.0, # Angstroms; radius for the substructure contacts 20 | "opacity": 1.0, 21 | "metal_only": False, 22 | "weighting": voronoi.WEIGHTING['vdw'], 23 | }) 24 | 25 | if __name__ == "__main__": 26 | voronoi.run_crystal_voronoi(new_settings) 27 | -------------------------------------------------------------------------------- /api_paper_2024/example_5/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Particle Shape Classification 2 | 3 | A versatile script that demonstrates the ability to access object data from the CSD Python API. 4 | The structure loading and morphology calculations can be replaced with customised functions. 5 | The main shape classification is carried out in `shape_classification.py` whilst the visualisation is handled 6 | in `visualiser.py` 7 | 8 | ## Dependencies 9 | - plotly 10 | ```conda install -c conda-forge plotly``` 11 | 12 | ## To run 13 | 14 | ```bash 15 | python particle_shape.py IBPRAC 16 | ``` 17 | 18 | ## Author 19 | Alex Moldovan (2024) -------------------------------------------------------------------------------- /api_paper_2024/example_5/particle_shape.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2021-11-29: created by Alex Moldovan, The Cambridge Crystallographic Data Centre 9 | # 2024-04-15: modified by Alex Moldovan, The Cambridge Crystallographic Data Centre 10 | # 11 | import argparse 12 | import warnings 13 | from typing import TypeVar 14 | 15 | from ccdc.io import CrystalReader 16 | from ccdc.morphology import BFDHMorphology 17 | 18 | MorphologyBase = TypeVar('MorphologyBase') 19 | Crystal = TypeVar('Crystal') 20 | 21 | try: 22 | from plotly import graph_objects as go 23 | from plotly.subplots import make_subplots 24 | from plotly import offline 25 | 26 | except ModuleNotFoundError: 27 | warnings.warn("Plotly could not be found, please install plotly using `conda install plotly`") 28 | 29 | from visualiser import PlotlyParticle, PlotlyZingg 30 | 31 | 32 | def calculate_morphology(structure: Crystal) -> MorphologyBase: 33 | """Calculate the morphology of a crystal as an example we use the BFDH""" 34 | morphology = BFDHMorphology(structure) 35 | return morphology 36 | 37 | 38 | def load_crystal(name: str) -> Crystal: 39 | """Load crystal. In this case we will use a refcode. But you can substitute any cif/mol2 method in here.""" 40 | return CrystalReader('CSD').crystal(name) 41 | 42 | 43 | def combine_graphs(zingg: PlotlyZingg, particle: PlotlyParticle) -> go.Figure: 44 | """ Puts the two graphs together""" 45 | combined_fig = make_subplots(rows=1, cols=2, 46 | specs=[[{'type': 'scatter'}, {'type': 'scatter3d'}]], 47 | column_widths=[0.6, 0.4]) 48 | combined_fig.add_traces(zingg.fig.data, rows=1, cols=1) 49 | for shape in zingg.fig.layout.shapes: 50 | combined_fig.add_shape(shape, row=1, col=1) 51 | for annotation in zingg.fig.layout.annotations: 52 | combined_fig.add_annotation(annotation, row=1, col=1) 53 | combined_fig.add_traces(particle.fig.data, rows=1, cols=2) 54 | combined_fig.update_scenes(particle.fig.layout.scene) 55 | combined_fig.update_layout(yaxis=dict(title='M / L', range=[0, 1]), 56 | xaxis=dict(title='S / M', range=[0, 1]), 57 | template='simple_white', 58 | font_family="Courier New", 59 | font_size=20, 60 | title="Shape Classification", 61 | scene_aspectmode="data", 62 | legend=dict( 63 | orientation="h", 64 | yanchor="bottom", 65 | xanchor="center", 66 | x=0.5, 67 | y=1 68 | )) 69 | return combined_fig 70 | 71 | 72 | def main(args: argparse.Namespace) -> None: 73 | """ Runs the classification and pieces the plot together """ 74 | ref = args.refcode 75 | 76 | crystal = load_crystal(name=ref) 77 | morphology = calculate_morphology(structure=crystal) 78 | 79 | plotly_particle = PlotlyParticle(morphology=morphology) 80 | zingg_plot = PlotlyZingg(zone_opacity=0.1) 81 | zingg_plot.plot_shape(major_length=morphology.oriented_bounding_box.major_length, 82 | medium_length=morphology.oriented_bounding_box.median_length, 83 | minor_length=morphology.oriented_bounding_box.minor_length, name=ref) 84 | 85 | figure = combine_graphs(zingg=zingg_plot, particle=plotly_particle) 86 | offline.plot(figure_or_data=figure, filename=f"{ref}_shape_classification.html") 87 | 88 | 89 | def arg_parser() -> argparse.Namespace: 90 | parser = argparse.ArgumentParser(description="Particle Shape Classifier") 91 | parser.add_argument("refcode", help="Refcode referencing the CSD or in-house database") 92 | return parser.parse_args() 93 | 94 | 95 | if __name__ == "__main__": 96 | args = arg_parser() 97 | main(args) 98 | -------------------------------------------------------------------------------- /api_paper_2024/example_5/shape_classification.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2023-06-21: created by Pietro Sacchi, The Cambridge Crystallographic Data Centre 9 | # 2024-04-15: modified by Alex Moldovan, The Cambridge Crystallographic Data Centre 10 | # 11 | from typing import TypeVar, TYPE_CHECKING 12 | 13 | MorphologyBase = TypeVar('MorphologyBase') 14 | 15 | if TYPE_CHECKING: 16 | from ccdc.morphology import MorphologyBase 17 | 18 | 19 | class ShapeClassification: 20 | """ 21 | Calculate parameters for shape classification as defined by Angelidakis et al. in 22 | Powder Technology, 396 (2022), 689-695 23 | """ 24 | 25 | def __init__(self, major: float, medium: float, minor: float): 26 | self.minor_length = minor 27 | self.medium_length = medium 28 | self.major_length = major 29 | 30 | @classmethod 31 | def from_morphology(cls, morphology: MorphologyBase): 32 | """Return object from morphology""" 33 | return cls(major=morphology.oriented_bounding_box.major_length, 34 | medium=morphology.oriented_bounding_box.median_length, 35 | minor=morphology.oriented_bounding_box.minor_length) 36 | 37 | def shape_classification_data(self): 38 | """ returns data as dictionary for database """ 39 | return {"shape_classification": self.shape_description, 40 | 'S/M': self.minor_length / self.medium_length, 41 | 'M/L': self.medium_length / self.major_length} 42 | 43 | @property 44 | def elongation(self) -> float: 45 | return (self.major_length * self.minor_length) / ( 46 | self.major_length * self.minor_length + self.medium_length ** 2) - self.minor_length / ( 47 | self.major_length + self.minor_length) 48 | 49 | @property 50 | def flatness(self) -> float: 51 | return self.medium_length ** 2 / ( 52 | self.major_length * self.minor_length + self.medium_length ** 2) - self.minor_length / ( 53 | self.major_length + self.minor_length) 54 | 55 | @property 56 | def compactness(self) -> float: 57 | return 2 * self.minor_length / (self.major_length + self.minor_length) 58 | 59 | @property 60 | def shape_description(self) -> str: 61 | """Return the classification of the shape""" 62 | if self.elongation >= 0.2: 63 | if self.flatness >= 0.2: 64 | return "Lath" 65 | else: 66 | return "Needle" 67 | if self.elongation <= 0.2: 68 | if self.flatness <= 0.2: 69 | return "Block" 70 | else: 71 | return "Plate" 72 | -------------------------------------------------------------------------------- /assets/csd-python-api-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/assets/csd-python-api-logo.png -------------------------------------------------------------------------------- /assets/download_zip.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/assets/download_zip.gif -------------------------------------------------------------------------------- /assets/search.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/assets/search.gif -------------------------------------------------------------------------------- /assets/single_download.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/assets/single_download.gif -------------------------------------------------------------------------------- /notebooks/CoRE-MOF/README.md: -------------------------------------------------------------------------------- 1 | # CoRE-MOF 2 | 3 | ## Introduction 4 | 5 | This package contains shared functionality that is used to export entries used by CoRE-MOF 6 | 7 | download_unmodified_MOFs_from_CSD.ipynb: a script for downloading structures of CSD-unmodified dataset. 8 | list_coremof_csd_unmodified_20250227.json: a list of structures of CSD-unmodified dataset. 9 | structures: an successful case by using the script. 10 | 11 | ## Requirements 12 | 13 | [CoRE-MOF tools](https://coremof-tools.readthedocs.io/en/latest/index.html), [PACMAN Charge](https://pubs.acs.org/doi/10.1021/acs.jctc.4c00434), CSD Python API 14 | 15 | ## Licensing Requirements 16 | 17 | CSD-Core or better 18 | 19 | ## Authors 20 | 21 | [CoRE-MOF](https://coremof-tools.readthedocs.io/en/latest/index.html) 22 | -------------------------------------------------------------------------------- /notebooks/CoRE-MOF/structures/CR/ASR/ACODAA.cif: -------------------------------------------------------------------------------- 1 | 2 | ####################################################################### 3 | # 4 | # Cambridge Crystallographic Data Centre 5 | # CCDC 6 | # 7 | ####################################################################### 8 | # 9 | # If this CIF has been generated from an entry in the Cambridge 10 | # Structural Database, then it will include bibliographic, chemical, 11 | # crystal, experimental, refinement or atomic coordinate data resulting 12 | # from the CCDC's data processing and validation procedures. 13 | # 14 | ####################################################################### 15 | 16 | data_ACODAA 17 | _symmetry_cell_setting tetragonal 18 | _symmetry_space_group_name_H-M 'P 42/m m c' 19 | _symmetry_Int_Tables_number 131 20 | _space_group_name_Hall '-P 4c 2' 21 | loop_ 22 | _symmetry_equiv_pos_site_id 23 | _symmetry_equiv_pos_as_xyz 24 | 1 x,y,z 25 | 2 -x,-y,z 26 | 3 -y,x,1/2+z 27 | 4 y,-x,1/2+z 28 | 5 -x,y,-z 29 | 6 x,-y,-z 30 | 7 y,x,1/2-z 31 | 8 -y,-x,1/2-z 32 | 9 -x,-y,-z 33 | 10 x,y,-z 34 | 11 y,-x,1/2-z 35 | 12 -y,x,1/2-z 36 | 13 x,-y,z 37 | 14 -x,y,z 38 | 15 -y,-x,1/2+z 39 | 16 y,x,1/2+z 40 | _cell_length_a 9.205(2) 41 | _cell_length_b 9.205(2) 42 | _cell_length_c 7.2374(19) 43 | _cell_angle_alpha 90 44 | _cell_angle_beta 90 45 | _cell_angle_gamma 90 46 | _cell_volume 613.24 47 | loop_ 48 | _atom_site_label 49 | _atom_site_type_symbol 50 | _atom_site_fract_x 51 | _atom_site_fract_y 52 | _atom_site_fract_z 53 | Fe1 Fe 0.5000 0.5000 0.2500 54 | N1 N 0.5000 0.6840(10) 0.4056(13) 55 | C1 C 0.5000 0.918(2) 0.5000 56 | C2 C 0.5000 0.8243(14) 0.352(2) 57 | H1 H 0.5000 0.8547 0.2262 58 | N1A N 0.5000 1.3160(10) 0.4056(13) 59 | C1A C 0.5000 1.082(2) 0.5000 60 | C2A C 0.5000 1.1757(14) 0.352(2) 61 | H1A H 0.5000 1.1453 0.2262 62 | N1D N 0.5000 0.6840(10) 0.5944(13) 63 | C2D C 0.5000 0.8243(14) 0.648(2) 64 | H1D H 0.5000 0.8547 0.7738 65 | N1E N 0.5000 1.3160(10) 0.5944(13) 66 | C2E C 0.5000 1.1757(14) 0.648(2) 67 | H1E H 0.5000 1.1453 0.7738 68 | Fe1_2 Fe 0.5000 1.5000 0.2500 69 | Fe1B Fe 0.5000 0.5000 0.7500 70 | Fe1B_2 Fe 0.5000 1.5000 0.7500 71 | N1A_2 N 0.5000 0.3160(10) 0.4056(13) 72 | N1F N 0.6840(10) 0.5000 0.0944(13) 73 | N1G N 0.3160(10) 0.5000 0.0944(13) 74 | loop_ 75 | _geom_bond_atom_site_label_1 76 | _geom_bond_atom_site_label_2 77 | _geom_bond_site_symmetry_1 78 | _geom_bond_site_symmetry_2 79 | Fe1 N1 1_555 1_555 80 | N1 C2 1_555 1_555 81 | C1 C2 1_555 1_555 82 | C2 H1 1_555 1_555 83 | N1A C2A 1_555 1_555 84 | C1A C1 1_555 1_555 85 | C2A C1A 1_555 1_555 86 | H1A C2A 1_555 1_555 87 | N1D N1 1_555 1_555 88 | C2D C1 1_555 1_555 89 | H1D C2D 1_555 1_555 90 | N1E N1A 1_555 1_555 91 | C2E C1A 1_555 1_555 92 | H1E C2E 1_555 1_555 93 | Fe1_2 N1A 1_555 1_555 94 | Fe1B N1D 1_555 1_555 95 | Fe1B_2 N1E 1_555 1_555 96 | N1A_2 Fe1 1_555 1_555 97 | N1F Fe1 1_555 1_555 98 | N1G Fe1 1_555 1_555 99 | N1D C2D 1_555 1_555 100 | N1E C2E 1_555 1_555 101 | 102 | #END 103 | -------------------------------------------------------------------------------- /notebooks/CoRE-MOF/structures/CR/ASR/ADABAK.cif: -------------------------------------------------------------------------------- 1 | 2 | ####################################################################### 3 | # 4 | # Cambridge Crystallographic Data Centre 5 | # CCDC 6 | # 7 | ####################################################################### 8 | # 9 | # If this CIF has been generated from an entry in the Cambridge 10 | # Structural Database, then it will include bibliographic, chemical, 11 | # crystal, experimental, refinement or atomic coordinate data resulting 12 | # from the CCDC's data processing and validation procedures. 13 | # 14 | ####################################################################### 15 | 16 | data_ADABAK 17 | _symmetry_cell_setting monoclinic 18 | _symmetry_space_group_name_H-M 'C 2/c' 19 | _symmetry_Int_Tables_number 15 20 | _space_group_name_Hall '-C 2yc' 21 | loop_ 22 | _symmetry_equiv_pos_site_id 23 | _symmetry_equiv_pos_as_xyz 24 | 1 x,y,z 25 | 2 -x,y,1/2-z 26 | 3 1/2+x,1/2+y,z 27 | 4 1/2-x,1/2+y,1/2-z 28 | 5 -x,-y,-z 29 | 6 x,-y,1/2+z 30 | 7 1/2-x,1/2-y,-z 31 | 8 1/2+x,1/2-y,1/2+z 32 | _cell_length_a 28.697(2) 33 | _cell_length_b 9.2637(5) 34 | _cell_length_c 9.3223(5) 35 | _cell_angle_alpha 90 36 | _cell_angle_beta 116.087(4) 37 | _cell_angle_gamma 90 38 | _cell_volume 2225.78 39 | loop_ 40 | _atom_site_label 41 | _atom_site_type_symbol 42 | _atom_site_fract_x 43 | _atom_site_fract_y 44 | _atom_site_fract_z 45 | _atom_site_U_iso_or_equiv 46 | _atom_site_thermal_displace_type 47 | Cu1 Cu 0.18682(19) 0.8289(6) 0.3113(6) 0.0360 Uiso 48 | O1 O 0.1250(6) 1.0750(18) 0.340(2) 0.0620 Uiso 49 | O2 O 0.1426(6) 1.177(2) 0.5772(18) 0.0620 Uiso 50 | N1 N 0.2178(5) 1.0042(18) 0.7372(14) 0.0620 Uiso 51 | N2 N 0.2550(4) 0.9029(19) 0.7914(13) 0.0620 Uiso 52 | N3 N 0.2522(4) 0.8284(14) 0.6666(17) 0.0620 Uiso 53 | N4 N 0.2139(5) 0.8802(17) 0.5343(14) 0.0620 Uiso 54 | N5 N 0.1415(3) 0.6590(9) 0.3297(12) 0.0620 Uiso 55 | C1 C 0.1493(3) 1.0864(14) 0.485(2) 0.0620 Uiso 56 | C2 C 0.1933(3) 0.9878(11) 0.5802(13) 0.0620 Uiso 57 | C3 C 0.1077(5) 0.7170(9) 0.3780(15) 0.0620 Uiso 58 | C4 C 0.0702(4) 0.6392(10) 0.4014(13) 0.0620 Uiso 59 | C5 C 0.06779(17) 0.4916(10) 0.3722(5) 0.0620 Uiso 60 | C6 C 0.1021(3) 0.4281(9) 0.3224(14) 0.0620 Uiso 61 | C7 C 0.1382(3) 0.5156(9) 0.3026(16) 0.0620 Uiso 62 | C8 C 0.02799(14) 0.4000(12) 0.39533(13) 0.0620 Uiso 63 | C9 C 0.00000 0.3057(13) 0.25000 0.0620 Uiso 64 | H1 H 0.109707 0.819031 0.397655 0.0747 Uiso 65 | H2 H 0.046969 0.685455 0.436102 0.0747 Uiso 66 | H3 H 0.100856 0.326244 0.302152 0.0747 Uiso 67 | H4 H 0.161855 0.471912 0.268001 0.0747 Uiso 68 | H5 H 0.003934 0.46015 0.409283 0.0747 Uiso 69 | H6 H 0.044724 0.341391 0.486102 0.0747 Uiso 70 | H7 H 0.02337 0.24148 0.23773 0.0747 Uiso 71 | Cu1A Cu -0.18682(19) 0.8289(6) 0.1887(6) 0.0360 Uiso 72 | Cu1D Cu -0.18682(19) 1.1711(6) -0.3113(6) 0.0360 Uiso 73 | Cu1E Cu 0.18682(19) 1.1711(6) 0.8113(6) 0.0360 Uiso 74 | Cu1F Cu 0.31318(19) 0.6711(6) 0.6887(6) 0.0360 Uiso 75 | Cu1G Cu -0.31318(19) 0.6711(6) -0.1887(6) 0.0360 Uiso 76 | O1A O -0.1250(6) 1.0750(18) 0.160(2) 0.0620 Uiso 77 | O2A O -0.1426(6) 1.177(2) -0.0772(18) 0.0620 Uiso 78 | O2D O -0.1426(6) 0.823(2) 0.4228(18) 0.0620 Uiso 79 | O2E O 0.1426(6) 0.823(2) 0.0772(18) 0.0620 Uiso 80 | N1A N -0.2178(5) 1.0042(18) -0.2372(14) 0.0620 Uiso 81 | N1D N -0.2178(5) 0.9958(18) 0.2628(14) 0.0620 Uiso 82 | N1E N 0.2178(5) 0.9958(18) 0.2372(14) 0.0620 Uiso 83 | N2A N -0.2550(4) 0.9029(19) -0.2914(13) 0.0620 Uiso 84 | N3A N -0.2522(4) 0.8284(14) -0.1666(17) 0.0620 Uiso 85 | N3F N 0.2478(4) 0.6716(14) 0.3334(17) 0.0620 Uiso 86 | N3G N -0.2478(4) 0.6716(14) 0.1666(17) 0.0620 Uiso 87 | N4A N -0.2139(5) 0.8802(17) -0.0343(14) 0.0620 Uiso 88 | N5A N -0.1415(3) 0.6590(9) 0.1703(12) 0.0620 Uiso 89 | C1A C -0.1493(3) 1.0864(14) 0.015(2) 0.0620 Uiso 90 | C2A C -0.1933(3) 0.9878(11) -0.0802(13) 0.0620 Uiso 91 | C3A C -0.1077(5) 0.7170(9) 0.1220(15) 0.0620 Uiso 92 | C4A C -0.0702(4) 0.6392(10) 0.0986(13) 0.0620 Uiso 93 | C5A C -0.06779(17) 0.4916(10) 0.1278(5) 0.0620 Uiso 94 | C6A C -0.1021(3) 0.4281(9) 0.1776(14) 0.0620 Uiso 95 | C7A C -0.1382(3) 0.5156(9) 0.1974(16) 0.0620 Uiso 96 | C8A C -0.02799(14) 0.4000(12) 0.10467(13) 0.0620 Uiso 97 | H1A H -0.109707 0.819031 0.102345 0.0747 Uiso 98 | H2A H -0.046969 0.685455 0.063898 0.0747 Uiso 99 | H3A H -0.100856 0.326244 0.197848 0.0747 Uiso 100 | H4A H -0.161855 0.471912 0.231999 0.0747 Uiso 101 | H5A H -0.003934 0.46015 0.090717 0.0747 Uiso 102 | H6A H -0.044724 0.341391 0.013898 0.0747 Uiso 103 | H7A H -0.02337 0.24148 0.26227 0.0747 Uiso 104 | loop_ 105 | _geom_bond_atom_site_label_1 106 | _geom_bond_atom_site_label_2 107 | _geom_bond_site_symmetry_1 108 | _geom_bond_site_symmetry_2 109 | Cu1 N4 1_555 1_555 110 | O1 C1 1_555 1_555 111 | O2 C1 1_555 1_555 112 | N1 N2 1_555 1_555 113 | N2 N3 1_555 1_555 114 | N3 N4 1_555 1_555 115 | N4 C2 1_555 1_555 116 | N5 Cu1 1_555 1_555 117 | C1 C2 1_555 1_555 118 | C2 N1 1_555 1_555 119 | C3 N5 1_555 1_555 120 | C4 C3 1_555 1_555 121 | C5 C4 1_555 1_555 122 | C6 C5 1_555 1_555 123 | C7 N5 1_555 1_555 124 | C8 C5 1_555 1_555 125 | C9 C8 1_555 1_555 126 | H1 C3 1_555 1_555 127 | H2 C4 1_555 1_555 128 | H3 C6 1_555 1_555 129 | H4 C7 1_555 1_555 130 | H5 C8 1_555 1_555 131 | H6 C8 1_555 1_555 132 | H7 C9 1_555 1_555 133 | Cu1A O2D 1_555 1_555 134 | Cu1D O2A 1_555 1_555 135 | Cu1E O2 1_555 1_555 136 | Cu1F N3 1_555 1_555 137 | Cu1G N3A 1_555 1_555 138 | O1A C1A 1_555 1_555 139 | O2A C1A 1_555 1_555 140 | O2E Cu1 1_555 1_555 141 | N1A Cu1D 1_555 1_555 142 | N1D Cu1A 1_555 1_555 143 | N1E Cu1 1_555 1_555 144 | N2A N1A 1_555 1_555 145 | N3A N2A 1_555 1_555 146 | N3F Cu1 1_555 1_555 147 | N3G Cu1A 1_555 1_555 148 | N4A Cu1A 1_555 1_555 149 | N5A Cu1A 1_555 1_555 150 | C1A C2A 1_555 1_555 151 | C2A N1A 1_555 1_555 152 | C3A N5A 1_555 1_555 153 | C4A C3A 1_555 1_555 154 | C5A C4A 1_555 1_555 155 | C6A C5A 1_555 1_555 156 | C7A N5A 1_555 1_555 157 | C8A C9 1_555 1_555 158 | H1A C3A 1_555 1_555 159 | H2A C4A 1_555 1_555 160 | H3A C6A 1_555 1_555 161 | H4A C7A 1_555 1_555 162 | H5A C8A 1_555 1_555 163 | H6A C8A 1_555 1_555 164 | H7A C9 1_555 1_555 165 | N1 Cu1E 1_555 1_555 166 | C6 C7 1_555 1_555 167 | N3A N4A 1_555 1_555 168 | N4A C2A 1_555 1_555 169 | C5A C8A 1_555 1_555 170 | C6A C7A 1_555 1_555 171 | 172 | #END 173 | -------------------------------------------------------------------------------- /notebooks/CoRE-MOF/structures/CR/FSR/ABAVOP.cif: -------------------------------------------------------------------------------- 1 | 2 | ####################################################################### 3 | # 4 | # Cambridge Crystallographic Data Centre 5 | # CCDC 6 | # 7 | ####################################################################### 8 | # 9 | # If this CIF has been generated from an entry in the Cambridge 10 | # Structural Database, then it will include bibliographic, chemical, 11 | # crystal, experimental, refinement or atomic coordinate data resulting 12 | # from the CCDC's data processing and validation procedures. 13 | # 14 | ####################################################################### 15 | 16 | data_ABAVOP 17 | _symmetry_cell_setting monoclinic 18 | _symmetry_space_group_name_H-M 'P 21/n' 19 | _symmetry_Int_Tables_number 14 20 | _space_group_name_Hall '-P 2yn' 21 | loop_ 22 | _symmetry_equiv_pos_site_id 23 | _symmetry_equiv_pos_as_xyz 24 | 1 x,y,z 25 | 2 1/2-x,1/2+y,1/2-z 26 | 3 -x,-y,-z 27 | 4 1/2+x,1/2-y,1/2+z 28 | _cell_length_a 9.778(3) 29 | _cell_length_b 13.212(4) 30 | _cell_length_c 10.266(3) 31 | _cell_angle_alpha 90 32 | _cell_angle_beta 104.183(5) 33 | _cell_angle_gamma 90 34 | _cell_volume 1285.81 35 | loop_ 36 | _atom_site_label 37 | _atom_site_type_symbol 38 | _atom_site_fract_x 39 | _atom_site_fract_y 40 | _atom_site_fract_z 41 | Co1 Co 1.0000 0.5000 1.0000 42 | Co2 Co 1.0000 1.0000 1.5000 43 | N1 N 0.9317(3) 0.8645(2) 1.3813(3) 44 | C1 C 0.8687(4) 0.8670(3) 1.2499(4) 45 | H1 H 0.8541 0.9305 1.2075 46 | C2 C 0.8242(4) 0.7826(3) 1.1734(4) 47 | H2 H 0.7792 0.7891 1.0817 48 | C3 C 0.8460(4) 0.6877(3) 1.2321(4) 49 | C4 C 0.9136(4) 0.6825(3) 1.3666(4) 50 | H3 H 0.9332 0.6196 1.4098 51 | C5 C 0.9521(4) 0.7717(3) 1.4369(4) 52 | H4 H 0.9951 0.7671 1.5292 53 | C6 C 0.7926(4) 0.5928(3) 1.1533(3) 54 | O1 O 0.6602(2) 0.58965(18) 1.1134(2) 55 | O2 O 0.8775(3) 0.52627(19) 1.1382(3) 56 | N2 N 0.9321(3) 0.6373(2) 0.8946(3) 57 | C7 C 0.7945(4) 0.6556(3) 0.8422(4) 58 | H5 H 0.7285 0.6078 0.8564 59 | C8 C 0.7451(4) 0.7418(3) 0.7680(4) 60 | H6 H 0.6479 0.7519 0.7330 61 | C9 C 0.8416(4) 0.8122(3) 0.7466(4) 62 | C10 C 0.9838(4) 0.7939(3) 0.8037(4) 63 | H7 H 1.0524 0.8412 0.7936 64 | C11 C 1.0233(4) 0.7064(3) 0.8748(4) 65 | H8 H 1.1200 0.6949 0.9114 66 | C12 C 0.7907(4) 0.9051(2) 0.6614(3) 67 | O3 O 0.6613(3) 0.91749(18) 0.6253(2) 68 | O4 O 0.8856(3) 0.96139(19) 0.6361(2) 69 | N1B N 1.0683(3) 0.1355(2) 0.6187(3) 70 | C1B C 1.1313(4) 0.1330(3) 0.7501(4) 71 | H1B H 1.1459 0.0695 0.7925 72 | C2B C 1.1758(4) 0.2174(3) 0.8266(4) 73 | H2B H 1.2208 0.2109 0.9183 74 | C3B C 1.1540(4) 0.3123(3) 0.7679(4) 75 | C4B C 1.0864(4) 0.3175(3) 0.6334(4) 76 | H3B H 1.0668 0.3804 0.5902 77 | C5B C 1.0479(4) 0.2283(3) 0.5631(4) 78 | H4B H 1.0049 0.2329 0.4708 79 | C6B C 1.2074(4) 0.4072(3) 0.8467(3) 80 | O1B O 1.3398(2) 0.41035(18) 0.8866(2) 81 | O2B O 1.1225(3) 0.47373(19) 0.8618(3) 82 | N2B N 1.0679(3) 0.3627(2) 1.1054(3) 83 | C7B C 1.2055(4) 0.3444(3) 1.1578(4) 84 | H5B H 1.2715 0.3922 1.1436 85 | C8B C 1.2549(4) 0.2582(3) 1.2320(4) 86 | H6B H 1.3521 0.2481 1.2670 87 | C9B C 1.1584(4) 0.1878(3) 1.2534(4) 88 | C10B C 1.0162(4) 0.2061(3) 1.1963(4) 89 | H7B H 0.9476 0.1588 1.2064 90 | C11B C 0.9767(4) 0.2936(3) 1.1252(4) 91 | H8B H 0.8800 0.3051 1.0886 92 | C12B C 1.2093(4) 0.0949(2) 1.3386(3) 93 | O3B O 1.3387(3) 0.08251(18) 1.3747(2) 94 | O4B O 1.1144(3) 0.03861(19) 1.3639(2) 95 | O3C O 1.1613(3) 0.58251(18) 1.1253(2) 96 | O3A O 0.8387(3) 0.41749(18) 0.8747(2) 97 | N1B_2 N 1.0683(3) 1.1355(2) 1.6187(3) 98 | O1C O 1.1602(2) 0.91035(18) 1.6134(2) 99 | O1A O 0.8398(2) 1.08965(18) 1.3866(2) 100 | O4_2 O 0.8856(3) 0.96139(19) 1.6361(2) 101 | O4B_2 O 1.1144(3) 1.03861(19) 1.3639(2) 102 | Co2C Co 0.5000 0.5000 1.0000 103 | Co1C Co 0.5000 1.0000 0.5000 104 | Co2_2 Co 1.0000 1.0000 0.5000 105 | Co2_3 Co 1.0000 0.0000 0.5000 106 | Co2C_2 Co 1.5000 0.5000 1.0000 107 | Co1C_2 Co 1.5000 0.0000 1.5000 108 | Co2_4 Co 1.0000 0.0000 1.5000 109 | loop_ 110 | _geom_bond_atom_site_label_1 111 | _geom_bond_atom_site_label_2 112 | _geom_bond_site_symmetry_1 113 | _geom_bond_site_symmetry_2 114 | Co1 O2 1_555 1_555 115 | Co2 N1 1_555 1_555 116 | N1 C1 1_555 1_555 117 | C1 H1 1_555 1_555 118 | C2 C1 1_555 1_555 119 | H2 C2 1_555 1_555 120 | C3 C2 1_555 1_555 121 | C4 C3 1_555 1_555 122 | H3 C4 1_555 1_555 123 | C5 N1 1_555 1_555 124 | H4 C5 1_555 1_555 125 | C6 C3 1_555 1_555 126 | O1 C6 1_555 1_555 127 | O2 C6 1_555 1_555 128 | N2 Co1 1_555 1_555 129 | C7 N2 1_555 1_555 130 | H5 C7 1_555 1_555 131 | C8 C7 1_555 1_555 132 | H6 C8 1_555 1_555 133 | C9 C8 1_555 1_555 134 | C10 C9 1_555 1_555 135 | H7 C10 1_555 1_555 136 | C11 N2 1_555 1_555 137 | H8 C11 1_555 1_555 138 | C12 C9 1_555 1_555 139 | O3 C12 1_555 1_555 140 | O4 C12 1_555 1_555 141 | N1B C1B 1_555 1_555 142 | C1B H1B 1_555 1_555 143 | C2B C1B 1_555 1_555 144 | H2B C2B 1_555 1_555 145 | C3B C2B 1_555 1_555 146 | C4B C3B 1_555 1_555 147 | H3B C4B 1_555 1_555 148 | C5B N1B 1_555 1_555 149 | H4B C5B 1_555 1_555 150 | C6B C3B 1_555 1_555 151 | O1B C6B 1_555 1_555 152 | O2B Co1 1_555 1_555 153 | N2B Co1 1_555 1_555 154 | C7B N2B 1_555 1_555 155 | H5B C7B 1_555 1_555 156 | C8B C7B 1_555 1_555 157 | H6B C8B 1_555 1_555 158 | C9B C8B 1_555 1_555 159 | C10B C9B 1_555 1_555 160 | H7B C10B 1_555 1_555 161 | C11B N2B 1_555 1_555 162 | H8B C11B 1_555 1_555 163 | C12B C9B 1_555 1_555 164 | O3B C12B 1_555 1_555 165 | O4B C12B 1_555 1_555 166 | O3C Co1 1_555 1_555 167 | O3A Co1 1_555 1_555 168 | N1B_2 Co2 1_555 1_555 169 | O1C Co2 1_555 1_555 170 | O1A Co2 1_555 1_555 171 | O4_2 Co2 1_555 1_555 172 | O4B_2 Co2 1_555 1_555 173 | Co2C O1 1_555 1_555 174 | Co1C O3 1_555 1_555 175 | Co2_2 O4 1_555 1_555 176 | Co2_3 N1B 1_555 1_555 177 | Co2C_2 O1B 1_555 1_555 178 | Co1C_2 O3B 1_555 1_555 179 | Co2_4 O4B 1_555 1_555 180 | C4 C5 1_555 1_555 181 | C10 C11 1_555 1_555 182 | C4B C5B 1_555 1_555 183 | C6B O2B 1_555 1_555 184 | C10B C11B 1_555 1_555 185 | 186 | #END 187 | -------------------------------------------------------------------------------- /notebooks/CoRE-MOF/structures/CR/Ion/BAMKIM.cif: -------------------------------------------------------------------------------- 1 | 2 | ####################################################################### 3 | # 4 | # Cambridge Crystallographic Data Centre 5 | # CCDC 6 | # 7 | ####################################################################### 8 | # 9 | # If this CIF has been generated from an entry in the Cambridge 10 | # Structural Database, then it will include bibliographic, chemical, 11 | # crystal, experimental, refinement or atomic coordinate data resulting 12 | # from the CCDC's data processing and validation procedures. 13 | # 14 | ####################################################################### 15 | 16 | data_BAMKIM 17 | _symmetry_cell_setting monoclinic 18 | _symmetry_space_group_name_H-M 'P 21/c' 19 | _symmetry_Int_Tables_number 14 20 | _space_group_name_Hall '-P 2ybc' 21 | loop_ 22 | _symmetry_equiv_pos_site_id 23 | _symmetry_equiv_pos_as_xyz 24 | 1 x,y,z 25 | 2 -x,1/2+y,1/2-z 26 | 3 -x,-y,-z 27 | 4 x,1/2-y,1/2+z 28 | _cell_length_a 9.6552(2) 29 | _cell_length_b 17.1958(5) 30 | _cell_length_c 20.8804(6) 31 | _cell_angle_alpha 90 32 | _cell_angle_beta 94.231(2) 33 | _cell_angle_gamma 90 34 | _cell_volume 3457.3 35 | loop_ 36 | _atom_site_label 37 | _atom_site_type_symbol 38 | _atom_site_fract_x 39 | _atom_site_fract_y 40 | _atom_site_fract_z 41 | Br1 Br 0.4043(3) 0.19710(13) 0.46669(11) 42 | Cu1 Cu 0.29918(6) 0.49341(4) 0.52393(3) 43 | Cu2 Cu 0.00104(6) 0.52158(4) 0.42825(3) 44 | O1 O 0.3127(4) 0.4043(2) 0.5816(2) 45 | O2 O 0.1072(4) 0.4027(3) 0.62553(19) 46 | O3 O 0.0498(5) 0.0626(3) 0.8334(2) 47 | O4 O -0.1190(4) 0.0676(3) 0.8986(2) 48 | O5 O 0.1230(4) 0.4598(2) 0.48512(18) 49 | H1 H 0.132(7) 0.4111(13) 0.484(3) 50 | N1 N 0.2778(5) 0.5902(3) 0.4681(2) 51 | N2 N 0.1499(5) 0.6031(3) 0.4365(2) 52 | N3 N 0.2845(5) 0.7015(3) 0.4188(2) 53 | N4 N 0.5799(5) 0.9327(3) 0.0566(2) 54 | N5 N 0.4557(5) 0.9601(3) 0.0778(2) 55 | N6 N 0.5776(5) 0.9125(3) 0.1592(2) 56 | C1 C 0.3559(6) 0.6505(4) 0.4569(3) 57 | H2 H 0.4475 0.6569 0.4731 58 | C2 C 0.1566(6) 0.6708(4) 0.4081(3) 59 | H3 H 0.0835 0.6943 0.3839 60 | C3 C 0.3321(7) 0.7751(4) 0.3925(3) 61 | H4 H 0.4060 0.7961 0.4215 62 | H5 H 0.2559 0.8120 0.3903 63 | C4 C 0.3847(6) 0.7665(4) 0.3263(3) 64 | C5 C 0.3518(7) 0.7044(4) 0.2857(3) 65 | H6 H 0.2947 0.6649 0.2990 66 | C6 C 0.4029(8) 0.7006(5) 0.2257(4) 67 | H7 H 0.3800 0.6588 0.1987 68 | C7 C 0.4885(7) 0.7593(4) 0.2057(3) 69 | H8 H 0.5235 0.7564 0.1655 70 | C8 C 0.5219(6) 0.8219(4) 0.2454(3) 71 | C9 C 0.4686(6) 0.8263(4) 0.3054(3) 72 | H9 H 0.4889 0.8690 0.3317 73 | C10 C 0.6187(7) 0.8857(5) 0.2264(3) 74 | H10 H 0.6147 0.9291 0.2559 75 | H11 H 0.7134 0.8665 0.2288 76 | C11 C 0.6492(6) 0.9053(4) 0.1070(3) 77 | H12 H 0.7373 0.8833 0.1069 78 | C12 C 0.4577(6) 0.9469(4) 0.1393(3) 79 | H13 H 0.3868 0.9595 0.1653 80 | C13 C 0.2262(6) 0.3767(4) 0.6178(3) 81 | C14 C 0.2767(8) 0.3068(5) 0.6566(4) 82 | H14 H 0.3565 0.3223 0.6846 83 | H15 H 0.3079 0.2678 0.6273 84 | C15 C 0.1713(7) 0.2698(5) 0.6971(4) 85 | C16 C 0.1549(8) 0.2976(5) 0.7588(5) 86 | H16 H 0.2075 0.3396 0.7749 87 | C17 C 0.0598(9) 0.2625(5) 0.7960(4) 88 | H17 H 0.0517 0.2808 0.8374 89 | C18 C -0.0208(7) 0.2033(5) 0.7748(4) 90 | C19 C -0.0048(8) 0.1755(5) 0.7143(5) 91 | H18 H -0.0590 0.1338 0.6989 92 | C20 C 0.0913(8) 0.2084(5) 0.6751(4) 93 | H19 H 0.1006 0.1885 0.6342 94 | C21 C -0.1205(8) 0.1643(5) 0.8171(5) 95 | H20 H -0.2033 0.1490 0.7911 96 | H21 H -0.1478 0.2012 0.8490 97 | C22 C -0.0574(7) 0.0929(4) 0.8511(3) 98 | Cu1A Cu -0.29918(6) -0.00659(4) 0.97607(3) 99 | Cu1A_2 Cu 0.70082(6) 0.99341(4) -0.02393(3) 100 | Cu1C Cu 0.29918(6) 1.00659(4) 0.02393(3) 101 | Cu2B Cu -0.00104(6) 0.47842(4) 0.57175(3) 102 | Cu2C Cu 0.00104(6) -0.02158(4) 0.92825(3) 103 | O2B O -0.1072(4) 0.5973(3) 0.37447(19) 104 | O3C O 0.0498(5) 0.4374(3) 0.3334(2) 105 | O4A O 0.1190(4) 0.5676(3) 0.6014(2) 106 | O4C O -0.1190(4) 0.4324(3) 0.3986(2) 107 | O5B O -0.1230(4) 0.5402(2) 0.51488(18) 108 | N4A N 0.4201(5) 0.4327(3) 0.4434(2) 109 | N5C N 0.4557(5) 0.5399(3) 0.5778(2) 110 | loop_ 111 | _geom_bond_atom_site_label_1 112 | _geom_bond_atom_site_label_2 113 | _geom_bond_site_symmetry_1 114 | _geom_bond_site_symmetry_2 115 | Cu1 O1 1_555 1_555 116 | Cu2 O5 1_555 1_555 117 | O1 C13 1_555 1_555 118 | O2 C13 1_555 1_555 119 | O3 C22 1_555 1_555 120 | O4 C22 1_555 1_555 121 | O5 Cu1 1_555 1_555 122 | H1 O5 1_555 1_555 123 | N1 Cu1 1_555 1_555 124 | N2 Cu2 1_555 1_555 125 | N3 C1 1_555 1_555 126 | N4 N5 1_555 1_555 127 | N5 C12 1_555 1_555 128 | N6 C10 1_555 1_555 129 | C1 N1 1_555 1_555 130 | H2 C1 1_555 1_555 131 | C2 N2 1_555 1_555 132 | H3 C2 1_555 1_555 133 | C3 N3 1_555 1_555 134 | H4 C3 1_555 1_555 135 | H5 C3 1_555 1_555 136 | C4 C3 1_555 1_555 137 | C5 C4 1_555 1_555 138 | H6 C5 1_555 1_555 139 | C6 C5 1_555 1_555 140 | H7 C6 1_555 1_555 141 | C7 C6 1_555 1_555 142 | H8 C7 1_555 1_555 143 | C8 C7 1_555 1_555 144 | C9 C4 1_555 1_555 145 | H9 C9 1_555 1_555 146 | C10 C8 1_555 1_555 147 | H10 C10 1_555 1_555 148 | H11 C10 1_555 1_555 149 | C11 N4 1_555 1_555 150 | H12 C11 1_555 1_555 151 | C12 N6 1_555 1_555 152 | H13 C12 1_555 1_555 153 | C13 C14 1_555 1_555 154 | C14 H14 1_555 1_555 155 | H15 C14 1_555 1_555 156 | C15 C14 1_555 1_555 157 | C16 C15 1_555 1_555 158 | H16 C16 1_555 1_555 159 | C17 C16 1_555 1_555 160 | H17 C17 1_555 1_555 161 | C18 C17 1_555 1_555 162 | C19 C18 1_555 1_555 163 | H18 C19 1_555 1_555 164 | C20 C15 1_555 1_555 165 | H19 C20 1_555 1_555 166 | C21 C18 1_555 1_555 167 | H20 C21 1_555 1_555 168 | H21 C21 1_555 1_555 169 | C22 C21 1_555 1_555 170 | Cu1A O4 1_555 1_555 171 | Cu1A_2 N4 1_555 1_555 172 | Cu1C N5 1_555 1_555 173 | Cu2B O2 1_555 1_555 174 | Cu2C O3 1_555 1_555 175 | O2B Cu2 1_555 1_555 176 | O3C Cu2 1_555 1_555 177 | O4A Cu1 1_555 1_555 178 | O4C Cu2 1_555 1_555 179 | O5B Cu2 1_555 1_555 180 | N4A Cu1 1_555 1_555 181 | N5C Cu1 1_555 1_555 182 | O4 Cu2C 1_555 1_555 183 | O5 Cu2B 1_555 1_555 184 | N1 N2 1_555 1_555 185 | N3 C2 1_555 1_555 186 | N6 C11 1_555 1_555 187 | C8 C9 1_555 1_555 188 | C19 C20 1_555 1_555 189 | Cu2B O4A 1_555 1_555 190 | Cu2B O5B 1_555 1_555 191 | 192 | #END 193 | -------------------------------------------------------------------------------- /notebooks/Discovery/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | .virtual_documents/ 3 | *.zip 4 | *.rlbcoor 5 | *.pkl 6 | conformer_generator.* 7 | 8 | 9 | 01_CSD_Search/aryl_sulphonamide.csv 10 | 11 | 04_Conformer_generation/ci500358p.pdf 12 | 04_Conformer_generation/superimposed/ 13 | 04_Conformer_generation/conformers.sdf 14 | 04_Conformer_generation/minimised.mol2 15 | 16 | 06_Interaction_maps/2UW7/ 17 | 06_Interaction_maps/A_GVP1351.mol 18 | 06_Interaction_maps/A_GVP1351/ 19 | 20 | 07_Cavities/cavities.db 21 | 07_Cavities/cavity.py 22 | 07_Cavities/cavity.rlbcoor 23 | 07_Cavities/overlaid_via_cavities.pdb 24 | 07_Cavities/overlaid_via_sequence.pdb 25 | 26 | # N.B. The 'target' directory is excluded by a rule in the global .gitignore to do with builds 27 | !08_Docking/input_files/target 28 | 08_Docking/input_files/input.* 29 | 08_Docking/output*/ 30 | 08_Docking/parameter_tests/ 31 | 32 | 09_Covalent_Docking/complexes/ 33 | 09_Covalent_Docking/test.mol 34 | 09_Covalent_Docking/input.mol2 35 | 09_Covalent_Docking/input.sdf 36 | 09_Covalent_Docking/output_substructure/ 37 | 09_Covalent_Docking/output_atom/ 38 | 09_Covalent_Docking/complexed_atom.mol2 39 | 09_Covalent_Docking/complexed_substructure.mol2 40 | 09_Covalent_Docking/complexes_atom/ 41 | 09_Covalent_Docking/complexes_substructure/ -------------------------------------------------------------------------------- /notebooks/Discovery/01_CSD_Search/Lapatinib.mol: -------------------------------------------------------------------------------- 1 | Lapatinib_from_MarvinSketch 2 | Mrv1921 08112016152D 3 | 4 | 40 44 0 0 0 0 999 V2000 5 | -4.9998 0.8210 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -4.1793 0.9073 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 7 | -4.0931 0.0868 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 8 | -4.2656 1.7277 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 9 | -3.3589 0.9935 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -2.8739 0.3261 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -2.0534 0.4123 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 12 | -1.5685 -0.2551 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -0.7480 -0.1689 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | -0.3355 0.5456 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 0.4714 0.3740 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 0.5577 -0.4464 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | -0.1960 -0.7820 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 18 | 1.2722 -0.8589 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 1.2722 -1.6839 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 1.9866 -2.0964 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 2.7011 -1.6839 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 2.7011 -0.8589 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 1.9866 -0.4464 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 3.4156 -0.4464 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 4.1300 -0.8589 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 26 | 4.1300 -1.6839 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 3.4156 -2.0964 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 28 | 3.4156 0.3786 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 29 | 4.1300 0.7911 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 4.1300 1.6161 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 4.8445 2.0286 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 5.5589 1.6161 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 5.5589 0.7911 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 4.8445 0.3786 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 6.2734 0.3786 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0 36 | 6.2734 2.0286 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 37 | 6.2734 2.8536 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 6.9879 3.2661 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | 6.9879 4.0911 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 7.7024 4.5036 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | 8.4168 4.0911 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 8.4168 3.2661 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 7.7024 2.8536 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 44 | 9.1313 2.8536 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 45 | 1 2 1 0 0 0 0 46 | 2 3 2 0 0 0 0 47 | 2 4 2 0 0 0 0 48 | 2 5 1 0 0 0 0 49 | 5 6 1 0 0 0 0 50 | 6 7 1 0 0 0 0 51 | 7 8 1 0 0 0 0 52 | 8 9 1 0 0 0 0 53 | 9 10 4 0 0 0 0 54 | 10 11 4 0 0 0 0 55 | 11 12 4 0 0 0 0 56 | 12 13 4 0 0 0 0 57 | 9 13 4 0 0 0 0 58 | 12 14 1 0 0 0 0 59 | 14 15 4 0 0 0 0 60 | 15 16 4 0 0 0 0 61 | 16 17 4 0 0 0 0 62 | 17 18 4 0 0 0 0 63 | 18 19 4 0 0 0 0 64 | 14 19 4 0 0 0 0 65 | 18 20 4 0 0 0 0 66 | 20 21 4 0 0 0 0 67 | 21 22 4 0 0 0 0 68 | 22 23 4 0 0 0 0 69 | 17 23 4 0 0 0 0 70 | 20 24 1 0 0 0 0 71 | 24 25 1 0 0 0 0 72 | 25 26 4 0 0 0 0 73 | 26 27 4 0 0 0 0 74 | 27 28 4 0 0 0 0 75 | 28 29 4 0 0 0 0 76 | 29 30 4 0 0 0 0 77 | 25 30 4 0 0 0 0 78 | 29 31 1 0 0 0 0 79 | 28 32 1 0 0 0 0 80 | 32 33 1 0 0 0 0 81 | 33 34 1 0 0 0 0 82 | 34 35 4 0 0 0 0 83 | 35 36 4 0 0 0 0 84 | 36 37 4 0 0 0 0 85 | 37 38 4 0 0 0 0 86 | 38 39 4 0 0 0 0 87 | 34 39 4 0 0 0 0 88 | 38 40 1 0 0 0 0 89 | M END 90 | -------------------------------------------------------------------------------- /notebooks/Discovery/01_CSD_Search/aryl_sulphonamide.con: -------------------------------------------------------------------------------- 1 | T1 *CONN 2 | NFRAG -99 3 | ELDEF BB = AA H 4 | ELDEF QA = C N 5 | AT1 C 3 :XY 250 167 6 | AT2 C 2 1 :XY 198 197 7 | AT3 C 2 1 :XY 198 257 8 | AT4 C 2 1 :XY 249 287 9 | AT5 C 2 1 :XY 301 257 10 | AT6 C 3 :XY 301 197 11 | AT7 S 4 A :XY 354 167 12 | AT8 O 1 :XY 324 115 13 | AT9 O 1 :XY 384 115 14 | AT10 QA 1 :XY 406 197 15 | AT11 BB 1 :XY 250 107 16 | BO 6 7 1 17 | BO 1 2 5 18 | BO 2 3 5 19 | BO 1 11 1 20 | BO 7 8 2 21 | BO 7 9 2 22 | BO 3 4 5 23 | BO 6 1 5 24 | BO 5 6 5 25 | BO 4 5 5 26 | BO 7 10 1 27 | GEOM 28 | DEFINE ANO1 11 29 | DEFINE ?TR1 1 6 7 10 30 | TRAN TOR1 = ABS ?TR1 31 | SYMCHK ON 32 | ENANT NORMAL 33 | END 34 | -------------------------------------------------------------------------------- /notebooks/Discovery/01_CSD_Search/aryl_sulphonamide.qry: -------------------------------------------------------------------------------- 1 | [{'subtype': None, 'type': '2D', 'query_data': "{'info': {'origin': '', 'format': ''}, 'match_count': 17, 'eldef': {'QA': {'plus': ['C', 'N'], 'minus': []}}, 'bonds': {'48': {'uid': (5, 6), 'atoms': (13, 16), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': -5}, '56': {'uid': (4, 5), 'atoms': (10, 13), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': -5}, '102': {'uid': (1, 11), 'atoms': (1, 33), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': 1}, '77': {'uid': (7, 8), 'atoms': (19, 24), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': 2}, '62': {'uid': (1, 2), 'atoms': (1, 4), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': -5}, '71': {'uid': (2, 3), 'atoms': (4, 7), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': -5}, '68': {'uid': (7, 9), 'atoms': (19, 27), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': 2}, '90': {'uid': (3, 4), 'atoms': (7, 10), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': -5}, '80': {'uid': (6, 1), 'atoms': (16, 1), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': -5}, '86': {'uid': (6, 7), 'atoms': (16, 19), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': 1}, '96': {'uid': (7, 10), 'atoms': (19, 30), 'cyc': 0, 'vpa': None, 'vbt': [], 'type': 1}}, 'joined': {'atoms': {'10-13': 56, '4-7': 71, '1-16': 80, '19-30': 96, '1-33': 102, '1-4': 62, '19-24': 77, '13-16': 48, '19-27': 68, '16-19': 86, '7-10': 90}, 'bond': {'48': '13-16', '56': '10-13', '102': '1-33', '77': '19-24', '62': '1-4', '71': '4-7', '68': '19-27', '90': '7-10', '80': '1-16', '86': '16-19', '96': '19-30'}}, 'geom': {'combinations': {}, 'all': {}, 'objects': {'TOR1': {'inter': 0, 'dist': 0.0, 'func': 'ABS', 'limits': ('none', 'none'), 'bonds': ('none', 'none'), 'combination': None, 'radii': {}, 'tol': 'none', 'tab': 1, 'cview': 0, 'group': None, 'num_list': {'1': {'var': None, 'type': 'atom', 'id': 1}, '3': {'var': None, 'type': 'atom', 'id': 19}, '2': {'var': None, 'type': 'atom', 'id': 16}, '4': {'var': None, 'type': 'atom', 'id': 30}}, 'type': 'torsion', 'id': 2, 'view': 'range'}, 'ANO1': {'inter': 0, 'dist': 0.0, 'func': 'ano', 'limits': ('none', 'none'), 'bonds': ('none', 'none'), 'combination': None, 'radii': {}, 'tol': 'none', 'tab': 1, 'cview': 0, 'group': None, 'num_list': {'1': {'var': None, 'type': 'atom', 'id': 33}}, 'type': 'label', 'id': 1, 'view': None}}}, 'atoms': {'24': {'nh': 99, 'ch': 99, 'group': None, 'uid': 8, 'bonds': [77], 'vch': [], 'autoh': 0, 'coord': (324.0, 115.0), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 0, 'type': 'O', 'tc': 99}, '10': {'nh': 1, 'ch': 99, 'group': None, 'uid': 4, 'bonds': [56, 90], 'vch': [], 'autoh': 0, 'coord': (249.99999999999997, 287.0), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 1, 'type': 'C', 'tc': 99}, '13': {'nh': 1, 'ch': 99, 'group': None, 'uid': 5, 'bonds': [48, 56], 'vch': [], 'autoh': 0, 'coord': (301.9615242270663, 257.00000000000006), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 1, 'type': 'C', 'tc': 99}, '27': {'nh': 99, 'ch': 99, 'group': None, 'uid': 9, 'bonds': [68], 'vch': [], 'autoh': 0, 'coord': (384.0, 115.0), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 0, 'type': 'O', 'tc': 99}, '16': {'nh': 99, 'ch': 99, 'group': None, 'uid': 6, 'bonds': [48, 80, 86], 'vch': [], 'autoh': 0, 'coord': (301.9615242270663, 197.00000000000003), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 0, 'type': 'C', 'tc': 99}, '19': {'nh': 99, 'ch': 99, 'group': None, 'uid': 7, 'bonds': [68, 77, 86, 96], 'vch': [], 'autoh': 0, 'coord': (354.0, 167.0), 'vnh': [], 'cyc': 1, 'vpa': [], 'link': 1, 'close': 1, 'imph': 0, 'type': 'S', 'tc': 99}, '30': {'nh': 99, 'ch': 99, 'group': None, 'uid': 10, 'bonds': [96], 'vch': [], 'autoh': 0, 'coord': (406.0, 197.0), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 0, 'type': 'QA', 'tc': 99}, '1': {'nh': 99, 'ch': 99, 'group': None, 'uid': 1, 'bonds': [62, 80, 102], 'vch': [], 'autoh': 0, 'coord': (250.0, 167.0), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 0, 'type': 'C', 'tc': 99}, '33': {'nh': 99, 'ch': 99, 'group': None, 'uid': 11, 'bonds': [102], 'vch': [], 'autoh': 0, 'coord': (250.0, 107.0), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 0, 'type': 'X', 'tc': 99}, '4': {'nh': 1, 'ch': 99, 'group': None, 'uid': 2, 'bonds': [62, 71], 'vch': [], 'autoh': 0, 'coord': (198.03847577293368, 197.0), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 1, 'type': 'C', 'tc': 99}, '7': {'nh': 1, 'ch': 99, 'group': None, 'uid': 3, 'bonds': [71, 90], 'vch': [], 'autoh': 0, 'coord': (198.03847577293365, 257.0), 'vnh': [], 'cyc': 0, 'vpa': [], 'link': 1, 'close': 1, 'imph': 1, 'type': 'C', 'tc': 99}}, 'version': '2020.1', 'atom_groups': [], 'c2d': 5, 'vpa': {}, 'format_id': 2, 'options': {'enant': 0, 'exhaustive': 1, 'permute': 0, 'symmchk': 1, 'sameres': 0}, 'match': {'11': 33, '10': 30, '13': 39, '12': 36, '15': 42, '17': 45, '1': 1, '3': 7, '2': 4, '5': 13, '4': 10, '7': 19, '6': 16, '9': 27, '8': 24}}", 'text_rep': "{'group_distances': [], 'contacts': {}, 'bonds': [(6, 7), (0, 10), (0, 1), (1, 2), (6, 8), (2, 3), (5, 0), (5, 6), (6, 9), (4, 5), (3, 4)], 'order': {1: 'ANO1', 2: 'TOR1'}, 'params': {'TOR1': ('torsion', [(0, 'atom'), (5, 'atom'), (6, 'atom'), (9, 'atom')]), 'ANO1': ('label', [(10, 'atom')])}, 'groups': {}, 'setups': {}, 'intras': {}}"}] -------------------------------------------------------------------------------- /notebooks/Discovery/07_Cavities/pdbe_get.ps1: -------------------------------------------------------------------------------- 1 | function pdbe_get() { 2 | 3 | Param([string]$pdb_code) 4 | 5 | $pdb_code = $pdb_code.ToLower() 6 | 7 | $uri = [System.Uri]"http://www.ebi.ac.uk/pdbe/entry-files/download/pdb${pdb_code}.ent" 8 | 9 | $pdb_file = Join-Path -Path $(Convert-Path '.') -ChildPath $uri.Segments[-1] # Absolute path for output file 10 | 11 | $wc = New-Object System.Net.WebClient 12 | 13 | $wc.DownloadFile($uri, $pdb_file) 14 | } -------------------------------------------------------------------------------- /notebooks/Discovery/08_Docking/Basic_CLI_useage/gold.conf: -------------------------------------------------------------------------------- 1 | GOLD CONFIGURATION FILE 2 | 3 | AUTOMATIC SETTINGS 4 | autoscale = 0.3 5 | 6 | POPULATION 7 | popsiz = auto 8 | select_pressure = auto 9 | n_islands = auto 10 | maxops = auto 11 | niche_siz = auto 12 | 13 | GENETIC OPERATORS 14 | pt_crosswt = auto 15 | allele_mutatewt = auto 16 | migratewt = auto 17 | 18 | FLOOD FILL 19 | radius = 6 20 | origin = 0 0 0 21 | do_cavity = 1 22 | floodfill_atom_no = 0 23 | cavity_file = ../input_files/target/ligand.mol2 24 | floodfill_center = cavity_from_ligand 25 | 26 | DATA FILES 27 | ligand_data_file ../input_files/input.sdf 5 28 | param_file = DEFAULT 29 | set_ligand_atom_types = 1 30 | set_protein_atom_types = 0 31 | directory = output 32 | tordist_file = DEFAULT 33 | make_subdirs = 0 34 | save_lone_pairs = 0 35 | fit_points_file = fit_pts.mol2 36 | read_fitpts = 0 37 | 38 | FLAGS 39 | internal_ligand_h_bonds = 0 40 | flip_free_corners = 0 41 | match_ring_templates = 0 42 | flip_amide_bonds = 0 43 | flip_planar_n = 1 flip_ring_NRR flip_ring_NHR 44 | flip_pyramidal_n = 0 45 | rotate_carboxylic_oh = flip 46 | use_tordist = 1 47 | postprocess_bonds = 1 48 | rotatable_bond_override_file = DEFAULT 49 | solvate_all = 1 50 | 51 | TERMINATION 52 | early_termination = 1 53 | n_top_solutions = 3 54 | rms_tolerance = 1.5 55 | 56 | CONSTRAINTS 57 | force_constraints = 0 58 | 59 | COVALENT BONDING 60 | covalent = 0 61 | 62 | SAVE OPTIONS 63 | save_score_in_file = 1 comments 64 | save_protein_torsions = 1 65 | output_file_format = MACCS 66 | clean_up_option save_top_n_solutions 1 67 | clean_up_option delete_empty_directories 68 | clean_up_option delete_redundant_log_files 69 | 70 | WRITE OPTIONS 71 | write_options = NO_LINK_FILES NO_RNK_FILES NO_GOLD_LIGAND_MOL2_FILE 72 | 73 | FITNESS FUNCTION SETTINGS 74 | initial_virtual_pt_match_max = 3 75 | relative_ligand_energy = 1 76 | gold_fitfunc_path = plp 77 | score_param_file = DEFAULT 78 | 79 | PROTEIN DATA 80 | protein_datafile = ../input_files/target/protein.mol2 81 | 82 | CONSTRAINTS 83 | constraint protein_h_bond 10.0000 0.005000 3696 84 | 85 | 86 | -------------------------------------------------------------------------------- /notebooks/Discovery/08_Docking/Basic_CLI_useage/run.ps1: -------------------------------------------------------------------------------- 1 | # Run a docking via the CLI for comparison with those run via the API. 2 | # gold.conf here should be the same as e.g. ..\output_foreground\api_gold.conf except for file paths. 3 | 4 | Remove-Item -Recurse -Force -ErrorAction SilentlyContinue .\output 5 | 6 | & 'C:\Program Files\CCDC\Discovery_2021\GOLD\gold\d_win32\bin\gold_win32.exe' .\gold.conf 7 | 8 | # To start Hermes and view results... 9 | 10 | # & 'C:\Program Files\CCDC\Discovery_2021\Hermes\hermes.exe' .\gold.conf -------------------------------------------------------------------------------- /notebooks/Discovery/08_Docking/Protein-ligand_descriptors.txt: -------------------------------------------------------------------------------- 1 | The calculation of descriptors for protein-ligand complexes, such as are producted by GOLD docking, 2 | is described in the Hermes User Guide. In particular, see the following sections: 3 | 4 | 16 Defining Descriptors 5 | 17 Calculating and Using Descriptors 6 | 23.9 Calculating Descriptors for a Set of Docked Ligands 7 | 23.9.4 Running Descriptor Calculations via the Command Line 8 | 24 Appendix C: Calculating Descriptors: Computational Details 9 | 25 Appendix D: Using Descriptors Tutorial 10 | 11 | An example of CLI usage on Windows would be: 12 | 13 | & 'C:\Program Files\CCDC\Discovery_2021\Hermes\descriptor_calculator.exe' ` 14 | -xml .\descriptors.xml ` 15 | -ligands .\gold_solns.mol2 ` 16 | -csvout .\descriptors.csv ` 17 | -sdout .\descriptors.sdf 18 | 19 | Note that there is a Descriptors module in the API; however, it's emphasis is rather different. 20 | It is described here: https://downloads.ccdc.cam.ac.uk/documentation/API/descriptive_docs/descriptors.html 21 | -------------------------------------------------------------------------------- /notebooks/Discovery/08_Docking/ReadMe.txt: -------------------------------------------------------------------------------- 1 | The notebooks in this directory illustrate the use of the CSD Docking API. 2 | 3 | The material in gold_multi.zip illustrates the use of the standard Python multiprocessing module 4 | and the Docking API to parallelize GOLD docking at a multicore workstation level. 5 | 6 | For very large-scale docking, please enquire about our GOLD Cloud or GOLD Cluster tools. 7 | 8 | Material about covalent docking with GOLD is also available on request. 9 | 10 | Note on the input files provided 11 | -------------------------------- 12 | 13 | The example target provided here is SYK tyrosine kinase (5LMA). 14 | 15 | A small number of example ligands are provided as SMILES in input_files/input.csv. If the name is a PDB code, it means 16 | the SMILES correspondes to the crystallographic ligand from that structure (with conventional ionization states assigned). 17 | If the name has a suffix, the SMILES is a manually-generated analogue. Note that not all ligands can be cross-docked into 18 | 5LMA, as there are induced-fit effects in SYK that GOLD cannot reproduce. -------------------------------------------------------------------------------- /notebooks/Discovery/08_Docking/input_files/ReadMe.txt: -------------------------------------------------------------------------------- 1 | The target here is SYK tyrosine kinase (5LMA). 2 | 3 | The ligands in 'input.mol2' are built from the SMILES in 'input.csv'. If the name is a PDBe code, it means the SMILES 4 | corresponds to the crystallographic ligand from that structure (with conventional ionissation states assigned). 5 | 6 | Note that not all ligands can be cross-docked, as there is an induced-fit effect in SYK that GOLD cannot reproduce. 7 | If the name has a suffix, the SMILES is a manually-generated analogue. The suffix '_bad' means the structure is designed 8 | to fail, even in the parent crystal structure (this is to e.g. illustrate constrain-violation penalties). -------------------------------------------------------------------------------- /notebooks/Discovery/08_Docking/input_files/gold.conf: -------------------------------------------------------------------------------- 1 | GOLD CONFIGURATION FILE 2 | 3 | AUTOMATIC SETTINGS 4 | autoscale = 0.3 5 | 6 | POPULATION 7 | popsiz = auto 8 | select_pressure = auto 9 | n_islands = auto 10 | maxops = auto 11 | niche_siz = auto 12 | 13 | GENETIC OPERATORS 14 | pt_crosswt = auto 15 | allele_mutatewt = auto 16 | migratewt = auto 17 | 18 | FLOOD FILL 19 | radius = 6 20 | origin = 0 0 0 21 | do_cavity = 1 22 | floodfill_atom_no = 0 23 | cavity_file = target/ligand.mol2 24 | floodfill_center = cavity_from_ligand 25 | 26 | DATA FILES 27 | ligand_data_file input.sdf 5 28 | param_file = DEFAULT 29 | set_ligand_atom_types = 1 30 | set_protein_atom_types = 0 31 | directory = output 32 | tordist_file = DEFAULT 33 | make_subdirs = 0 34 | save_lone_pairs = 0 35 | fit_points_file = fit_pts.mol2 36 | read_fitpts = 0 37 | 38 | FLAGS 39 | internal_ligand_h_bonds = 0 40 | flip_free_corners = 0 41 | match_ring_templates = 0 42 | flip_amide_bonds = 0 43 | flip_planar_n = 1 flip_ring_NRR flip_ring_NHR 44 | flip_pyramidal_n = 0 45 | rotate_carboxylic_oh = flip 46 | use_tordist = 1 47 | postprocess_bonds = 1 48 | rotatable_bond_override_file = DEFAULT 49 | solvate_all = 1 50 | 51 | TERMINATION 52 | early_termination = 1 53 | n_top_solutions = 3 54 | rms_tolerance = 1.5 55 | 56 | CONSTRAINTS 57 | force_constraints = 0 58 | 59 | COVALENT BONDING 60 | covalent = 0 61 | 62 | SAVE OPTIONS 63 | save_score_in_file = 1 comments 64 | save_protein_torsions = 1 65 | output_file_format = MACCS 66 | clean_up_option save_top_n_solutions 1 67 | clean_up_option delete_empty_directories 68 | clean_up_option delete_redundant_log_files 69 | 70 | WRITE OPTIONS 71 | write_options = NO_LINK_FILES NO_RNK_FILES NO_GOLD_LIGAND_MOL2_FILE 72 | 73 | FITNESS FUNCTION SETTINGS 74 | initial_virtual_pt_match_max = 3 75 | relative_ligand_energy = 1 76 | gold_fitfunc_path = plp 77 | score_param_file = DEFAULT 78 | 79 | PROTEIN DATA 80 | protein_datafile = target/protein.mol2 81 | 82 | CONSTRAINTS 83 | constraint protein_h_bond 10.0000 0.005000 3696 84 | 85 | 86 | -------------------------------------------------------------------------------- /notebooks/Discovery/08_Docking/input_files/input.csv: -------------------------------------------------------------------------------- 1 | smiles,name,data_1,data_2,data_3 2 | n1ccnc2c(NCCCC[NH3+])nc(c3ccc(C)cc3)cc12,5LMA,10,aaa,1.1 3 | n1ccnc2c(NC(CC3)CCC3[NH3+])nc(c3ccc(C)cc3)cc12,5LMA_1,20,bbb,2.2 4 | n1ccnc2c(NCCC(OCC3)C[NH2+]3)nc(c3ccc(C)cc3)cc12,5LMA_2,30,ccc,3.3 5 | O=c1[nH]cnc2cc(N[C@H]3[C@@H]([NH3+])CCCC3)nc(Nc4cccc5cc[nH]c45)c12,4PX6,40,ddd,4.4 6 | O=c1[nH]cnc2cc(N[C@H]3[C@@H]([NH3+])CCCC3)nc(Nc4ccc(C)cc4)c12,4PX6_1,50,eee,5.5 7 | n1ccc(N(CCO)c2cccc3[nH]ncc23)nc1N(c4cc5S(=O)(=O)[NH]Cc5cc4),4YJP,60,fff,6.6 8 | # zzz,rubbish,-1,bad,66.6 9 | n(cc1)c(Nc2cc3c(Cl)nn(C)c3cc2)nc1n(c4)nc(C)c4CN5CC(O)C5,4XG8,70,ggg,7.7 -------------------------------------------------------------------------------- /notebooks/Discovery/08_Docking/input_files/target/ligand.mol2: -------------------------------------------------------------------------------- 1 | @MOLECULE 2 | A:5LMA 3 | 45 47 1 0 1 4 | SMALL 5 | USER_CHARGES 6 | **** 7 | Generated from the CSD 8 | 9 | @ATOM 10 | 1 C10 26.9763 -43.1756 34.9942 C.ar 1 6ZG701 0.0000 11 | 2 C13 26.7604 -42.0345 32.5741 C.ar 1 6ZG701 0.0000 12 | 3 C15 25.9034 -42.3185 34.6662 C.ar 1 6ZG701 0.0000 13 | 4 C20 23.6793 -38.4994 36.8322 C.3 1 6ZG701 0.0000 14 | 5 C21 24.4623 -37.5544 37.7082 C.3 1 6ZG701 0.0000 15 | 6 C01 26.2021 -45.3807 42.6822 C.3 1 6ZG701 0.0000 16 | 7 C02 26.1662 -44.8936 41.2542 C.ar 1 6ZG701 0.0000 17 | 8 C03 27.0482 -45.3887 40.3082 C.ar 1 6ZG701 0.0000 18 | 9 C04 27.0222 -44.9446 38.9972 C.ar 1 6ZG701 0.0000 19 | 10 C05 26.0942 -43.9906 38.5862 C.ar 1 6ZG701 0.0000 20 | 11 C06 25.2162 -43.4896 39.5442 C.ar 1 6ZG701 0.0000 21 | 12 C07 25.2562 -43.9336 40.8502 C.ar 1 6ZG701 0.0000 22 | 13 C08 26.0733 -43.4716 37.1962 C.ar 1 6ZG701 0.0000 23 | 14 C09 27.0603 -43.7396 36.2842 C.ar 1 6ZG701 0.0000 24 | 15 N11 27.9544 -43.4406 34.0772 N.ar 1 6ZG701 0.0000 25 | 16 C12 27.8234 -42.8636 32.8931 C.ar 1 6ZG701 0.0000 26 | 17 N14 25.7954 -41.7515 33.4342 N.ar 1 6ZG701 0.0000 27 | 18 C16 24.9274 -42.0785 35.6912 C.ar 1 6ZG701 0.0000 28 | 19 N17 23.8754 -41.2885 35.4632 N.pl3 1 6ZG701 0.0000 29 | 20 C18 22.9324 -40.8905 36.4942 C.3 1 6ZG701 0.0000 30 | 21 C19 23.5163 -39.8745 37.4532 C.3 1 6ZG701 0.0000 31 | 22 N22 24.5153 -36.2063 37.1372 N.4 1 6ZG701 1.0000 32 | 23 N23 25.0403 -42.6426 36.9082 N.ar 1 6ZG701 0.0000 33 | 24 H911 24.2002 -38.6059 35.8818 H 1 6ZG701 0.0000 34 | 25 H912 22.6910 -38.0760 36.6591 H 1 6ZG701 0.0000 35 | 26 H913 24.4856 -42.7346 39.2578 H 1 6ZG701 0.0000 36 | 27 H914 22.6381 -41.7743 37.0583 H 1 6ZG701 0.0000 37 | 28 H915 22.0555 -40.4561 36.0165 H 1 6ZG701 0.0000 38 | 29 H916 22.8553 -39.7903 38.3146 H 1 6ZG701 0.0000 39 | 30 H917 24.4940 -40.2264 37.7789 H 1 6ZG701 0.0000 40 | 31 H918 24.5580 -43.5202 41.5765 H 1 6ZG701 0.0000 41 | 32 H919 23.5725 -35.8436 37.0381 H 1 6ZG701 0.0000 42 | 33 H920 25.0485 -35.5993 37.7516 H 1 6ZG701 0.0000 43 | 34 H921 24.9616 -36.2420 36.2263 H 1 6ZG701 0.0000 44 | 35 H922 27.8992 -44.3802 36.5522 H 1 6ZG701 0.0000 45 | 36 H927 25.4779 -37.9324 37.8161 H 1 6ZG701 0.0000 46 | 37 H928 23.9875 -37.5049 38.6870 H 1 6ZG701 0.0000 47 | 38 H930 23.7265 -40.9477 34.5188 H 1 6ZG701 0.0000 48 | 39 H933 28.5846 -43.0488 32.1366 H 1 6ZG701 0.0000 49 | 40 H934 27.7356 -45.3458 38.2789 H 1 6ZG701 0.0000 50 | 41 H935 26.7173 -41.5988 31.5770 H 1 6ZG701 0.0000 51 | 42 H936 27.1796 -45.1695 43.1133 H 1 6ZG701 0.0000 52 | 43 H937 25.4328 -44.8699 43.2594 H 1 6ZG701 0.0000 53 | 44 H938 26.0198 -46.4541 42.7043 H 1 6ZG701 0.0000 54 | 45 H939 27.7771 -46.1427 40.6016 H 1 6ZG701 0.0000 55 | @BOND 56 | 1 1 3 ar 57 | 2 4 5 1 58 | 3 6 7 1 59 | 4 7 8 ar 60 | 5 8 9 ar 61 | 6 9 10 ar 62 | 7 10 11 ar 63 | 8 7 12 ar 64 | 9 11 12 ar 65 | 10 10 13 1 66 | 11 1 14 ar 67 | 12 13 14 ar 68 | 13 1 15 ar 69 | 14 2 16 ar 70 | 15 15 16 ar 71 | 16 2 17 ar 72 | 17 3 17 ar 73 | 18 3 18 ar 74 | 19 18 19 1 75 | 20 19 20 1 76 | 21 4 21 1 77 | 22 20 21 1 78 | 23 5 22 1 79 | 24 13 23 ar 80 | 25 18 23 ar 81 | 26 4 24 1 82 | 27 4 25 1 83 | 28 11 26 1 84 | 29 20 27 1 85 | 30 20 28 1 86 | 31 21 29 1 87 | 32 21 30 1 88 | 33 12 31 1 89 | 34 22 32 1 90 | 35 22 33 1 91 | 36 22 34 1 92 | 37 14 35 1 93 | 38 5 36 1 94 | 39 5 37 1 95 | 40 19 38 1 96 | 41 16 39 1 97 | 42 9 40 1 98 | 43 2 41 1 99 | 44 6 42 1 100 | 45 6 43 1 101 | 46 6 44 1 102 | 47 8 45 1 103 | @SUBSTRUCTURE 104 | 1 6ZG701 1 GROUP 0 A 6ZG 0 105 | @SET 106 | CCDC_LIGAND STATIC ATOMS 107 | 45 1 2 3 4 5 6 7 8 9 \ 108 | 10 11 12 13 14 15 16 17 18 19 \ 109 | 20 21 22 23 24 25 26 27 28 29 \ 110 | 30 31 32 33 34 35 36 37 38 39 \ 111 | 40 41 42 43 44 45 112 | -------------------------------------------------------------------------------- /notebooks/Discovery/09_Covalent_Docking/ReadMe.txt: -------------------------------------------------------------------------------- 1 | The notebook 'Ligand_Preparation_for_Covalent_Docking' illustrates the process of ligand-preparation for covalent docking with GOLD. 2 | 3 | Also included here is a demonstration docking system based on Covid-19 MPro that can be used with the example ligands. 4 | 5 | The 'Covalent_Complexes' notebooks illustrate post-processing of the docking to produce usable covalent complexes. 6 | -------------------------------------------------------------------------------- /notebooks/Discovery/09_Covalent_Docking/cleanup.ps1: -------------------------------------------------------------------------------- 1 | Remove-Item -Recurse .\test.mol, .\conformer_generator.*, input.sdf, .\output_*,.\complexed_*, complexes_* -------------------------------------------------------------------------------- /notebooks/Discovery/09_Covalent_Docking/gold_atom.conf: -------------------------------------------------------------------------------- 1 | GOLD CONFIGURATION FILE 2 | 3 | AUTOMATIC SETTINGS 4 | autoscale = 0.1 5 | 6 | POPULATION 7 | popsiz = auto 8 | select_pressure = auto 9 | n_islands = auto 10 | maxops = auto 11 | niche_siz = auto 12 | 13 | GENETIC OPERATORS 14 | pt_crosswt = auto 15 | allele_mutatewt = auto 16 | migratewt = auto 17 | 18 | FLOOD FILL 19 | radius = 10 20 | origin = 0 0 0 21 | do_cavity = 1 22 | floodfill_atom_no = 0 23 | cavity_file = target/ligand.mol2 24 | floodfill_center = cavity_from_ligand 25 | 26 | DATA FILES 27 | ligand_data_file input.sdf 5 28 | param_file = DEFAULT 29 | set_ligand_atom_types = 1 30 | set_protein_atom_types = 0 31 | directory = output_atom 32 | tordist_file = DEFAULT 33 | make_subdirs = 0 34 | save_lone_pairs = 0 35 | fit_points_file = fit_pts.mol2 36 | read_fitpts = 0 37 | 38 | FLAGS 39 | internal_ligand_h_bonds = 0 40 | flip_free_corners = 0 41 | match_ring_templates = 0 42 | flip_amide_bonds = 0 43 | flip_planar_n = 1 flip_ring_NRR flip_ring_NHR 44 | flip_pyramidal_n = 0 45 | rotate_carboxylic_oh = flip 46 | use_tordist = 1 47 | postprocess_bonds = 1 48 | rotatable_bond_override_file = DEFAULT 49 | solvate_all = 1 50 | 51 | TERMINATION 52 | early_termination = 1 53 | n_top_solutions = 3 54 | rms_tolerance = 1.5 55 | 56 | CONSTRAINTS 57 | force_constraints = 0 58 | 59 | COVALENT BONDING 60 | covalent = 1 61 | covalent_protein_atom_no = 1127 62 | covalent_ligand_atom_no = 5 63 | 64 | SAVE OPTIONS 65 | save_score_in_file = 1 comments 66 | save_protein_torsions = 1 67 | clean_up_option save_top_n_solutions 1 68 | 69 | WRITE OPTIONS 70 | write_options = NO_LINK_FILES NO_RNK_FILES NO_GOLD_LIGAND_MOL2_FILE 71 | 72 | FITNESS FUNCTION SETTINGS 73 | initial_virtual_pt_match_max = 3 74 | relative_ligand_energy = 1 75 | gold_fitfunc_path = plp 76 | score_param_file = DEFAULT 77 | 78 | PROTEIN DATA 79 | protein_datafile = target/protein.mol2 80 | 81 | 82 | -------------------------------------------------------------------------------- /notebooks/Discovery/09_Covalent_Docking/gold_substructure.conf: -------------------------------------------------------------------------------- 1 | GOLD CONFIGURATION FILE 2 | 3 | AUTOMATIC SETTINGS 4 | autoscale = 0.1 5 | 6 | POPULATION 7 | popsiz = auto 8 | select_pressure = auto 9 | n_islands = auto 10 | maxops = auto 11 | niche_siz = auto 12 | 13 | GENETIC OPERATORS 14 | pt_crosswt = auto 15 | allele_mutatewt = auto 16 | migratewt = auto 17 | 18 | FLOOD FILL 19 | radius = 10 20 | origin = 0 0 0 21 | do_cavity = 1 22 | floodfill_atom_no = 0 23 | cavity_file = target/ligand.mol2 24 | floodfill_center = cavity_from_ligand 25 | 26 | DATA FILES 27 | ligand_data_file input.sdf 5 28 | param_file = DEFAULT 29 | set_ligand_atom_types = 1 30 | set_protein_atom_types = 0 31 | directory = output_substructure 32 | tordist_file = DEFAULT 33 | make_subdirs = 0 34 | save_lone_pairs = 0 35 | fit_points_file = fit_pts.mol2 36 | read_fitpts = 0 37 | 38 | FLAGS 39 | internal_ligand_h_bonds = 0 40 | flip_free_corners = 0 41 | match_ring_templates = 0 42 | flip_amide_bonds = 0 43 | flip_planar_n = 1 flip_ring_NRR flip_ring_NHR 44 | flip_pyramidal_n = 0 45 | rotate_carboxylic_oh = flip 46 | use_tordist = 1 47 | postprocess_bonds = 1 48 | rotatable_bond_override_file = DEFAULT 49 | solvate_all = 1 50 | 51 | TERMINATION 52 | early_termination = 1 53 | n_top_solutions = 3 54 | rms_tolerance = 1.5 55 | 56 | CONSTRAINTS 57 | force_constraints = 0 58 | 59 | COVALENT BONDING 60 | covalent = 1 61 | covalent_protein_atom_no = 1127 62 | covalent_substructure = 1 63 | covalent_substructure_filename = substructure.mol2 64 | covalent_substructure_atom_no = 5 65 | covalent_topology = 1 66 | 67 | SAVE OPTIONS 68 | save_score_in_file = 1 comments 69 | save_protein_torsions = 1 70 | clean_up_option save_top_n_solutions 1 71 | 72 | WRITE OPTIONS 73 | write_options = NO_LINK_FILES NO_RNK_FILES NO_GOLD_LIGAND_MOL2_FILE 74 | 75 | FITNESS FUNCTION SETTINGS 76 | initial_virtual_pt_match_max = 3 77 | relative_ligand_energy = 1 78 | gold_fitfunc_path = plp 79 | score_param_file = DEFAULT 80 | 81 | PROTEIN DATA 82 | protein_datafile = target/protein.mol2 83 | 84 | 85 | -------------------------------------------------------------------------------- /notebooks/Discovery/09_Covalent_Docking/input.csv: -------------------------------------------------------------------------------- 1 | name,smiles 2 | CVD-0001891,C=CC(=O)N(c1ccc(C(F)(F)F)cc1)[C@H](C(=O)Nc1ccc(OC)cc1)c1cccnc1 3 | CVD-0001880,C=CC(=O)N(c1ccc([C@@H](C)OC)cc1)[C@@H](C(=O)Nc1cccc(CC)c1)c1cccnc1 4 | CVD-0001901,C=CC(=O)N(c1ccc2ncsc2c1)[C@H](C(=O)Nc1cccc(CC)c1)c1cccnc1 5 | CVD-0001902,C=CC(=O)N(c1ccc(C(F)(F)F)cc1)[C@H](C(=O)Nc1c(C)cccc1CC)c1cccnc1 6 | CVD-0001916,C=CC(=O)N(c1ccc2ncsc2c1)[C@@H](C(=O)Nc1ccc(Br)cc1)c1cccnc1 7 | CVD-0001888,C=CC(=O)N(c1ccc(SC)cc1)[C@H](C(=O)Nc1ccc(Cl)cc1)c1cccnc1 8 | CVD-0001897,C=CC(=O)N(c1ccc(C(C)(C)CC)cc1)[C@@H](C(=O)Nc1ccc(Cl)cc1)c1cccnc1 9 | CVD-0001899,C=CC(=O)N(c1ccc(C(C)(C)CC)cc1)[C@H](C(=O)Nc1cccc(F)c1C)c1cccnc1 10 | CVD-0001875,C=CC(=O)N(c1ccc(S(F)(F)(F)(F)F)cc1)[C@H](C(=O)Nc1ccc(OC)cc1)c1cccnc1 11 | CVD-0001904,C=CC(=O)N(c1ccc(C(F)(F)F)cc1)[C@H](C(=O)Nc1ccccc1Br)c1cccnc1 12 | CVD-0001878,C=CC(=O)N(c1cc(C(C)(C)C)on1)[C@@H](C(=O)Nc1cccc(CC)c1)c1cccnc1 13 | CVD-0001910,C=CC(=O)N(c1ccc(SC)cc1)[C@@H](C(=O)Nc1ccc(OC)cc1)c1cccnc1 14 | CVD-0001906,C=CC(=O)N(c1cc(C(C)(C)C)on1)[C@@H](C(=O)Nc1ccc(OC)cc1C)c1cccnc1 15 | CVD-0001886,C=CC(=O)N(c1ccc([C@H](C)OC)cc1)[C@H](C(=O)Nc1ccc(Cl)cc1)c1cccnc1 16 | CVD-0001896,C=CC(=O)N(c1ccc(C(C)C)nc1)[C@@H](C(=O)NC(C)(C)C)c1cccnc1 17 | CVD-0001914,C=CC(=O)N(c1ccc(S(F)(F)(F)(F)F)cc1)[C@H](C(=O)NC(C)(C)C)c1cccnc1 18 | CVD-0001883,C=CC(=O)N(c1ccc(SC)cc1)[C@H](C(=O)Nc1c(C)cccc1CC)c1cccnc1 19 | CVD-0001895,C=CC(=O)N(c1ccc2ncsc2c1)[C@@H](C(=O)NC(C)(C)C)c1cccnc1 20 | CVD-0001907,C=CC(=O)N(c1ccc2ncsc2c1)[C@H](C(=O)Nc1ccc(Cl)cc1)c1cccnc1 21 | CVD-0001898,C=CC(=O)N(c1ccc(C(C)(C)CC)cc1)[C@@H](C(=O)Nc1ccc(Br)cc1)c1cccnc1 22 | CVD-0001903,C=CC(=O)N(c1ccc(SC)cc1)[C@H](C(=O)Nc1ccc(OC)cc1C)c1cccnc1 23 | CVD-0001887,C=CC(=O)N(c1cc(C(C)(C)C)on1)[C@H](C(=O)NC(C)(C)C)c1cccnc1 24 | CVD-0001911,C=CC(=O)N(c1ccc2ncsc2c1)[C@@H](C(=O)Nc1c(C)cccc1CC)c1cccnc1 25 | CVD-0001909,C=CC(=O)N(c1cc(C(C)(C)C)on1)[C@@H](C(=O)Nc1c(C)cccc1CC)c1cccnc1 26 | CVD-0001873,C=CC(=O)N(c1ccc([C@H](C)OC)cc1)[C@H](C(=O)NC(C)(C)C)c1cccnc1 27 | CVD-0001900,C=CC(=O)N(c1ccc(C(F)(F)F)cc1)[C@@H](C(=O)Nc1ccc(OC)cc1C)c1cccnc1 28 | CVD-0001879,C=CC(=O)N(c1ccc(N(C)C)c(C)c1)[C@@H](C(=O)Nc1cccc(CC)c1)c1cccnc1 29 | CVD-0001890,C=CC(=O)N(c1ccc(C(C)C)nc1)[C@H](C(=O)Nc1ccc(OC)cc1C)c1cccnc1 30 | CVD-0001905,C=CC(=O)N(c1ccc(N(C)C)c(C)c1)[C@H](C(=O)Nc1c(C)cccc1CC)c1cccnc1 31 | CVD-0001885,C=CC(=O)N(c1cc(C(C)(C)C)n[nH]1)[C@@H](C(=O)Nc1ccc(Cl)cc1)c1cccnc1 32 | CVD-0001908,C=CC(=O)N(c1ccc([C@H](C)OC)cc1)[C@@H](C(=O)Nc1c(C)cccc1CC)c1cccnc1 33 | CVD-0001881,C=CC(=O)N(c1ccc(C(C)C)nc1)[C@H](C(=O)Nc1c(C)cccc1CC)c1cccnc1 34 | CVD-0001892,C=CC(=O)N(c1ccc(S(F)(F)(F)(F)F)cc1)[C@H](C(=O)Nc1ccc(OC)cc1C)c1cccnc1 35 | CVD-0001874,C=CC(=O)N(c1ccc(N(C)C)c(C)c1)[C@@H](C(=O)Nc1ccc(Cl)cc1)c1cccnc1 36 | CVD-0001884,C=CC(=O)N(c1cc(C(C)(C)C)n[nH]1)[C@@H](C(=O)Nc1c(C)cccc1CC)c1cccnc1 37 | CVD-0001889,C=CC(=O)N(c1ccc2ncsc2c1)[C@@H](C(=O)Nc1ccc(OC)cc1)c1cccnc1 38 | CVD-0001913,C=CC(=O)N(c1ccc(SC)cc1)[C@H](C(=O)NC(C)(C)C)c1cccnc1 39 | CVD-0001877,C=CC(=O)N(c1ccc(N(C)C)c(C)c1)[C@H](C(=O)Nc1ccc(Br)cc1)c1cccnc1 40 | CVD-0001872,C=CC(=O)N(c1ccc2ncsc2c1)[C@H](C(=O)Nc1ccc(OC)cc1C)c1cccnc1 41 | CVD-0001893,C=CC(=O)N(c1ccc(S(F)(F)(F)(F)F)cc1)[C@@H](C(=O)Nc1c(C)cccc1CC)c1cccnc1 42 | CVD-0001882,C=CC(=O)N(c1ccc([C@H](C)OC)cc1)[C@@H](C(=O)Nc1ccccc1Br)c1cccnc1 43 | CVD-0001915,C=CC(=O)N(c1ccc([C@H](C)OC)cc1)[C@H](C(=O)Nc1ccc(OC)cc1C)c1cccnc1 44 | CVD-0001876,C=CC(=O)N(c1ccc(N(C)C)c(C)c1)[C@@H](C(=O)Nc1ccccc1Br)c1cccnc1 45 | CVD-0001894,C=CC(=O)N(c1ccc(N(C)C)c(C)c1)[C@@H](C(=O)Nc1ccc(OC)cc1C)c1cccnc1 46 | -------------------------------------------------------------------------------- /notebooks/Discovery/09_Covalent_Docking/substructure.mol2: -------------------------------------------------------------------------------- 1 | # MOL_START 2 | # Creating user name: Hermes 3 | # Creation time: Wed Feb 3 09:38:09 2021 4 | 5 | @MOLECULE 6 | --CCDC--020321 3D 7 | 10 9 1 0 1 8 | SMALL 9 | NO_CHARGES 10 | **** 11 | Generated from the CSD 12 | 13 | @ATOM 14 | 1 O1 0.9915 -3.2165 1.4711 O.2 1 0 0.0000 15 | 2 C2 1.9092 -3.5761 0.7457 C.2 1 0 0.0000 16 | 3 C3 2.7664 -2.5622 0.0228 C.3 1 0 0.0000 17 | 4 C4 2.3351 -1.1405 0.3215 C.3 1 0 0.0000 18 | 5 S5 3.3096 0.0744 -0.5025 S.3 1 0 0.0000 19 | 6 N6 2.1947 -4.8788 0.5791 N.am 1 0 0.0000 20 | 7 H15 2.7359 -2.7344 -1.0601 H 1 0 0.0000 21 | 8 H16 3.8054 -2.6932 0.3493 H 1 0 0.0000 22 | 9 H17 2.4270 -0.9710 1.4014 H 1 0 0.0000 23 | 10 H18 1.2778 -1.0146 0.0573 H 1 0 0.0000 24 | @BOND 25 | 1 1 2 2 26 | 2 2 3 1 27 | 3 3 4 1 28 | 4 4 5 1 29 | 5 6 2 am 30 | 6 3 7 1 31 | 7 3 8 1 32 | 8 4 9 1 33 | 9 4 10 1 34 | @SUBSTRUCTURE 35 | 1 0 1 GROUP 0 L_1 **** 0 36 | @SET 37 | CCDC_LIGAND STATIC ATOMS 38 | 10 1 2 3 4 5 6 7 8 9 \ 39 | 10 40 | # MOL_END 41 | 42 | -------------------------------------------------------------------------------- /notebooks/Discovery/10_Editing_molecules/Editing_molecules.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "```\n", 8 | "This script can be used for any purpose without limitation subject to the\n", 9 | "conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx\n", 10 | "\n", 11 | "This permission notice and the following statement of attribution must be\n", 12 | "included in all copies or substantial portions of this script.\n", 13 | "\n", 14 | "2022-06-01: Made available by the Cambridge Crystallographic Data Centre.\n", 15 | "\n", 16 | "```" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "# Editing molecules\n", 24 | "\n", 25 | "This notebook illustrates the use of the [Molecule API](https://downloads.ccdc.cam.ac.uk/documentation/API/descriptive_docs/molecule_editing.html) to create and modify molecules.\n", 26 | "\n", 27 | "This one is definately Under Construction!" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import sys\n", 37 | "sys.path.append('../..')\n", 38 | "from ccdc_notebook_utilities import create_logger\n", 39 | "import os\n", 40 | "from pathlib import Path\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from IPython.display import HTML" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "import ccdc" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "from ccdc.io import EntryReader\n", 68 | "from ccdc.diagram import DiagramGenerator\n", 69 | "from ccdc.molecule import Molecule, Atom, Bond" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "### Initialization" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "Set up a logger object, with timestamp _etc._..." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "logger = create_logger()" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### Creating a molecule from scratch\n", 100 | "\n", 101 | "https://downloads.ccdc.cam.ac.uk/documentation/API/descriptive_docs/molecule_editing.html#building-molecules-from-scratch" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "mol = Molecule(identifier='my molecule')" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "#### Add atoms" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "a1 = Atom('N', coordinates=(-0.301, -0.968, 4.080), formal_charge=1)\n", 127 | "a2 = Atom('C', coordinates=(-1.590, -1.256, 4.148))\n", 128 | "a3 = Atom('C', coordinates=(-2.144, -2.420, 3.669))\n", 129 | "a4 = Atom('C', coordinates=(-1.327, -3.345, 3.075))\n", 130 | "a5 = Atom('C', coordinates=( 0.200, -3.055, 2.977))\n", 131 | "a6 = Atom('C', coordinates=( 0.447, -1.874, 3.505))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "a1_id = mol.add_atom(a1)\n", 141 | "a2_id = mol.add_atom(a2)\n", 142 | "a3_id = mol.add_atom(a3)\n", 143 | "a4_id = mol.add_atom(a4)\n", 144 | "a5_id = mol.add_atom(a5)\n", 145 | "a6_id = mol.add_atom(a6)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "#### Add bonds" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "aromatic_bond_type = Bond.BondType(5)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "b1_id = mol.add_bond(aromatic_bond_type, a1_id, a2_id)\n", 171 | "b2_id = mol.add_bond(aromatic_bond_type, a2_id, a3_id)\n", 172 | "b3_id = mol.add_bond(aromatic_bond_type, a3_id, a4_id)\n", 173 | "b4_id = mol.add_bond(aromatic_bond_type, a4_id, a5_id)\n", 174 | "b5_id = mol.add_bond(aromatic_bond_type, a5_id, a6_id)\n", 175 | "b6_id = mol.add_bond(aromatic_bond_type, a6_id, a1_id)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "mol.add_hydrogens()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "mol.smiles" 194 | ] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3 (ipykernel)", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.9.16" 214 | } 215 | }, 216 | "nbformat": 4, 217 | "nbformat_minor": 4 218 | } 219 | -------------------------------------------------------------------------------- /notebooks/Discovery/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Discovery Notebooks 2 | 3 | The notebooks in this directory will be of interest primarily to scientists working in Drug Discovery. They show how the API may be used to complement the applications in the [CSD-Discovery](https://www.ccdc.cam.ac.uk/Solutions/csd-discovery/) suite. 4 | 5 | ## Contents 6 | 7 | Directory | Contents 8 | ----- | ----- 9 | [00_Background](00_Background) | Basic introduction to API concepts 10 | [01_CSD_Search](01_CSD_Search) | Searching the CSD and extracting geometrical parameters (as in ConQuest) 11 | [02_Protein_Ligand](02_Protein_Ligand) | Searching the CrossMiner database of protein binding sites and extracting geometrical parameters 12 | [03_Molecular_geometries](03_Molecular_geometries) | Indentify unusal intramolecular geometries uisng CSD data (as in Mogul) 13 | [04_Conformer_generation](04_Conformer_generation) | Generating conformers using parameter distributions derived from the CSD 14 | [05_Molecular_interactions](05_Molecular_interactions) | Investigate intermolecular interactions (as in IsoStar) 15 | [06_Interaction_maps](06_Interaction_maps) | Investigate intermolecular interactions (as in SuperStar) 16 | [07_Cavities](07_Cavities) | Search and compare protein cavities 17 | [08_Docking](08_Docking) | Programmatic GOLD docking 18 | [09_Covalent_Docking](09_Covalent_Docking) | Covalent docking with GOLD 19 | [10_Editing_molecules](10_Editing_molecules) | Creating molecules from scratch 20 | [11_Working_With_Proteins](11_Working_With_Proteins) | Basics of working with proteins 21 | [12_Ensemble_docking](12_Ensemble_docking) | Ensemble docking with GOLD using functional waters 22 | 23 | ## Requirements 24 | 25 | Beyond the API, the only Python modules required are [Pandas](https://pandas.pydata.org/) and [Plotly](https://plotly.com/); for the 26 | Covalent Docking notebooks, [RDKit](https://rdkit.org/) is also required. These may be installed from [conda-forge](https://conda-forge.org/): 27 | 28 | ``` 29 | conda install --yes --channel=conda-forge pandas plotly rdkit 30 | ``` -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Jupyter Notebooks 2 | 3 | This directory contains collections of [Jupyter Notebooks](https://jupyter.org/) intended to illustrate various aspects of the CSD Python API. 4 | 5 | ## Discovery/ 6 | The notebooks in the directory [Discovery/](./Discovery/) will be of interest primarily to scientists working in Drug Discovery. They show how the API may be used to complement the applications in the [CSD-Discovery](https://www.ccdc.cam.ac.uk/Solutions/csd-discovery/) suite. -------------------------------------------------------------------------------- /notebooks/ccdc_notebook_utilities/README.md: -------------------------------------------------------------------------------- 1 | # Shared utilities 2 | 3 | This package contains shared functionality that is used in multiple notebooks 4 | -------------------------------------------------------------------------------- /notebooks/ccdc_notebook_utilities/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2022-05-17: created by Jason Cole, The Cambridge Crystallographic Data Centre 9 | # 10 | from .run_hermes import run_hermes # noqa: F401 11 | from .create_logger import create_logger # noqa: F401 12 | -------------------------------------------------------------------------------- /notebooks/ccdc_notebook_utilities/create_logger.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2022-05-17: created by Jason Cole, The Cambridge Crystallographic Data Centre 9 | # 10 | import logging 11 | from platform import platform 12 | import sys 13 | import ccdc.io 14 | import os 15 | 16 | 17 | def create_logger(verbose=True): 18 | """ 19 | From inside a notebook, create a logger and log starting information 20 | """ 21 | 22 | logger = logging.getLogger(__name__) 23 | handler = logging.StreamHandler() 24 | handler.setFormatter(logging.Formatter('[%(asctime)s %(levelname)-7s] %(message)s', datefmt='%y-%m-%d %H:%M:%S')) 25 | logger.addHandler(handler) 26 | logger.setLevel(logging.INFO) 27 | 28 | if verbose: 29 | logger.info(f""" 30 | Platform: {platform()} 31 | 32 | Python exe: {sys.executable} 33 | Python version: {'.'.join(str(x) for x in sys.version_info[:3])} 34 | 35 | CSD version: {ccdc.io.csd_version()} 36 | CSD directory: {ccdc.io.csd_directory()} 37 | API version: {ccdc.__version__} 38 | 39 | CSDHOME: {os.environ.get('CSDHOME', 'Not set')} 40 | CCDC_LICENSING_CONFIGURATION: {os.environ.get('CCDC_LICENSING_CONFIGURATION', 'Not set')} 41 | """) 42 | return logger 43 | -------------------------------------------------------------------------------- /notebooks/ccdc_notebook_utilities/run_hermes.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2022-05-17: created by Jason Cole, The Cambridge Crystallographic Data Centre 9 | # 10 | 11 | from ccdc.io import csd_directory 12 | from pathlib import Path 13 | from platform import platform 14 | from subprocess import Popen 15 | 16 | 17 | def run_hermes(*filenames): 18 | """ 19 | From inside a notebook, run CCDC Hermes utility or fail if we cant find it. Assumes the 20 | software is installed alongside the data folder currently. 21 | """ 22 | 23 | try: 24 | hermes_dir = Path(csd_directory()) / '..' / '..' / 'ccdc-software' / 'hermes' 25 | hermes_exe = (hermes_dir / 'hermes.exe' if platform().startswith('Windows') else hermes_dir / 'hermes').as_posix() 26 | _ = Popen([hermes_exe, *filenames], creationflags=0x00000008) 27 | except Exception as e: 28 | print(f"Couldnt run Hermes {e}") 29 | -------------------------------------------------------------------------------- /scripts/ReadMe.md: -------------------------------------------------------------------------------- 1 | 2 | # Contents 3 | 4 | This folder contains scripts submitted by users or CCDC scientists for anyone to use freely. 5 | 6 | ## Concat Mol2 7 | 8 | - Concatenates mol2 files present in working directory to a single `.mol2` file. 9 | 10 | ## Create CASTEP Input 11 | 12 | - Creates input files (`.cell` and `.param`) files for a given compound through Mercury. 13 | 14 | ## Create GAUSSIAN Input 15 | 16 | - Create GAUSSIAN input file (`.gjf`) for a given CSD refcode or `.mol2` file. 17 | 18 | ## Find Binding Conformation 19 | 20 | - Generates idealized conformers for ligands and evaluates their RMSD to the conformation in the PDB. 21 | 22 | ## GOLD-multi 23 | 24 | - Use the CSD Docking API and the multiprocessing module to parallelize GOLD docking. 25 | 26 | ## Hydrogen bond propensity 27 | 28 | - Writes a `.docx report` of a hydrogen bond propensity calculation for any given `.mol2`/refcode. 29 | 30 | ## MOF subset 2017 Chem Mater publication 31 | 32 | - Two scripts that were supplementary information in the publication "Development of a Cambridge Structural Database Subset: 33 | A Collection of Metal–Organic Frameworks for Past, Present, and Future" DOI: 34 | 35 | ## Multi-component hydrogen bond propensity 36 | 37 | - Performs a multi-component HBP calculation for a given library of co-formers. 38 | 39 | ## Packing similarity dendrogram 40 | 41 | - Construct a dendrogram for an input set of structures based on packing-similarity analysis. 42 | 43 | ## Particle Rugosity 44 | 45 | - Calculates the simulated BFDH particle rugosity weighted by facet area. 46 | 47 | ## Surface Charge 48 | 49 | - Calculates the surface charge for a given structure and surface terminations. Runs both from CMD and Mercury. 50 | 51 | ## Refcodes With Properties 52 | 53 | - A script for generating refcode lists with specific properties from an easy-to-read control file. 54 | 55 | ## Tips 56 | 57 | A section for top tips in using the repository and GitHub. 58 | 59 | ### Searching tips 60 | 61 | The search bar in GitHub allows you to search for keywords mentioned in any file throughout the repository (in the main branch). 62 | 63 | It is also possible to filter which file type you are interested in. 64 | 65 | For example: 66 | "hydrogen bond" 67 | 68 | 69 | -------------------------------------------------------------------------------- /scripts/concat_mol2/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Concat Mol2 2 | 3 | ## Summary 4 | 5 | Opens a set of mol2 files in the working directory and creates one concatenated multi-mol2 file. 6 | Optionally delete the individual mol2 files other than the new concat.mol2 7 | 8 | ## Requirements 9 | 10 | - CSD Python API not required. 11 | - Mol2 files must be in the same directory. 12 | 13 | ## Licensing Requirements 14 | 15 | No license required. 16 | 17 | ## Instructions on running 18 | 19 | ```cmd 20 | > python concat_mol2.py 21 | ``` 22 | 23 | Help output: 24 | ```cmd 25 | python concat_mol2.py -h 26 | usage: concat_mol2.py [-h] [-d] 27 | 28 | optional arguments: 29 | -h, --help show this help message and exit 30 | -d, --delete_contributors 31 | Remove contributing individual mol2 files after 32 | concatenation 33 | ``` 34 | 35 | ## Author 36 | 37 | _Peter Galek_ (2014) 38 | 39 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) -------------------------------------------------------------------------------- /scripts/concat_mol2/concat_mol2.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2014-08-11: created by Peter Galek, The Cambridge Crystallographic Data Centre 9 | # 2022-02-24: updated by Alex Moldovan, The Cambridge Crystallographic Data Centre 10 | # 11 | 12 | import glob 13 | import argparse 14 | import os 15 | 16 | 17 | def main(delete_separate_files): 18 | mol2_files = glob.glob('*.mol2') 19 | count = 0 20 | with open('concat.mol2', 'w') as outfile: 21 | for f in mol2_files: 22 | if f == 'concat.mol2': 23 | continue 24 | with open(f, 'r') as infile: 25 | outfile.write(infile.read()) 26 | count += 1 27 | print(f"{count} files concatenated.") 28 | 29 | if delete_separate_files == True: 30 | count = 0 31 | for f in mol2_files: 32 | if f == 'concat.mol2': 33 | continue 34 | os.remove(f) 35 | count += 1 36 | print(f"{count} files removed.") 37 | 38 | 39 | if __name__ == '__main__': 40 | parser = argparse.ArgumentParser(description=__doc__) 41 | parser.add_argument('-d', '--delete_contributors', action='store_true', 42 | help='Remove contributing individual mol2 files after concatenation') 43 | 44 | args = parser.parse_args() 45 | main(args.delete_contributors) 46 | -------------------------------------------------------------------------------- /scripts/conformer_demo/conformer_demo.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | ######################################################################################################################## 3 | # 4 | # This script can be used for any purpose without limitation subject to the 5 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 6 | # 7 | # This permission notice and the following statement of attribution must be 8 | # included in all copies or substantial portions of this script. 9 | # 10 | # 2024-11-22: created by the Cambridge Crystallographic Data Centre 11 | # 12 | ######################################################################################################################## 13 | 14 | from ccdc import conformer, descriptors, io, molecule 15 | from ccdc.search import SubstructureSearch, SMARTSSubstructure 16 | 17 | 18 | def read(molecule_file: str) -> molecule: 19 | print(f'Reading file: {molecule_file} ... ', end='') 20 | mol_reader = io.MoleculeReader(molecule_file) 21 | mol = mol_reader[0] 22 | print('done.') 23 | 24 | return mol 25 | 26 | 27 | def generate_conformers(molecule: molecule, max_conformers: int = 50) -> conformer.ConformerHitList: 28 | """ 29 | Generate conformers for a molecule. 30 | 31 | :param molecule: The Molecule (ccdc Molecule object) to generate conformers for. 32 | :param max_conformers: The maximum number of conformers to generate. 33 | 34 | :returns: ccdc.conformer.ConformerHitList 35 | """ 36 | 37 | # Set up the ConformerGenerator 38 | confgen = conformer.ConformerGenerator() 39 | confgen.settings.max_conformers = max_conformers 40 | # confgen.settings.superimpose_conformers_onto_reference = True 41 | 42 | # Generate conformers and assign identifiers to them before returning 43 | conformers = confgen.generate(molecule) 44 | 45 | print(f'Generating conformers, maximum of {max_conformers} ... ', end='') 46 | for i, conf in enumerate(conformers): 47 | conf.molecule.identifier = '{}_{:04}'.format(conf.molecule.identifier, i + 1) 48 | print(f'done, generated {len(conformers)} conformers.') 49 | 50 | return conformers 51 | 52 | 53 | def analyse(conformers: conformer.ConformerHitList) -> molecule: 54 | """ 55 | Perform some basic analysis of the conformers generated. 56 | :param conformers: Conformers generated from ConfGen 57 | :return: The best molecule of all the conformers generated. 58 | """ 59 | print(f'Sampling limit reached? {"Yes." if conformers.sampling_limit_reached else "No."}') 60 | 61 | print(f'How many rotamers had no observations? {conformers.n_rotamers_with_no_observations}.') 62 | 63 | most_probable_conformer = conformers[0] 64 | 65 | print(f'Normalised score of most probable conformer: {round(most_probable_conformer.normalised_score, 5)}.') 66 | print(f'Most probable conformer RMSD wrt input: {round(most_probable_conformer.rmsd(), 3)}; ' 67 | f'wrt minimised: {round(most_probable_conformer.rmsd(wrt="minimised"), 3)}.') 68 | 69 | print('Scores of top 10 conformers: ', end='') 70 | 71 | top_ten = conformers[:10] 72 | for i in range(len(top_ten)): 73 | if i < len(top_ten) - 1: 74 | print(f'{round(top_ten[i].normalised_score, 3):.3f}, ', end='') 75 | else: 76 | print(f'{round(top_ten[i].normalised_score, 3):.3f}.') 77 | 78 | return most_probable_conformer.molecule 79 | 80 | 81 | def overlay(conformers, query: str, output_filename: str) -> None: 82 | """ 83 | Overlay conformers based on a SMARTS substructure pattern 84 | :param conformers: Conformers generated from ConfGen 85 | :param query: SMARTS pattern which the conformers will overlay on top of. 86 | Should be consistent across all conformers, e.g. benzene ring. 87 | """ 88 | print('Overlaying conformers ... ', end='') 89 | conformers_mols = [c.molecule for c in conformers] 90 | ss_search = SubstructureSearch() 91 | substructure = SMARTSSubstructure(query) 92 | ss_search.add_substructure(substructure) 93 | hits = ss_search.search(conformers_mols, max_hits_per_structure=1) 94 | ref_ats = hits[0].match_atoms() 95 | print('done.') 96 | 97 | print('Writing file superimposed ... ', end='') 98 | with io.MoleculeWriter(output_filename) as writer: 99 | for hit in hits: 100 | hit_ats = hit.match_atoms() 101 | atoms = zip(ref_ats, hit_ats) 102 | ov = descriptors.MolecularDescriptors.Overlay(hits[0].molecule, hit.molecule, atoms) 103 | superimposed_hit = ov.molecule 104 | writer.write(superimposed_hit) 105 | print('done.') 106 | 107 | 108 | def write_conformers_to_file(conformers: conformer.ConformerHitList, filename: str) -> None: 109 | """ 110 | Write conformers to a file without any addition overlaying. 111 | :param conformers: Conformer generated from ConfGen. 112 | :param filename: The name of the output file. 113 | """ 114 | 115 | with io.MoleculeWriter(filename) as writer: 116 | for conf in conformers: 117 | writer.write(conf.molecule) 118 | 119 | 120 | if __name__ == '__main__': 121 | 122 | input_filename = 'AZD9291.mol2' 123 | # Read example molecule 124 | mol = read(input_filename) 125 | 126 | # Generate conformers 127 | confs = generate_conformers(mol, 20) 128 | 129 | # Provide summary of analysis 130 | analyse(confs) 131 | 132 | # Overlay structures based on common substructure 133 | query = 'c1cncnc1' 134 | output_filename = f'superimposed_{input_filename}' 135 | overlay(confs, query, output_filename) 136 | -------------------------------------------------------------------------------- /scripts/conformer_demo/description.md: -------------------------------------------------------------------------------- 1 | # Conformer Demo 2 | 3 | This is a short script to generate conformers with some rudimentary analysis for a single molecule. 4 | There are also options to overlay the results to view in Hermes. 5 | 6 | ### Example output showing what the user can expect to see: 7 | 8 | ``` 9 | Reading file: AZD9291.mol2 ... done. 10 | Generating conformers, maximum of 20 ... done, generated 20 conformers. 11 | Sampling limit reached? No. 12 | How many rotamers had no observations? 0. 13 | Normalised score of most probable conformer: 0.0. 14 | Most probable conformer RMSD wrt input: 3.276; wrt minimised: 3.202. 15 | Scores of top 10 conformers: 0.000, 0.000, 0.000, 0.027, 0.027, 0.027, 0.027, 0.027, 0.029, 0.029. 16 | Overlaying conformers ... done. 17 | Writing file superimposed ... done. 18 | ``` 19 | 20 | CCDC Python API Licence required, minimum version: 3.0.15 21 | 22 | There is an accompanying mol2 file with this script, but users may use any small molecule provided in a file format readable by our API (e.g. mol, mol2, sdf, etc) 23 | 24 | Author: Chris Ringrose - 22/11/24 25 | 26 | For feedback or to report any issues please contact support@ccdc.cam.ac.uk 27 | -------------------------------------------------------------------------------- /scripts/create_castep_input/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Create CASTEP Input 2 | 3 | ## Summary 4 | 5 | Allows the user to generate a set of CASTEP input files (.cell and .params) for a structure viewed in Mercury. 6 | 7 | ![img.png](assets/file_output.png) 8 | 9 | ## Requirements 10 | 11 | Tested with CSD Python API 3.0.9 12 | 13 | Requires user to add script to Mercury interface. 14 | 15 | ## Licensing Requirements 16 | 17 | - CSD-Core 18 | 19 | If you wish to run CASTEP, you will need to acquire a license for CASTEP, this is not supplied by the CCDC. 20 | 21 | ## Instructions on running 22 | 23 | Add script with Folder to Mercury interface (Mercury -> CSD Python API-> Options -> Add Location) 24 | 25 | ![img.png](assets/add_script_location.png) 26 | 27 | Select refcode of interest or load structure into Mercury. 28 | 29 | Select script from CSD Python API dropdown. 30 | 31 | ![img.png](assets/select_script.png) 32 | 33 | ## Author 34 | 35 | _Anthony Reilly_ (2016) 36 | 37 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) -------------------------------------------------------------------------------- /scripts/create_castep_input/assets/add_script_location.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/create_castep_input/assets/add_script_location.png -------------------------------------------------------------------------------- /scripts/create_castep_input/assets/file_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/create_castep_input/assets/file_output.png -------------------------------------------------------------------------------- /scripts/create_castep_input/assets/select_script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/create_castep_input/assets/select_script.png -------------------------------------------------------------------------------- /scripts/create_castep_input/create_castep_input.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2016-12-06 - created by Anthony Reilly, The Cambridge Crystallographic Data Centre based on code from Andrew Maloney, CCDC 9 | # 2021-02-24 - updated by Alex Moldovan, The Cambridge Crystallographic Data Centre 10 | # 11 | 12 | import os 13 | from ccdc.utilities import ApplicationInterface 14 | 15 | 16 | def make_castep_input(crystal, kp_spacing=0.06, cut_off=750.000, task='SinglePoint', di_opt=False): 17 | # cell file 18 | with open('%s.cell' % crystal.identifier, 'w') as cell_file: 19 | cell_file.write('%BLOCK LATTICE_ABC\n') 20 | cell_file.write(' %11.7f %11.7f %11.7f\n' % crystal.cell_lengths) 21 | cell_file.write(' %11.7f %11.7f %11.7f\n' % crystal.cell_angles) 22 | cell_file.write('%ENDBLOCK LATTICE_ABC\n\n') 23 | cell_file.write('%BLOCK POSITIONS_FRAC\n') 24 | for op in crystal.symmetry_operators: 25 | mol = crystal.symmetric_molecule(op, [0, 0, 0], force=False) 26 | for atom in mol.atoms: 27 | cell_file.write(' %s %19.16f %19.16f %19.16f\n' % (atom.atomic_symbol, 28 | atom.fractional_coordinates.x, 29 | atom.fractional_coordinates.y, 30 | atom.fractional_coordinates.z)) 31 | cell_file.write('%ENDBLOCK POSITIONS_FRAC\n\n') 32 | 33 | cell_file.write('KPOINT_MP_SPACING ' + str(kp_spacing) + '\n\n') 34 | 35 | cell_file.write('%BLOCK SYMMETRY_OPS\n') 36 | for op in crystal.symmetry_operators: 37 | rotation = crystal.symmetry_rotation(op) 38 | trans = crystal.symmetry_translation(op) 39 | cell_file.write(' %18.15f %18.15f %18.15f\n' % (rotation[0], rotation[1], rotation[2])) 40 | cell_file.write(' %18.15f %18.15f %18.15f\n' % (rotation[3], rotation[4], rotation[5])) 41 | cell_file.write(' %18.15f %18.15f %18.15f\n' % (rotation[6], rotation[7], rotation[8])) 42 | cell_file.write(' %18.15f %18.15f %18.15f\n' % (trans[0], trans[1], trans[2])) 43 | cell_file.write('###\n') 44 | cell_file.write('%ENDBLOCK SYMMETRY_OPS\n\n') 45 | 46 | # param file 47 | with open('%s.param' % crystal.identifier, 'w') as param_file: 48 | param_file.write('task : ' + task + '\n') 49 | param_file.write('comment : CASTEP calculation for %s\n' % crystal.identifier) 50 | 51 | param_file.write('xc_functional : PBE\n') 52 | param_file.write('sedc_scheme : TS\n') 53 | 54 | param_file.write('metals_method : dm\n') 55 | param_file.write('mixing_scheme : Pulay\n') 56 | param_file.write('spin_polarized : false\n') 57 | param_file.write('opt_strategy : speed\n') 58 | param_file.write('cut_off_energy : ' + str(cut_off) + '\n') 59 | param_file.write('grid_scale : 2.0\n') 60 | param_file.write('fine_grid_scale : 3.0\n') 61 | param_file.write('elec_energy_tol : 1.000e-008\n') 62 | param_file.write('fix_occupancy : true\n') 63 | if task != 'SinglePoint': 64 | param_file.write('finite_basis_corr : 2\n') 65 | 66 | param_file.write('geom_modulus_est : 50 GPa\n') 67 | param_file.write('geom_max_iter : 200\n') 68 | param_file.write('num_backup_iter : 1\n') 69 | 70 | param_file.write('geom_energy_tol : 5E-06\n') 71 | param_file.write('geom_stress_tol : 0.02\n') 72 | param_file.write('geom_disp_tol : 1E-03\n') 73 | param_file.write('geom_force_tol : 5E-03\n') 74 | if di_opt: 75 | param_file.write('geom_method : delocalised \n') 76 | 77 | param_file.write('write_cif_structure : true\n') 78 | param_file.write('write_cell_structure : true\n') 79 | param_file.write('#continuation : default\n') 80 | 81 | return True 82 | 83 | 84 | if __name__ == '__main__': 85 | helper = ApplicationInterface() 86 | entry = helper.current_entry 87 | crystal = entry.crystal 88 | os.chdir(helper.output_directory_path) 89 | make_castep_input(crystal) 90 | -------------------------------------------------------------------------------- /scripts/create_gaussian_input/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Create Gaussian Input 2 | 3 | ## Summary 4 | 5 | Allows the user to generate a Gaussian input file (.gjf) for a given CSD refcode or .mol2. If you wish to run different keywords the script will need to be changed manually. 6 | 7 | 8 | ## Requirements 9 | 10 | Tested with CSD Python API 3.0.9 11 | 12 | 13 | ## Licensing Requirements 14 | 15 | CSD-Core 16 | 17 | If you wish to run Gaussian, you will need to acquire a licence for Gaussian, this is not supplied by the CCDC. 18 | 19 | ## Instructions on running 20 | 21 | To create an input file for the 22 | ```cmd 23 | >python create_gaussian_input.py HXACAN 24 | ``` 25 | 26 | ```cmd 27 | >python create_gaussian_input.py HXACAN.mol2 28 | ``` 29 | 30 | ## Author 31 | 32 | _Andrew Maloney_(2015) 33 | 34 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) -------------------------------------------------------------------------------- /scripts/create_gaussian_input/create_gaussian_input.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2015-06-17: created by Andrew Maloney the Cambridge Crystallographic Data Centre 9 | # 10 | 11 | from __future__ import division, absolute_import, print_function 12 | 13 | """ 14 | This script will generate a generic Gaussian input file 15 | Input: CSD Identifier as a string or .mol2 16 | Output: GJF input file 17 | """ 18 | 19 | import sys 20 | import os 21 | import ccdc.io 22 | 23 | 24 | def fatal(*args): 25 | """Generates an error message if necessary to smoothly exit the program.""" 26 | print('ERROR:', ' '.join(map(str, args))) 27 | sys.exit(1) 28 | 29 | 30 | def file_writer(molecule, name): 31 | """Writes a standard Gaussian input file for all molecules contained in the structure files.""" 32 | if not mol.all_atoms_have_sites: 33 | fatal(entry_id, 'has some atoms without coordinates') 34 | mol.normalise_hydrogens() 35 | 36 | for i, component in enumerate(molecule.components): 37 | 38 | file_name = '%s_molecule%d.gjf' % (name, i) 39 | f = open(file_name, 'w') 40 | 41 | f.write('#B3LYP/6-31G** opt\n') 42 | f.write('\n') 43 | f.write('Standard Gaussian Input File for %s, molecule %d\n' % (name, i)) 44 | f.write('\n') 45 | f.write('0 1\n') 46 | 47 | for atom in component.atoms: 48 | f.write('%2s %9.6f %9.6f %9.6f\n' % (atom.atomic_symbol, 49 | atom.coordinates.x, 50 | atom.coordinates.y, 51 | atom.coordinates.z)) 52 | 53 | f.write('\n') 54 | f.write('--Link1--') 55 | f.write('\n') 56 | f.write('\n') 57 | f.write('\n') 58 | 59 | f.close() 60 | 61 | 62 | if __name__ == '__main__': 63 | # Get the relevant structure typed by user 64 | if len(sys.argv) != 2: 65 | fatal('you must supply a structure identifier.') 66 | entry_id = sys.argv[1] 67 | 68 | # Checking the current directory for user cif file 69 | filepath = '%s' % entry_id 70 | if os.path.isfile(filepath): 71 | reader = ccdc.io.MoleculeReader(filepath) 72 | for mol in reader: 73 | identifier = mol.identifier 74 | file_writer(mol, identifier) 75 | 76 | else: 77 | # Read molecule from database 78 | reader = ccdc.io.MoleculeReader('CSD') 79 | identifier = entry_id 80 | try: 81 | mol = reader.molecule(entry_id) 82 | file_writer(mol, identifier) 83 | except RuntimeError: 84 | fatal(entry_id, '- structure not found') 85 | -------------------------------------------------------------------------------- /scripts/find_binding_conformation/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Find Binding Conformation 2 | 3 | ## Summary 4 | 5 | We know that most pharmaceutically relevant compounds bind to their targets in a relaxed conformation. The challenge in discovery is to figure out rapidly which conformations are readily accessible for the molecules we are considering. There is now a new solution to address this based on statistical, rather than just energetic approaches. 6 | 7 | Driven by the wealth and diversity of bond, angle and torsion information in the Cambridge Structural Database (CSD), the CSD Conformer Generator produces realistic ensembles of low energy ligand structures. These are ready to be exploited for drug design in the presence and also in the absence of detailed knowledge about the three-dimensional structure of the protein active site. 8 | 9 | Starting from a list of PDB-codes, this script generates idealized conformers 10 | for ligands and evaluates their RMSD to the conformation in the PDB. 11 | 12 | The output are subdirectories for each PDB entry with the conformers generated for each ligand, and a spreadsheet (.csv) with the results of the comparison. 13 | ## Requirements 14 | - Tested with CSD Python API 3.0.9 15 | - This script uses PDBe's and RCSB's API to obtain PDB related information. 16 | 17 | ## Licensing Requirements 18 | - CSD-Core 19 | 20 | ## Instructions on Running 21 | Using the [activated CSD Python API environment](../../README.md#running-scripts-through-the-csd-python-api-miniconda-installed) 22 | 23 | ```cmd 24 | python find_binding_conformation.py pdb_example.txt 25 | ``` 26 | ## Author 27 | _'Brandl, Giangreco, Higueruelo, Schaerfer and Sykes'_ 28 | 29 | 30 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) -------------------------------------------------------------------------------- /scripts/find_binding_conformation/pdb_example.txt: -------------------------------------------------------------------------------- 1 | 3zds 2 | 3pka 3 | 3i4b 4 | 2viq 5 | -------------------------------------------------------------------------------- /scripts/gold_multi/.gitignore: -------------------------------------------------------------------------------- 1 | !target 2 | output -------------------------------------------------------------------------------- /scripts/gold_multi/ReadMe.md: -------------------------------------------------------------------------------- 1 | # GOLD and Multiprocessing 2 | 3 | ## Introduction 4 | 5 | This repo contains a script, `gold_multi.py`, which is designed to illustrate how to use the [CSD Docking API](https://downloads.ccdc.cam.ac.uk/documentation/API/descriptive_docs/docking.html) and the standard Python [multiprocessing](https://docs.python.org/3.7/library/multiprocessing.html) module to parallelize GOLD docking. Also included is a simple example system to demonstrate the operation of the script. 6 | 7 | On a multi-core workstation, this approach should be suitable for docking some hundreds or thousands of ligands depending on the rigour of the docking protocol used; please consult the GOLD USer Guide for information about speed/accuracy tradeoffs in GOLD. Note that the script is not useful for running GOLD on an HPC compute cluster or on the Cloud: the CCDC provides the GOLD Cluster and GOLD Cloud tools for those use-cases. For further details, please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk). 8 | 9 | As ever when using multiprocessing techniques, increasing the number processes will at some point begin to degrade performance as available cores are saturated. At what point this happens will depend on the machine and the workload and thus can only really be determined by experimentation. A default of six was selected as the script was developed on an eight-core workstation and this seemed to give decent performance while leaving cores for other processes. 10 | 11 | The script is designed to be as simple as possible in order to not obscure the mechanisms of parallelization. Thus, for example, configuration of the docking is taken entirely from the GOLD conf file. There is also the limitation that only a single input file of ligands is accepted. In addition, the implementation of error-handling and logging is rather lightweight. If a proper application was required then these matters could be addressed. 12 | 13 | The script writes output to the directory specified in the GOLD configuration file, and the results can be inspected by loading the GOLD conf file in Hermes as normal (see the Hermes User Guide for details). A `bestranking.lst` file is also written, which records the best-scoring pose for each molecule. Other output normally written by GOLD is not created, although this could be implemented if necessary. 14 | 15 | The script partitions the input ligand file into chunks and uses the Docking API and multiprocessing to dock these chunks in parallel using named subdirectories for their output. The solution files for the chunks are then copied to the main output directory and the full `bestranking.lst` file compiled from the partial chunk versions. The intermediate subdirectories are currently kept, but the script could easily be modified to delete them or use anonymous temporary directories if disk usage was to be an issue. 16 | 17 | --- 18 | ## Requirements 19 | 20 | - [GOLD](https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/) and the [CSD Python API](https://downloads.ccdc.cam.ac.uk/documentation/API/) installed. 21 | - Configuration File: `gold.conf` 22 | 23 | ## Licensing Requirements 24 | 25 | CSD-Discovery, CSD-Enterprise and Research Partner suites would all be sufficient. 26 | 27 | ## Instructions on Running 28 | 29 | To run the script, an environment with the CCDC Python API installed must be active. Further information is available in 30 | the [API installation notes](https://downloads.ccdc.cam.ac.uk/documentation/API/installation_notes.html). 31 | 32 | The script is designed to be run from the command line only (and not, for example, from within Hermes). The path to a GOLD configuration file may be provided as a command argument; if no argument is provided, it is assumed there will be a file `gold.conf` in the current working directory. 33 | 34 | On Windows, the command would be (in the folder where this archive was unzipped)... 35 | 36 | ``` 37 | > python.exe .\gold_multi.py 38 | ``` 39 | 40 | On Linux or MacOS, an equivalent would be (first making the script executable)... 41 | 42 | ``` 43 | $ chmod u+x ./gold_multi.py 44 | 45 | $ ./gold_multi.py 46 | ``` 47 | 48 | In either case, add the option `--help` to show more information. 49 | 50 | ```cmd 51 | usage: gold_multi.py [-h] [--n_processes N_PROCESSES] [conf_file] 52 | 53 | positional arguments: 54 | conf_file GOLD configuration file (default='gold.conf') 55 | 56 | optional arguments: 57 | -h, --help show this help message and exit 58 | --n_processes N_PROCESSES 59 | No. of processes (default=6) 60 | ``` 61 | 62 | --- 63 | ## Note on the input files provided 64 | 65 | The example target provided (see the directory `target/`) is SYK tyrosine kinase ([5LMA](https://www.ebi.ac.uk/pdbe/entry/pdb/5lma)). 66 | 67 | The ligands in `input.sdf` were built from SMILES. If the name is a PDB code, it means the SMILES corresponded to the crystallographic ligand from that structure (with conventional ionization states assigned). If the name has a suffix, the SMILES is a manually-generated analogue. Note that not all these ligands can be correctly cross-docked into 5LMA, as there are induced-fit effects in SYK that GOLD cannot reproduce. 68 | 69 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) -------------------------------------------------------------------------------- /scripts/gold_multi/gold.conf: -------------------------------------------------------------------------------- 1 | GOLD CONFIGURATION FILE 2 | 3 | AUTOMATIC SETTINGS 4 | autoscale = 0.3 5 | 6 | POPULATION 7 | popsiz = auto 8 | select_pressure = auto 9 | n_islands = auto 10 | maxops = auto 11 | niche_siz = auto 12 | 13 | GENETIC OPERATORS 14 | pt_crosswt = auto 15 | allele_mutatewt = auto 16 | migratewt = auto 17 | 18 | FLOOD FILL 19 | radius = 6 20 | origin = 0 0 0 21 | do_cavity = 1 22 | floodfill_atom_no = 0 23 | cavity_file = target/ligand.mol2 24 | floodfill_center = cavity_from_ligand 25 | 26 | DATA FILES 27 | ligand_data_file input.sdf 5 28 | param_file = DEFAULT 29 | set_ligand_atom_types = 1 30 | set_protein_atom_types = 0 31 | directory = output 32 | tordist_file = DEFAULT 33 | make_subdirs = 0 34 | save_lone_pairs = 0 35 | fit_points_file = fit_pts.mol2 36 | read_fitpts = 0 37 | 38 | FLAGS 39 | internal_ligand_h_bonds = 0 40 | flip_free_corners = 0 41 | match_ring_templates = 0 42 | flip_amide_bonds = 0 43 | flip_planar_n = 1 flip_ring_NRR flip_ring_NHR 44 | flip_pyramidal_n = 0 45 | rotate_carboxylic_oh = flip 46 | use_tordist = 1 47 | postprocess_bonds = 1 48 | rotatable_bond_override_file = DEFAULT 49 | solvate_all = 1 50 | 51 | TERMINATION 52 | early_termination = 1 53 | n_top_solutions = 3 54 | rms_tolerance = 1.5 55 | 56 | CONSTRAINTS 57 | force_constraints = 0 58 | 59 | COVALENT BONDING 60 | covalent = 0 61 | 62 | SAVE OPTIONS 63 | save_score_in_file = 1 comments 64 | save_protein_torsions = 1 65 | output_file_format = MACCS 66 | 67 | WRITE OPTIONS 68 | write_options = NO_LINK_FILES NO_RNK_FILES NO_GOLD_LIGAND_MOL2_FILE 69 | 70 | FITNESS FUNCTION SETTINGS 71 | initial_virtual_pt_match_max = 3 72 | relative_ligand_energy = 1 73 | gold_fitfunc_path = plp 74 | score_param_file = DEFAULT 75 | 76 | PROTEIN DATA 77 | protein_datafile = target/protein.mol2 78 | 79 | CONSTRAINTS 80 | constraint protein_h_bond 10.0000 0.005000 3696 81 | 82 | 83 | -------------------------------------------------------------------------------- /scripts/gold_multi/target/ligand.mol2: -------------------------------------------------------------------------------- 1 | @MOLECULE 2 | A:5LMA 3 | 45 47 1 0 1 4 | SMALL 5 | USER_CHARGES 6 | **** 7 | Generated from the CSD 8 | 9 | @ATOM 10 | 1 C10 26.9763 -43.1756 34.9942 C.ar 1 6ZG701 0.0000 11 | 2 C13 26.7604 -42.0345 32.5741 C.ar 1 6ZG701 0.0000 12 | 3 C15 25.9034 -42.3185 34.6662 C.ar 1 6ZG701 0.0000 13 | 4 C20 23.6793 -38.4994 36.8322 C.3 1 6ZG701 0.0000 14 | 5 C21 24.4623 -37.5544 37.7082 C.3 1 6ZG701 0.0000 15 | 6 C01 26.2021 -45.3807 42.6822 C.3 1 6ZG701 0.0000 16 | 7 C02 26.1662 -44.8936 41.2542 C.ar 1 6ZG701 0.0000 17 | 8 C03 27.0482 -45.3887 40.3082 C.ar 1 6ZG701 0.0000 18 | 9 C04 27.0222 -44.9446 38.9972 C.ar 1 6ZG701 0.0000 19 | 10 C05 26.0942 -43.9906 38.5862 C.ar 1 6ZG701 0.0000 20 | 11 C06 25.2162 -43.4896 39.5442 C.ar 1 6ZG701 0.0000 21 | 12 C07 25.2562 -43.9336 40.8502 C.ar 1 6ZG701 0.0000 22 | 13 C08 26.0733 -43.4716 37.1962 C.ar 1 6ZG701 0.0000 23 | 14 C09 27.0603 -43.7396 36.2842 C.ar 1 6ZG701 0.0000 24 | 15 N11 27.9544 -43.4406 34.0772 N.ar 1 6ZG701 0.0000 25 | 16 C12 27.8234 -42.8636 32.8931 C.ar 1 6ZG701 0.0000 26 | 17 N14 25.7954 -41.7515 33.4342 N.ar 1 6ZG701 0.0000 27 | 18 C16 24.9274 -42.0785 35.6912 C.ar 1 6ZG701 0.0000 28 | 19 N17 23.8754 -41.2885 35.4632 N.pl3 1 6ZG701 0.0000 29 | 20 C18 22.9324 -40.8905 36.4942 C.3 1 6ZG701 0.0000 30 | 21 C19 23.5163 -39.8745 37.4532 C.3 1 6ZG701 0.0000 31 | 22 N22 24.5153 -36.2063 37.1372 N.4 1 6ZG701 1.0000 32 | 23 N23 25.0403 -42.6426 36.9082 N.ar 1 6ZG701 0.0000 33 | 24 H911 24.2002 -38.6059 35.8818 H 1 6ZG701 0.0000 34 | 25 H912 22.6910 -38.0760 36.6591 H 1 6ZG701 0.0000 35 | 26 H913 24.4856 -42.7346 39.2578 H 1 6ZG701 0.0000 36 | 27 H914 22.6381 -41.7743 37.0583 H 1 6ZG701 0.0000 37 | 28 H915 22.0555 -40.4561 36.0165 H 1 6ZG701 0.0000 38 | 29 H916 22.8553 -39.7903 38.3146 H 1 6ZG701 0.0000 39 | 30 H917 24.4940 -40.2264 37.7789 H 1 6ZG701 0.0000 40 | 31 H918 24.5580 -43.5202 41.5765 H 1 6ZG701 0.0000 41 | 32 H919 23.5725 -35.8436 37.0381 H 1 6ZG701 0.0000 42 | 33 H920 25.0485 -35.5993 37.7516 H 1 6ZG701 0.0000 43 | 34 H921 24.9616 -36.2420 36.2263 H 1 6ZG701 0.0000 44 | 35 H922 27.8992 -44.3802 36.5522 H 1 6ZG701 0.0000 45 | 36 H927 25.4779 -37.9324 37.8161 H 1 6ZG701 0.0000 46 | 37 H928 23.9875 -37.5049 38.6870 H 1 6ZG701 0.0000 47 | 38 H930 23.7265 -40.9477 34.5188 H 1 6ZG701 0.0000 48 | 39 H933 28.5846 -43.0488 32.1366 H 1 6ZG701 0.0000 49 | 40 H934 27.7356 -45.3458 38.2789 H 1 6ZG701 0.0000 50 | 41 H935 26.7173 -41.5988 31.5770 H 1 6ZG701 0.0000 51 | 42 H936 27.1796 -45.1695 43.1133 H 1 6ZG701 0.0000 52 | 43 H937 25.4328 -44.8699 43.2594 H 1 6ZG701 0.0000 53 | 44 H938 26.0198 -46.4541 42.7043 H 1 6ZG701 0.0000 54 | 45 H939 27.7771 -46.1427 40.6016 H 1 6ZG701 0.0000 55 | @BOND 56 | 1 1 3 ar 57 | 2 4 5 1 58 | 3 6 7 1 59 | 4 7 8 ar 60 | 5 8 9 ar 61 | 6 9 10 ar 62 | 7 10 11 ar 63 | 8 7 12 ar 64 | 9 11 12 ar 65 | 10 10 13 1 66 | 11 1 14 ar 67 | 12 13 14 ar 68 | 13 1 15 ar 69 | 14 2 16 ar 70 | 15 15 16 ar 71 | 16 2 17 ar 72 | 17 3 17 ar 73 | 18 3 18 ar 74 | 19 18 19 1 75 | 20 19 20 1 76 | 21 4 21 1 77 | 22 20 21 1 78 | 23 5 22 1 79 | 24 13 23 ar 80 | 25 18 23 ar 81 | 26 4 24 1 82 | 27 4 25 1 83 | 28 11 26 1 84 | 29 20 27 1 85 | 30 20 28 1 86 | 31 21 29 1 87 | 32 21 30 1 88 | 33 12 31 1 89 | 34 22 32 1 90 | 35 22 33 1 91 | 36 22 34 1 92 | 37 14 35 1 93 | 38 5 36 1 94 | 39 5 37 1 95 | 40 19 38 1 96 | 41 16 39 1 97 | 42 9 40 1 98 | 43 2 41 1 99 | 44 6 42 1 100 | 45 6 43 1 101 | 46 6 44 1 102 | 47 8 45 1 103 | @SUBSTRUCTURE 104 | 1 6ZG701 1 GROUP 0 A 6ZG 0 105 | @SET 106 | CCDC_LIGAND STATIC ATOMS 107 | 45 1 2 3 4 5 6 7 8 9 \ 108 | 10 11 12 13 14 15 16 17 18 19 \ 109 | 20 21 22 23 24 25 26 27 28 29 \ 110 | 30 31 32 33 34 35 36 37 38 39 \ 111 | 40 41 42 43 44 45 112 | -------------------------------------------------------------------------------- /scripts/hydrogen_bond_propensity/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Hydrogen Bond Propensity 2 | 3 | Writes a .docx report of a hydrogen bond propensity calculation 4 | 5 | ---- 6 | Included in the report: 7 | - Hydrogen bond propensity chart. 8 | - Table of Predicted intermolecular hydrogen bond propensities. 9 | - Table of Hydrogen bond coordination likelihood. 10 | - Table of Hydrogen Bond Donor/Acceptor Definitions 11 | - Functional Group Definitions for CSD Substructure Searches 12 | - Search Results 13 | - Number of hits selected per functional group 14 | - Hydrogen Bond Analysis of Training Dataset 15 | - Hydrogen bond donor and acceptor outcomes 16 | 17 | ## Example 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | ## Requirements 26 | 27 | - ```matplotlib, docxtpl``` 28 | 29 | - hydrogen_bond_propensity_report.docx 30 | ## Licensing Requirements 31 | - CSD-Core 32 | 33 | ## Instructions on running 34 | 35 | Using mol2 file 36 | 37 | ```cmd 38 | "C:\Program Files\CCDC\Python_API_2022\miniconda\Scripts\activate.bat" 39 | python hydrogen_bond_propensity_report.py structure.mol2 40 | ``` 41 | 42 | Options: 43 | ``` 44 | - d | --directory - sets the working directory 45 | - n | --noopen - Do not automatically open the generated output file. 46 | ``` 47 | ## Author 48 | 49 | _Andrew Maloney_(2017) 50 | 51 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) -------------------------------------------------------------------------------- /scripts/hydrogen_bond_propensity/assets/HXACAN_Report_Screenshot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/hydrogen_bond_propensity/assets/HXACAN_Report_Screenshot_1.png -------------------------------------------------------------------------------- /scripts/hydrogen_bond_propensity/assets/HXACAN_Report_Screenshot_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/hydrogen_bond_propensity/assets/HXACAN_Report_Screenshot_2.png -------------------------------------------------------------------------------- /scripts/hydrogen_bond_propensity/assets/HXACAN_Report_Screenshot_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/hydrogen_bond_propensity/assets/HXACAN_Report_Screenshot_3.png -------------------------------------------------------------------------------- /scripts/hydrogen_bond_propensity/assets/HXACAN_Report_Screenshot_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/hydrogen_bond_propensity/assets/HXACAN_Report_Screenshot_4.png -------------------------------------------------------------------------------- /scripts/hydrogen_bond_propensity/hydrogen_bond_propensity_report.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/hydrogen_bond_propensity/hydrogen_bond_propensity_report.docx -------------------------------------------------------------------------------- /scripts/mof_solvent_removal_2017_chem_mater_publication/Command_prompt_MOF_solvent_removal.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2016-12-15: created by S. B. Wiggin, the Cambridge Crystallographic Data Centre 9 | # 2024-07-02: minor update to include using ccdc utilities to find the solvent file 10 | 11 | """ 12 | Script to identify and remove bound solvent molecules from a MOF structure. 13 | 14 | Solvents are identified using a defined list. 15 | Output in CIF format includes only framework component with all monodentate solvent removed. 16 | """ 17 | ####################################################################### 18 | 19 | import os 20 | import glob 21 | import argparse 22 | 23 | from ccdc import io 24 | from ccdc import utilities 25 | 26 | ####################################################################### 27 | 28 | arg_handler = argparse.ArgumentParser(description=__doc__) 29 | arg_handler.add_argument( 30 | 'input_file', 31 | help='CSD .gcd file from which to read MOF structures' 32 | ) 33 | arg_handler.add_argument( 34 | '-o', '--output-directory', 35 | help='Directory into which to write stripped structures' 36 | ) 37 | arg_handler.add_argument( 38 | '-m', '--monodentate', default=False, action='store_true', 39 | help='Whether or not to strip all unidenate (or monodentate) ligands from the structure' 40 | ) 41 | arg_handler.add_argument( 42 | '-s', '--solvent-file', 43 | help='Location of solvent file' 44 | ) 45 | 46 | args = arg_handler.parse_args() 47 | if not args.output_directory: 48 | args.output_directory = os.path.abspath(os.path.dirname(args.input_file)) 49 | 50 | if not os.path.exists(args.output_directory): 51 | os.makedirs(args.output_directory) 52 | 53 | # Define the solvent smiles patterns 54 | if not args.solvent_file: 55 | args.solvent_file = utilities.Resources().get_ccdc_solvents_dir() 56 | 57 | if os.path.isdir(args.solvent_file): 58 | solvent_smiles = [ 59 | io.MoleculeReader(f)[0].smiles 60 | for f in glob.glob(os.path.join(args.solvent_file, '*.mol2')) 61 | ] 62 | else: 63 | solvent_smiles = [m.smiles for m in io.MoleculeReader(args.solvent_file)] 64 | 65 | 66 | ####################################################################### 67 | 68 | 69 | def is_multidentate(c, mol): 70 | """ 71 | Check for components bonded to metals more than once. 72 | If monodentate is not specified in the arguments, skip this test. 73 | """ 74 | if not args.monodentate: 75 | return True 76 | got_one = False 77 | for a in c.atoms: 78 | orig_a = mol.atom(a.label) 79 | if any(x.is_metal for b in orig_a.bonds for x in b.atoms): 80 | if got_one: 81 | return True 82 | got_one = True 83 | return False 84 | 85 | 86 | def is_solvent(c): 87 | """Check if this component is a solvent.""" 88 | return c.smiles == 'O' or c.smiles in solvent_smiles 89 | 90 | 91 | def has_metal(c): 92 | """Check if this component has any metals.""" 93 | return any(a.is_metal for a in c.atoms) 94 | 95 | 96 | # Iterate over entries 97 | try: 98 | for entry in io.EntryReader(args.input_file): 99 | if entry.has_3d_structure: 100 | # Ensure labels are unique 101 | mol = entry.molecule 102 | mol.normalise_labels() 103 | # Use a copy 104 | clone = mol.copy() 105 | # Remove all bonds containing a metal atom 106 | clone.remove_bonds(b for b in clone.bonds if any(a.is_metal for a in b.atoms)) 107 | # Work out which components to remove 108 | to_remove = [ 109 | c 110 | for c in clone.components 111 | if not has_metal(c) and (not is_multidentate(c, mol) or is_solvent(c)) 112 | ] 113 | # Remove the atoms of selected components 114 | mol.remove_atoms( 115 | mol.atom(a.label) for c in to_remove for a in c.atoms 116 | ) 117 | # Write the CIF 118 | entry.crystal.molecule = mol 119 | with io.CrystalWriter('%s/%s_stripped.cif' % (args.output_directory, entry.identifier)) as writer: 120 | writer.write(entry.crystal) 121 | except RuntimeError: 122 | print('File format not recognised') 123 | -------------------------------------------------------------------------------- /scripts/mof_solvent_removal_2017_chem_mater_publication/Mercury_MOF_solvent_removal.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2016-12-15: created by S. B. Wiggin, the Cambridge Crystallographic Data Centre 9 | # 2024-07-02: minor update to include using ccdc utilities to find the solvent file 10 | 11 | """ 12 | Script to identify and remove bound solvent molecules from a MOF structure. 13 | 14 | Solvents are identified using a defined list. 15 | Output in CIF format includes only framework component with all monodentate solvent removed. 16 | """ 17 | ####################################################################### 18 | 19 | import os 20 | import glob 21 | 22 | from ccdc import io 23 | from ccdc import utilities 24 | from mercury_interface import MercuryInterface 25 | 26 | ####################################################################### 27 | 28 | helper = MercuryInterface() 29 | solvent_smiles = [] 30 | 31 | # Define the solvent smiles patterns 32 | solvent_file = utilities.Resources().get_ccdc_solvents_dir() 33 | 34 | if os.path.isdir(solvent_file): 35 | solvent_smiles = [ 36 | io.MoleculeReader(f)[0].smiles 37 | for f in glob.glob(os.path.join(solvent_file, '*.mol2')) 38 | ] 39 | 40 | else: 41 | html_file = helper.output_html_file 42 | f = open(html_file, "w") 43 | f.write('
') 44 | f.write('Sorry, unable to locate solvent files in the CCDC directory') 45 | f.write('
') 46 | f.close() 47 | # a user-defined solvent directory could be added here instead 48 | 49 | ####################################################################### 50 | 51 | 52 | def is_solvent(c): 53 | """Check if this component is a solvent.""" 54 | return c.smiles == 'O' or c.smiles in solvent_smiles 55 | 56 | 57 | def has_metal(c): 58 | """Check if this component has any metals.""" 59 | return any(a.is_metal for a in c.atoms) 60 | 61 | 62 | entry = helper.current_entry 63 | if entry.has_3d_structure: 64 | # Ensure labels are unique 65 | mol = entry.molecule 66 | mol.normalise_labels() 67 | # Use a copy 68 | clone = mol.copy() 69 | # Remove all bonds containing a metal atom 70 | clone.remove_bonds(b for b in clone.bonds if any(a.is_metal for a in b.atoms)) 71 | # Work out which components to remove 72 | to_remove = [ 73 | c 74 | for c in clone.components 75 | if not has_metal(c) and is_solvent(c) 76 | ] 77 | # Remove the atoms of selected components 78 | mol.remove_atoms( 79 | mol.atom(a.label) for c in to_remove for a in c.atoms 80 | ) 81 | # Write the CIF 82 | entry.crystal.molecule = mol 83 | with (io.CrystalWriter('%s/%s_stripped.cif' % (helper.options['working_directory_path'], entry.identifier)) as 84 | writer): 85 | writer.write(entry.crystal) 86 | html_file = helper.output_html_file 87 | f = open(html_file, "w") 88 | f.write('
') 89 | f.write('Cif file containing MOF framework without monodentate solvent written to your output directory') 90 | f.write('
') 91 | f.close() 92 | else: 93 | html_file = helper.output_html_file 94 | f = open(html_file, "w") 95 | f.write('
') 96 | f.write('Sorry, this script will only work for CSD entries containing atomic coordinates') 97 | f.write('
') 98 | f.close() 99 | -------------------------------------------------------------------------------- /scripts/mof_solvent_removal_2017_chem_mater_publication/ReadMe.md: -------------------------------------------------------------------------------- 1 | # MOF solvent removal 2 | 3 | ## Summary 4 | 5 | Scripts included in the supporting information of the article "Development of a Cambridge Structural Database Subset: 6 | A Collection of Metal–Organic Frameworks for Past, Present, and Future", Peyman Z. Moghadam, Aurelia Li, 7 | Seth B. Wiggin, Andi Tao, Andrew G. P. Maloney, Peter A. Wood, Suzanna C. Ward, and David Fairen-Jimenez 8 | *Chem. Mater.* **2017**, 29, 7, 2618–2625, DOI: 9 | 10 | Scripts are essentially equivalent: one is designed to be run through the Mercury CSD Python API menu to 11 | remove solvent from a single structure present in the visualiser, the second runs from the command line 12 | and takes a list of CSD entries (a .gcd file) to run through the solvent removal process in bulk. 13 | 14 | ## Requirements 15 | 16 | Tested with CSD Python API 3.9.18 17 | 18 | ## Licensing Requirements 19 | 20 | CSD-Core 21 | 22 | ## Instructions on running 23 | 24 | For the script Mercury_MOF_solvent_removal.py: 25 | 26 | - In Mercury, pick **CSD Python API** in the top-level menu, then **Options…** in the resulting pull-down menu. 27 | - The Mercury Scripting Configuration control window will be displayed; from the *Additional Mercury Script Locations* 28 | section, use the **Add Location** button to navigate to a folder location containing the script 29 | - It will then be possible to run the script directly from the CSD Python API menu, with the script running on the structure 30 | shown in the visualiser 31 | 32 | For the script Command_prompt_MOF_solvent_removal.py 33 | 34 | ```cmd 35 | python Command_prompt_MOF_solvent_removal.py .gcd 36 | ``` 37 | 38 | ```cmd 39 | positional arguments: 40 | input_file CSD .gcd file from which to read MOF structures 41 | 42 | optional arguments: 43 | -h, --help show this help message and exit 44 | -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY 45 | Directory into which to write stripped structures 46 | -m, --monodentate 47 | Whether or not to strip all unidenate (or monodentate) ligands from the structure 48 | -s SOLVENT_FILE, --solvent-file SOLVENT_FILE 49 | The location of a solvent file 50 | ``` 51 | 52 | ## Author 53 | 54 | *S.B.Wiggin* (2016) 55 | 56 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) 57 | -------------------------------------------------------------------------------- /scripts/multi_component_hydrogen_bond_propensity/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Multi-Component Hydrogen Bond Propensity 2 | 3 | ## Summary 4 | 5 | Performs a multi-component Hydrogen bond propensity calculation for a given library of co-formers 6 | 7 | ## Example 8 | 9 | Individual reports are generated for each coformer stored in indvidual folders. A summary multicomponent report with rankings is also generated. 10 | 11 | Individual reports include: 12 | 13 | - Predicted intermolecular hydrogen bond propensities 14 | - Hydrogen bond coordination likelihood 15 | 16 | Summary report includes: 17 | 18 | - Chemical Diagram 19 | - Table of ranked components 20 | - Multi-component hydrogen-bond propensity chart 21 | 22 | ## Requirements 23 | 24 | - docxtpl 25 | - multi_component_hydrogen_bond_propensity_report.docx 26 | - multi_component_pair_hbp_report.docx 27 | 28 | ## Licensing Requirements 29 | 30 | - CSD-Materials 31 | 32 | ## Instructions on running 33 | 34 | ```cmd 35 | python multi_component_hydrogen_bond_propensity_report.py HXACAN28 36 | ``` 37 | 38 | ```cmd 39 | positional arguments: 40 | input_structure Refcode or mol2 file of the component to be screened 41 | 42 | optional arguments: 43 | -h, --help show this help message and exit 44 | -d DIRECTORY, --directory DIRECTORY 45 | the working directory for the calculation 46 | -c COFORMER_LIBRARY, --coformer_library COFORMER_LIBRARY 47 | the directory of the desired coformer library 48 | -f FAILURE_DIRECTORY, --failure_directory FAILURE_DIRECTORY 49 | The location where the failures file should be generated 50 | ``` 51 | 52 | The default coformer library is the one supplied with your Mercury install 53 | 54 | - for 2023.1 or later, in ```\ccdc-software\mercury\molecular_libraries\ccdc_coformers``` 55 | - for 2022.3 or earlier, in ```\Mercury\molecular_libraries\ccdc_coformers``` 56 | 57 | Ensure the input structure and coformers have the correct bond typing and any charges before running the script 58 | 59 | If the calculation fails for any API-coformer combinations, they will be recorded with N/A in the summary report table 60 | 61 | ## Author 62 | 63 | _Andrew Maloney_ (CCDC) 2017 64 | 65 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) 66 | -------------------------------------------------------------------------------- /scripts/multi_component_hydrogen_bond_propensity/multi_component_hydrogen_bond_propensity_report.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/multi_component_hydrogen_bond_propensity/multi_component_hydrogen_bond_propensity_report.docx -------------------------------------------------------------------------------- /scripts/multi_component_hydrogen_bond_propensity/multi_component_pair_hbp_report.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/multi_component_hydrogen_bond_propensity/multi_component_pair_hbp_report.docx -------------------------------------------------------------------------------- /scripts/new_script_readme_template/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Script Name 2 | 3 | ## Summary 4 | 5 | \#Description of what the script does 6 | 7 | \#Example output showing what the user can expect to see 8 | 9 | ## Requirements 10 | 11 | \#Minimum CSD Python API version 12 | 13 | \#List of additional required CCDC modules, e.g. ccdc_rp 14 | 15 | \#List of additional non-CCDC python modules required, e.g. docxtpl 16 | 17 | \#Any additional files/resources required, e.g. additional files from this repository script folder 18 | 19 | ## Licensing Requirements 20 | 21 | \#CCDC Licence requirements, e.g. CSD-Core, CSD-Materials, CSD-Enterprise, RP 22 | 23 | ## Instructions on Running 24 | 25 | \#Command line usage 26 | 27 | \#Arguments available 28 | 29 | \#Additional resources required, e.g. file from the script repository and how that is used (e.g. in same folder as python script, used in an argument etc.) 30 | 31 | ## Author 32 | 33 | \#Author name and date created 34 | 35 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) -------------------------------------------------------------------------------- /scripts/new_script_readme_template/script_example.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 0000-00-00: created by Name, Institute 9 | # -------------------------------------------------------------------------------- /scripts/november_2023_morphology_webinar/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Working with morphologies and CSD-Particle 2 | 3 | This is a collection of scripts from the November 2023 CCDC Webinar on crystal morphologies, presented by Dr Andrew G. 4 | P. Maloney. 5 | 6 | ## Contents 7 | 8 | This folder contains the following scripts: 9 | 10 | - `calculate_morphologies_tabulate_input.py` 11 | Runs a calculation of BFDH morphology and VisualHabit morphology for `IBPRAC18` (ibuprofen), printing a table of 12 | facet properties. 13 | - `exploring_surface_properties.py` 14 | Calculates the VisualHabit morphology of `IBPRAC18` (ibuprofen) and descriptors for the (100) surface. Also calculates 15 | particle rugosity. 16 | - `morphology_plot.py` 17 | Contains a function to generate a 3D plot of a crystal morphology using Matplotlib. Running the script will plot the 18 | BFDH morphology of `IBPRAC18` (ibuprofen). 19 | 20 | ## Requirements 21 | 22 | All of the modules required to run these scripts are available in the miniconda environment installed with the CSD 23 | Python API. 24 | 25 | ## Licensing requirements 26 | 27 | - CSD-Core, CSD-Particle 28 | 29 | ## Usage 30 | 31 | Each script can be run independently from the command line: 32 | 33 | `python script_name.py` 34 | 35 | ### Author 36 | 37 | Pietro Sacchi 38 | 39 | -------------------------------------------------------------------------------- /scripts/november_2023_morphology_webinar/calculate_morphologies_tabulate_output.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | 9 | """ 10 | This script runs a morphology calculation and outputs the facet properties. 11 | """ 12 | import pandas as pd 13 | from ccdc.io import EntryReader 14 | from ccdc.morphology import BFDHMorphology 15 | 16 | csd = EntryReader('CSD') 17 | ibuprofen = csd.crystal("IBPRAC18") 18 | bfdh_morphology = BFDHMorphology(ibuprofen) 19 | 20 | bfdh_facet_data = [[f.miller_indices.hkl, round(f.area, 3)] for f in bfdh_morphology.facets] 21 | 22 | print() 23 | print("BFDH Data:") 24 | print(pd.DataFrame(bfdh_facet_data, columns=["Miller Index", "Facet Area"])) 25 | 26 | from ccdc.morphology import VisualHabit 27 | 28 | visualhabit_settings = VisualHabit.Settings() 29 | visualhabit_settings.potential = 'dreidingII' 30 | vh_results = VisualHabit().calculate(ibuprofen) 31 | vh_morphology = vh_results.morphology 32 | 33 | vh_facet_data = [[f.miller_indices.hkl, f.area, f.attachment_energy] for f in vh_morphology.facets] 34 | 35 | print() 36 | print("VisualHabit Data:") 37 | print(pd.DataFrame(vh_facet_data, columns=["Miller Index", "Facet Area", "Attachment Energy"])) 38 | -------------------------------------------------------------------------------- /scripts/november_2023_morphology_webinar/exploring_surface_properties.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | 9 | """ 10 | This script shows a few uses of the CSD Particle tools available. 11 | """ 12 | import pandas as pd 13 | from ccdc.io import EntryReader 14 | from ccdc.particle import Surface 15 | from ccdc.morphology import VisualHabit 16 | 17 | csd = EntryReader('CSD') 18 | ibuprofen = csd.crystal("IBPRAC18") 19 | 20 | visualhabit_settings = VisualHabit.Settings() 21 | visualhabit_settings.potential = 'dreidingII' 22 | vh_results = VisualHabit().calculate(ibuprofen) 23 | vh_morphology = vh_results.morphology 24 | 25 | # Surface topology 26 | ibuprofen_100 = Surface(ibuprofen, (1, 0, 0)) 27 | print() 28 | print("Ibuprofen (100) topology descriptors (no offset):") 29 | print(ibuprofen_100.descriptors.rugosity) 30 | 31 | ibuprofen_100 = Surface(ibuprofen, (1, 0, 0), offset=4.308) 32 | surface_topology = [[(1, 0, 0), 33 | ibuprofen_100.descriptors.rugosity, 34 | ibuprofen_100.descriptors.rmsd, 35 | ibuprofen_100.descriptors.skewness, 36 | ibuprofen_100.descriptors.kurtosis]] 37 | 38 | print() 39 | print("Ibuprofen (100) topology descriptors:") 40 | print(pd.DataFrame(surface_topology, columns=["Facet", "Rugosity", "RMSD", "Skewness", "Kurtosis"])) 41 | 42 | # Surface chemistry 43 | surface_chemistry = [[(1, 0, 0), 44 | ibuprofen_100.descriptors.hb_donors, 45 | ibuprofen_100.descriptors.hb_acceptors, 46 | ibuprofen_100.descriptors.aromatic_bonds]] 47 | 48 | print() 49 | print("Ibuprofen (100) surface chemistry descriptors:") 50 | print(pd.DataFrame(surface_chemistry, 51 | columns=["Facet", "Density, HB Donors", "Density, HB Acceptors", "Density, Aromatic Bonds"])) 52 | 53 | # Average surface properties 54 | facet_rugosity_data = [] 55 | for facet in vh_morphology.facets: 56 | hkl = facet.miller_indices.hkl 57 | relative_area = round(vh_morphology.relative_area(facet.miller_indices), 3) 58 | rugosity = Surface(ibuprofen, facet.miller_indices.hkl).descriptors.rugosity 59 | weighted_rugosity = relative_area * rugosity 60 | facet_rugosity_data.append([hkl, relative_area, rugosity, weighted_rugosity]) 61 | 62 | facet_rugosities = pd.DataFrame(facet_rugosity_data, 63 | columns=["Facet", "Relative Area", "Rugosity", "Weighted Rugosity"]) 64 | print() 65 | print("Facet rugosities:") 66 | print(facet_rugosities) 67 | 68 | # Particle rugosity 69 | import morphology_plot as plotter 70 | 71 | print() 72 | print("Weighted rugosity:") 73 | print(round(facet_rugosities["Weighted Rugosity"].sum(), 3)) 74 | plotter.generate_morphology_plot(vh_morphology, labels=list(facet_rugosities["Rugosity"])) 75 | -------------------------------------------------------------------------------- /scripts/november_2023_morphology_webinar/morphology_plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | # matplotlib.use('Agg') 3 | import matplotlib.pyplot as plt 4 | from mpl_toolkits.mplot3d import Axes3D 5 | from mpl_toolkits.mplot3d.art3d import Poly3DCollection 6 | 7 | """ 8 | Script to create a 3D plot of a crystal morphology. 9 | """ 10 | 11 | 12 | def generate_morphology_plot(morphology, labels=None): 13 | fig = plt.figure(figsize=(10., 10.)) # 3D graph instance 14 | ax = fig.add_subplot(111, projection='3d') # 3D Axes 15 | _ = ax.set_xlim3d(-15, 15) # Set the axes limits 16 | _ = ax.set_ylim3d(-15, 15) 17 | _ = ax.set_zlim3d(-15, 15) 18 | _ = ax.grid(False) # To hide the gridlines 19 | _ = plt.axis('off') # To hide the axes 20 | for i, facet in enumerate(morphology.facets): 21 | for edge in facet.edges: 22 | Axes3D.plot(ax, 23 | [coord[0] for coord in edge], # The x coordinates 24 | [coord[1] for coord in edge], # The y coordinates 25 | [coord[2] for coord in edge], # The z coordinates 26 | c='black', 27 | linewidth=1.5) 28 | for edge in facet.edges: 29 | vertices = [(edge[0], edge[1], facet.centre_of_geometry)] 30 | Axes3D.add_collection3d(ax, Poly3DCollection(vertices, color='blue', linewidth=0, alpha=0.3)) 31 | if labels: 32 | ax.text(facet.centre_of_geometry[0], facet.centre_of_geometry[1], facet.centre_of_geometry[2], 33 | ''' {} 34 | {}'''.format(facet.miller_indices.hkl, labels[i]), 35 | color='black') 36 | else: 37 | ax.text(facet.centre_of_geometry[0], facet.centre_of_geometry[1], facet.centre_of_geometry[2], 38 | ''' {} 39 | {}'''.format(facet.miller_indices.hkl, round(morphology.relative_area(facet.miller_indices), 3)), 40 | color='black') 41 | 42 | ax.scatter(facet.centre_of_geometry[0], 43 | facet.centre_of_geometry[1], 44 | facet.centre_of_geometry[2], 45 | s=10, 46 | color='black') 47 | plt.show() 48 | 49 | 50 | if __name__ == "__main__": 51 | from ccdc.io import EntryReader 52 | from ccdc.morphology import BFDHMorphology 53 | 54 | csd = EntryReader('CSD') 55 | ibuprofen = csd.crystal("IBPRAC18") 56 | bfdh_morphology = BFDHMorphology(ibuprofen) 57 | generate_morphology_plot(bfdh_morphology) 58 | -------------------------------------------------------------------------------- /scripts/packing_similarity_dendrogram/assets/dendogram_figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/packing_similarity_dendrogram/assets/dendogram_figure_1.png -------------------------------------------------------------------------------- /scripts/packing_similarity_dendrogram/assets/dendogram_figure_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/packing_similarity_dendrogram/assets/dendogram_figure_2.png -------------------------------------------------------------------------------- /scripts/packing_similarity_dendrogram/assets/dendogram_figure_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/packing_similarity_dendrogram/assets/dendogram_figure_3.png -------------------------------------------------------------------------------- /scripts/particle_rugosity/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Particle Rugosity 2 | 3 | ## Summary 4 | Simulates the particle shape of the given crystal structure using BFDH mophology 5 | prediction, then calculates the particle rugosity by determining the rugosity 6 | of the facets and calculating the weighted average (by facet area). 7 | 8 | ### Relevance 9 | Rugosity is considered to be a factor in nucleation, with rougher crystal surfaces 10 | having higher energies and being generally less stable. This property has been used 11 | in rationalizing the relative stability or probability of forming one polymorph 12 | (or putative crystal structure from Crystal Structure Prediction, CSP) over another. 13 | 14 | Using a protocol similar to that reported by [Cruz-Cabeza and colleagues](https://doi.org/10.1002/anie.202006939) this script 15 | takes a crystal structure or refcode as an argument and will use the CCDC BFDH 16 | morphology module to simulate the predicted particle shape. The facet hkl planes are 17 | then obtained and the rugosity of these planes is calculated using the [CCDC rugosity function](https://downloads.ccdc.cam.ac.uk/documentation/API/descriptive_docs/particle.html) (including a scan of 18 | different offsets from the origin to identify the minimum rugosity of that crystal 19 | plane). Finally, the total particle rugosity is calculated by weighted average 20 | according to the facet surface areas. 21 | 22 | ## Requirements 23 | 24 | - Tested with CSD Python API version 3.7.9 on Linux and Windows 25 | - ccdc.particle 26 | - ccdc.morphology 27 | 28 | ## Licensing Requirements 29 | 30 | - CSD-Core, CSD-Particle 31 | 32 | ## Instructions on Running 33 | ### Linux command line 34 | - load the CSD Python API Miniconda environment 35 | - call Python to read the script, add the crystal structure refcode or filename (local file) 36 | ~~~ 37 | $ python particle_rugosity.py AABHTZ 38 | The crystal structure AABHTZ has been loaded 39 | Calculating particle rugosity ... 40 | AABHTZ particle rugosity: 1.507 41 | ~~~ 42 | OR 43 | ~~~ 44 | $ python particle_rugosity.py AABHTZ.cif 45 | ~~~ 46 | - results are printed and can be redirected to be saved in an output file 47 | ~~~ 48 | $ python particle_rugosity.py AABHTZ > AABHTZ_rugosity.out 49 | ~~~ 50 | 51 | ### Windows CSD Python API 52 | - move the particle\_rugostiy.py file to the CCDC\Python\_API\_(year)\ folder 53 | - to determine where this is on your computer, start the CSD Python API and enter the following commands 54 | ~~~ 55 | >>> import os 56 | >>> wd = os.getcwd() 57 | >>> print(wd) 58 | C:\Program Files\CCDC\Python_API_2022\ 59 | ~~~ 60 | the last line is the folder location where you need to move the particle\_rugostiy.py file 61 | - now the script can be run in the CSD Python API to calculate the rugosity of any CSD entry using: 62 | ~~~ 63 | >>> from particle_rugosity import particle_rugosity 64 | >>> particle_rugosity('AABHTZ') 65 | The crystal structure AABHTZ has been loaded 66 | Calculating particle rugosity ... 67 | AABHTZ particle rugosity: 1.507 68 | ~~~ 69 | OR 70 | - if you want to run your own crystal structures, make a folder (eg. rugosity_calc) in Python_API_(year), and add your 71 | crystal structure file to that folder 72 | - modify the argument to include the file path 73 | ~~~ 74 | >>> particle_rugosity('rugosity_calc/AABHTZ.cif') 75 | The crystal structure rugosity_calc/AABHTZ.cif has been loaded 76 | Calculating particle rugosity ... 77 | rugosity_calc/AABHTZ.cif particle rugosity: 1.507 78 | ~~~ 79 | 80 | ## Author 81 | 82 | _R. Alex Mayo_ 2023 83 | 84 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) 85 | -------------------------------------------------------------------------------- /scripts/particle_rugosity/particle_rugosity.py: -------------------------------------------------------------------------------- 1 | # script for calculating the particle rugosity for a given crystal structure (.cif in local working directory, or refcode of a CSD entry) 2 | # requires a CCDC license, and loading of the CSD-python miniconda environment if run from Linux command line 3 | 4 | # import ccdc modules 5 | import os as opsys 6 | import argparse 7 | from ccdc.io import CrystalReader 8 | from ccdc.particle import Surface 9 | from ccdc.morphology import BFDHMorphology 10 | 11 | def particle_rugosity(xname): 12 | # xname is a string, either refcode (eg: 'AABHTZ') or filename (eg: 'AABHTZ.cif') 13 | # load structure, either (1) crystal structure file in working directory, or 14 | # (2) CSD refcode 15 | if opsys.path.isfile(xname): 16 | try: 17 | # agrument is crystal structure file 18 | xtal1 = CrystalReader(xname) 19 | xtal = xtal1[0] 20 | xtal1.close 21 | except: 22 | print("Error in reading crystal structure input.\nPlease enter a CSD refcode or provide the path to your crystal structure file") 23 | input('Press ENTER to exit') 24 | quit() 25 | else: 26 | try: 27 | # agrument is a CSD refcode 28 | xtal = CrystalReader('CSD').crystal(xname) 29 | except: 30 | print("Error in reading crystal structure input.\nPlease enter a CSD refcode or provide the path to your crystal structure file") 31 | input('Press ENTER to exit') 32 | quit() 33 | 34 | print(f"The crystal structure {xname} has been loaded") 35 | print("Calculating particle rugosity ...") 36 | 37 | # run BFDH morphology to determine predicted particle surface area = hkl planes, and d-spacing of the planes 38 | morphology = BFDHMorphology(xtal) 39 | facets = morphology.facets 40 | f = facets[0] 41 | all_hkl = [f.miller_indices.hkl for f in facets] 42 | all_mi = [f.miller_indices for f in facets] 43 | facets_relA = [morphology.relative_area(mi) for mi in all_mi] 44 | all_d_hkl = [f.miller_indices.d_spacing for f in facets] 45 | num_face = len(all_d_hkl) 46 | 47 | # generate surfaces and record rugosity, weigh by particle surface area 48 | w_part_rug = [] 49 | for i in range(num_face): 50 | hkl = all_hkl[i] 51 | rug_list = [] 52 | 53 | # check rugosity of crystal plane at 0, 0.25, 0.5, and 0.75 offset from origin 54 | for split in range(1,5): 55 | os = all_d_hkl[i] / split 56 | surface = Surface(xtal, hkl, offset=os) 57 | rug_list.append(surface.descriptors.rugosity) 58 | 59 | # only take the minimum rugosity value (from all offset calculations) for each hkl 60 | rug = min(rug_list) 61 | w_surf_rug = rug * facets_relA[i] 62 | w_part_rug.append(w_surf_rug) 63 | 64 | # sum weighted rugosity values and print total particle rugosity 65 | tot_rug = sum(w_part_rug) 66 | print(f'{xname} particle rugosity: ',round(tot_rug, 3)) 67 | 68 | # ================================ 69 | if __name__=='__main__': 70 | parser = argparse.ArgumentParser() 71 | parser.add_argument("input_x") 72 | args = parser.parse_args() 73 | crystal = args.input_x 74 | 75 | # run code 76 | particle_rugosity(crystal) 77 | 78 | -------------------------------------------------------------------------------- /scripts/refcodes_with_properties/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Refcode List Generator 2 | 3 | ## Summary 4 | 5 | A script that allows you to create refcode lists (or CSV files of properties for a refcode list) for simple properties. 6 | The advantage of the script is that the control is via an easy to read file so you can keep an interprettable record of 7 | how a test set was generated in research. You can also then reproduce the list, or indeed run it on a new database and 8 | update it with the same conditions. 9 | 10 | ### Relevance 11 | 12 | We want research to be FAIR (Findable, Attributable, Interoperable and Reproducible) - this script means we can create a 13 | simple description of the test set used that any researcher could then reproduce from the script and the description. 14 | 15 | ## Requirements 16 | 17 | - Tested with CSD Python API version 3.9 on Linux and Windows 18 | - ccdc.io 19 | - ccdc.search 20 | 21 | ## Licensing Requirements 22 | 23 | - CSD-Core 24 | 25 | ## Instructions on Running 26 | 27 | ### Linux command line 28 | 29 | - load the CSD Python API Miniconda environment 30 | - create a text control file with the various control lines specified 31 | - call Python to read the script and specify necessary arguments 32 | 33 | ~~~bash 34 | python refcodes_with_properties.py --help 35 | ~~~ 36 | 37 | The above will print an extended help message that describes the registered 38 | 39 | You can run the script with an Example file. Results are printed by default and can be redirected to be saved in an 40 | output file, e.g. 41 | 42 | ~~~ 43 | python refcodes_with_properties.py -c example_control_file.txt -o mylist.gcd 44 | ~~~ 45 | 46 | This will generate a GCD file that can be used in other work. 47 | 48 | ### Windows CSD Python API 49 | 50 | - launch a CMD window 51 | - Use the installed version of the CSD Python API, for example C:\Users\ 52 | \CCDC\ccdc-software\csd-python-api assuming the CCDC tools are installed in the ususal place do this 53 | 54 | ~~~bat 55 | C:\Users\\CCDC\ccdc-software\csd-python-api\run_python_api.bat refcodes_with_properties.py --help 56 | ~~~ 57 | 58 | ## Author 59 | 60 | _Jason C.Cole_ 2025 61 | 62 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) 63 | -------------------------------------------------------------------------------- /scripts/refcodes_with_properties/example_control_file.txt: -------------------------------------------------------------------------------- 1 | # An example control file - this will find all organic structures 2 | # with up to 100 atoms, Z' = 1, only 1 component that isnt disordered and 3 | # has a low R-Factor 4 | # 5 | # only include organic structures as output 6 | organic : 1 7 | # number of atoms to allow through 8 | atom count : 0 100 9 | # Ensure Z-prime is one 10 | zprime range : 0.99 1.01 11 | # Ensure only one component in the structure 12 | component range : 0 1 13 | # Dont include disordered structures 14 | disordered : 0 15 | # Specify an R-factor range 16 | rfactor range : 0.1 5 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /scripts/refcodes_with_properties/more_elaborate_control.txt: -------------------------------------------------------------------------------- 1 | # An example control file 2 | # 3 | # 4 | # only include organic structures as output 5 | organic : 1 6 | # specify a range of donors 7 | donor count : 0 10 8 | # specify a range of acceptors 9 | acceptor count : 5 5 10 | # rotatable bond count range 11 | rotatable bond count : 3 7 12 | # number of atoms to allow through 13 | atom count : 0 100 14 | # only include structures containing Hydrogen, Carbon, Nitrogen or Oxygen and nothing else 15 | allowed atomic numbers : 1 6 7 8 16 | # only include structures containing all of these elements (i.e.) Hydrogen, Carbon, Nitrogen or Oxygen 17 | must have atomic numbers : 1 6 7 8 18 | # Ensure Z-prime is one 19 | zprime range : 0.99 1.01 20 | # Ensure only one component in the structure 21 | component range : 0 1 22 | # Dont include disordered structures 23 | disordered : 0 24 | # Specify an R-factor range 25 | rfactor range : 0.1 5 26 | # atomic weight 27 | atomic weight : 0.0 1000.0 28 | 29 | 30 | -------------------------------------------------------------------------------- /scripts/refcodes_with_properties/refcodes_with_properties.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This script can be used for any purpose without limitation subject to the 4 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 5 | # 6 | # This permission notice and the following statement of attribution must be 7 | # included in all copies or substantial portions of this script. 8 | # 9 | # 2025-03-14: created by Jason C. Cole, The Cambridge Crystallographic Data Centre 10 | 11 | ''' 12 | Filter a refcode list to the subset that have the desired properties 13 | ''' 14 | 15 | ######################################################################### 16 | 17 | import argparse 18 | import csv 19 | import sys 20 | 21 | from ccdc import io 22 | 23 | import entry_property_calculator 24 | 25 | if __name__ == '__main__': 26 | 27 | parser = argparse.ArgumentParser(description=__doc__, 28 | formatter_class=argparse.RawDescriptionHelpFormatter) 29 | 30 | parser.add_argument('-r', '--refcode_file', help='input file containing the list of refcodes', default=None) 31 | parser.add_argument('-d', '--database_file', help='input file containing the list of refcodes', default=None) 32 | parser.add_argument('-c', '--control_file', help='configuration file containing the desired properties\n\n %s' % ( 33 | entry_property_calculator.helptext())) 34 | parser.add_argument('-v', '--get_values', action="store_true", 35 | help='calculate and print descriptor values where possible rather than filter\n\n %s' % ( 36 | entry_property_calculator.helptext())) 37 | parser.add_argument('-o', '--output_file', default=None, 38 | help='output CSV file for results\n\n %s' % (entry_property_calculator.helptext())) 39 | 40 | args = parser.parse_args() 41 | 42 | refcode_file = args.refcode_file 43 | database_file = args.database_file 44 | control_file = args.control_file 45 | print_values = args.get_values 46 | 47 | outfile = sys.stdout 48 | if args.output_file is not None: 49 | outfile = open(args.output_file, 'w', encoding='utf-8') 50 | 51 | filterer = entry_property_calculator.parse_control_file(open(control_file, "r").readlines()) 52 | 53 | reader = None 54 | if refcode_file: 55 | reader = io.EntryReader(refcode_file, format='identifiers') 56 | elif database_file: 57 | reader = io.EntryReader(database_file) 58 | else: 59 | reader = io.EntryReader('CSD') 60 | 61 | if args.get_values: 62 | 63 | csvwriter = None 64 | for entry in reader: 65 | values = filterer.values(entry) 66 | if csvwriter == None: 67 | fieldnames = ["identifier"] + values.keys() 68 | csvwriter = csv.DictWriter(outfile, fieldnames=fieldnames) 69 | csvwriter.writeheader() 70 | values["identifier"] = entry.identifier 71 | csvwriter.writerow(values) 72 | 73 | else: 74 | for entry in reader: 75 | if filterer.evaluate(entry): 76 | outfile.write(entry.identifier + "\n") 77 | -------------------------------------------------------------------------------- /scripts/refcodes_with_properties/test_entry_property_calculator.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from ccdc.io import EntryReader 4 | 5 | from entry_property_calculator import parse_control_file 6 | 7 | 8 | class TestFiltering(unittest.TestCase): 9 | 10 | def setUp(self): 11 | 12 | self.reader = EntryReader('CSD') 13 | self.aabhtz = self.reader.entry("AABHTZ") 14 | self.aacani_ten = self.reader.entry("AACANI10") 15 | self.aadamc = self.reader.entry("AADAMC") 16 | self.aadrib = self.reader.entry("AADRIB") 17 | self.abadis = self.reader.entry("ABADIS") 18 | 19 | def test_organic_filter(self): 20 | 21 | test_file = """ 22 | organic : 1 23 | """ 24 | lines = test_file.split('\n') 25 | evaluator = parse_control_file(lines) 26 | 27 | self.assertTrue(evaluator.evaluate(self.aabhtz)) 28 | 29 | self.assertFalse(evaluator.evaluate(self.aacani_ten)) 30 | 31 | def test_component_filter(self): 32 | test_file = """ 33 | component range : 0 1 34 | """ 35 | lines = test_file.split('\n') 36 | evaluator = parse_control_file(lines) 37 | 38 | self.assertTrue(evaluator.evaluate(self.aabhtz)) 39 | 40 | self.assertFalse(evaluator.evaluate(self.aacani_ten)) 41 | 42 | def test_donor_count_filter(self): 43 | test_file = """ 44 | donor count : 2 2 45 | """ 46 | lines = test_file.split('\n') 47 | evaluator = parse_control_file(lines) 48 | 49 | self.assertFalse(evaluator.evaluate(self.aabhtz)) 50 | 51 | self.assertTrue(evaluator.evaluate(self.aadamc)) 52 | 53 | test_file = """ 54 | donor count : 0 3 55 | """ 56 | lines = test_file.split('\n') 57 | evaluator = parse_control_file(lines) 58 | 59 | self.assertTrue(evaluator.evaluate(self.aabhtz)) 60 | self.assertTrue(evaluator.evaluate(self.aadamc)) 61 | 62 | def test_acceptor_count_filter(self): 63 | test_file = """ 64 | acceptor count : 7 7 65 | """ 66 | lines = test_file.split('\n') 67 | evaluator = parse_control_file(lines) 68 | 69 | # regards Cl as an acceptor ... 70 | self.assertTrue(evaluator.evaluate(self.aabhtz)) 71 | 72 | self.assertTrue(evaluator.evaluate(self.aacani_ten)) 73 | 74 | def test_zprime(self): 75 | test_file = """ 76 | zprime range : 0.99 1.01 77 | """ 78 | lines = test_file.split('\n') 79 | evaluator = parse_control_file(lines) 80 | self.assertTrue(evaluator.evaluate(self.aabhtz)) 81 | self.assertFalse(evaluator.evaluate(self.aadrib)) 82 | 83 | def test_atomic_numbers(self): 84 | test_file = """ 85 | allowed atomic numbers : 1 6 7 8 86 | must have atomic numbers : 1 6 7 8 87 | """ 88 | lines = test_file.split('\n') 89 | evaluator = parse_control_file(lines) 90 | self.assertFalse(evaluator.evaluate(self.aabhtz)) 91 | self.assertFalse(evaluator.evaluate(self.aadrib)) 92 | 93 | test_file = """ 94 | must have atomic numbers : 1 6 7 8 95 | """ 96 | lines = test_file.split('\n') 97 | evaluator = parse_control_file(lines) 98 | self.assertTrue(evaluator.evaluate(self.aabhtz)) 99 | self.assertFalse(evaluator.evaluate(self.aadrib)) 100 | 101 | def test_rotatable_bond_count(self): 102 | test_file = """ 103 | rotatable bond count : 0 4 104 | """ 105 | lines = test_file.split('\n') 106 | evaluator = parse_control_file(lines) 107 | self.assertTrue(evaluator.evaluate(self.abadis)) 108 | 109 | def test_multiple(self): 110 | test_file = """ 111 | 112 | # An example control file 113 | # 114 | # 115 | # only include organic structures as output 116 | organic : 1 117 | # specify a range of donors 118 | donor count : 0 10 119 | # specify a range of acceptors 120 | acceptor count : 5 5 121 | # rotatable bond count range 122 | rotatable bond count : 3 7 123 | # number of atoms to allow through 124 | atom count : 0 100 125 | # only include structures containing Hydrogen, Carbon, Nitrogen or Oxygen and nothing else 126 | allowed atomic numbers : 1 6 7 8 127 | # only include structures containing all of these elements (i.e.) Hydrogen, Carbon, Nitrogen or Oxygen 128 | must have atomic numbers : 1 6 7 8 129 | # Ensure Z-prime is one 130 | zprime range : 0.99 1.01 131 | # Ensure only one component in the structure 132 | component range : 2 2 133 | # Dont include disordered structures 134 | disordered : 0 135 | # Specify an R-factor range 136 | rfactor range : 0.1 5 137 | # 138 | 139 | 140 | """ 141 | 142 | lines = test_file.split('\n') 143 | evaluator = parse_control_file(lines) 144 | hits = [] 145 | 146 | test_entries = ['AABHTZ', 'ABAQEB', 'ABELEY', 'ADAQOM', 'ADARAA', 'ADARAZ', 'ADUWIG', 'AFEREK'] 147 | for id in test_entries: 148 | e = self.reader.entry(id) 149 | 150 | if evaluator.evaluate(e): 151 | hits.append(e.identifier) 152 | 153 | self.assertEqual(['ABAQEB', 'ABELEY', 'ADAQOM', 'ADUWIG', 'AFEREK'], hits) 154 | -------------------------------------------------------------------------------- /scripts/show_semiconductor_properties/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Descs Data Plot Readme 2 | 3 | Only works for 2024.3 onwards, please see 4 | for more details on the properties. 5 | Download the whole directory and place it in the ccdc-sfotware/mercury/scripts/ folder to make use of it, it 6 | will display the properties for the structure currently loaded in the Mercury window when launched from the csd-python-api dropdown in 7 | Mercury 8 | -------------------------------------------------------------------------------- /scripts/show_semiconductor_properties/show_semiconductor_properties.py: -------------------------------------------------------------------------------- 1 | # 2 | # This script can be used for any purpose without limitation subject to the 3 | # conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx 4 | # 5 | # This permission notice and the following statement of attribution must be 6 | # included in all copies or substantial portions of this script. 7 | # 8 | # 2023-08-18 Created by Chris Kingsbury, the Cambridge Crystallographic Data Centre 9 | # ORCID 0000-0002-4694-5566 10 | # 11 | # 12 | from ccdc.utilities import ApplicationInterface 13 | from jinja2 import Template 14 | from pathlib import Path 15 | from io import BytesIO 16 | import base64 17 | import json 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | from matplotlib.patches import Rectangle 21 | 22 | README_LINK = "https://downloads.ccdc.cam.ac.uk/documentation/API/descriptive_docs/predicted_properties.html" 23 | 24 | default_settings = {} 25 | 26 | 27 | def plot_hist(descs_data, astype="fig"): 28 | data_hlt = { 29 | "singlet_state_1_energy": descs_data.singlet_state_1_energy, 30 | "singlet_state_1_oscillator_strength": descs_data.singlet_state_1_oscillator_strength, 31 | "singlet_state_2_energy": descs_data.singlet_state_2_energy, 32 | "singlet_state_2_oscillator_strength": descs_data.singlet_state_2_oscillator_strength, 33 | "triplet_state_1_energy": descs_data.triplet_state_1_energy, 34 | "triplet_state_2_energy": descs_data.triplet_state_2_energy, 35 | "homo_lumo_gap": descs_data.homo_lumo_gap, 36 | "transfer_integral": descs_data.transfer_integral, 37 | "hole_reorganization_energy": descs_data.hole_reorganization_energy, 38 | "dynamic_disorder": descs_data.dynamic_disorder, 39 | } 40 | hist_data = json.load(open(Path(__file__).parent / "hist_data.json", "r")) 41 | fig, axs = plt.subplots(nrows=2, ncols=5, figsize=(15, 15)) 42 | 43 | for ix, (key, data_red) in enumerate(hist_data.items()): 44 | row, col = int(np.floor(ix / 5)), ix % 5 45 | ax = axs[row][col] 46 | xs = data_red["x"] 47 | ys = data_red["y"] 48 | 49 | ax.stairs(xs, ys, orientation="horizontal", fill=True) 50 | ax.title.set_text(data_red["axis_label"]) 51 | 52 | arrow_place = data_hlt.get(data_red["name"], np.nan) 53 | if arrow_place is None: 54 | arrow_place = np.nan 55 | try: 56 | arrow_anchor = (0, [x for x in ys if (x < arrow_place)][-1]) 57 | height = ys[1] - ys[0] 58 | width = [x for x, y in zip(xs, ys) if (y < arrow_place)][-1] 59 | axs[row][col].add_patch( 60 | Rectangle( 61 | xy=arrow_anchor, 62 | width=width, 63 | height=height, 64 | facecolor="red", 65 | fill=True, 66 | ) 67 | ) 68 | except IndexError: 69 | pass 70 | 71 | if astype == "buf": 72 | buf = BytesIO() 73 | fig.savefig(buf, format="png", dpi=600, backend="Agg") 74 | image_base64 = ( 75 | base64.b64encode(buf.getvalue()).decode("utf-8").replace("\n", "") 76 | ) 77 | buf.close() 78 | return image_base64 79 | 80 | else: 81 | return fig 82 | 83 | 84 | def write_descs_report(settings=default_settings): 85 | interface = ApplicationInterface(parse_commandline=False) 86 | interface.parse_commandline() 87 | entry = interface.current_entry 88 | if (entry.predicted_properties is None): 89 | interface.write_report(title="Data not found", content="No Predicted Property Data Found For " + entry.identifier) 90 | return None 91 | else: 92 | properties = entry.predicted_properties 93 | if (properties.semiconductor_properties is None): 94 | interface.write_report(title="Data not found", content="No Semiconductor Data Found For " + entry.identifier) 95 | return None 96 | 97 | descs_data = properties.semiconductor_properties 98 | with open(interface.output_html_file, "w") as report: 99 | 100 | tl = Template( 101 | open( 102 | Path(__file__).parent / "semiconductor_template.html", 103 | "r", 104 | ).read() 105 | ) 106 | report.write( 107 | tl.render( 108 | ident=interface.identifier, data=descs_data, readme_link=README_LINK, image=plot_hist(descs_data, "buf") 109 | ) 110 | ) 111 | 112 | 113 | if __name__ == "__main__": 114 | write_descs_report() 115 | -------------------------------------------------------------------------------- /scripts/surface_charge/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Surface Charge Calculator 2 | 3 | ## Summary 4 | 5 | This tool returns the total surface charges for a given structure and list of supplied hkl indices and offsets. 6 | The script provides a GUI that can be used from Mercury or from the command line. 7 | 8 | The output is an HTML file with a table for all the selected surfaces and their associated charges, projected surface areas, and normalised surface charges (surface charge per projected area). 9 | 10 | Charges are currently calculated using the Gasteiger charge model. Further development could be made to use user derived charges. Please let us know if that is of interest: [support@ccdc.cam.ac.uk](support@ccdc.cam.ac.uk). 11 | 12 | Example Output: 13 | 14 | ![Example Output](assets/example_output_hxacan28.png) 15 | 16 | > **Note** - When comparing charges for non-CSD structures and structures from mol2 files the values might be different as the bonding might not be the same. When importing a mol2 file the bonding and charges may have to be calculated on the fly, whereas this information is assigned for CSD entries. 17 | 18 | ## Requirements 19 | 20 | - Requires a minimum of CSD 2022.2 21 | 22 | ## Licensing Requirements 23 | 24 | - CSD-Particle Licence 25 | 26 | ## Instructions for use 27 | 28 | - To Run from command line: 29 | 30 | ```commandline 31 | # With an activated environment 32 | > python surface_charge.py 33 | ``` 34 | 35 | - To run from mercury: 36 | Add the folder containing the script to your Python API menu. Mercury -> CSD Python API-> Options -> Add Location. Then select the `surface_charge.py` script from the drop down menu 37 | ![Adding_Locations](assets/adding_location.png) 38 | ![Selecting Scripts](assets/selecting_script.png) 39 | 40 | Running from either the command line or Mercury will show the same interface allowing you to select a refcode from the CSD or input a mol2 file directly. 41 | 42 | Example Input: 43 | 44 | ![Example Input](assets/example_input.png) 45 | 46 | ## Author 47 | 48 | Alex Moldovan (2024) 49 | 50 | > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) -------------------------------------------------------------------------------- /scripts/surface_charge/assets/adding_location.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/surface_charge/assets/adding_location.png -------------------------------------------------------------------------------- /scripts/surface_charge/assets/csd-python-api-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/surface_charge/assets/csd-python-api-logo.png -------------------------------------------------------------------------------- /scripts/surface_charge/assets/example_input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/surface_charge/assets/example_input.png -------------------------------------------------------------------------------- /scripts/surface_charge/assets/example_output_hxacan28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/surface_charge/assets/example_output_hxacan28.png -------------------------------------------------------------------------------- /scripts/surface_charge/assets/selecting_script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccdc-opensource/csd-python-api-scripts/542bd4409f6ccc465d8fc70ff97ce56657f22e2d/scripts/surface_charge/assets/selecting_script.png --------------------------------------------------------------------------------