├── .dockerignore ├── .github └── workflows │ ├── build-latest.yaml │ ├── build-stable.yaml │ ├── build-tag.yaml │ └── build.yaml ├── .gitignore ├── Dockerfile-obabel ├── Dockerfile-pli ├── Dockerfile-rdkit ├── Dockerfile-rdkit-centos ├── Dockerfile-sdposter ├── Dockerfile-smog ├── Jenkinsfile ├── LICENSE ├── README.md ├── build.gradle ├── data ├── DCP2_1.pdb.gz ├── DCP2_1_confs.sdf.gz ├── Kinase_inhibs.sdf.gz ├── XChemReactionMaker1.sdf.gz ├── conformers_to_align.data.gz ├── dhfr_3d.data.gz ├── dhfr_3d.metadata ├── dhfr_3d.sdf ├── dhfr_3d.sdf.gz ├── hivpr.config.zip ├── hivpr_ligprep.sdf.gz ├── hivpr_ligprep_100.sdf.gz ├── hivpr_rdock.as ├── hivpr_rdock.mol2 ├── hivpr_rdock.prm ├── mpro │ ├── Mpro-x0387_0.mol │ ├── Mpro-x0387_0.mol2 │ ├── Mpro-x0387_0.pdb │ ├── Mpro-x0387_0.smi │ ├── Mpro-x0678_0_016.mol │ ├── Mpro-x0678_0_016.smi │ ├── docking-tethered.as │ ├── docking-tethered.prm │ ├── expanded-17.json │ ├── featurestein.p │ ├── hits-17.sdf.gz │ ├── hits-23.sdf.gz │ ├── hits-5.sdf.gz │ ├── poses.sdf │ └── poses.sdf.gz ├── nci10.smiles ├── nci100.data.gz ├── nudt7 │ ├── ligands.data.gz │ ├── ligands.sdf.gz │ ├── receptor.mol2 │ └── refmol.mol ├── pyrimethamine.mol ├── ref_mol.sdf.gz ├── sdf-aliphatic-primary-amines-175.data.gz ├── sdf-aliphatic-primary-amines-175.metadata ├── sdf-aliphatic-primary-amines-175.sdf.gz ├── smog │ ├── DCP2_1.pdb │ ├── NUDT22_holo.pdb │ ├── confs.data.gz │ ├── confs.metadata │ └── confs.sdf ├── sucos │ ├── 4e3g_lig.mol │ ├── benzene.sdf │ ├── hits.sdf │ ├── mols.sdf │ └── poses.sdf └── sulfonyl_chloride.sdf ├── environment-rdkit-utils.yml ├── execute ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── post-service-descriptors.sh ├── requirements-obabel.txt ├── requirements-rdkit.txt ├── src ├── nextflow │ ├── README.md │ ├── docking │ │ ├── .gitignore │ │ ├── plip.config │ │ ├── plip.nf │ │ ├── plip.nsd.config │ │ ├── plip.nsd.nf │ │ ├── plip.nsd.yml │ │ ├── rdock-filter.nsd.config │ │ ├── rdock-filter.nsd.nf │ │ ├── rdock-filter.nsd.yml │ │ ├── rdock.config │ │ ├── rdock.nf │ │ ├── rdock.nsd.config │ │ ├── rdock.nsd.nf │ │ ├── rdock.nsd.yml │ │ ├── smog.config │ │ ├── smog.nf │ │ ├── smog.nsd.config │ │ ├── smog.nsd.nf │ │ └── smog.nsd.yml │ ├── nextflow-docker.config │ ├── rdkit │ │ ├── screen+conformers.nf │ │ ├── screen-dataset.nsd.config │ │ ├── screen-dataset.nsd.nf │ │ ├── screen-dataset.nsd.yml │ │ ├── screen-multi-dataset.nsd.config │ │ ├── screen-multi-dataset.nsd.nf │ │ ├── screen-multi-dataset.nsd.yml │ │ ├── screen.config │ │ └── screen.nf │ └── xchem │ │ ├── dock-score.nf │ │ ├── expand.nf │ │ ├── featurestein.nf │ │ ├── prepare-tether-featurestein.nf │ │ ├── prepare-tether.nf │ │ ├── tether-dock-score.nf │ │ └── xcos.nf └── python │ ├── NNScore_pdbbind2016.pickle │ ├── README.rst │ ├── RFScore_v1_pdbbind2016.pickle │ ├── RFScore_v2_pdbbind2016.pickle │ ├── RFScore_v3_pdbbind2016.pickle │ ├── __init__.py │ ├── notebooks │ └── default.ipynb │ ├── pipelines │ ├── __init__.py │ ├── dimorphite │ │ ├── LICENSE.txt │ │ ├── README.txt │ │ ├── __init__.py │ │ ├── dimorphite_dl.py │ │ ├── enumerate_charges.dsd.yml │ │ ├── enumerate_charges.py │ │ ├── enumerate_charges.test │ │ ├── run.py │ │ └── site_substructures.smarts │ ├── dmpk │ │ ├── __init__.py │ │ ├── pk_tmax_cmax_sim.dsd.yml │ │ ├── pk_tmax_cmax_sim.py │ │ └── pk_tmax_cmax_sim.test │ ├── docking │ │ ├── __init__.py │ │ ├── obabel_prepare_pdb.dsd.yml │ │ ├── obabel_prepare_pdb.py │ │ ├── obabel_prepare_pdb.test │ │ ├── plip.py │ │ ├── plip.test │ │ ├── smog2016.py │ │ └── smog2016.test │ ├── rdkit │ │ ├── README.md │ │ ├── __init__.py │ │ ├── cluster_3d.py │ │ ├── cluster_butina.dsd.yml │ │ ├── cluster_butina.py │ │ ├── cluster_butina.test │ │ ├── cluster_butina_diverse_subset_picker.dsd.yml │ │ ├── cluster_butina_matrix.dsd.yml │ │ ├── cluster_butina_matrix.py │ │ ├── cluster_butina_matrix.test │ │ ├── conformers.dsd.yml │ │ ├── conformers.py │ │ ├── conformers.test │ │ ├── constrained_conf_gen.dsd.yml │ │ ├── constrained_conf_gen.py │ │ ├── constrained_conf_gen.test │ │ ├── enumerate_candidates.py │ │ ├── max_min_picker.py │ │ ├── max_min_picker.test │ │ ├── max_min_picker_enrich.dsd.yml │ │ ├── max_min_picker_simple.dsd.yml │ │ ├── o3dAlign.dsd.yml │ │ ├── o3dAlign.py │ │ ├── o3dAlign.test │ │ ├── pbf_ev.dsd.yml │ │ ├── pbf_ev.py │ │ ├── pbf_ev.test │ │ ├── poised_filter.py │ │ ├── prepare_3d.py │ │ ├── rxn_maker.dsd.yml │ │ ├── rxn_maker.py │ │ ├── rxn_maker.test │ │ ├── rxn_selector.py │ │ ├── rxn_selector.test │ │ ├── rxn_smarts_filter.dsd.yml │ │ ├── rxn_smarts_filter.py │ │ ├── rxn_smarts_filter.test │ │ ├── sanifier.py │ │ ├── sanifier.test │ │ ├── sanifier_enumerator.dsd.yml │ │ ├── sanifier_standardiser_flatkinson.dsd.yml │ │ ├── sanifier_standardiser_molvs.dsd.yml │ │ ├── sanify_utils.py │ │ ├── screen.py │ │ ├── screen.test │ │ ├── screen_multi.py │ │ ├── screen_multi.test │ │ ├── show_feats.py │ │ ├── standardize.dsd.yml │ │ ├── standardize.py │ │ ├── standardize.test │ │ ├── sucos-max.dsd.yml │ │ ├── sucos-max.test │ │ ├── sucos.dsd.yml │ │ ├── sucos.py │ │ ├── sucos.test │ │ └── sucos_max.py │ └── xchem │ │ ├── __init__.py │ │ ├── build_oddt_models.py │ │ ├── calc_interactions.py │ │ ├── featurestein_generate.py │ │ ├── featurestein_generate.test │ │ ├── featurestein_generate_and_score.py │ │ ├── featurestein_generate_and_score.test │ │ ├── featurestein_score.py │ │ ├── featurestein_score.test │ │ ├── filter_interactions.py │ │ ├── fragnet_expand.py │ │ ├── interactions.py │ │ ├── prepare_tether.py │ │ ├── rdkit_align.py │ │ ├── rmsd_filter.py │ │ ├── split_fragnet_candidates.py │ │ ├── xcos.py │ │ └── xcos.test │ ├── setup.py │ └── simple_inters.py ├── test-nextflow.sh └── test-rdkit.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | data 2 | gradle 3 | .gradle 4 | openshift 5 | work 6 | .nextflow 7 | trace.txt* 8 | report.html* 9 | wip -------------------------------------------------------------------------------- /.github/workflows/build-latest.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: build latest 3 | 4 | # Actions on the main/master branch for every change. 5 | 6 | # ----------------- 7 | # Control variables (GitHub Secrets) 8 | # ----------------- 9 | # 10 | # At the GitHub 'organisation' or 'project' level you must have the following 11 | # GitHub 'Repository Secrets' defined (i.e. via 'Settings -> Secrets'): - 12 | # 13 | # DOCKERHUB_USERNAME 14 | # DOCKERHUB_TOKEN 15 | # 16 | # ----------- 17 | # Environment (GitHub Environments) 18 | # ----------- 19 | # 20 | # Environment (n/a) 21 | 22 | on: 23 | push: 24 | branches: 25 | - 'master' 26 | repository_dispatch: 27 | types: 28 | - pipelines-utils-rdkit 29 | - pipelines-utils 30 | 31 | jobs: 32 | build: 33 | runs-on: ubuntu-latest 34 | steps: 35 | - uses: actions/checkout@v2 36 | # Build the images 37 | - name: Login to DockerHub 38 | uses: docker/login-action@v1 39 | with: 40 | username: ${{ secrets.DOCKERHUB_USERNAME }} 41 | password: ${{ secrets.DOCKERHUB_TOKEN }} 42 | - name: Build pipelines 43 | uses: docker/build-push-action@v2 44 | with: 45 | push: false 46 | file: Dockerfile-rdkit 47 | tags: informaticsmatters/rdkit_pipelines:latest 48 | # Test the images 49 | # We need Java and Groovy 50 | - name: Set up Java 11 51 | uses: actions/setup-java@v1 52 | with: 53 | java-version: 11 54 | - name: Run pipeline tester 55 | uses: informaticsmatters/pipeline-tester-action@v1 56 | # Publish the images 57 | - name: Push 58 | run: docker push informaticsmatters/rdkit_pipelines:latest 59 | - name: Build and push pipelines sdposter 60 | uses: docker/build-push-action@v2 61 | with: 62 | push: true 63 | file: Dockerfile-sdposter 64 | tags: squonk/rdkit-pipelines-sdposter:latest 65 | -------------------------------------------------------------------------------- /.github/workflows/build-stable.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: build stable 3 | 4 | # Actions for an 'official' tag. 5 | 6 | # An official tag is a 2 or 3-digit value (i.e. 'N.N[.N]'). 7 | # We publish images using the tag as a tag and one using 'stable' as s tag. 8 | 9 | # ----------------- 10 | # Control variables (GitHub Secrets) 11 | # ----------------- 12 | # 13 | # At the GitHub 'organisation' or 'project' level you must have the following 14 | # GitHub 'Repository Secrets' defined (i.e. via 'Settings -> Secrets'): - 15 | # 16 | # DOCKERHUB_USERNAME 17 | # DOCKERHUB_TOKEN 18 | # 19 | # ----------- 20 | # Environment (GitHub Environments) 21 | # ----------- 22 | # 23 | # Environment (n/a) 24 | 25 | on: 26 | push: 27 | tags: 28 | - '[0-9]+.[0-9]+.[0-9]+' 29 | - '[0-9]+.[0-9]+' 30 | 31 | jobs: 32 | publish-stable: 33 | runs-on: ubuntu-latest 34 | steps: 35 | - uses: actions/checkout@v2 36 | - name: Inject slug/short variables 37 | uses: rlespinasse/github-slug-action@v3.x 38 | - name: Login to DockerHub 39 | uses: docker/login-action@v1 40 | with: 41 | username: ${{ secrets.DOCKERHUB_USERNAME }} 42 | password: ${{ secrets.DOCKERHUB_TOKEN }} 43 | - name: Build and push pipelines 44 | uses: docker/build-push-action@v2 45 | with: 46 | push: true 47 | file: Dockerfile-rdkit 48 | tags: | 49 | informaticsmatters/rdkit_pipelines:${{ env.GITHUB_REF_SLUG }} 50 | informaticsmatters/rdkit_pipelines:stable 51 | - name: Build and push pipelines sdposter 52 | uses: docker/build-push-action@v2 53 | with: 54 | push: true 55 | file: Dockerfile-sdposter 56 | tags: | 57 | squonk/rdkit-pipelines-sdposter:${{ env.GITHUB_REF_SLUG }} 58 | squonk/rdkit-pipelines-sdposter:stable 59 | -------------------------------------------------------------------------------- /.github/workflows/build-tag.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: build tag 3 | 4 | # Actions for any 'unofficial' tag. 5 | 6 | # It's not an official tag if it's not formed from 2 or 3 digits 7 | # (i.e. is not 'N.N.N'). We publish images using the tag as a tag. 8 | 9 | # ----------------- 10 | # Control variables (GitHub Secrets) 11 | # ----------------- 12 | # 13 | # At the GitHub 'organisation' or 'project' level you must have the following 14 | # GitHub 'Repository Secrets' defined (i.e. via 'Settings -> Secrets'): - 15 | # 16 | # DOCKERHUB_USERNAME 17 | # DOCKERHUB_TOKEN 18 | # 19 | # ----------- 20 | # Environment (GitHub Environments) 21 | # ----------- 22 | # 23 | # Environment (n/a) 24 | 25 | on: 26 | push: 27 | tags: 28 | - '**' 29 | - '![0-9]+.[0-9]+.[0-9]+' 30 | - '![0-9]+.[0-9]+' 31 | 32 | jobs: 33 | publish-tag: 34 | runs-on: ubuntu-latest 35 | steps: 36 | - uses: actions/checkout@v2 37 | # Build and push the images with a tag. 38 | # Testing will have been done using the 'build-latest' workflow. 39 | - name: Inject slug/short variables 40 | uses: rlespinasse/github-slug-action@v3.x 41 | - name: Login to DockerHub 42 | uses: docker/login-action@v1 43 | with: 44 | username: ${{ secrets.DOCKERHUB_USERNAME }} 45 | password: ${{ secrets.DOCKERHUB_TOKEN }} 46 | - name: Build and push pipelines 47 | uses: docker/build-push-action@v2 48 | with: 49 | push: true 50 | file: Dockerfile-rdkit 51 | tags: informaticsmatters/rdkit_pipelines:${{ env.GITHUB_REF_SLUG }} 52 | - name: Build and push pipelines sdposter 53 | uses: docker/build-push-action@v2 54 | with: 55 | push: true 56 | file: Dockerfile-sdposter 57 | tags: squonk/rdkit-pipelines-sdposter:${{ env.GITHUB_REF_SLUG }} 58 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: build 3 | 4 | # Actions that take place on branches 5 | # or are driven by pull-requests to the main/master branch. 6 | # Here we build container images but don't push them 7 | # and therefore do not require docker credentials. 8 | 9 | # ----------------- 10 | # Control variables (GitHub Secrets) 11 | # ----------------- 12 | # 13 | # (n/a) 14 | # 15 | # ----------- 16 | # Environment (GitHub Environments) 17 | # ----------- 18 | # 19 | # Environment (n/a) 20 | 21 | on: 22 | push: 23 | branches-ignore: 24 | - 'master' 25 | pull_request: 26 | branches: 27 | - 'master' 28 | 29 | jobs: 30 | build: 31 | runs-on: ubuntu-latest 32 | steps: 33 | - uses: actions/checkout@v2 34 | # Build the images 35 | - name: Build pipelines 36 | uses: docker/build-push-action@v2 37 | with: 38 | file: Dockerfile-rdkit 39 | tags: informaticsmatters/rdkit_pipelines:latest 40 | - name: Build pipelines sdposter 41 | uses: docker/build-push-action@v2 42 | with: 43 | file: Dockerfile-sdposter 44 | tags: squonk/rdkit-pipelines-sdposter:latest 45 | # Test the images 46 | # We need Java and Groovy 47 | - name: Set up Java 11 48 | uses: actions/setup-java@v1 49 | with: 50 | java-version: 11 51 | - name: Run pipeline tester 52 | uses: informaticsmatters/pipeline-tester-action@v1 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .gradle 3 | .idea 4 | *.ipr 5 | *.iws 6 | build 7 | dist 8 | *.iml 9 | *.pyc 10 | work 11 | .nextflow 12 | .nextflow.log* 13 | /tmp 14 | **/*.egg-info 15 | **/.DS_Store 16 | *.retry -------------------------------------------------------------------------------- /Dockerfile-obabel: -------------------------------------------------------------------------------- 1 | # We should create an Open Babel implementation of pipeline_utils that handles the basic I/O for 2 | # structure files so that the dependency on RDKit can be removed. 3 | # See https://github.com/InformaticsMatters/pipelines-obabel/issues/1 4 | 5 | FROM informaticsmatters/obabel:latest 6 | LABEL maintainer="Tim Dudgeon" 7 | 8 | USER root 9 | 10 | # Copy the obabel pipeline implementation into the image 11 | COPY src/python /opt/python-obabel 12 | RUN apt-get update && \ 13 | apt-get install -y --no-install-recommends \ 14 | python-setuptools \ 15 | gzip \ 16 | python-pip && \ 17 | pip install -e /opt/python-obabel 18 | # And the project pip requirements 19 | COPY requirements-obabel.txt /root/ 20 | RUN pip install -r /root/requirements-obabel.txt 21 | 22 | # The CMD is simply to run 'execute' in the WORKDIR. 23 | # The user would normally mount a volume with their own execute 24 | # script in it and then set the WORKDIR to the directory it's in. 25 | # In its absence we just run the built-in 'execute', 26 | # which is expected to echo some descriptive/helpful text. 27 | # 28 | # The default 'execute' relies on an ENV to name the pipeline it's in, 29 | # which can be defined with the docker 'pipeline' build argument. 30 | ARG pipeline=informaticsmatters/pipelines-obabel:latest 31 | ENV PIPELINE=$pipeline 32 | WORKDIR /home/obabel 33 | COPY execute ./ 34 | RUN chown obabel:0 ./execute && \ 35 | chmod +x ./execute 36 | CMD ["./execute"] 37 | 38 | USER obabel 39 | -------------------------------------------------------------------------------- /Dockerfile-pli: -------------------------------------------------------------------------------- 1 | FROM informaticsmatters/rdkit_pipelines:latest 2 | LABEL maintainer="Tim Dudgeon" 3 | 4 | 5 | USER root 6 | RUN apt-get update -y && apt-get install zlib1g-dev make gcc git -y 7 | RUN mkdir -p /usr/local/ 8 | WORKDIR /usr/local/ 9 | RUN git clone https://bitbucket.org/AstexUK/pli.git 10 | WORKDIR /usr/local/pli 11 | RUN make 12 | 13 | RUN useradd -u 1001 -g 0 -m pli 14 | 15 | WORKDIR /home/pli 16 | ENV PLI_DIR /usr/local/pli 17 | 18 | # The CMD is simply to run 'execute' in the WORKDIR. 19 | # The user would normally mount a volume with their own execute 20 | # script in it and then set the WORKDIR to the directory it's in. 21 | # In its absence we just run the built-in 'execute', 22 | # which is expected to echo some descriptive/helpful text. 23 | # 24 | # The default 'execute' relies on an ENV to name the pipeline it's in, 25 | # which can be defined with the docker 'pipeline' build argument. 26 | ARG pipeline=informaticsmatters/pli:latest 27 | ENV PIPELINE=$pipeline 28 | COPY execute ./ 29 | RUN chown 1001:0 ./execute && \ 30 | chmod +x ./execute 31 | CMD ["./execute"] 32 | 33 | USER 1001 34 | -------------------------------------------------------------------------------- /Dockerfile-rdkit: -------------------------------------------------------------------------------- 1 | FROM informaticsmatters/rdkit-python3-debian:Release_2020_09_1 2 | LABEL maintainer="Tim Dudgeon" 3 | 4 | USER root 5 | 6 | # install required packages 7 | RUN apt-get -y update && apt-get -y install zip unzip procps 8 | 9 | # Copy the pipeline implementation into the image 10 | COPY src/python /opt/python 11 | COPY requirements-rdkit.txt /root/ 12 | RUN pip install -e /opt/python 13 | # And the pip packages from the project requirements 14 | # NOTE: matplotlib is missing 15 | RUN pip install -r /root/requirements-rdkit.txt 16 | 17 | # The CMD is simply to run 'execute' in the WORKDIR. 18 | # The user would normally mount a volume with their own execute 19 | # script in it and then set the WORKDIR to the directory it's in. 20 | # In its absence we just run the built-in 'execute', 21 | # which is expected to echo some descriptive/helpful text. 22 | # 23 | # The default 'execute' relies on an ENV to name the pipeline it's in, 24 | # which can be defined with the docker 'pipeline' build argument. 25 | ARG pipeline=informaticsmatters/rdkit_pipelines:latest 26 | ENV PIPELINE=$pipeline 27 | WORKDIR /home/rdkit 28 | COPY execute ./ 29 | RUN chmod +x ./execute 30 | CMD ["./execute"] 31 | -------------------------------------------------------------------------------- /Dockerfile-rdkit-centos: -------------------------------------------------------------------------------- 1 | FROM informaticsmatters/rdkit-python-centos:latest 2 | LABEL maintainer="Tim Dudgeon" 3 | 4 | USER root 5 | 6 | # install required packages 7 | RUN yum -y update && yum -y install zip unzip python-devel python2-pip python-setuptools python2-matplotlib 8 | 9 | # Copy the pipeline implementation into the image 10 | COPY src/python /opt/python 11 | COPY requirements-rdkit.txt /root/ 12 | RUN pip install -e /opt/python 13 | # And the pip packages from the project requirements 14 | # NOTE: matplotlib is missing 15 | RUN pip install -r /root/requirements-rdkit.txt 16 | 17 | # The CMD is simply to run 'execute' in the WORKDIR. 18 | # The user would normally mount a volume with their own execute 19 | # script in it and then set the WORKDIR to the directory it's in. 20 | # In its absence we just run the built-in 'execute', 21 | # which is expected to echo some descriptive/helpful text. 22 | # 23 | # The default 'execute' relies on an ENV to name the pipeline it's in, 24 | # which can be defined with the docker 'pipeline' build argument. 25 | ARG pipeline=informaticsmatters/rdkit_pipelines:latest 26 | ENV PIPELINE=$pipeline 27 | WORKDIR /home/rdkit 28 | COPY execute ./ 29 | RUN chmod +x ./execute 30 | CMD ["./execute"] 31 | -------------------------------------------------------------------------------- /Dockerfile-sdposter: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | MAINTAINER Tim Dudgeon 3 | 4 | # The image tag for the pipelines we're expected to post. 5 | # By default this is 'latest' but the build environment can 6 | # use a built-arg to over-ride this. 7 | # So, a poster container image built for Git tag '1.0.0' would be expected 8 | # to have its IMAGE_TAG environment variable set to '1.0.0' and therefore 9 | # running poster:1.0.0 would inject pipelines for container image '1.0.0' 10 | ARG image_tag=latest 11 | ENV IMAGE_TAG=$image_tag 12 | 13 | # An image to populate the Core with the contents of the 14 | # Service Descriptors located in SD_SRC. 15 | 16 | ENV SD_SRC /sd-src 17 | WORKDIR ${SD_SRC} 18 | 19 | # Copy all potential Service Descriptors into the image... 20 | COPY src/python/ ${SD_SRC}/src/python/ 21 | COPY src/nextflow/ ${SD_SRC}/src/nextflow/ 22 | COPY post-service-descriptors.sh ${SD_SRC}/ 23 | RUN chmod 755 post-service-descriptors.sh 24 | 25 | # On execution copy files from source to destination... 26 | CMD ./post-service-descriptors.sh 27 | -------------------------------------------------------------------------------- /Dockerfile-smog: -------------------------------------------------------------------------------- 1 | FROM informaticsmatters/rdkit_pipelines:latest 2 | LABEL maintainer="Tim Dudgeon" 3 | 4 | 5 | USER root 6 | 7 | RUN echo 'deb http://deb.debian.org/debian experimental main' >> /etc/apt/sources.list 8 | 9 | RUN apt-get update -y && apt-get install -t experimental libopenbabel-dev g++ -y 10 | 11 | WORKDIR /usr/local 12 | RUN wget https://sourceforge.net/projects/opengrowth/files/SMoG2016.tar.gz/download -O smog.tar.gz &&\ 13 | tar xfz smog.tar.gz &&\ 14 | rm smog.tar.gz &&\ 15 | g++ -O3 -Wall -std=c++11 -Wno-uninitialized -I/usr/include/openbabel-2.0 -lm -c /usr/local/SMoG2016/SMoG2016.cpp -o /usr/local/SMoG2016/SMoG2016.o &&\ 16 | g++ /usr/local/SMoG2016/SMoG2016.o -o /usr/local/SMoG2016/SMoG2016.exe -rdynamic /usr/lib/libopenbabel.so -Wl,-rpath,/usr/lib 17 | 18 | ARG USERID=1001 19 | 20 | RUN useradd -u $USERID -g 0 -m smog 21 | 22 | # The CMD is simply to run 'execute' in the WORKDIR. 23 | # The user would normally mount a volume with their own execute 24 | # script in it and then set the WORKDIR to the directory it's in. 25 | # In its absence we just run the built-in 'execute', 26 | # which is expected to echo some descriptive/helpful text. 27 | # 28 | # The default 'execute' relies on an ENV to name the pipeline it's in, 29 | # which can be defined with the docker 'pipeline' build argument. 30 | ARG pipeline=informaticsmatters/smog:latest 31 | ENV PIPELINE=$pipeline 32 | WORKDIR /home/smog 33 | COPY execute ./ 34 | RUN chown $USERID:0 ./execute && \ 35 | chmod +x ./execute 36 | CMD ["./execute"] 37 | 38 | USER $USERID 39 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | #!groovy​ 2 | 3 | // Part of the Squonk/OepnShift CI/CD Jenkins Pipeline. 4 | // 5 | // This is the primary CI/CD pipeline, which provides basic assembly, 6 | // unit testing and Docker image construction. Other pipelines may offer 7 | // static analysis and code coverage for example. 8 | 9 | pipeline { 10 | 11 | // As we may need different flavours of agent, 12 | // the agent definition is deferred to each stage. 13 | agent none 14 | 15 | // Some environment variables for every stage... 16 | environment { 17 | 18 | USER = 'jenkins' 19 | REGISTRY = 'docker-registry.default:5000' 20 | NAMESPACE = 'squonk-cicd' 21 | 22 | PIPELINES_IMAGE = 'rdkit_pipelines' 23 | LOADER_IMAGE = "${PIPELINES_IMAGE}_loader" 24 | TAG = 'latest' 25 | 26 | P_IMAGE = "${NAMESPACE}/${PIPELINES_IMAGE}:${TAG}" 27 | L_IMAGE = "${NAMESPACE}/${LOADER_IMAGE}:${TAG}" 28 | 29 | } 30 | 31 | stages { 32 | 33 | // -------------------------------------------------------------------- 34 | // Deploy 35 | // -------------------------------------------------------------------- 36 | 37 | stage ('Deploy') { 38 | 39 | // Here we build and Deploy the docker images. 40 | // We need a custom agent that's capable of building images. 41 | agent { 42 | label 'buildah-slave' 43 | } 44 | 45 | steps { 46 | 47 | // Registry.. 48 | echo "Expecting registry at ${env.REGISTRY}" 49 | echo "Expecting registry user ${env.USER}" 50 | echo "Expecting registry project ${env.PUSH_NAMESPACE}" 51 | 52 | // Expose tool versions... 53 | sh 'buildah -v' 54 | sh 'podman -v' 55 | sh 'skopeo -v' 56 | 57 | // Build... 58 | // (Small image first) 59 | sh "buildah bud --format docker -f Dockerfile-sdposter -t ${env.P_IMAGE} ." 60 | sh "buildah bud --format docker -f Dockerfile-rdkit -t ${env.L_IMAGE} ." 61 | 62 | // Deploy... 63 | // Get user login token 64 | script { 65 | TOKEN = sh(script: 'oc whoami -t', returnStdout: true).trim() 66 | } 67 | // Login to the target registry, push images and logout 68 | sh "podman login --tls-verify=false --username ${env.USER} --password ${TOKEN} ${env.REGISTRY}" 69 | // sh "buildah push --tls-verify=false ${env.P_IMAGE} docker://${env.REGISTRY}/${env.P_IMAGE}" 70 | // sh "buildah push --tls-verify=false ${env.L_IMAGE} docker://${env.REGISTRY}/${env.L_IMAGE}" 71 | sh "podman logout ${env.REGISTRY}" 72 | 73 | } 74 | 75 | } 76 | 77 | } 78 | 79 | // End-of-pipeline post-processing actions... 80 | post { 81 | 82 | failure { 83 | mail to: 'achristie@informaticsmatters.com tdudgeon@informaticsmatters.com', 84 | subject: 'Failed Pipelines Job', 85 | body: "Something is wrong with the Squonk CI/CD PIPELINES build ${env.BUILD_URL}" 86 | } 87 | 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /data/DCP2_1.pdb.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/DCP2_1.pdb.gz -------------------------------------------------------------------------------- /data/DCP2_1_confs.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/DCP2_1_confs.sdf.gz -------------------------------------------------------------------------------- /data/Kinase_inhibs.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/Kinase_inhibs.sdf.gz -------------------------------------------------------------------------------- /data/XChemReactionMaker1.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/XChemReactionMaker1.sdf.gz -------------------------------------------------------------------------------- /data/conformers_to_align.data.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/conformers_to_align.data.gz -------------------------------------------------------------------------------- /data/dhfr_3d.data.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/dhfr_3d.data.gz -------------------------------------------------------------------------------- /data/dhfr_3d.metadata: -------------------------------------------------------------------------------- 1 | {"type": "org.squonk.types.MoleculeObject"} -------------------------------------------------------------------------------- /data/dhfr_3d.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/dhfr_3d.sdf.gz -------------------------------------------------------------------------------- /data/hivpr.config.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/hivpr.config.zip -------------------------------------------------------------------------------- /data/hivpr_ligprep.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/hivpr_ligprep.sdf.gz -------------------------------------------------------------------------------- /data/hivpr_ligprep_100.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/hivpr_ligprep_100.sdf.gz -------------------------------------------------------------------------------- /data/hivpr_rdock.as: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/hivpr_rdock.as -------------------------------------------------------------------------------- /data/hivpr_rdock.prm: -------------------------------------------------------------------------------- 1 | RBT_PARAMETER_FILE_V1.00 2 | TITLE hivpr_DUD 3 | 4 | RECEPTOR_FILE hivpr_rdock.mol2 5 | RECEPTOR_FLEX 3.0 6 | 7 | ################################################################## 8 | ### CAVITY DEFINITION: REFERENCE LIGAND METHOD 9 | ################################################################## 10 | SECTION MAPPER 11 | SITE_MAPPER RbtLigandSiteMapper 12 | REF_MOL xtal-lig.sd 13 | RADIUS 6.0 14 | SMALL_SPHERE 1.0 15 | MIN_VOLUME 100 16 | MAX_CAVITIES 1 17 | VOL_INCR 0.0 18 | GRIDSTEP 0.5 19 | END_SECTION 20 | 21 | ################################# 22 | #CAVITY RESTRAINT PENALTY 23 | ################################# 24 | SECTION CAVITY 25 | SCORING_FUNCTION RbtCavityGridSF 26 | WEIGHT 1.0 27 | END_SECTION 28 | 29 | -------------------------------------------------------------------------------- /data/mpro/Mpro-x0387_0.mol: -------------------------------------------------------------------------------- 1 | Mpro-x0387_0 2 | RDKit 3D 3 | 4 | 13 14 0 0 0 0 0 0 0 0999 V2000 5 | 9.0650 -4.7370 27.6980 O 0 0 0 0 0 0 0 0 0 0 0 0 6 | 9.2630 -5.0400 26.3380 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 10.5520 -4.5380 25.9490 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 10.4810 -3.0380 25.6850 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 9.7300 -2.7840 24.4990 N 0 0 0 0 0 0 0 0 0 0 0 0 10 | 9.6860 -1.6170 24.1270 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 11.0770 -1.2320 23.6120 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 11.9690 -0.1320 24.1710 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 13.2030 -0.0890 23.4410 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 13.2070 -1.2250 22.2850 S 0 0 0 0 0 0 0 0 0 0 0 0 15 | 11.7550 -1.8900 22.5360 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 8.5600 -3.6210 24.3460 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 8.1750 -4.4830 25.5640 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 2 1 1 0 19 | 2 3 1 0 20 | 2 13 1 0 21 | 3 4 1 0 22 | 13 12 1 0 23 | 4 5 1 0 24 | 5 6 1 0 25 | 5 12 1 0 26 | 6 7 1 0 27 | 7 8 1 0 28 | 7 11 2 0 29 | 8 9 2 0 30 | 11 10 1 0 31 | 9 10 1 0 32 | M END 33 | -------------------------------------------------------------------------------- /data/mpro/Mpro-x0387_0.smi: -------------------------------------------------------------------------------- 1 | c1cc(CN2CCCCC2)cs1 2 | CN1CCC(O)CC1 3 | c1cc(OC2CCN(Cc3ccsc3)CC2)ccn1 4 | OCC1CN(Cc2ccsc2)CCC1O 5 | OCC1(O)CCN(Cc2ccsc2)CC1 6 | OC1CCN(Cc2cscc2Cl)CC1 7 | OC1CCN(Cc2cscc2C(F)(F)F)CC1 8 | OC1CCN(Cc2cscc2Br)CC1 9 | OC1CCN(Cc2csc(Cl)c2)CC1 10 | OC1CCN(Cc2csc(C(F)(F)F)c2)CC1 11 | OC1CCN(Cc2csc(Br)c2)CC1 12 | OC1CCN(Cc2ccsc2Cl)CC1 13 | OC1CCN(Cc2ccsc2C(F)(F)F)CC1 14 | OC1CCN(Cc2ccsc2Br)CC1 15 | OC1CCN(Cc2ccsc2)CC1F 16 | OC1CCN(Cc2ccsc2)C(c2ccccc2)C1 17 | OC1(c2ccccn2)CCN(Cc2ccsc2)CC1 18 | OC1(C(F)(F)F)CCN(Cc2ccsc2)CC1 19 | O=Cc1cc(CN2CCC(O)CC2)cs1 20 | NCC1(O)CCN(Cc2ccsc2)CC1 21 | NC(=O)C1(O)CCN(Cc2ccsc2)CC1 22 | N#Cc1cc(CN2CCC(O)CC2)cs1 23 | N#CC1(O)CCN(Cc2ccsc2)CC1 24 | Cc1sccc1CN1CCC(O)CC1 25 | Cc1cscc1CN1CCC(O)CC1 26 | Cc1cc(CN2CCC(O)CC2)cs1 27 | COC1CN(Cc2ccsc2)CCC1O 28 | COC(=O)c1sccc1CN1CCC(O)CC1 29 | COC(=O)c1cc(CN2CCC(O)CC2)cs1 30 | COC(=O)C1(O)CCN(Cc2ccsc2)CC1 31 | CN(C)CC1(O)CCN(Cc2ccsc2)CC1 32 | CCC1CN(Cc2ccsc2)CCC1O 33 | CCC1(O)CCN(Cc2ccsc2)CC1 34 | CC1CN(Cc2ccsc2)CCC1O 35 | CC1CC(O)CCN1Cc1ccsc1 36 | C[C@@H]1C[C@H](O)CCN1Cc1ccsc1 37 | CC1(O)CCN(Cc2ccsc2)CC1 38 | C#CC1(O)CCN(Cc2ccsc2)CC1 39 | -------------------------------------------------------------------------------- /data/mpro/Mpro-x0678_0_016.mol: -------------------------------------------------------------------------------- 1 | Mpro-x2193_0 2 | RDKit 2D 3 | 4 | 12 13 0 0 0 0 0 0 0 0999 V2000 5 | 5.0456 -2.1321 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | 5.0456 0.3429 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 5.0456 -3.7821 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 8 | 5.7600 -0.0696 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 5.7600 -0.8946 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 5.0456 -1.3071 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 4.3311 -0.8946 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 4.3311 -0.0696 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 5.7600 -2.5446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 5.7600 -3.3696 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 4.3311 -3.3696 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 4.3311 -2.5446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 1 6 1 0 18 | 1 9 1 0 19 | 1 12 1 0 20 | 6 5 1 0 21 | 6 7 1 0 22 | 9 10 1 0 23 | 12 11 1 0 24 | 2 4 1 0 25 | 2 8 1 0 26 | 4 5 1 0 27 | 8 7 1 0 28 | 3 10 1 0 29 | 3 11 1 0 30 | M END -------------------------------------------------------------------------------- /data/mpro/docking-tethered.as: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/docking-tethered.as -------------------------------------------------------------------------------- /data/mpro/docking-tethered.prm: -------------------------------------------------------------------------------- 1 | RBT_PARAMETER_FILE_V1.00 2 | TITLE Mpro virtual screening 3 | 4 | RECEPTOR_FILE receptor.mol2 5 | RECEPTOR_FLEX 3.0 6 | 7 | ################################################################## 8 | ### CAVITY DEFINITION: REFERENCE LIGAND METHOD 9 | ################################################################## 10 | SECTION MAPPER 11 | SITE_MAPPER RbtLigandSiteMapper 12 | REF_MOL hits_frankenstein.sdf 13 | RADIUS 3.0 14 | SMALL_SPHERE 1.0 15 | MIN_VOLUME 100 16 | MAX_CAVITIES 1 17 | VOL_INCR 0.0 18 | GRIDSTEP 0.5 19 | END_SECTION 20 | 21 | 22 | ################################# 23 | #CAVITY RESTRAINT PENALTY 24 | ################################# 25 | SECTION CAVITY 26 | SCORING_FUNCTION RbtCavityGridSF 27 | WEIGHT 1.0 28 | END_SECTION 29 | 30 | SECTION LIGAND 31 | TRANS_MODE TETHERED 32 | ROT_MODE TETHERED 33 | DIHEDRAL_MODE TETHERED 34 | MAX_TRANS 0.1 35 | MAX_ROT 1 36 | MAX_DIHEDRAL 1 37 | END_SECTION 38 | 39 | -------------------------------------------------------------------------------- /data/mpro/featurestein.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/featurestein.p -------------------------------------------------------------------------------- /data/mpro/hits-17.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/hits-17.sdf.gz -------------------------------------------------------------------------------- /data/mpro/hits-23.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/hits-23.sdf.gz -------------------------------------------------------------------------------- /data/mpro/hits-5.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/hits-5.sdf.gz -------------------------------------------------------------------------------- /data/mpro/poses.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/poses.sdf.gz -------------------------------------------------------------------------------- /data/nci10.smiles: -------------------------------------------------------------------------------- 1 | CC1=CC(=O)C=CC1=O 1 2 | S(SC1=NC2=CC=CC=C2S1)C3=NC4=C(S3)C=CC=C4 2 3 | OC1=C(Cl)C=C(C=C1[N+]([O-])=O)[N+]([O-])=O 3 4 | [O-][N+](=O)C1=CNC(=N)S1 4 5 | NC1=CC2=C(C=C1)C(=O)C3=C(C=CC=C3)C2=O 5 6 | OC(=O)C1=C(C=CC=C1)C2=C3C=CC(=O)C(=C3OC4=C2C=CC(=C4Br)O)Br 6 7 | CN(C)C1=C(Cl)C(=O)C2=C(C=CC=C2)C1=O 7 8 | CC1=C(C2=C(C=C1)C(=O)C3=CC=CC=C3C2=O)[N+]([O-])=O 8 9 | CC(=NO)C(C)=NO 9 10 | C1=CC=C(C=C1)P(C2=CC=CC=C2)C3=CC=CC=C3 10 11 | -------------------------------------------------------------------------------- /data/nci100.data.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/nci100.data.gz -------------------------------------------------------------------------------- /data/nudt7/ligands.data.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/nudt7/ligands.data.gz -------------------------------------------------------------------------------- /data/nudt7/ligands.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/nudt7/ligands.sdf.gz -------------------------------------------------------------------------------- /data/nudt7/refmol.mol: -------------------------------------------------------------------------------- 1 | 2 | OpenBabel06051719483D 3 | 4 | 18 19 0 0 0 0 2 V2000 5 | 29.0700 -43.2240 73.7660 C 0 0 0 0 0 6 | 36.2650 -44.8070 74.9140 C 0 0 0 0 0 7 | 37.1260 -44.6280 73.8270 C 0 0 0 0 0 8 | 38.5050 -44.6030 73.9960 C 0 0 0 0 0 9 | 39.0530 -44.7650 75.2580 C 0 0 0 0 0 10 | 38.2200 -44.9420 76.3450 C 0 0 0 0 0 11 | 36.8400 -44.9680 76.1790 C 0 0 0 0 0 12 | 30.0630 -44.1750 73.1160 C 0 0 0 0 0 13 | 29.4310 -45.1480 72.1250 C 0 0 0 0 0 14 | 32.0050 -44.9160 74.3360 C 0 0 0 0 0 15 | 32.8370 -45.6680 73.5100 C 0 0 0 0 0 16 | 34.2080 -45.6230 73.7110 C 0 0 0 0 0 17 | 34.7860 -44.8390 74.7200 C 0 0 0 0 0 18 | 33.9230 -44.0930 75.5330 C 0 0 0 0 0 19 | 32.5490 -44.1280 75.3490 C 0 0 0 0 0 20 | 30.2050 -45.7640 71.3670 O 0 0 0 0 0 21 | 28.1910 -45.2760 72.1490 O 0 0 0 0 0 22 | 30.6380 -44.9430 74.1670 O 0 0 0 0 0 23 | 1 8 1 0 0 0 24 | 2 3 2 0 0 0 25 | 2 7 1 0 0 0 26 | 2 13 1 0 0 0 27 | 3 4 1 0 0 0 28 | 4 5 2 0 0 0 29 | 5 6 1 0 0 0 30 | 6 7 2 0 0 0 31 | 8 9 1 0 0 0 32 | 8 18 1 0 0 0 33 | 9 16 2 0 0 0 34 | 9 17 1 0 0 0 35 | 10 11 2 0 0 0 36 | 10 15 1 0 0 0 37 | 10 18 1 0 0 0 38 | 11 12 1 0 0 0 39 | 12 13 2 0 0 0 40 | 13 14 1 0 0 0 41 | 14 15 2 0 0 0 42 | M CHG 1 17 -1 43 | M END 44 | -------------------------------------------------------------------------------- /data/pyrimethamine.mol: -------------------------------------------------------------------------------- 1 | 1-pyrimethamine 2 | Cerius2 12180216023D 1 1.00000 3 | Structure written by MMmdl. 4 | 30 31 0 0 0 0 0 0 0 0999 V2000 5 | -2.8357 0.2028 0.4209 N 0 0 0 0 0 0 6 | -2.8255 -1.1104 0.1969 C 0 0 0 0 0 0 7 | -1.7271 -1.8218 -0.0537 N 0 0 0 0 0 0 8 | -0.5417 -1.1654 -0.0884 C 0 0 0 0 0 0 9 | -0.4439 0.2086 0.1358 C 0 0 0 0 0 0 10 | -1.6550 0.8467 0.4052 C 0 0 0 0 0 0 11 | 0.8362 0.9302 0.0951 C 0 0 0 0 0 0 12 | 1.6327 1.0444 1.2466 C 0 0 0 0 0 0 13 | 2.8536 1.7244 1.2069 C 0 0 0 0 0 0 14 | 3.2885 2.2979 0.0146 C 0 0 0 0 0 0 15 | 2.5126 2.1980 -1.1375 C 0 0 0 0 0 0 16 | 1.2918 1.5178 -1.0966 C 0 0 0 0 0 0 17 | -4.0141 -1.7715 0.2232 N 0 0 0 0 0 0 18 | -1.7348 2.2137 0.6034 N 0 0 0 0 0 0 19 | 4.7918 3.1344 -0.0351 Cl 0 0 0 0 0 0 20 | 0.4261 -3.4744 -0.6318 C 0 0 0 0 0 0 21 | 0.6932 -1.9936 -0.3864 C 0 0 0 0 0 0 22 | 1.3065 0.6014 2.1872 H 0 0 0 0 0 0 23 | 3.4548 1.8000 2.1100 H 0 0 0 0 0 0 24 | 2.8462 2.6451 -2.0710 H 0 0 0 0 0 0 25 | 0.6948 1.4500 -2.0056 H 0 0 0 0 0 0 26 | -4.0348 -2.7663 0.0556 H 0 0 0 0 0 0 27 | -4.8657 -1.2631 0.4089 H 0 0 0 0 0 0 28 | -0.9674 2.6820 1.0738 H 0 0 0 0 0 0 29 | -2.6605 2.5780 0.8038 H 0 0 0 0 0 0 30 | 1.3655 -3.9964 -0.8406 H 0 0 0 0 0 0 31 | -0.2391 -3.6219 -1.4893 H 0 0 0 0 0 0 32 | -0.0337 -3.9462 0.2432 H 0 0 0 0 0 0 33 | 1.3880 -1.9081 0.4572 H 0 0 0 0 0 0 34 | 1.1868 -1.5858 -1.2763 H 0 0 0 0 0 0 35 | 1 2 2 0 0 0 36 | 1 6 1 0 0 0 37 | 2 3 1 0 0 0 38 | 2 13 1 0 0 0 39 | 3 4 2 0 0 0 40 | 4 5 1 0 0 0 41 | 4 17 1 0 0 0 42 | 5 6 2 0 0 0 43 | 5 7 1 0 0 0 44 | 6 14 1 0 0 0 45 | 7 8 2 0 0 0 46 | 7 12 1 0 0 0 47 | 8 9 1 0 0 0 48 | 8 18 1 0 0 0 49 | 9 10 2 0 0 0 50 | 9 19 1 0 0 0 51 | 10 11 1 0 0 0 52 | 10 15 1 0 0 0 53 | 11 12 2 0 0 0 54 | 11 20 1 0 0 0 55 | 12 21 1 0 0 0 56 | 13 22 1 0 0 0 57 | 13 23 1 0 0 0 58 | 14 24 1 0 0 0 59 | 14 25 1 0 0 0 60 | 16 17 1 0 0 0 61 | 16 26 1 0 0 0 62 | 16 27 1 0 0 0 63 | 16 28 1 0 0 0 64 | 17 29 1 0 0 0 65 | 17 30 1 0 0 0 66 | M END 67 | -------------------------------------------------------------------------------- /data/ref_mol.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/ref_mol.sdf.gz -------------------------------------------------------------------------------- /data/sdf-aliphatic-primary-amines-175.data.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/sdf-aliphatic-primary-amines-175.data.gz -------------------------------------------------------------------------------- /data/sdf-aliphatic-primary-amines-175.metadata: -------------------------------------------------------------------------------- 1 | {"type": "org.squonk.types.MoleculeObject"} -------------------------------------------------------------------------------- /data/sdf-aliphatic-primary-amines-175.sdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/sdf-aliphatic-primary-amines-175.sdf.gz -------------------------------------------------------------------------------- /data/smog/confs.data.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/smog/confs.data.gz -------------------------------------------------------------------------------- /data/smog/confs.metadata: -------------------------------------------------------------------------------- 1 | {"type": "org.squonk.types.MoleculeObject"} -------------------------------------------------------------------------------- /data/sucos/4e3g_lig.mol: -------------------------------------------------------------------------------- 1 | 2 | RDKit 3D 3 | 4 | 10 10 0 0 0 0 0 0 0 0999 V2000 5 | 17.3410 1.4040 15.6300 O 0 0 0 0 0 0 0 0 0 0 0 0 6 | 16.4400 2.1870 15.2350 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 15.2530 1.8470 15.5410 O 0 0 0 0 0 0 0 0 0 0 0 0 8 | 16.7060 3.4620 14.6760 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 18.0480 3.9210 14.6600 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 18.4420 5.2100 14.1920 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 17.3440 5.9500 13.6530 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 17.5460 7.2150 13.0840 O 0 0 0 0 0 0 0 0 0 0 0 0 13 | 16.0150 5.4960 13.6580 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 15.6810 4.2780 14.2290 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 2 1 2 0 16 | 3 2 1 0 17 | 4 2 1 0 18 | 5 4 2 0 19 | 6 5 1 0 20 | 7 6 2 0 21 | 8 7 1 0 22 | 9 7 1 0 23 | 10 9 2 0 24 | 10 4 1 0 25 | M END 26 | -------------------------------------------------------------------------------- /data/sucos/benzene.sdf: -------------------------------------------------------------------------------- 1 | 2 | PyMOL2.1 3D 0 3 | 4 | 6 6 0 0 0 0 0 0 0 0999 V2000 5 | 16.7060 3.4620 14.6760 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 18.0480 3.9210 14.6600 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 18.4420 5.2100 14.1920 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 17.3440 5.9500 13.6530 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 16.0150 5.4960 13.6580 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 15.6810 4.2780 14.2290 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 1 2 2 0 0 0 0 12 | 1 6 1 0 0 0 0 13 | 2 3 1 0 0 0 0 14 | 3 4 2 0 0 0 0 15 | 4 5 1 0 0 0 0 16 | 5 6 2 0 0 0 0 17 | M END 18 | $$$$ 19 | -------------------------------------------------------------------------------- /data/sulfonyl_chloride.sdf: -------------------------------------------------------------------------------- 1 | 2 | RDKit 3 | 4 | 12 12 0 0 0 0 0 0 0 0999 V2000 5 | 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 0.0000 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 9 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 0.0000 0.0000 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 14 | 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 15 | 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 16 | 0.0000 0.0000 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0 17 | 1 2 3 0 18 | 2 3 1 0 19 | 3 4 2 0 20 | 4 5 1 0 21 | 5 6 2 0 22 | 6 7 1 0 23 | 7 8 2 0 24 | 8 9 1 0 25 | 9 10 2 0 26 | 9 11 2 0 27 | 9 12 1 0 28 | 8 3 1 0 29 | M END 30 | $$$$ 31 | -------------------------------------------------------------------------------- /environment-rdkit-utils.yml: -------------------------------------------------------------------------------- 1 | name: pipelines-utils 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.7.3 6 | - pip=20.0.2 7 | - rdkit=2020.09.1 8 | - pandas=1.0.1 9 | - scikit-learn=0.22.1 10 | - pip: 11 | - requests==2.24.0 12 | - matplotlib==2.2.* 13 | - molvs==0.1.1 14 | - standardiser==0.1.9 15 | - oddt==0.7 16 | - im-pipelines-utils 17 | - im-pipelines-utils-rdkit 18 | -------------------------------------------------------------------------------- /execute: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | echo 4 | echo "PIPELINES" 5 | echo "---------" 6 | echo "You have just run the built-in 'execute' command. Congratulations!" 7 | echo "" 8 | echo "In order to use this container productively you need to provide your own" 9 | echo "'execute' script or command and any corresponding data files." 10 | echo "" 11 | echo "You could mount a volume into this container that contains an executable" 12 | echo "file named 'execute', which contains the command you wish to run," 13 | echo "and also include any additional data files. When you run the container," 14 | echo "adjust the default container WORKDIR by using the docker '-w' option," 15 | echo "specifying the mounted path of the volume as its value. As an example," 16 | echo "if you've placed an 'execute' and its data in the current working" 17 | echo "directory you could run the container like this: -" 18 | echo "" 19 | echo " $ docker run --rm \\" 20 | echo " -v \$PWD:/squonk/work/docker \\" 21 | echo " -w=\"/squonk/work/docker\" \\" 22 | echo " $PIPELINE" 23 | echo "" 24 | echo "Alternatively you can run interactive commands by over-riding the" 25 | echo "built-in container 'CMD' (which is './execute'). To enter the" 26 | echo "container's shell (bash) using the default 'WORKDIR' you can run: -" 27 | echo "" 28 | echo " $ docker run --rm \\" 29 | echo " -it \\" 30 | echo " $PIPELINE \\" 31 | echo " bash" 32 | echo 33 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | zipStoreBase=GRADLE_USER_HOME 4 | zipStorePath=wrapper/dists 5 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.4.1-bin.zip 6 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /post-service-descriptors.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # run locally with something like this: 3 | # ./post-service-descriptors.sh http://localhost:8091/coreservices/rest/v1/services 4 | # or 5 | # docker run -it --rm -v $PWD:$PWD:Z -w $PWD --network deploy_squonk_back centos:7 ./post-service-descriptors.sh 6 | 7 | set -e 8 | 9 | POST=${1:-http://coreservices:8080/coreservices/rest/v1/services} 10 | BASE_D=docker://github.com/InformaticsMatters/pipelines 11 | BASE_N=nextflow://github.com/InformaticsMatters/pipelines 12 | IMAGE_TAG=${IMAGE_TAG:-} 13 | CT_DJ="application/x-squonk-service-descriptor-docker+json" 14 | CT_DY="application/x-squonk-service-descriptor-docker+yaml" 15 | CT_MM="multipart/mixed" 16 | 17 | echo BASE_D="${BASE_D}" 18 | echo BASE_N="${BASE_N}" 19 | echo IMAGE_TAG="${IMAGE_TAG}" 20 | 21 | for d in 'src/python/pipelines/dmpk' \ 22 | 'src/python/pipelines/docking' \ 23 | 'src/python/pipelines/rdkit' \ 24 | 'src/python/pipelines/dimorphite' 25 | do 26 | for file in $d/*.dsd.yml 27 | do 28 | echo $file 29 | curl -X POST \ 30 | -T $file\ 31 | -H "Content-Type: $CT_DY"\ 32 | -H "Base-URL: $BASE_D"\ 33 | -H "Image-Tag: $IMAGE_TAG"\ 34 | $POST 35 | echo "" 36 | done 37 | done 38 | 39 | for d in 'src/nextflow/docking' \ 40 | 'src/nextflow/rdkit' 41 | do 42 | for file in $d/*.nsd.yml 43 | do 44 | basename=${file::-4} 45 | echo $basename 46 | curl -X POST \ 47 | -F "nextflow.nsd.yml=@${basename}.yml;type=application/x-squonk-service-descriptor-nextflow+yaml;filename=nextflow.nsd.yml"\ 48 | -F "nextflow.nf=@${basename}.nf;type=text/plain;filename=nextflow.nf"\ 49 | -F "nextflow.config=@${basename}.config;type=text/plain;filename=nextflow.config"\ 50 | -H "Content-Type: $CT_MM"\ 51 | -H "Base-URL: $BASE_N"\ 52 | $POST 53 | echo "" 54 | done 55 | done 56 | -------------------------------------------------------------------------------- /requirements-obabel.txt: -------------------------------------------------------------------------------- 1 | im-pipelines-utils==2.4.* 2 | im-pipelines-utils-rdkit==1.5.* 3 | -------------------------------------------------------------------------------- /requirements-rdkit.txt: -------------------------------------------------------------------------------- 1 | im-pipelines-utils==2.4.* 2 | im-pipelines-utils-rdkit==1.5.* 3 | matplotlib==2.2.* 4 | molvs==0.1.1 5 | standardiser==0.1.9 6 | numpy==1.19.1 7 | pandas==1.0.1 8 | scikit-learn==0.22.1 9 | requests==2.24.0 10 | oddt==0.7 11 | -------------------------------------------------------------------------------- /src/nextflow/docking/.gitignore: -------------------------------------------------------------------------------- 1 | results -------------------------------------------------------------------------------- /src/nextflow/docking/plip.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty -------------------------------------------------------------------------------- /src/nextflow/docking/plip.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* Example Nextflow pipline that runs PLI scoring 4 | */ 5 | 6 | 7 | params.ligands = 'ligands.sdf.gz' 8 | params.protein = 'protein.pdb' 9 | params.chunk = 25 10 | params.score = null 11 | 12 | 13 | ligands = file(params.ligands) 14 | protein = file(params.protein) 15 | 16 | /* Splits the input SD file into multiple files of ${params.chunk} records. 17 | * Each file is sent individually to the ligand_parts channel. 18 | * Parts are renamed so as to be in correct sorted area. 19 | */ 20 | process sdsplit { 21 | 22 | input: 23 | file ligands 24 | 25 | output: 26 | file 'ligands_part*' into ligand_parts mode flatten 27 | 28 | 29 | """ 30 | python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -d 5 -o ligands_part -of sdf 31 | """ 32 | } 33 | 34 | /* Scores each file from the ligand_parts channel sending each resulting SD file to the results channel 35 | */ 36 | process pli_scoring { 37 | 38 | input: 39 | file part from ligand_parts 40 | file protein 41 | 42 | output: 43 | file 'scored_part*.sdf' into scored_parts 44 | 45 | """ 46 | python -m pipelines.docking.plip -i $part -pdb $protein -o ${part.name.replace('ligands', 'scored')[0..-8]} -of sdf --no-gzip ${params.score ? ' -t ' + params.score : ''} --threads 1 &> scored_out.log 47 | """ 48 | } 49 | 50 | /* Recombine and publish the results 51 | */ 52 | process results { 53 | 54 | 55 | input: 56 | file ligands 57 | file part from scored_parts.collect() 58 | 59 | output: 60 | file 'output.sdf.gz' into results 61 | 62 | 63 | """ 64 | cat scored_part*.sdf | gzip > output.sdf.gz 65 | """ 66 | } 67 | 68 | results.println { "Results: $it" } 69 | 70 | -------------------------------------------------------------------------------- /src/nextflow/docking/plip.nsd.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty -------------------------------------------------------------------------------- /src/nextflow/docking/plip.nsd.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | params.ligands = 'ligands.data.gz' 4 | params.protein = 'protein.pdb.gz' 5 | params.chunk = 25 6 | params.score = null 7 | params.limit = 0 8 | params.digits = 4 9 | 10 | ligands = file(params.ligands) 11 | protein = file(params.protein) 12 | 13 | process splitter { 14 | 15 | container 'informaticsmatters/pli:latest' 16 | beforeScript 'chmod g+w .' 17 | 18 | input: 19 | file ligands 20 | 21 | output: 22 | file 'ligand_part*.sdf.gz' into ligand_parts mode flatten 23 | file 'ligand_part_metrics.txt' into splitter_metrics 24 | 25 | """ 26 | python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -l $params.limit -d $params.digits -o ligand_part -of sdf --meta 27 | """ 28 | } 29 | 30 | 31 | /* Scores each file from the ligand_parts channel sending each resulting SD file to the results channel 32 | */ 33 | process pli_scoring { 34 | 35 | container 'informaticsmatters/pli:latest' 36 | beforeScript 'chmod g+w .' 37 | 38 | input: 39 | file part from ligand_parts 40 | file protein 41 | 42 | output: 43 | file 'scored_part*.sdf' into scored_parts 44 | 45 | """ 46 | python -m pipelines.docking.plip -i $part -pdb $protein -o ${part.name.replace('ligand', 'scored')[0..-8]} -of sdf --no-gzip ${params.score ? ' -t ' + params.score : ''} --threads 1 &> scored_out.log 47 | """ 48 | } 49 | 50 | process joiner { 51 | 52 | container 'informaticsmatters/pli:latest' 53 | beforeScript 'chmod g+w .' 54 | publishDir "$baseDir/results", mode: 'move' 55 | 56 | input: 57 | file parts from scored_parts.collect() 58 | file 'splitter_metrics.txt' from splitter_metrics 59 | 60 | output: 61 | file 'output.data.gz' 62 | file 'output_metrics.txt' 63 | file 'output.metadata' 64 | 65 | """ 66 | cat scored_part*.sdf | python -m pipelines_utils_rdkit.filter -if sdf -of json -o output --meta --thin 67 | mv output_metrics.txt joiner_metrics.txt 68 | grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/PLI/ > output_metrics.txt 69 | grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt 70 | grep '__OutputCount__' joiner_metrics.txt >> output_metrics.txt 71 | echo '{"type":"org.squonk.types.BasicObject","valueClassMappings":{"pliff_cscore":"java.lang.Float","pliff_iscore":"java.lang.Float","pliff_tscore":"java.lang.Float","pliff_gscore":"java.lang.Float","pliff_score":"java.lang.Float","pliff_nb_score":"java.lang.Float"}}' > output.metadata 72 | """ 73 | } 74 | -------------------------------------------------------------------------------- /src/nextflow/docking/plip.nsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ! 3 | serviceConfig: 4 | id: "pipelines.pli.v1" 5 | "name": "PLI docking scoring" 6 | "description": "Score docked ligands with PLI" 7 | tags: 8 | - "pli" 9 | - "docking" 10 | - "scoring" 11 | - "3d" 12 | - "docker" 13 | - "nextflow" 14 | resourceUrl: null 15 | icon: "icons/filter_molecules.png" 16 | inputDescriptors: 17 | - name: "ligands" 18 | mediaType: "application/x-squonk-dataset-molecule+json" 19 | primaryType: "org.squonk.dataset.Dataset" 20 | secondaryType: "org.squonk.types.MoleculeObject" 21 | - name: "protein" 22 | mediaType: "chemical/x-pdb" 23 | primaryType: "org.squonk.types.PDBFile" 24 | outputDescriptors: 25 | - name: "output" 26 | mediaType: "application/x-squonk-dataset-molecule+json" 27 | primaryType: "org.squonk.dataset.Dataset" 28 | secondaryType: "org.squonk.types.MoleculeObject" 29 | optionDescriptors: 30 | - ! 31 | typeDescriptor: ! 32 | type: "java.lang.Float" 33 | key: "arg.score" 34 | label: "Score threshold" 35 | description: "Keep only molecules with scores better than this value" 36 | minValues: 0 37 | maxValues: 1 38 | visible: true 39 | editable: true 40 | modes: 41 | - "User" 42 | 43 | executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep" 44 | thinDescriptors: 45 | - input: ligands 46 | output: output 47 | inputRoutes: 48 | - route: "FILE" 49 | - route: "FILE" 50 | outputRoutes: 51 | - route: "FILE" 52 | nextflowParams: | 53 | ${binding.variables.containsKey('score') ? 'params.score = ' + score : ''} 54 | -------------------------------------------------------------------------------- /src/nextflow/docking/rdock-filter.nsd.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty -------------------------------------------------------------------------------- /src/nextflow/docking/rdock-filter.nsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ! 3 | serviceConfig: 4 | id: pipelines.docking.rdock.filter.v1 5 | "name": rDock filtering 6 | "description": Dock ligands with rDock filtering poses relative to reference ligand 7 | tags: 8 | - rdock 9 | - docking 10 | - scoring 11 | - 3d 12 | - docker 13 | - nextflow 14 | resourceUrl: null 15 | icon: icons/filter_molecules.png 16 | inputDescriptors: 17 | - name: receptor 18 | mediaType: chemical/x-mol2 19 | primaryType: org.squonk.types.Mol2File 20 | - name: refmol 21 | mediaType: chemical/x-mdl-molfile 22 | primaryType: org.squonk.types.MolFile 23 | - name: ligands 24 | mediaType: application/x-squonk-dataset-molecule+json 25 | primaryType: org.squonk.dataset.Dataset 26 | secondaryType: org.squonk.types.MoleculeObject 27 | outputDescriptors: 28 | - name: output 29 | mediaType: application/x-squonk-dataset-molecule+json 30 | primaryType: org.squonk.dataset.Dataset 31 | secondaryType: org.squonk.types.MoleculeObject 32 | optionDescriptors: 33 | - ! 34 | typeDescriptor: ! 35 | type: java.lang.Integer 36 | key: arg.num_dockings 37 | label: Number of dockings 38 | description: Execute this many docking runs 39 | defaultValue: 10 40 | minValues: 1 41 | maxValues: 1 42 | visible: true 43 | editable: true 44 | modes: 45 | - User 46 | - ! 47 | typeDescriptor: ! 48 | type: java.lang.Integer 49 | key: arg.top 50 | label: Top scoring poses 51 | description: Keep this many top scoring poses 52 | defaultValue: 1 53 | minValues: 1 54 | maxValues: 1 55 | visible: true 56 | editable: true 57 | modes: 58 | - User 59 | - ! 60 | typeDescriptor: ! 61 | type: java.lang.Float 62 | key: arg.threshold 63 | label: Score threshold 64 | description: Filter out poses with relative scores above this value compared to reference 65 | defaultValue: 0.0 66 | minValues: 1 67 | maxValues: 1 68 | visible: true 69 | editable: true 70 | modes: 71 | - User 72 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep 73 | thinDescriptors: 74 | - input: ligands 75 | inputRoutes: 76 | - route: FILE 77 | - route: FILE 78 | - route: FILE 79 | outputRoutes: 80 | - route: FILE 81 | nextflowParams: | 82 | params.receptor = 'receptor.mol2.gz' 83 | params.refmol = 'refmol.mol' 84 | params.ligands = 'ligands.data.gz' 85 | ${binding.variables.containsKey('num_dockings') ? 'params.num_dockings = ' + num_dockings : ''} 86 | ${binding.variables.containsKey('top') ? 'params.top = ' + top : ''} 87 | ${binding.variables.containsKey('threshold') ? 'params.threshold = ' + threshold : ''} 88 | -------------------------------------------------------------------------------- /src/nextflow/docking/rdock.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty 2 | -------------------------------------------------------------------------------- /src/nextflow/docking/rdock.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* Example Nextflow pipeline that runs rDock docking 4 | */ 5 | 6 | 7 | params.ligands = 'ligands.sdf.gz' 8 | params.prmfile = 'receptor.prm' 9 | params.protein = 'receptor.mol2' 10 | params.asfile = 'receptor.as' 11 | params.chunk = 25 12 | 13 | params.num_dockings = 100 14 | params.top = 1 15 | params.score = null 16 | params.nscore = null 17 | params.limit = 0 18 | params.digits = 4 19 | 20 | 21 | ligands = file(params.ligands) 22 | protein = file(params.protein) 23 | prmfile = file(params.prmfile) 24 | asfile = file(params.asfile) 25 | 26 | /* Splits the input SD file into multiple files of ${params.chunk} records. 27 | * Each file is sent individually to the ligand_parts channel. 28 | * Parts are named so as to be in correct sorted area. 29 | */ 30 | process sdsplit { 31 | 32 | container 'informaticsmatters/rdkit_pipelines:latest' 33 | 34 | input: 35 | file ligands 36 | 37 | output: 38 | file 'ligands_part*.sdf' into ligand_parts mode flatten 39 | 40 | 41 | """ 42 | python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -d $params.digits -o ligands_part -of sdf --no-gzip 43 | """ 44 | } 45 | 46 | /* Docks each file from the ligand_parts channel sending each resulting SD file to the results channel 47 | */ 48 | process rdock { 49 | 50 | input: 51 | file part from ligand_parts 52 | file protein 53 | file prmfile 54 | file asfile 55 | 56 | output: 57 | file 'docked_part*.sd' into docked_parts 58 | 59 | """ 60 | rbdock -r $prmfile -p dock.prm -n $params.num_dockings -i $part -o ${part.name.replace('ligands', 'docked')[0..-5]} > docked_out.log 61 | """ 62 | } 63 | 64 | /* Filter, combine and publish the results 65 | */ 66 | process results { 67 | 68 | input: 69 | file part from docked_parts.collect() 70 | 71 | output: 72 | file 'results.sdf' into results 73 | 74 | """ 75 | sdsort -n -s -fSCORE docked_part*.sd |${params.score == null ? '' : " sdfilter -f'\$SCORE <= $params.score' |"}${params.nscore == null ? '' : " sdfilter -f'\$SCORE.norm <= $params.nscore' |"} sdfilter -f'\$_COUNT <= ${params.top}' > results.sdf 76 | """ 77 | } 78 | 79 | results.println { "Results: $it" } 80 | -------------------------------------------------------------------------------- /src/nextflow/docking/rdock.nsd.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty -------------------------------------------------------------------------------- /src/nextflow/docking/rdock.nsd.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* Squonk Nextflow pipline that runs Docking using rDock. 4 | * The contents of the zip file specified by params.receptor must contain the following: 5 | * 1. receptor.mol2 - the prepared protein in mol2 format 6 | * 2. receptor.as - the receptor active site definition 7 | * 3. receptor.prm - the rDock configuration file that refers to receptor.mol2 in its RECEPTOR_FILE property. 8 | * This zip file is unzipped and the contents used by rDock. 9 | * To test this manually run something like this: 10 | * nextflow run src/nextflow/docking/rdock.nsd.nf --ligands data/dhfr_3d.data.gz --receptor data/hivpr.config.zip --num_dockings 5 -with-docker informaticsmatters/rdkit_pipelines 11 | */ 12 | 13 | params.ligands = "$baseDir/ligands.data.gz" 14 | params.receptor = "$baseDir/config.zip" 15 | params.chunk = 25 16 | params.num_dockings = 100 17 | params.top = 1 18 | params.score = null 19 | params.nscore = null 20 | params.limit = 0 21 | params.digits = 4 22 | 23 | 24 | ligands = file(params.ligands) 25 | receptorzip = file(params.receptor) 26 | 27 | process unzip_config { 28 | 29 | beforeScript 'chmod g+w .' 30 | container 'informaticsmatters/rdkit_pipelines:latest' 31 | 32 | input: 33 | file receptorzip 34 | 35 | output: 36 | file 'receptor.prm' into prmfile 37 | file 'receptor.mol2' into protein 38 | file 'receptor.as' into asfile 39 | 40 | """ 41 | unzip $receptorzip 42 | """ 43 | 44 | } 45 | 46 | /* Splits the input into multiple files of ${params.chunk} records. 47 | */ 48 | process splitter { 49 | 50 | beforeScript 'chmod g+w .' 51 | container 'informaticsmatters/rdkit_pipelines:latest' 52 | 53 | input: 54 | file ligands 55 | 56 | output: 57 | file 'ligands_part*.sdf' into ligands_parts mode flatten 58 | file 'ligands_part_metrics.txt' into splitter_metrics 59 | 60 | """ 61 | python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -l $params.limit -d $params.digits -o ligands_part -of sdf --no-gzip --meta 62 | """ 63 | } 64 | 65 | /* Docks each file from the ligand_parts channel sending each resulting SD file to the results channel 66 | */ 67 | process rdock { 68 | 69 | container 'informaticsmatters/rdock-mini:latest' 70 | // change permissions on the work dir so that the rdock user in the container 71 | // can write to the directory that is owned by root 72 | beforeScript 'chmod g+w .' 73 | 74 | input: 75 | file part from ligands_parts 76 | file prmfile 77 | file protein 78 | file asfile 79 | 80 | output: 81 | file 'docked_part*.sd' into docked_parts 82 | 83 | """ 84 | rbdock -r $prmfile -p dock.prm -n $params.num_dockings -i $part -o ${part.name.replace('ligands', 'docked')[0..-5]} > docked_out.log 85 | """ 86 | } 87 | 88 | /* Filter, combine and publish the results 89 | */ 90 | process results { 91 | 92 | container 'informaticsmatters/rdock-mini' 93 | // change permissions - see above 94 | beforeScript 'chmod g+w .' 95 | 96 | input: 97 | file ligands 98 | file part from docked_parts.collect() 99 | 100 | output: 101 | file 'results.sdf' into results 102 | 103 | """ 104 | sdsort -n -s -fSCORE docked_part*.sd |${params.score == null ? '' : " sdfilter -f'\$SCORE <= $params.score' |"}${params.nscore == null ? '' : " sdfilter -f'\$SCORE.norm <= $params.nscore' |"} sdfilter -f'\$_COUNT <= ${params.top}' > results.sdf 105 | """ 106 | } 107 | 108 | process metrics { 109 | 110 | beforeScript 'chmod g+w .' 111 | container 'informaticsmatters/rdkit_pipelines:latest' 112 | 113 | publishDir "$baseDir/results", mode: 'move' 114 | 115 | input: 116 | file 'results.sdf' from results 117 | file 'splitter_metrics.txt' from splitter_metrics 118 | 119 | output: 120 | file 'output.data.gz' 121 | file 'output.metadata' 122 | file 'output_metrics.txt' 123 | 124 | """ 125 | python -m pipelines_utils_rdkit.filter -i results.sdf -of json -o output --meta 126 | mv output_metrics.txt old_metrics.txt 127 | grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/DockingRDock/ > output_metrics.txt 128 | grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt 129 | grep '__OutputCount__' old_metrics.txt >> output_metrics.txt 130 | """ 131 | } 132 | -------------------------------------------------------------------------------- /src/nextflow/docking/rdock.nsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ! 3 | serviceConfig: 4 | id: "pipelines.docking.rdock.basic.v2" 5 | "name": "rDock docking" 6 | "description": "Dock ligands with rDock" 7 | tags: 8 | - "rdock" 9 | - "docking" 10 | - "scoring" 11 | - "3d" 12 | - "docker" 13 | - "nextflow" 14 | resourceUrl: null 15 | icon: "icons/filter_molecules.png" 16 | inputDescriptors: 17 | - name: "ligands" 18 | mediaType: "application/x-squonk-dataset-molecule+json" 19 | primaryType: "org.squonk.dataset.Dataset" 20 | secondaryType: "org.squonk.types.MoleculeObject" 21 | - name: "config" 22 | mediaType: "application/zip" 23 | primaryType: "org.squonk.types.ZipFile" 24 | outputDescriptors: 25 | - name: "output" 26 | mediaType: "application/x-squonk-dataset-molecule+json" 27 | primaryType: "org.squonk.dataset.Dataset" 28 | secondaryType: "org.squonk.types.MoleculeObject" 29 | optionDescriptors: 30 | - ! 31 | typeDescriptor: ! 32 | type: "java.lang.Integer" 33 | key: "arg.num" 34 | label: "Number of dockings" 35 | description: "Execute this many docking runs" 36 | defaultValue: 10 37 | minValues: 1 38 | maxValues: 1 39 | visible: true 40 | editable: true 41 | modes: 42 | - "User" 43 | - ! 44 | typeDescriptor: ! 45 | type: "java.lang.Integer" 46 | key: "arg.top" 47 | label: "Top scoring poses" 48 | description: "Keep this many top scoring poses" 49 | defaultValue: 1 50 | minValues: 1 51 | maxValues: 1 52 | visible: true 53 | editable: true 54 | modes: 55 | - "User" 56 | - ! 57 | typeDescriptor: ! 58 | type: "java.lang.Float" 59 | key: "arg.score" 60 | label: "Score filter" 61 | description: "Filter out scores above this value" 62 | minValues: 0 63 | maxValues: 1 64 | visible: true 65 | editable: true 66 | modes: 67 | - "User" 68 | - ! 69 | typeDescriptor: ! 70 | type: "java.lang.Float" 71 | key: "arg.nscore" 72 | label: "Normalised score filter" 73 | description: "Filter out scores normalised by heavy atom count above this value" 74 | minValues: 0 75 | maxValues: 1 76 | visible: true 77 | editable: true 78 | modes: 79 | - "User" 80 | 81 | executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep" 82 | thinDescriptors: 83 | - input: "ligands" 84 | inputRoutes: 85 | - route: "FILE" 86 | - route: "FILE" 87 | outputRoutes: 88 | - route: "FILE" 89 | nextflowParams: | 90 | params.ligands = 'ligands.data.gz' 91 | params.receptor = 'config.zip' 92 | params.num_dockings = $num 93 | params.top = $top 94 | ${binding.variables.containsKey('score') ? 'params.score = ' + score : ''} 95 | ${binding.variables.containsKey('nscore') ? 'params.nscore = ' + nscore : ''} 96 | -------------------------------------------------------------------------------- /src/nextflow/docking/smog.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty -------------------------------------------------------------------------------- /src/nextflow/docking/smog.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* Example Nextflow pipline that runs SMoG2016 scoring 4 | */ 5 | 6 | 7 | params.ligands = 'ligands.sdf.gz' 8 | params.protein = 'protein.pdb' 9 | params.chunk = 25 10 | params.score = null 11 | 12 | 13 | ligands = file(params.ligands) 14 | protein = file(params.protein) 15 | 16 | /* Splits the input SD file into multiple files of ${params.chunk} records. 17 | * Each file is sent individually to the ligand_parts channel. 18 | * Parts are renamed so as to be in correct sorted area. 19 | */ 20 | process sdsplit { 21 | 22 | input: 23 | file ligands 24 | 25 | output: 26 | file 'ligands_part*' into ligand_parts mode flatten 27 | 28 | 29 | """ 30 | python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -d 5 -o ligands_part -of sdf 31 | """ 32 | } 33 | 34 | /* Scores each file from the ligand_parts channel sending each resulting SD file to the results channel 35 | */ 36 | process smog_scoring { 37 | 38 | input: 39 | file part from ligand_parts 40 | file protein 41 | 42 | output: 43 | file 'scored_part*.sdf' into scored_parts 44 | 45 | """ 46 | python -m pipelines.docking.smog2016 -i $part -pdb $protein -o ${part.name.replace('ligands', 'scored')[0..-8]} -of sdf --thin --no-gzip ${params.score ? ' -t ' + params.score : ''} --threads 1 &> scored_out.log 47 | """ 48 | } 49 | 50 | /* Recombine the results 51 | */ 52 | process results { 53 | 54 | 55 | input: 56 | file ligands 57 | file part from scored_parts.collect() 58 | 59 | output: 60 | file 'output.sdf.gz' into results 61 | 62 | 63 | """ 64 | cat scored_part*.sdf | gzip > output.sdf.gz 65 | """ 66 | } 67 | 68 | results.println { "Results: $it" } -------------------------------------------------------------------------------- /src/nextflow/docking/smog.nsd.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty -------------------------------------------------------------------------------- /src/nextflow/docking/smog.nsd.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | params.ligands = 'ligands.data.gz' 4 | params.protein = 'protein.pdb.gz' 5 | params.chunk = 25 6 | params.score = null 7 | params.limit = 0 8 | params.digits = 4 9 | 10 | ligands = file(params.ligands) 11 | protein = file(params.protein) 12 | 13 | process splitter { 14 | 15 | container 'informaticsmatters/smog:latest' 16 | beforeScript 'chmod g+w .' 17 | 18 | input: 19 | file ligands 20 | 21 | output: 22 | file 'ligand_part*.sdf.gz' into ligand_parts mode flatten 23 | file 'ligand_part_metrics.txt' into splitter_metrics 24 | 25 | """ 26 | python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -l $params.limit -d $params.digits -o ligand_part -of sdf --meta 27 | """ 28 | } 29 | 30 | 31 | /* Scores each file from the ligand_parts channel sending each resulting SD file to the results channel 32 | */ 33 | process smog_scoring { 34 | 35 | container 'informaticsmatters/smog:latest' 36 | beforeScript 'chmod g+w .' 37 | 38 | input: 39 | file part from ligand_parts 40 | file protein 41 | 42 | output: 43 | file 'scored_part*.sdf' into scored_parts 44 | 45 | """ 46 | python -m pipelines.docking.smog2016 -i $part -pdb $protein -o ${part.name.replace('ligand', 'scored')[0..-8]} -of sdf --no-gzip ${params.score ? ' -t ' + params.score : ''} --threads 1 &> scored_out.log 47 | """ 48 | } 49 | 50 | process joiner { 51 | 52 | container 'informaticsmatters/smog:latest' 53 | beforeScript 'chmod g+w .' 54 | publishDir "$baseDir/results", mode: 'move' 55 | 56 | input: 57 | file parts from scored_parts.collect() 58 | file 'splitter_metrics.txt' from splitter_metrics 59 | 60 | output: 61 | file 'output_metrics.txt' 62 | file 'output.data.gz' 63 | file 'output.metadata' 64 | 65 | """ 66 | cat scored_part*.sdf | python -m pipelines_utils_rdkit.filter -if sdf -of json -o output --meta --thin 67 | mv output_metrics.txt joiner_metrics.txt 68 | grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/SMOG/ > output_metrics.txt 69 | grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt 70 | grep '__OutputCount__' joiner_metrics.txt >> output_metrics.txt 71 | echo '{"type":"org.squonk.types.BasicObject","valueClassMappings":{"SMoG2016_SCORE":"java.lang.Float","EmbedRMS":"java.lang.Float"}}' > output.metadata 72 | """ 73 | } 74 | -------------------------------------------------------------------------------- /src/nextflow/docking/smog.nsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ! 3 | serviceConfig: 4 | id: "pipelines.docking.smog2016.v1" 5 | "name": "SMoG2016 docking scoring" 6 | "description": "Score docked ligands with SMoG2016" 7 | tags: 8 | - "smog2016" 9 | - "docking" 10 | - "scoring" 11 | - "3d" 12 | - "docker" 13 | - "nextflow" 14 | resourceUrl: null 15 | icon: "icons/filter_molecules.png" 16 | inputDescriptors: 17 | - name: "ligands" 18 | mediaType: "application/x-squonk-dataset-molecule+json" 19 | primaryType: "org.squonk.dataset.Dataset" 20 | secondaryType: "org.squonk.types.MoleculeObject" 21 | - name: "protein" 22 | mediaType: "chemical/x-pdb" 23 | primaryType: "org.squonk.types.PDBFile" 24 | outputDescriptors: 25 | - name: "output" 26 | mediaType: "application/x-squonk-dataset-molecule+json" 27 | primaryType: "org.squonk.dataset.Dataset" 28 | secondaryType: "org.squonk.types.MoleculeObject" 29 | optionDescriptors: 30 | - ! 31 | typeDescriptor: ! 32 | type: "java.lang.Float" 33 | key: "arg.score" 34 | label: "Score threshold" 35 | description: "Keep only molecules with scores better than this value" 36 | minValues: 0 37 | maxValues: 1 38 | visible: true 39 | editable: true 40 | modes: 41 | - "User" 42 | 43 | executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep" 44 | thinDescriptors: 45 | - input: ligands 46 | output: output 47 | inputRoutes: 48 | - route: "FILE" 49 | - route: "FILE" 50 | outputRoutes: 51 | - route: "FILE" 52 | nextflowParams: | 53 | ${binding.variables.containsKey('score') ? 'params.score = ' + score : ''} 54 | -------------------------------------------------------------------------------- /src/nextflow/nextflow-docker.config: -------------------------------------------------------------------------------- 1 | docker.enabled = true 2 | docker.mountFlags = 'z' 3 | docker.runOptions = '-u $(id -u):$(id -g)' 4 | process.container = 'busybox' 5 | -------------------------------------------------------------------------------- /src/nextflow/rdkit/screen+conformers.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* Example Nextflow pipline that runs screen.py followed by conformers.py 4 | */ 5 | 6 | params.qsmiles = 'OC(=O)C1=CC=C(NC2=NC3=C(CN=C(C4=CC(Cl)=CC=C34)C3=C(F)C=CC=C3F)C=N2)C=C1' 7 | params.target = 'data/Kinase_inhibs.sdf.gz' 8 | params.simmin = 0.7 9 | params.simmax = 1.0 10 | params.descriptor = 'rdkit' 11 | params.metric = 'tanimoto' 12 | params.num = 1 13 | params.attempts = 0 14 | 15 | target = file(params.target) 16 | 17 | process rdkitScreen { 18 | 19 | input: 20 | file target 21 | 22 | output: 23 | stdout screenOutput 24 | 25 | 26 | """ 27 | python -m pipelines.rdkit.screen --qsmiles '$params.qsmiles' --simmin $params.simmin --simmax $params.simmax -d $params.descriptor -m $params.metric -i $target 28 | """ 29 | 30 | } 31 | 32 | process rdkitConformer { 33 | 34 | input: 35 | stdin screenOutput 36 | 37 | output: 38 | file 'results.sdf.gz' into results 39 | 40 | 41 | """ 42 | python -m pipelines.rdkit.conformers -if sdf -n $params.num -a $params.attempts -o results 43 | """ 44 | 45 | } 46 | 47 | results.println { "Results: $it" } 48 | -------------------------------------------------------------------------------- /src/nextflow/rdkit/screen-dataset.nsd.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty -------------------------------------------------------------------------------- /src/nextflow/rdkit/screen-dataset.nsd.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | params.input = "$baseDir/input.data.gz" 4 | params.qsmiles 5 | params.simmin = 0.7 6 | params.simmax = 1.0 7 | params.descriptor = 'rdkit' 8 | params.metric = 'tanimoto' 9 | params.chunk = 2500 10 | params.limit = 0 11 | params.digits = 4 12 | 13 | target = file(params.input) 14 | 15 | process splitter { 16 | 17 | container 'informaticsmatters/rdkit_pipelines:latest' 18 | 19 | input: 20 | file target 21 | 22 | output: 23 | file 'target_part*.sdf.gz' into target_parts mode flatten 24 | file 'target_part_metrics.txt' into splitter_metrics 25 | 26 | """ 27 | python -m pipelines_utils_rdkit.filter -i $target -c $params.chunk -l $params.limit -d $params.digits -o target_part -of sdf --meta 28 | """ 29 | } 30 | 31 | process rdkitScreen { 32 | 33 | container 'informaticsmatters/rdkit_pipelines' 34 | 35 | input: 36 | file part from target_parts 37 | 38 | output: 39 | file 'screened_part*.sdf.gz' into screened_parts 40 | 41 | """ 42 | python -m pipelines.rdkit.screen --qsmiles '$params.qsmiles' --simmin $params.simmin --simmax $params.simmax -d $params.descriptor -m $params.metric -i $part -o ${part.name.replace('target', 'screened')[0..-8]} -of sdf 43 | """ 44 | } 45 | 46 | process joiner { 47 | 48 | container 'informaticsmatters/rdkit_pipelines:latest' 49 | 50 | publishDir "$baseDir/results", mode: 'move' 51 | 52 | input: 53 | file 'splitter_metrics.txt' from splitter_metrics 54 | file parts from screened_parts.collect() 55 | 56 | output: 57 | file 'output_metrics.txt' 58 | file 'output.data.gz' 59 | file 'output.metadata' 60 | 61 | """ 62 | zcat $parts | python -m pipelines_utils_rdkit.filter -if sdf -of json -o output --meta 63 | mv output_metrics.txt joiner_metrics.txt 64 | grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/RDKitScreen/ > output_metrics.txt 65 | grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt 66 | grep '__OutputCount__' joiner_metrics.txt >> output_metrics.txt 67 | """ 68 | } 69 | -------------------------------------------------------------------------------- /src/nextflow/rdkit/screen-dataset.nsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ! 3 | serviceConfig: 4 | id: "pipelines.rdkit.screen.basic" 5 | name: "RDKitSimilarityScreening" 6 | description: "RDKit Similarity Screening" 7 | tags: 8 | - "rdkit" 9 | - "screening" 10 | - "similarity" 11 | - "docker" 12 | - "nextflow" 13 | resourceUrl: null 14 | icon: "icons/filter_molecules.png" 15 | inputDescriptors: 16 | - name: "input" 17 | mediaType: "application/x-squonk-dataset-molecule+json" 18 | primaryType: "org.squonk.dataset.Dataset" 19 | secondaryType: "org.squonk.types.MoleculeObject" 20 | outputDescriptors: 21 | - name: "output" 22 | mediaType: "application/x-squonk-dataset-molecule+json" 23 | primaryType: "org.squonk.dataset.Dataset" 24 | secondaryType: "org.squonk.types.MoleculeObject" 25 | optionDescriptors: 26 | - ! 27 | typeDescriptor: ! 28 | type: "org.squonk.options.types.Structure" 29 | formats: ["smiles"] 30 | molType: "DISCRETE" 31 | key: "arg.query" 32 | label: "Query molecule" 33 | description: "Query molecule as smiles" 34 | visible: true 35 | editable: true 36 | modes: 37 | - "User" 38 | - ! 39 | typeDescriptor: ! 40 | type: "org.squonk.types.NumberRange$Float" 41 | key: "arg.sim" 42 | label: "Similarity" 43 | description: "Similarity threshold (1.0 is identical)" 44 | defaultValue: ! 45 | minValue: 0.7 46 | maxValue: 1.0 47 | visible: true 48 | editable: true 49 | modes: 50 | - "User" 51 | - ! 52 | typeDescriptor: ! 53 | type: "java.lang.String" 54 | key: "arg.descriptor" 55 | label: "Descriptor" 56 | description: "Descriptor/fingerprint to use" 57 | values: 58 | - "maccs" 59 | - "morgan2" 60 | - "morgan3" 61 | - "rdkit" 62 | defaultValue: "rdkit" 63 | visible: true 64 | editable: true 65 | modes: 66 | - "User" 67 | - ! 68 | typeDescriptor: ! 69 | type: "java.lang.String" 70 | key: "arg.metric" 71 | label: "Metric" 72 | description: "Similarity metric to use" 73 | values: 74 | - "asymmetric" 75 | - "braunblanquet" 76 | - "cosine" 77 | - "dice" 78 | - "kulczynski" 79 | - "mcconnaughey" 80 | - "rogotgoldberg" 81 | - "russel" 82 | - "sokal" 83 | - "tanimoto" 84 | defaultValue: "tanimoto" 85 | visible: true 86 | editable: true 87 | modes: 88 | - "User" 89 | executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep" 90 | thinDescriptors: 91 | - input: "input" 92 | output: "output" 93 | filtering: true 94 | inputRoutes: 95 | - route: "FILE" 96 | outputRoutes: 97 | - route: "FILE" 98 | nextflowParams: | 99 | params.qsmiles = '$query_source' 100 | params.simmin = $sim.minValue 101 | params.simmax = $sim.maxValue 102 | params.descriptor = '$descriptor' 103 | params.metric = '$metric' 104 | -------------------------------------------------------------------------------- /src/nextflow/rdkit/screen-multi-dataset.nsd.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty -------------------------------------------------------------------------------- /src/nextflow/rdkit/screen-multi-dataset.nsd.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | params.target = "$baseDir/target.data.gz" 4 | params.query = "$baseDir/query.data.gz" 5 | params.simmin = 0.7 6 | params.simmax = 1.0 7 | params.descriptor = 'rdkit' 8 | params.metric = 'tanimoto' 9 | params.chunk = 500 10 | params.limit = 0 11 | params.digits = 4 12 | 13 | target = file(params.target) 14 | query = file(params.query) 15 | 16 | process splitter { 17 | 18 | container 'informaticsmatters/rdkit_pipelines:latest' 19 | 20 | input: 21 | file target 22 | 23 | output: 24 | file 'target_part*.sdf.gz' into target_parts mode flatten 25 | file 'target_part_metrics.txt' into splitter_metrics 26 | 27 | """ 28 | python -m pipelines_utils_rdkit.filter -i $target -c $params.chunk -l $params.limit -d $params.digits -o target_part -of sdf --meta 29 | """ 30 | } 31 | 32 | process rdkitScreen { 33 | 34 | container 'informaticsmatters/rdkit_pipelines:latest' 35 | 36 | input: 37 | file part from target_parts 38 | file 'query.data.gz' from query 39 | 40 | output: 41 | file 'screened_part*.sdf.gz' into screened_parts 42 | 43 | """ 44 | python -m pipelines.rdkit.screen_multi -i $part --qjson query.data.gz --simmin $params.simmin --simmax $params.simmax -d $params.descriptor -m $params.metric -o ${part.name.replace('target', 'screened')[0..-8]} -of sdf 45 | """ 46 | } 47 | 48 | process joiner { 49 | 50 | container 'informaticsmatters/rdkit_pipelines:latest' 51 | 52 | publishDir "$baseDir/results", mode: 'move' 53 | 54 | input: 55 | file 'splitter_metrics.txt' from splitter_metrics 56 | file parts from screened_parts.collect() 57 | 58 | output: 59 | file 'output_metrics.txt' 60 | file 'output.data.gz' 61 | file 'output.metadata' 62 | 63 | """ 64 | zcat $parts | python -m pipelines_utils_rdkit.filter -if sdf -of json -o output --meta 65 | mv output_metrics.txt joiner_metrics.txt 66 | grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/RDKitScreen/ > output_metrics.txt 67 | grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt 68 | grep '__OutputCount__' joiner_metrics.txt >> output_metrics.txt 69 | """ 70 | } 71 | -------------------------------------------------------------------------------- /src/nextflow/rdkit/screen-multi-dataset.nsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ! 3 | serviceConfig: 4 | id: "pipelines.rdkit.screen.multi" 5 | name: "RDKitMultiSimilarityScreening" 6 | description: "RDKit Similarity Screening against multiple query structures" 7 | tags: 8 | - "rdkit" 9 | - "screening" 10 | - "similarity" 11 | - "docker" 12 | - "nextflow" 13 | resourceUrl: null 14 | icon: "icons/filter_molecules.png" 15 | inputDescriptors: 16 | - name: "target" 17 | mediaType: "application/x-squonk-dataset-molecule+json" 18 | primaryType: "org.squonk.dataset.Dataset" 19 | secondaryType: "org.squonk.types.MoleculeObject" 20 | - name: "query" 21 | mediaType: "application/x-squonk-dataset-molecule+json" 22 | primaryType: "org.squonk.dataset.Dataset" 23 | secondaryType: "org.squonk.types.MoleculeObject" 24 | outputDescriptors: 25 | - name: "output" 26 | mediaType: "application/x-squonk-dataset-molecule+json" 27 | primaryType: "org.squonk.dataset.Dataset" 28 | secondaryType: "org.squonk.types.MoleculeObject" 29 | optionDescriptors: 30 | - ! 31 | typeDescriptor: ! 32 | type: "org.squonk.types.NumberRange$Float" 33 | key: "arg.sim" 34 | label: "Similarity" 35 | description: "Similarity threshold (1.0 is identical)" 36 | defaultValue: ! 37 | minValue: 0.7 38 | maxValue: 1.0 39 | visible: true 40 | editable: true 41 | modes: 42 | - "User" 43 | - ! 44 | typeDescriptor: ! 45 | type: "java.lang.String" 46 | key: "arg.descriptor" 47 | label: "Descriptor" 48 | description: "Descriptor/fingerprint to use" 49 | values: 50 | - "maccs" 51 | - "morgan2" 52 | - "morgan3" 53 | - "rdkit" 54 | defaultValue: "rdkit" 55 | visible: true 56 | editable: true 57 | modes: 58 | - "User" 59 | - ! 60 | typeDescriptor: ! 61 | type: "java.lang.String" 62 | key: "arg.metric" 63 | label: "Metric" 64 | description: "Similarity metric to use" 65 | values: 66 | - "asymmetric" 67 | - "braunblanquet" 68 | - "cosine" 69 | - "dice" 70 | - "kulczynski" 71 | - "mcconnaughey" 72 | - "rogotgoldberg" 73 | - "russel" 74 | - "sokal" 75 | - "tanimoto" 76 | defaultValue: "tanimoto" 77 | visible: true 78 | editable: true 79 | modes: 80 | - "User" 81 | executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep" 82 | thinDescriptors: 83 | - input: "target" 84 | output: "output" 85 | filtering: true 86 | - input: "query" 87 | inputRoutes: 88 | - route: "FILE" 89 | - route: "FILE" 90 | outputRoutes: 91 | - route: "FILE" 92 | nextflowParams: | 93 | params.simmin = $sim.minValue 94 | params.simmax = $sim.maxValue 95 | params.descriptor = '$descriptor' 96 | params.metric = '$metric' 97 | -------------------------------------------------------------------------------- /src/nextflow/rdkit/screen.config: -------------------------------------------------------------------------------- 1 | // Intentionally Empty -------------------------------------------------------------------------------- /src/nextflow/rdkit/screen.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | params.qsmiles = 'OC(=O)C1=CC=C(NC2=NC3=C(CN=C(C4=CC(Cl)=CC=C34)C3=C(F)C=CC=C3F)C=N2)C=C1' 4 | params.target = 'data/Kinase_inhibs.sdf.gz' 5 | params.simmin = 0.7 6 | params.simmax = 1.0 7 | params.descriptor = 'rdkit' 8 | params.metric = 'tanimoto' 9 | 10 | target = file(params.target) 11 | 12 | process rdkitScreen { 13 | 14 | input: 15 | file target 16 | 17 | output: 18 | file 'results.sdf.gz' into results 19 | 20 | """ 21 | python -m pipelines.rdkit.screen --qsmiles '$params.qsmiles' --simmin $params.simmin --simmax $params.simmax -d $params.descriptor -m $params.metric -i $target -o results 22 | """ 23 | 24 | } 25 | 26 | results.println { "Results: $it" } -------------------------------------------------------------------------------- /src/nextflow/xchem/expand.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | // expand params 4 | params.hits = 'data/mpro/hits-5.sdf.gz' 5 | params.token = null 6 | params.hac_min = 3 7 | params.hac_max = 3 8 | params.rac_min = 1 9 | params.rac_max = 1 10 | params.hops = 1 11 | params.server = null 12 | 13 | 14 | // files 15 | hits = file(params.hits) 16 | 17 | process fragnet_expand { 18 | 19 | container 'informaticsmatters/rdkit_pipelines:latest' 20 | 21 | publishDir ".", mode: 'copy' 22 | 23 | input: 24 | file hits 25 | 26 | output: 27 | file '*.smi' into smiles 28 | file '*.mol' into mols 29 | 30 | """ 31 | python -m pipelines.xchem.fragnet_expand -i '$hits' ${params.token ? '--token ' + params.token : ''}\ 32 | --hops $params.hops\ 33 | --hac-min $params.hac_min\ 34 | --hac-max $params.hac_max\ 35 | --rac-min $params.rac_min\ 36 | --rac-max $params.rac_max\ 37 | ${params.server ? '--server ' + params.server : ''} 38 | """ 39 | } -------------------------------------------------------------------------------- /src/nextflow/xchem/featurestein.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | params.inputs = "data/mpro/poses.sdf.gz" 4 | params.fragments = "data/mpro/hits-17.sdf.gz" 5 | params.chunk = 5000 6 | params.limit = 0 7 | params.digits = 4 8 | 9 | inputs = file(params.inputs) 10 | fragments = file(params.fragments) 11 | 12 | process generate_feat_maps { 13 | 14 | container 'informaticsmatters/rdkit_pipelines:latest' 15 | 16 | input: 17 | file fragments 18 | 19 | output: 20 | file 'featurestein.p' into fmaps 21 | 22 | """ 23 | python -m pipelines.xchem.featurestein_generate -i '$fragments' -f featurestein.p 24 | """ 25 | } 26 | 27 | process splitter { 28 | 29 | container 'informaticsmatters/rdkit_pipelines:latest' 30 | 31 | input: 32 | file inputs 33 | 34 | output: 35 | file 'inputs_part*.sdf.gz' into inputs_parts mode flatten 36 | 37 | """ 38 | python -m pipelines_utils_rdkit.filter -i '$inputs' -c $params.chunk -l $params.limit -d $params.digits -o 'inputs_part_' -of sdf 39 | """ 40 | } 41 | 42 | process score { 43 | 44 | container 'informaticsmatters/rdkit_pipelines:latest' 45 | 46 | input: 47 | file part from inputs_parts 48 | file fmaps 49 | 50 | output: 51 | file 'scored_part*.sdf' into scored_parts 52 | 53 | """ 54 | python -m pipelines.xchem.featurestein_score -i '$part' -f '$fmaps' -o '${part.name.replace('inputs', 'scored')[0..-8]}' -of sdf --no-gzip 55 | """ 56 | } 57 | 58 | process joiner { 59 | 60 | container 'informaticsmatters/rdkit_pipelines:latest' 61 | 62 | publishDir ".", mode: 'link' 63 | 64 | input: 65 | file parts from scored_parts.collect() 66 | 67 | output: 68 | file 'featurestein_scored.sdf.gz' 69 | 70 | """ 71 | cat $parts | gzip > featurestein_scored.sdf.gz 72 | """ 73 | } 74 | -------------------------------------------------------------------------------- /src/nextflow/xchem/prepare-tether-featurestein.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | params.smiles = '*.smi' 4 | params.molfiles = '*.mol' 5 | params.fragments = "data/mpro/hits-17.sdf.gz" 6 | params.chunk_tether = 250 7 | params.chunk_score = 10000 8 | params.limit = 0 9 | params.digits = 4 10 | params.generate_filenames = false 11 | params.num_conformers = 10 12 | 13 | // files 14 | smilesfiles = file(params.smiles) 15 | molfiles = file(params.molfiles) 16 | fragments = file(params.fragments) 17 | 18 | process generate_feat_maps { 19 | 20 | container 'informaticsmatters/rdkit_pipelines:latest' 21 | 22 | input: 23 | file fragments 24 | 25 | output: 26 | file 'featurestein.p' into fmaps 27 | 28 | """ 29 | python -m pipelines.xchem.featurestein_generate -i '$fragments' -f featurestein.p 30 | """ 31 | } 32 | 33 | process splitter { 34 | 35 | container 'informaticsmatters/rdkit_pipelines:latest' 36 | 37 | input: 38 | file smiles from smilesfiles.flatten() 39 | file mol from molfiles.flatten() 40 | 41 | output: 42 | file '*.mol' into mols 43 | file '*.smi' into smiles 44 | 45 | """ 46 | stem=${smiles.name[0..-5]} 47 | split -l $params.chunk_tether -d -a 3 --additional-suffix .smi $smiles \${stem}_ 48 | mv $smiles ${smiles}.orig 49 | for f in *.smi 50 | do 51 | cp $mol \${f:0:-4}.mol 52 | done 53 | mv $mol ${mol}.orig 54 | """ 55 | } 56 | 57 | process tether { 58 | 59 | container 'informaticsmatters/rdkit_pipelines:latest' 60 | 61 | input: 62 | file smiles from smiles.flatten() 63 | file mol from mols.flatten() 64 | 65 | output: 66 | file 'Tethered_*.sdf' into tethered_parts 67 | 68 | """ 69 | python -m pipelines.xchem.prepare_tether --smi '$smiles' --mol '$mol' --chunk-size $params.chunk_score --num-conformers $params.num_conformers -o 'Tethered_${smiles.name[0..-5]}' 70 | """ 71 | } 72 | 73 | process score { 74 | 75 | container 'informaticsmatters/rdkit_pipelines:latest' 76 | publishDir '.' 77 | 78 | input: 79 | file part from tethered_parts.flatten() 80 | file fmaps 81 | 82 | output: 83 | file 'Scored_*.sdf' into scored_parts 84 | 85 | """ 86 | python -m pipelines.xchem.featurestein_score -i '$part' -f '$fmaps' -o 'Scored_${part.name[0..-5]}' -of sdf --no-gzip 87 | """ 88 | } 89 | -------------------------------------------------------------------------------- /src/nextflow/xchem/prepare-tether.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | params.smiles = '*.smi' 4 | params.molfiles = '*.mol' 5 | params.chunk_tether = 250 6 | params.chunk_score = 1000 7 | params.limit = 0 8 | params.num_conformers = 1 9 | params.ph_min = null 10 | params.ph_max = null 11 | params.atom_compare = 'CompareElements' 12 | params.bond_compare = 'CompareOrder' 13 | params.complete_rings_only = true 14 | params.ring_matches_ring_only = true 15 | params.minimize = 4 16 | params.timeout_embed = null 17 | 18 | 19 | smilesfiles = file(params.smiles) 20 | molfiles = file(params.molfiles) 21 | 22 | process splitter { 23 | 24 | container 'informaticsmatters/rdkit_pipelines:latest' 25 | 26 | input: 27 | file smiles from smilesfiles.flatten() 28 | file mol from molfiles.flatten() 29 | 30 | output: 31 | file '*.mol' into mols 32 | file '*.smi' into smiles 33 | 34 | """ 35 | stem=${smiles.name[0..-5]} 36 | split -l $params.chunk_tether -d -a 3 --additional-suffix .smi $smiles \${stem}_ 37 | mv $smiles ${smiles}.orig 38 | for f in *.smi 39 | do 40 | cp $mol \${f:0:-4}.mol 41 | done 42 | mv $mol ${mol}.orig 43 | """ 44 | } 45 | 46 | process tether { 47 | 48 | container 'informaticsmatters/rdkit_pipelines:latest' 49 | publishDir '.' 50 | 51 | input: 52 | file smiles from smiles.flatten() 53 | file mol from mols.flatten() 54 | 55 | output: 56 | file 'Tethered_*.sdf' into tethered_parts 57 | 58 | """ 59 | python -m pipelines.xchem.prepare_tether --smi '$smiles' --mol '$mol' --chunk-size $params.chunk_score\ 60 | -o 'Tethered_${smiles.name[0..-5]}'\ 61 | --num-conformers $params.num_conformers\ 62 | --atom-compare $params.atom_compare --bond-compare $params.bond_compare\ 63 | ${params.complete_rings_only ? '--complete-rings-only' : ''}\ 64 | ${params.ring_matches_ring_only ? '--ring-matches-ring-only' : ''}\ 65 | --minimize $params.minimize\ 66 | ${params.ph_min != null ? '--min-ph ' + params.ph_min : ''}\ 67 | ${params.ph_max != null ? '--max-ph ' + params.ph_max : ''}\ 68 | ${params.timeout_embed != null ? '--timeout-embed ' + params.timeout_embed : ''} 69 | """ 70 | } 71 | -------------------------------------------------------------------------------- /src/nextflow/xchem/xcos.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | params.inputs = 'data/mpro/poses.sdf.gz' 4 | params.fragments = 'data/mpro/hits-17.sdf.gz' 5 | params.threshold = 0.4 // XCos score theshold 6 | params.chunk = 500 // chunk size to split input into 7 | params.limit = 0 // max number of molecules to process 8 | params.digits = 4 // number of digits for the split file name number 9 | 10 | inputs = file(params.inputs) 11 | fragments = file(params.fragments) 12 | 13 | process splitter { 14 | 15 | container 'informaticsmatters/rdkit_pipelines:latest' 16 | 17 | input: 18 | file inputs 19 | 20 | output: 21 | file 'inputs_part*.sdf.gz' into inputs_parts mode flatten 22 | 23 | """ 24 | python -m pipelines_utils_rdkit.filter -i '$inputs' -c $params.chunk -l $params.limit -d $params.digits -o 'inputs_part_' -of sdf 25 | """ 26 | } 27 | 28 | process xcos { 29 | 30 | container 'informaticsmatters/rdkit_pipelines:latest' 31 | 32 | input: 33 | file part from inputs_parts 34 | file fragments 35 | 36 | output: 37 | file 'scored_part*.sdf' into scored_parts 38 | 39 | """ 40 | python -m pipelines.xchem.xcos -i '$part' -f '$fragments' -t $params.threshold -o '${part.name.replace('inputs', 'scored')[0..-8]}' -of sdf --no-gzip 41 | """ 42 | } 43 | 44 | process joiner { 45 | 46 | container 'informaticsmatters/rdkit_pipelines:latest' 47 | 48 | publishDir ".", mode: 'link' 49 | 50 | input: 51 | file parts from scored_parts.collect() 52 | 53 | output: 54 | file 'xcos_scored.sdf.gz' 55 | 56 | """ 57 | cat $parts | gzip > xcos_scored.sdf.gz 58 | """ 59 | } 60 | -------------------------------------------------------------------------------- /src/python/NNScore_pdbbind2016.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/NNScore_pdbbind2016.pickle -------------------------------------------------------------------------------- /src/python/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/README.rst -------------------------------------------------------------------------------- /src/python/RFScore_v1_pdbbind2016.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/RFScore_v1_pdbbind2016.pickle -------------------------------------------------------------------------------- /src/python/RFScore_v2_pdbbind2016.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/RFScore_v2_pdbbind2016.pickle -------------------------------------------------------------------------------- /src/python/RFScore_v3_pdbbind2016.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/RFScore_v3_pdbbind2016.pickle -------------------------------------------------------------------------------- /src/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/__init__.py -------------------------------------------------------------------------------- /src/python/notebooks/default.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from collections import defaultdict\n", 12 | "import numpy as np\n", 13 | "from rdkit import Chem\n", 14 | "from rdkit.Chem import AllChem,Draw\n", 15 | "from rdkit.Chem.Draw import IPythonConsole\n", 16 | "IPythonConsole.ipython_3d=True\n", 17 | "%pylab inline\n", 18 | "from pipelines.utils import load_data\n", 19 | "\n", 20 | "# Load_data loads in all the data placed in the normal places\n", 21 | "input_data = load_data()" 22 | ] 23 | } 24 | ], 25 | "metadata": { 26 | "kernelspec": { 27 | "display_name": "Python 2", 28 | "language": "python", 29 | "name": "python2" 30 | }, 31 | "language_info": { 32 | "codemirror_mode": { 33 | "name": "ipython", 34 | "version": 2.0 35 | }, 36 | "file_extension": ".py", 37 | "mimetype": "text/x-python", 38 | "name": "python", 39 | "nbconvert_exporter": "python", 40 | "pygments_lexer": "ipython2", 41 | "version": "2.7.6" 42 | } 43 | }, 44 | "nbformat": 4, 45 | "nbformat_minor": 0 46 | } -------------------------------------------------------------------------------- /src/python/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/__init__.py -------------------------------------------------------------------------------- /src/python/pipelines/dimorphite/README.txt: -------------------------------------------------------------------------------- 1 | This package uses dimorphite-dl from the Durrant lab at University of Pittsburg. 2 | For details see: 3 | Ropp PJ, Kaminsky JC, Yablonski S, Durrant JD (2019) Dimorphite-DL: An open-source 4 | program for enumerating the ionization states of drug-like small molecules. 5 | J Cheminform 11:14. doi:10.1186/s13321-019-0336-9. 6 | 7 | The original Dimorphite-dl code can be found here: 8 | https://git.durrantlab.pitt.edu/jdurrant/dimorphite_dl 9 | The two files dimorphite_dl.py and site_substructures.smarts are copied here. -------------------------------------------------------------------------------- /src/python/pipelines/dimorphite/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/dimorphite/__init__.py -------------------------------------------------------------------------------- /src/python/pipelines/dimorphite/enumerate_charges.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.dimorphite.enumerate_charges.v1 5 | name: EnumerateCharges 6 | description: Generate charge forms using Dimporphite_DL 7 | tags: 8 | - rdkit 9 | - dimorphite 10 | - charges 11 | - enumerate 12 | - docker 13 | resourceUrl: 14 | icon: icons/molecule_generator.png 15 | inputDescriptors: 16 | - primaryType: org.squonk.dataset.Dataset 17 | secondaryType: org.squonk.types.MoleculeObject 18 | mediaType: application/x-squonk-dataset-molecule+json 19 | name: input 20 | outputDescriptors: 21 | - primaryType: org.squonk.dataset.Dataset 22 | secondaryType: org.squonk.types.MoleculeObject 23 | mediaType: application/x-squonk-dataset-molecule+json 24 | name: output 25 | optionDescriptors: 26 | - "@class": org.squonk.options.OptionDescriptor 27 | modes: 28 | - User 29 | typeDescriptor: 30 | type: java.lang.Float 31 | "@class": org.squonk.options.SimpleTypeDescriptor 32 | editable: true 33 | visible: true 34 | description: Minimum pH 35 | defaultValue: 5.0 36 | label: Minimum pH value 37 | key: arg.min_ph 38 | minValues: 1 39 | maxValues: 1 40 | - "@class": org.squonk.options.OptionDescriptor 41 | modes: 42 | - User 43 | typeDescriptor: 44 | type: java.lang.Float 45 | "@class": org.squonk.options.SimpleTypeDescriptor 46 | editable: true 47 | visible: true 48 | description: Maximum pH 49 | defaultValue: 9.0 50 | label: Maximum pH value 51 | key: arg.max_ph 52 | minValues: 1 53 | maxValues: 1 54 | - modes: 55 | - User 56 | editable: true 57 | "@class": org.squonk.options.OptionDescriptor 58 | typeDescriptor: 59 | type: java.lang.String 60 | "@class": org.squonk.options.SimpleTypeDescriptor 61 | key: arg.fragment_method 62 | label: Fragment method 63 | description: Approach to use for picking biggest molecular fragment 64 | values: 65 | - hac 66 | - mw 67 | defaultValue: hac 68 | visible: true 69 | executorClassName: org.squonk.execution.steps.impl.DefaultDockerExecutorStep 70 | #thinDescriptors: 71 | #- input: input 72 | inputRoutes: 73 | - route: FILE 74 | outputRoutes: 75 | - route: FILE 76 | imageName: informaticsmatters/rdkit_pipelines 77 | command: >- 78 | python -m pipelines.dimorphite.enumerate_charges -i ${PIN}input.data.gz -if json -o ${POUT}output -of json --meta 79 | ${binding.variables.containsKey('min_ph') ? ' --min-ph ' + min_ph : ''} 80 | ${binding.variables.containsKey('max_ph') ? ' --max-ph ' + max_ph : ''} 81 | ${binding.variables.containsKey('fragment_method') ? ' --fragment-method ' + fragment_method : ''} -------------------------------------------------------------------------------- /src/python/pipelines/dimorphite/enumerate_charges.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing cluster_butina.py reading from STDIN and writing to STDOUT 8 | test_dimorphite_simple = [ 9 | 10 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 11 | python -m pipelines.dimorphite.enumerate_charges -if sdf''', 12 | 13 | stderr: [ 'No output format specified - using sdf', 14 | '36 1265 0'] 15 | 16 | ] 17 | ] 18 | -------------------------------------------------------------------------------- /src/python/pipelines/dimorphite/run.py: -------------------------------------------------------------------------------- 1 | from rdkit import Chem 2 | import dimorphite_dl 3 | 4 | #suppl = [Chem.MolFromSmiles(s) for s in ["C[C@](F)(Br)CC(O)=O", "CCCCCN"]] 5 | suppl = Chem.SDMolSupplier('Kinase_inhibs.sdf') 6 | #suppl = Chem.SDMolSupplier('dhfr_standardized.sdf') 7 | 8 | print(suppl) 9 | 10 | protonated_mols = dimorphite_dl.run_with_mol_list( 11 | suppl, 12 | min_ph=5.0, 13 | max_ph=9.0, 14 | ) 15 | 16 | 17 | print("Charged mols ------------------------------------------------") 18 | 19 | for m in protonated_mols: 20 | if m: 21 | print(Chem.MolToSmiles(m) + " " + ",".join(m.GetPropNames())) 22 | -------------------------------------------------------------------------------- /src/python/pipelines/dimorphite/site_substructures.smarts: -------------------------------------------------------------------------------- 1 | *Azide [N+0:1]=[N+:2]=[N+0:3]-[H] 2 4.65 0.07071067811865513 2 | Nitro [C,c,N,n,O,o:1]-[NX3:2](=[O:3])-[O:4]-[H] 3 -1000.0 0 3 | AmidineGuanidine1 [N:1]-[C:2](-[N:3])=[NX2:4]-[H:5] 3 12.025333333333334 1.5941046150769165 4 | AmidineGuanidine2 [C:1](-[N:2])=[NX2+0:3] 2 10.035538461538462 2.1312826469414716 5 | Sulfate [SX4:1](=[O:2])(=[O:3])([O:4]-[C,c,N,n:5])-[OX2:6]-[H] 5 -2.36 1.3048043093561141 6 | Sulfonate [SX4:1](=[O:2])(=[O:3])(-[C,c,N,n:4])-[OX2:5]-[H] 4 -1.8184615384615386 1.4086213481855594 7 | Sulfinic_acid [SX3:1](=[O:2])-[O:3]-[H] 2 1.7933333333333332 0.4372070447739835 8 | Phenyl_carboxyl [c,n,o:1]-[C:2](=[O:3])-[O:4]-[H] 3 3.463441968255319 1.2518054407928614 9 | Carboxyl [C:1](=[O:2])-[O:3]-[H] 2 3.456652971502591 1.2871420886834017 10 | Thioic_acid [C,c,N,n:1](=[O,S:2])-[SX2,OX2:3]-[H] 2 0.678267 1.497048763660801 11 | Phenyl_Thiol [c,n:1]-[SX2:2]-[H] 1 4.978235294117647 2.6137000480499806 12 | Thiol [C,N:1]-[SX2:2]-[H] 1 9.12448275862069 1.3317968158171463 13 | Phosphate [PX4:1](=[O:2])(-[OX2:3]-[H])(-[O+0:4])-[OX2:5]-[H] 2 2.4182608695652172 1.1091177991945305 5 6.5055 0.9512787792174668 14 | Phosphonate [PX4:1](=[O:2])(-[OX2:3]-[H])(-[C,c,N,n:4])-[OX2:5]-[H] 2 1.8835714285714287 0.5925999820080644 5 7.247254901960784 0.8511476450801531 15 | Phenol [c,n,o:1]-[O:2]-[H] 1 7.065359866910526 3.277356122295936 16 | Peroxide1 [O:1]([$(C=O),$(C[Cl]),$(CF),$(C[Br]),$(CC#N):2])-[O:3]-[H] 2 8.738888888888889 0.7562592839596507 17 | Peroxide2 [C:1]-[O:2]-[O:3]-[H] 2 11.978235294117647 0.8697645895163075 18 | O=C-C=C-OH [O:1]=[C;R:2]-[C;R:3]=[C;R:4]-[O:5]-[H] 4 3.554 0.803339458581667 19 | Vinyl_alcohol [C:1]=[C:2]-[O:3]-[H] 2 8.871850714285713 1.660200255394124 20 | Alcohol [C:1]-[O:2]-[H] 1 14.780384615384616 2.546464970533435 21 | N-hydroxyamide [C:1](=[O:2])-[N:3]-[O:4]-[H] 3 9.301904761904762 1.2181897185891002 22 | *Ringed_imide1 [O,S:1]=[C;R:2]([$([#8]),$([#7]),$([#16]),$([#6][Cl]),$([#6]F),$([#6][Br]):3])-[N;R:4]([C;R:5]=[O,S:6])-[H] 3 6.4525 0.5555627777308341 23 | *Ringed_imide2 [O,S:1]=[C;R:2]-[N;R:3]([C;R:4]=[O,S:5])-[H] 2 8.681666666666667 1.8657779975741713 24 | *Imide [F,Cl,Br,S,s,P,p:1][#6:2][CX3:3](=[O,S:4])-[NX3+0:5]([CX3:6]=[O,S:7])-[H] 4 2.466666666666667 1.4843629385474877 25 | *Imide2 [O,S:1]=[CX3:2]-[NX3+0:3]([CX3:4]=[O,S:5])-[H] 2 10.23 1.1198214143335534 26 | *Amide_electronegative [C:1](=[O:2])-[N:3](-[Br,Cl,I,F,S,O,N,P:4])-[H] 2 3.4896 2.688124315081677 27 | *Amide [C:1](=[O:2])-[N:3]-[H] 2 12.00611111111111 4.512491341218857 28 | *Sulfonamide [SX4:1](=[O:2])(=[O:3])-[NX3+0:4]-[H] 3 7.9160326086956525 1.9842121316708763 29 | Anilines_primary [c:1]-[NX3+0:2]([H:3])[H:4] 1 3.899298673194805 2.068768503987161 30 | Anilines_secondary [c:1]-[NX3+0:2]([H:3])[!H:4] 1 4.335408163265306 2.1768842022330843 31 | Anilines_tertiary [c:1]-[NX3+0:2]([!H:3])[!H:4] 1 4.16690685045614 2.005865735782679 32 | Aromatic_nitrogen_unprotonated [n+0&H0:1] 0 4.3535441240733945 2.0714072661859584 33 | Amines_primary_secondary_tertiary [C:1]-[NX3+0:2] 1 8.159107682388349 2.5183597445318147 34 | Phosphinic_acid [PX4:1](=[O:2])(-[C,c,N,n,F,Cl,Br,I:3])(-[C,c,N,n,F,Cl,Br,I:4])-[OX2:5]-[H] 4 2.9745 0.6867886750744557 35 | Phosphate_diester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[O+0:5]-[C,c,N,n,F,Cl,Br,I:4])-[OX2:6]-[H] 6 2.7280434782608696 2.5437448856908316 36 | Phosphonate_ester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[C,c,N,n,F,Cl,Br,I:5])-[OX2:6]-[H] 5 2.0868 0.4503028610465036 37 | Primary_hydroxyl_amine [C,c:1]-[O:2]-[NH2:3] 2 4.035714285714286 0.8463816543155368 38 | *Indole_pyrrole [c;R:1]1[c;R:2][c;R:3][c;R:4][n;R:5]1[H] 4 14.52875 4.06702491591416 39 | *Aromatic_nitrogen_protonated [n:1]-[H] 0 7.17 2.94602395490212 40 | -------------------------------------------------------------------------------- /src/python/pipelines/dmpk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/dmpk/__init__.py -------------------------------------------------------------------------------- /src/python/pipelines/dmpk/pk_tmax_cmax_sim.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.dmpk.sygnature.tmax_cmax_sim.1 5 | name: TmaxCmaxSimulation 6 | description: Simulation of Tmax and Cmax 7 | tags: 8 | - sygnature 9 | - dmpk 10 | - pk 11 | - cmax 12 | - tmax 13 | - simulation 14 | - prediction 15 | - docker 16 | resourceUrl: 17 | icon: icons/program.png 18 | outputDescriptors: 19 | - primaryType: org.squonk.types.PngImageFile 20 | mediaType: image/png 21 | name: output 22 | optionDescriptors: 23 | - modes: 24 | - User 25 | editable: true 26 | visible: true 27 | description: Half life(elim)(hr) 28 | label: Half life(elim)(hr) 29 | key: arg.halfLife 30 | minValues: 1 31 | maxValues: 1 32 | typeDescriptor: 33 | type: java.lang.Float 34 | "@class": org.squonk.options.SimpleTypeDescriptor 35 | "@class": org.squonk.options.OptionDescriptor 36 | - modes: 37 | - User 38 | editable: true 39 | visible: true 40 | description: Half life(abs)(hr) 41 | label: Half life(abs)(hr) 42 | key: arg.absorption 43 | minValues: 1 44 | maxValues: 1 45 | typeDescriptor: 46 | type: java.lang.Float 47 | "@class": org.squonk.options.SimpleTypeDescriptor 48 | "@class": org.squonk.options.OptionDescriptor 49 | - modes: 50 | - User 51 | editable: true 52 | visible: true 53 | description: Dose(mg) 54 | label: Dose(mg) 55 | key: arg.dose 56 | minValues: 1 57 | maxValues: 1 58 | typeDescriptor: 59 | type: java.lang.Float 60 | "@class": org.squonk.options.SimpleTypeDescriptor 61 | "@class": org.squonk.options.OptionDescriptor 62 | - modes: 63 | - User 64 | editable: true 65 | visible: true 66 | description: AUC(mg/L.hr) 67 | label: AUC(mg/L.hr) 68 | key: arg.auc 69 | minValues: 1 70 | maxValues: 1 71 | typeDescriptor: 72 | type: java.lang.Float 73 | "@class": org.squonk.options.SimpleTypeDescriptor 74 | "@class": org.squonk.options.OptionDescriptor 75 | - modes: 76 | - User 77 | editable: true 78 | visible: true 79 | description: Simulation Time Length(hr) 80 | label: Simulation Time Length(hr) 81 | key: arg.time 82 | minValues: 1 83 | maxValues: 1 84 | typeDescriptor: 85 | type: java.lang.Float 86 | "@class": org.squonk.options.SimpleTypeDescriptor 87 | "@class": org.squonk.options.OptionDescriptor 88 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 89 | outputRoutes: 90 | - route: FILE 91 | imageName: informaticsmatters/rdkit_pipelines 92 | command: >- 93 | python -m pipelines.dmpk.pk_tmax_cmax_sim -o ${POUT}output --half-life $halfLife 94 | --absorption $absorption --dose $dose --auc $auc --time $time 95 | -------------------------------------------------------------------------------- /src/python/pipelines/dmpk/pk_tmax_cmax_sim.test: -------------------------------------------------------------------------------- 1 | // The `pk_tmax_cmax_sim` automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | setup_collection = [ 8 | creates: [ 'output.png', 9 | 'output_metrics.txt'] 10 | ], 11 | 12 | test_absorption_0_5 = [ 13 | 14 | params: [ halfLife: 0.79, 15 | absorption: 0.5, 16 | dose: 0.14, 17 | auc: 0.88, 18 | time: 8 ], 19 | 20 | stderr: [ 'kel 0.8774014943', 21 | 'ka 1.3862943611', 22 | 'Tmax 0.8988627605', 23 | 'Cmax 0.3508882480', 24 | 'V_F 0.1813205358' ], 25 | 26 | metrics: [ '__StatusMessage__': 'Tmax(hr): 0.899, Cmax(mg/L): 0.351, kel(hr-1): 0.877, ka(hr-1): 1.39, V/F(L): 0.181', 27 | 'DMPK.Syg.TmaxCmax': '1' ] 28 | 29 | ], 30 | 31 | test_raw_command = [ 32 | 33 | command: '''python -m pipelines.dmpk.pk_tmax_cmax_sim -o ${POUT}output 34 | --half-life 0.79 --absorption 0.5 --dose 0.14 35 | --auc 0.88 --time 8''', 36 | 37 | stderr: [ 'kel 0.8774014943', 38 | 'ka 1.3862943611', 39 | 'Tmax 0.8988627605', 40 | 'Cmax 0.3508882480', 41 | 'V_F 0.1813205358' ], 42 | 43 | metrics: [ '__StatusMessage__': 'Tmax(hr): 0.899, Cmax(mg/L): 0.351, kel(hr-1): 0.877, ka(hr-1): 1.39, V/F(L): 0.181', 44 | 'DMPK.Syg.TmaxCmax': '1' ] 45 | 46 | ], 47 | 48 | ] 49 | -------------------------------------------------------------------------------- /src/python/pipelines/docking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/docking/__init__.py -------------------------------------------------------------------------------- /src/python/pipelines/docking/obabel_prepare_pdb.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.xchem.obabel.prepare.pdb.v1 5 | name: OBabelPreparePDB 6 | description: Prepare PDB file using Open Babel 7 | tags: 8 | - rdkit 9 | - xchem 10 | - docker 11 | - openbabel 12 | - obabel 13 | - prepare 14 | - pdb 15 | - convert 16 | - protein 17 | resourceUrl: 18 | icon: icons/transform_molecule.png 19 | inputDescriptors: 20 | - primaryType: org.squonk.types.PDBFile 21 | mediaType: chemical/x-pdb 22 | name: pdb 23 | outputDescriptors: 24 | - primaryType: org.squonk.types.Mol2File 25 | mediaType: chemical/x-mol2 26 | name: mol2 27 | optionDescriptors: 28 | - modes: 29 | - User 30 | editable: true 31 | "@class": org.squonk.options.OptionDescriptor 32 | typeDescriptor: 33 | type: java.lang.Float 34 | "@class": org.squonk.options.SimpleTypeDescriptor 35 | key: arg.protonate 36 | label: Protonate at pH 37 | description: Optionally protonate side chains at this pH 38 | visible: true 39 | minValues: 0 40 | maxValues: 1 41 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 42 | inputRoutes: 43 | - route: FILE 44 | outputRoutes: 45 | - route: FILE 46 | imageName: informaticsmatters/pipelines-obabel:latest 47 | command: >- 48 | python -m pipelines.docking.obabel_prepare_pdb -i ${PIN}pdb.pdb.gz -o ${POUT}mol2 -mol2 49 | ${binding.variables.containsKey('protonate') ? '-prot ' + protonate : ''} 50 | --meta -------------------------------------------------------------------------------- /src/python/pipelines/docking/obabel_prepare_pdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2018 Informatics Matters Ltd. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | import sys, subprocess 19 | 20 | from pipelines_utils import utils 21 | 22 | 23 | def execute(input, output, extension, format, ph, noGzip): 24 | 25 | # TODO - convert this to use the Python API rather than an external process 26 | 27 | filename = output + "." + extension 28 | base_args = ["obabel", "-ipdb", input, format, "-O", filename] 29 | if ph: 30 | base_args.append("-p") 31 | base_args.append(str(ph)) 32 | utils.log("Command: " + " ".join(base_args)) 33 | 34 | subprocess.check_call(base_args, stdout=sys.stderr, stderr=sys.stderr) 35 | 36 | # NOTE the -z argument does not seem to work correctly with obabel (truncated files generated) so we 37 | # fall back to good old gzip to handle the compression once the uncompressed file is created 38 | if not noGzip: 39 | subprocess.check_call(['gzip', filename], stdout=sys.stderr, stderr=sys.stderr) 40 | 41 | def main(): 42 | global PDB_PATH,WRITER,THRESHOLD 43 | parser = argparse.ArgumentParser(description='Open babel PDB prepare') 44 | parser.add_argument('--no-gzip', action='store_true', help='Do not compress the output') 45 | parser.add_argument('-i', '--input', help="PDB file for converting") 46 | parser.add_argument('-o', '--output', help="Base name for output files (no extension).") 47 | parser.add_argument('-mol2', '--mol2', action='store_true', help='Output as Mol2 format.') 48 | parser.add_argument('-pdbqt', '--pdbqt', action='store_true', help='Output as pdbqt format.') 49 | parser.add_argument('--meta', action='store_true', help='Write metrics files') 50 | parser.add_argument('-prot', '--protonate', type=float, help="protonate at this pH (optional)") 51 | 52 | args = parser.parse_args() 53 | 54 | utils.log("Prepare Args: ", args) 55 | 56 | if not (args.mol2 or args.pdbqt): 57 | raise ValueError("Must specify at least one output fromat: mol2 and/or pdbqt") 58 | 59 | 60 | if args.pdbqt: 61 | utils.log("Preparing as pdbqt") 62 | execute(args.input, args.output, "pdbqt", "-opdbqt", args.protonate, args.no_gzip) 63 | 64 | if args.mol2: 65 | utils.log("Preparing as mol2") 66 | execute(args.input, args.output, "mol2", "-omol2", args.protonate, args.no_gzip) 67 | 68 | utils.log("Preparation complete") 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /src/python/pipelines/docking/obabel_prepare_pdb.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // A basic start-up test for the module. 8 | // Simply makes sure it starts cleanly. 9 | ignore_test_help = [ 10 | 11 | command: '''python -m pipelines.docking.obabel_prepare_pdb -h''', 12 | 13 | stdout: [ 'usage: obabel_prepare_pdb.py' ] 14 | 15 | ], 16 | 17 | ] 18 | -------------------------------------------------------------------------------- /src/python/pipelines/docking/plip.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // A basic start-up test for the module. 8 | // Simply makes sure it starts cleanly. 9 | ignore_test_help = [ 10 | 11 | command: '''python -m pipelines.docking.plip -h''', 12 | 13 | stdout: [ 'usage: plip.py' ] 14 | 15 | ], 16 | 17 | ] 18 | -------------------------------------------------------------------------------- /src/python/pipelines/docking/smog2016.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // A basic start-up test for the module. 8 | // Simply makes sure it starts cleanly. 9 | ignore_test_help = [ 10 | 11 | command: '''python -m pipelines.docking.smog2016 -h''', 12 | 13 | stdout: [ 'usage: smog2016.py' ] 14 | 15 | ], 16 | 17 | ] 18 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/rdkit/__init__.py -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/cluster_butina.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.cluster.butina 5 | name: RDKitButinaClustering 6 | description: Clustering similar molecules using RDKit Butina clustering 7 | tags: 8 | - rdkit 9 | - clustering 10 | - similarity 11 | - butina 12 | - docker 13 | resourceUrl: 14 | icon: icons/clustering.png 15 | inputDescriptors: 16 | - primaryType: org.squonk.dataset.Dataset 17 | secondaryType: org.squonk.types.MoleculeObject 18 | mediaType: application/x-squonk-dataset-molecule+json 19 | name: input 20 | outputDescriptors: 21 | - primaryType: org.squonk.dataset.Dataset 22 | secondaryType: org.squonk.types.MoleculeObject 23 | mediaType: application/x-squonk-dataset-molecule+json 24 | name: output 25 | optionDescriptors: 26 | - modes: 27 | - User 28 | "@class": org.squonk.options.OptionDescriptor 29 | typeDescriptor: 30 | type: java.lang.Float 31 | "@class": org.squonk.options.SimpleTypeDescriptor 32 | key: arg.threshold 33 | label: Threshold 34 | description: Similarity threshold 35 | defaultValue: 36 | - java.lang.Float 37 | - 0.7 38 | visible: true 39 | editable: true 40 | - modes: 41 | - User 42 | editable: true 43 | "@class": org.squonk.options.OptionDescriptor 44 | typeDescriptor: 45 | type: java.lang.String 46 | "@class": org.squonk.options.SimpleTypeDescriptor 47 | key: arg.fragment_method 48 | label: Fragment method 49 | description: Approach to use for picking biggest molecular fragment 50 | values: 51 | - hac 52 | - mw 53 | defaultValue: hac 54 | visible: true 55 | - modes: 56 | - User 57 | editable: true 58 | "@class": org.squonk.options.OptionDescriptor 59 | typeDescriptor: 60 | type: java.lang.Boolean 61 | "@class": org.squonk.options.SimpleTypeDescriptor 62 | key: arg.output_fragment 63 | label: Output biggest fragment 64 | description: Output the biggest fragment rather than the whole molecule 65 | defaultValue: false 66 | visible: true 67 | - modes: 68 | - User 69 | editable: true 70 | "@class": org.squonk.options.OptionDescriptor 71 | typeDescriptor: 72 | type: java.lang.String 73 | "@class": org.squonk.options.SimpleTypeDescriptor 74 | key: arg.descriptor 75 | label: Descriptor 76 | description: Descriptor/fingerprint to use 77 | values: 78 | - maccs 79 | - morgan2 80 | - morgan3 81 | - rdkit 82 | defaultValue: rdkit 83 | visible: true 84 | - modes: 85 | - User 86 | editable: true 87 | "@class": org.squonk.options.OptionDescriptor 88 | typeDescriptor: 89 | type: java.lang.String 90 | "@class": org.squonk.options.SimpleTypeDescriptor 91 | key: arg.metric 92 | label: Metric 93 | description: Similarity metric to use 94 | values: 95 | - asymmetric 96 | - braunblanquet 97 | - cosine 98 | - dice 99 | - kulczynski 100 | - mcconnaughey 101 | - rogotgoldberg 102 | - russel 103 | - sokal 104 | - tanimoto 105 | defaultValue: tanimoto 106 | visible: true 107 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 108 | thinDescriptors: 109 | - input: input 110 | output: output 111 | preserve: false 112 | inputRoutes: 113 | - route: FILE 114 | outputRoutes: 115 | - route: FILE 116 | imageName: informaticsmatters/rdkit_pipelines 117 | command: >- 118 | python -m pipelines.rdkit.cluster_butina -i ${PIN}input.data.gz -if json 119 | -o ${POUT}output -of json -t ${threshold} -d ${descriptor} -m ${metric} 120 | ${binding.variables.containsKey('fragment_method') ? ' --fragment-method ' + fragment_method : ''} 121 | ${binding.variables.containsKey('output_fragment') && output_fragment ? ' --output-fragment' : ''} --thin --meta 122 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/cluster_butina.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing cluster_butina.py reading from STDIN and writing to STDOUT 8 | test_raw_cluster_butina_from_sdf_to_default = [ 9 | 10 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 11 | python -m pipelines.rdkit.cluster_butina -t 0.6 -if sdf''', 12 | 13 | stderr: [ 'No output format specified - using sdf', 14 | 'Found [1-9]\\d+ clusters', 15 | 'Final Clusters:'], 16 | 17 | ], 18 | 19 | // Testing cluster_butina.py reading from STDIN and writing to STDOUT 20 | test_raw_cluster_butina_from_sdf_to_sdf = [ 21 | 22 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 23 | python -m pipelines.rdkit.cluster_butina -t 0.6 -if sdf -of sdf''', 24 | 25 | stderr: [ "outformat='sdf'", 26 | 'Found [1-9]\\d+ clusters', 27 | 'Final Clusters:'], 28 | 29 | ], 30 | 31 | // Testing cluster_butina.py reading from STDIN and writing to STDOUT 32 | test_raw_cluster_butina_from_sdf_to_json = [ 33 | 34 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 35 | python -m pipelines.rdkit.cluster_butina -t 0.6 -if sdf -of json''', 36 | 37 | stderr: [ "outformat='json'", 38 | 'Found [1-9]\\d+ clusters', 39 | 'Final Clusters:'], 40 | 41 | ], 42 | 43 | // Testing cluster_butina.py reading from file 44 | test_raw_cluster_butina_from_json_file_to_sdf = [ 45 | 46 | command: '''python -m pipelines.rdkit.cluster_butina -t 0.6 -i ${PIN}nci100.data.gz -if json -of sdf''', 47 | 48 | stderr: [ "outformat='sdf'", 49 | 'Found [1-9]\\d+ clusters', 50 | 'Final Clusters:'], 51 | 52 | ], 53 | 54 | // Testing cluster_butina.py reading from file 55 | test_raw_cluster_butina_from_json_file_to_json = [ 56 | 57 | command: '''gunzip -c ${PIN}nci100.data.gz | 58 | python -m pipelines.rdkit.cluster_butina -t 0.6 -i ${PIN}nci100.data.gz -if json -of json''', 59 | 60 | stderr: [ "outformat='json'", 61 | 'Found [1-9]\\d+ clusters', 62 | 'Final Clusters:'], 63 | 64 | ], 65 | 66 | // Testing cluster_butina.py reading and writing from/to file 67 | test_raw_cluster_butina_from_json_to_json_file = [ 68 | 69 | command: '''gunzip -c ${PIN}nci100.data.gz | 70 | python -m pipelines.rdkit.cluster_butina -t 0.6 -i ${PIN}nci100.data.gz -if json -o ${POUT}output -of json''', 71 | 72 | stderr: [ "outformat='json'", 73 | 'Found [1-9]\\d+ clusters', 74 | 'Final Clusters:'], 75 | 76 | creates: [ 'output.data.gz' ], 77 | 78 | ], 79 | 80 | // Testing cluster_butina.py reading and writing from/to file 81 | test_raw_cluster_butina_from_sdf_to_json_file = [ 82 | 83 | command: '''gunzip -c ${PIN}nci100.data.gz | 84 | python -m pipelines.rdkit.cluster_butina -t 0.6 -i ${PIN}Kinase_inhibs.sdf.gz -if json -o ${POUT}output -of json''', 85 | 86 | stderr: [ "outformat='json'", 87 | 'Found [1-9]\\d+ clusters', 88 | 'Final Clusters:'], 89 | 90 | creates: [ 'output.data.gz' ], 91 | 92 | ], 93 | 94 | ] 95 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/cluster_butina_matrix.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.cluster.butina.matrix.v1 5 | name: RDKitClusterMatrix 6 | description: Generate similarity matrix using RDKit Butina clustering 7 | tags: 8 | - rdkit 9 | - clustering 10 | - similarity 11 | - butina 12 | - matrix 13 | - docker 14 | resourceUrl: 15 | icon: icons/clustering.png 16 | inputDescriptors: 17 | - primaryType: org.squonk.dataset.Dataset 18 | secondaryType: org.squonk.types.MoleculeObject 19 | mediaType: application/x-squonk-dataset-molecule+json 20 | name: input 21 | outputDescriptors: 22 | - primaryType: org.squonk.dataset.Dataset 23 | secondaryType: org.squonk.types.BasicObject 24 | mediaType: application/x-squonk-dataset-basic+json 25 | name: output 26 | optionDescriptors: 27 | - modes: 28 | - User 29 | "@class": org.squonk.options.OptionDescriptor 30 | typeDescriptor: 31 | type: java.lang.Float 32 | "@class": org.squonk.options.SimpleTypeDescriptor 33 | key: arg.threshold 34 | label: Threshold 35 | description: Similarity threshold for clustering 36 | defaultValue: 37 | - java.lang.Float 38 | - 0.7 39 | visible: true 40 | editable: true 41 | - modes: 42 | - User 43 | "@class": org.squonk.options.OptionDescriptor 44 | typeDescriptor: 45 | type: java.lang.Float 46 | "@class": org.squonk.options.SimpleTypeDescriptor 47 | key: arg.matrixThreshold 48 | label: Matrix Threshold 49 | description: Only output values above this similarity threshold 50 | defaultValue: 51 | - java.lang.Float 52 | - 0.5 53 | visible: true 54 | editable: true 55 | - modes: 56 | - User 57 | editable: true 58 | "@class": org.squonk.options.OptionDescriptor 59 | typeDescriptor: 60 | type: java.lang.String 61 | "@class": org.squonk.options.SimpleTypeDescriptor 62 | key: arg.descriptor 63 | label: Descriptor 64 | description: Descriptor/fingerprint to use 65 | values: 66 | - maccs 67 | - morgan2 68 | - morgan3 69 | - rdkit 70 | defaultValue: rdkit 71 | visible: true 72 | - modes: 73 | - User 74 | editable: true 75 | "@class": org.squonk.options.OptionDescriptor 76 | typeDescriptor: 77 | type: java.lang.String 78 | "@class": org.squonk.options.SimpleTypeDescriptor 79 | key: arg.metric 80 | label: Metric 81 | description: Similarity metric to use 82 | values: 83 | - asymmetric 84 | - braunblanquet 85 | - cosine 86 | - dice 87 | - kulczynski 88 | - mcconnaughey 89 | - rogotgoldberg 90 | - russel 91 | - sokal 92 | - tanimoto 93 | defaultValue: tanimoto 94 | visible: true 95 | executorClassName: org.squonk.execution.steps.impl.DefaultDockerExecutorStep 96 | inputRoutes: 97 | - route: FILE 98 | outputRoutes: 99 | - route: FILE 100 | imageName: informaticsmatters/rdkit_pipelines 101 | command: >- 102 | python -m pipelines.rdkit.cluster_butina_matrix -i ${PIN}input.data.gz -if json -o ${POUT}output -of json 103 | -t ${threshold} -mt ${matrixThreshold} -d ${descriptor} 104 | -m ${metric} --meta 105 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/cluster_butina_matrix.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing cluster_butina_matrix.py 8 | // reading from STDIN and writing TSV to file 9 | test_raw_cluster_butina_matirx_to_tsv = [ 10 | 11 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 12 | python -m pipelines.rdkit.cluster_butina_matrix 13 | -t 0.6 -if sdf -of tsv -o ${POUT}output''', 14 | 15 | stderr: [ 'Found [1-9]\\d+ clusters', 16 | 'Clusters:'], 17 | 18 | creates: [ 'output.tsv.gz' ], 19 | 20 | ], 21 | 22 | // Testing cluster_butina_matrix.py 23 | // reading from SDF file and writing TSV to file 24 | test_raw_cluster_butina_matirx_sdf_file_to_tsv = [ 25 | 26 | command: '''python -m pipelines.rdkit.cluster_butina_matrix 27 | -t 0.6 -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -of tsv -o ${POUT}output''', 28 | 29 | stderr: [ 'Found [1-9]\\d+ clusters', 30 | 'Clusters:'], 31 | 32 | creates: [ 'output.tsv.gz' ], 33 | 34 | ], 35 | 36 | // Testing cluster_butina_matrix.py 37 | // reading from JSON file and writing TSV to file 38 | test_raw_cluster_butina_matirx_json_file_to_tsv = [ 39 | 40 | command: '''python -m pipelines.rdkit.cluster_butina_matrix 41 | -t 0.6 -i ${PIN}nci100.data.gz -if json -of tsv -o ${POUT}output''', 42 | 43 | stderr: [ 'Found [1-9]\\d+ clusters', 44 | 'Clusters:'], 45 | 46 | creates: [ 'output.tsv.gz' ], 47 | 48 | ], 49 | 50 | // Testing cluster_butina_matrix.py 51 | // reading from STDIN and writing JSON to file 52 | test_raw_cluster_butina_matirx_to_json = [ 53 | 54 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 55 | python -m pipelines.rdkit.cluster_butina_matrix 56 | -t 0.6 -if sdf -of json -o ${POUT}output''', 57 | 58 | stderr: [ 'Found [1-9]\\d+ clusters', 59 | 'Clusters:'], 60 | 61 | creates: [ 'output.data.gz', 62 | 'output.metadata' ], 63 | 64 | ], 65 | 66 | // Testing cluster_butina_matrix.py 67 | // reading from file (SDF) and writing JSON to file 68 | test_raw_cluster_butina_matirx_sdf_file_to_json = [ 69 | 70 | command: '''python -m pipelines.rdkit.cluster_butina_matrix 71 | -t 0.6 -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -of json -o ${POUT}output''', 72 | 73 | stderr: [ 'Found [1-9]\\d+ clusters', 74 | 'Clusters:'], 75 | 76 | creates: [ 'output.data.gz', 77 | 'output.metadata' ], 78 | 79 | ], 80 | 81 | ] 82 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/conformers.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.conformer.basic 5 | name: RDKitConformers 6 | description: Generate 3D conformers using RDKit 7 | tags: 8 | - rdkit 9 | - conformer 10 | - 3d 11 | - docker 12 | resourceUrl: 13 | icon: icons/molecule_generator.png 14 | inputDescriptors: 15 | - primaryType: org.squonk.dataset.Dataset 16 | secondaryType: org.squonk.types.MoleculeObject 17 | mediaType: application/x-squonk-dataset-molecule+json 18 | name: input 19 | outputDescriptors: 20 | - primaryType: org.squonk.dataset.Dataset 21 | secondaryType: org.squonk.types.MoleculeObject 22 | mediaType: application/x-squonk-dataset-molecule+json 23 | name: output 24 | optionDescriptors: 25 | - "@class": org.squonk.options.OptionDescriptor 26 | modes: 27 | - User 28 | typeDescriptor: 29 | type: java.lang.Integer 30 | "@class": org.squonk.options.SimpleTypeDescriptor 31 | editable: true 32 | visible: true 33 | description: Number of conformers to aim to generate 34 | defaultValue: 1 35 | label: Number of conformers 36 | key: arg.num 37 | minValues: 1 38 | maxValues: 1 39 | - modes: 40 | - User 41 | editable: true 42 | visible: true 43 | description: Number of attempts to generate conformers 44 | label: Number of attempts 45 | key: arg.attempts 46 | minValues: 0 47 | maxValues: 1 48 | typeDescriptor: 49 | type: java.lang.Integer 50 | "@class": org.squonk.options.SimpleTypeDescriptor 51 | "@class": org.squonk.options.OptionDescriptor 52 | - modes: 53 | - User 54 | editable: true 55 | visible: true 56 | description: Prune RMSD threshold for removing similar conformers 57 | label: Prune RMSD threshold 58 | key: arg.prune 59 | minValues: 0 60 | maxValues: 1 61 | typeDescriptor: 62 | type: java.lang.Float 63 | "@class": org.squonk.options.SimpleTypeDescriptor 64 | "@class": org.squonk.options.OptionDescriptor 65 | - modes: 66 | - User 67 | editable: true 68 | visible: true 69 | description: Cluster method (RMSD or TFD) 70 | label: Cluster method 71 | key: arg.method 72 | values: 73 | - RMSD 74 | - TFD 75 | defaultValue: RMSD 76 | minValues: 1 77 | maxValues: 1 78 | typeDescriptor: 79 | type: java.lang.String 80 | "@class": org.squonk.options.SimpleTypeDescriptor 81 | "@class": org.squonk.options.OptionDescriptor 82 | - modes: 83 | - User 84 | editable: true 85 | visible: true 86 | description: Cluster threshold 87 | label: Cluster threshold 88 | key: arg.threshold 89 | minValues: 0 90 | maxValues: 1 91 | typeDescriptor: 92 | type: java.lang.Float 93 | "@class": org.squonk.options.SimpleTypeDescriptor 94 | "@class": org.squonk.options.OptionDescriptor 95 | - modes: 96 | - User 97 | editable: true 98 | visible: true 99 | description: Number of energy minimization iterations 100 | defaultValue: 0 101 | label: Energy minimization iterations 102 | key: arg.minimize 103 | minValues: 1 104 | maxValues: 1 105 | typeDescriptor: 106 | type: java.lang.Integer 107 | "@class": org.squonk.options.SimpleTypeDescriptor 108 | "@class": org.squonk.options.OptionDescriptor 109 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 110 | thinDescriptors: 111 | - input: input 112 | inputRoutes: 113 | - route: FILE 114 | outputRoutes: 115 | - route: FILE 116 | imageName: informaticsmatters/rdkit_pipelines 117 | command: >- 118 | python -m pipelines.rdkit.conformers -i ${PIN}input.data.gz -if json -o ${POUT}output -of json -n $num -c $method 119 | ${binding.variables.containsKey('attempts') ? ' -a ' + attempts : ''} 120 | ${binding.variables.containsKey('prune') ? ' -r ' + prune : ''} 121 | ${binding.variables.containsKey('threshold') ? ' -t ' + threshold : ''} 122 | ${binding.variables.containsKey('minimize') ? ' -e ' + minimize : ''} --meta -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/conformers.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing conformers.py reading from STDIN and writing to STDOUT 8 | test_raw_conformers = [ 9 | 10 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 11 | python -m pipelines.rdkit.conformers -n 2 -if sdf''', 12 | 13 | stderr: [ 'No output format specified - using sdf', 14 | 'Molecule 36 generated [1-9]\\d* conformers' ], 15 | 16 | ], 17 | 18 | test_raw_conformers_to_sdf = [ 19 | 20 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 21 | python -m pipelines.rdkit.conformers -n 2 -if sdf -of sdf''', 22 | 23 | stderr: [ 'Molecule 36 generated [1-9]\\d* conformers' ], 24 | 25 | ], 26 | 27 | // Testing conformers.py with clustering 28 | // reading from STDIN and writing to STDOUT 29 | test_raw_conformers_with_rmsd_clustering = [ 30 | 31 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 32 | python -m pipelines.rdkit.conformers -n 2 -c RMSD -if sdf''', 33 | 34 | stderr: [ "cluster='rmsd'", 35 | 'Molecule 35 generated [1-9]\\d* conformers and [1-9]\\d* clusters' ], 36 | 37 | ], 38 | 39 | // Testing conformers.py with clustering 40 | // reading from file and writing to file 41 | test_raw_conformers_with_tfd_clustering_from_sdf_file = [ 42 | 43 | command: '''python -m pipelines.rdkit.conformers -n 2 -c TFD 44 | -i ${PIN}Kinase_inhibs.sdf.gz -if sdf''', 45 | 46 | stderr: [ "cluster='tfd'", 47 | 'Molecule 35 generated [1-9]\\d* conformers and [1-9]\\d* clusters' ], 48 | 49 | ], 50 | 51 | // Testing conformers.py with clustering 52 | // reading from file and writing to file 53 | test_raw_conformers_with_tfd_clustering_from_sdf_file_to_json_file = [ 54 | 55 | command: '''python -m pipelines.rdkit.conformers -n 2 -c TFD 56 | -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -o ${POUT}output -of json''', 57 | 58 | stderr: [ "cluster='tfd'", 59 | 'Molecule 35 generated [1-9]\\d* conformers and [1-9]\\d* clusters' ], 60 | 61 | creates: [ 'output.data.gz', 62 | 'output.metadata' ], 63 | 64 | ], 65 | 66 | ] 67 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/constrained_conf_gen.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.conformer.constrained 5 | name: RDKitConstrainedConformers 6 | description: Generate constrained 3D conformers using RDKit 7 | tags: 8 | - rdkit 9 | - conformer 10 | - 3d 11 | - docker 12 | resourceUrl: 13 | icon: icons/molecule_generator.png 14 | inputDescriptors: 15 | - primaryType: org.squonk.dataset.Dataset 16 | secondaryType: org.squonk.types.MoleculeObject 17 | mediaType: application/x-squonk-dataset-molecule+json 18 | name: input 19 | - primaryType: org.squonk.dataset.Dataset 20 | secondaryType: org.squonk.types.MoleculeObject 21 | mediaType: application/x-squonk-dataset-molecule+json 22 | name: refMol 23 | outputDescriptors: 24 | - primaryType: org.squonk.dataset.Dataset 25 | secondaryType: org.squonk.types.MoleculeObject 26 | mediaType: application/x-squonk-dataset-molecule+json 27 | name: output 28 | optionDescriptors: 29 | - "@class": org.squonk.options.OptionDescriptor 30 | modes: 31 | - User 32 | typeDescriptor: 33 | type: java.lang.Integer 34 | "@class": org.squonk.options.SimpleTypeDescriptor 35 | editable: true 36 | visible: true 37 | description: Number of conformers to generate 38 | defaultValue: 1 39 | label: Number of conformers 40 | key: arg.num 41 | minValues: 1 42 | maxValues: 1 43 | - modes: 44 | - User 45 | editable: true 46 | visible: true 47 | description: Reference molecule index 48 | label: Reference mol index 49 | key: arg.refmolidx 50 | minValues: 1 51 | maxValues: 1 52 | defaultValue: 1 53 | typeDescriptor: 54 | type: java.lang.Integer 55 | "@class": org.squonk.options.SimpleTypeDescriptor 56 | "@class": org.squonk.options.OptionDescriptor 57 | - modes: 58 | - User 59 | editable: true 60 | visible: true 61 | description: Core smiles to constrain (optional) 62 | label: Core smiles 63 | key: arg.coresmiles 64 | minValues: 0 65 | maxValues: 1 66 | typeDescriptor: 67 | type: java.lang.String 68 | "@class": org.squonk.options.SimpleTypeDescriptor 69 | "@class": org.squonk.options.OptionDescriptor 70 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 71 | thinDescriptors: 72 | - input: input 73 | inputRoutes: 74 | - route: FILE 75 | - route: FILE 76 | outputRoutes: 77 | - route: FILE 78 | imageName: informaticsmatters/rdkit_pipelines 79 | command: >- 80 | python -m pipelines.rdkit.constrained_conf_gen -i ${PIN}input.data.gz -o ${POUT}output -of json -n $num 81 | -r refMol.data.gz --refmolidx $refmolidx 82 | ${binding.variables.containsKey('coresmiles') ? ' -c "' + coresmiles + '"' : ''} --meta -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/constrained_conf_gen.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing constrained_conf_gen.py 8 | // reading from STDIN and writing to STDOUT 9 | test_raw_constrained_default_of = [ 10 | 11 | command: '''python -m pipelines.rdkit.constrained_conf_gen 12 | -n 2 -i ${PIN}XChemReactionMaker1.sdf.gz 13 | -r ${PIN}ref_mol.sdf.gz 14 | -o ${POUT}constrained_conf_gen''', 15 | 16 | stderr: [ 'No output format specified - using sdf' ], 17 | 18 | creates: [ 'constrained_conf_gen.sdf.gz' ], 19 | ], 20 | 21 | test_raw_constrained_sdf_of = [ 22 | 23 | command: '''python -m pipelines.rdkit.constrained_conf_gen 24 | -n 2 -i ${PIN}XChemReactionMaker1.sdf.gz 25 | -r ${PIN}ref_mol.sdf.gz 26 | -o ${POUT}constrained_conf_gen -of sdf''', 27 | 28 | creates: [ 'constrained_conf_gen.sdf.gz' ], 29 | ], 30 | 31 | test_raw_constrained_json_of = [ 32 | 33 | command: '''python -m pipelines.rdkit.constrained_conf_gen 34 | -n 2 -i ${PIN}XChemReactionMaker1.sdf.gz 35 | -r ${PIN}ref_mol.sdf.gz 36 | -o ${POUT}constrained_conf_gen -of json''', 37 | 38 | creates: [ 'constrained_conf_gen.data.gz', 39 | 'constrained_conf_gen.metadata'], 40 | ], 41 | 42 | ] 43 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/max_min_picker.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // A basic start-up test for the module. 8 | // Simply makes sure it starts cleanly. 9 | test_help = [ 10 | 11 | command: '''python -m pipelines.rdkit.max_min_picker -h''', 12 | 13 | stdout: [ 'usage: max_min_picker.py' ], 14 | 15 | ], 16 | 17 | test_missing_threshold = [ 18 | 19 | command: '''python -m pipelines.rdkit.max_min_picker''', 20 | 21 | exit_error: '--num or --threshold arguments must be specified, or both', 22 | 23 | ], 24 | 25 | test_missing_file = [ 26 | 27 | command: '''python -m pipelines.rdkit.max_min_picker -t 1.0''', 28 | 29 | exit_error: 'Must specify either an input file name or an input format (or both)', 30 | 31 | ], 32 | 33 | test_missing_sdf_input = [ 34 | 35 | command: '''python -m pipelines.rdkit.max_min_picker 36 | -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -t 1.0''', 37 | 38 | stderr: [ 'No output format specified - using sdf', 39 | 'MaxMinPicking with descriptor morgan2 and threshold 1.0', 40 | 'Output 1 molecules' ], 41 | 42 | ], 43 | 44 | test_missing_json_input_sdf_output = [ 45 | 46 | command: '''python -m pipelines.rdkit.max_min_picker 47 | -i ${PIN}nci100.data.gz -if json -t 1.0 -o ${POUT}output -of sdf''', 48 | 49 | stderr: [ "outformat='sdf'", 50 | 'MaxMinPicking with descriptor morgan2 and threshold 1.0', 51 | 'Output 1 molecules' ], 52 | 53 | creates: [ 'output.sdf.gz' ], 54 | 55 | ], 56 | 57 | ] 58 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/max_min_picker_enrich.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.maxminpicker.enrich.1 5 | name: RDKitMaxMinPickerEnrich 6 | description: RDKit MaxMin picker for enriching a dataset with a diverse selection 7 | tags: 8 | - rdkit 9 | - maxmin 10 | - picker 11 | - diverse 12 | - subset 13 | - enrich 14 | - docker 15 | resourceUrl: 16 | icon: icons/filter_molecules.png 17 | inputDescriptors: 18 | - primaryType: org.squonk.dataset.Dataset 19 | secondaryType: org.squonk.types.MoleculeObject 20 | mediaType: application/x-squonk-dataset-molecule+json 21 | name: input 22 | - primaryType: org.squonk.dataset.Dataset 23 | secondaryType: org.squonk.types.MoleculeObject 24 | mediaType: application/x-squonk-dataset-molecule+json 25 | name: seeds 26 | outputDescriptors: 27 | - primaryType: org.squonk.dataset.Dataset 28 | secondaryType: org.squonk.types.MoleculeObject 29 | mediaType: application/x-squonk-dataset-molecule+json 30 | name: output 31 | optionDescriptors: 32 | - "@class": org.squonk.options.OptionDescriptor 33 | modes: 34 | - User 35 | typeDescriptor: 36 | type: java.lang.Integer 37 | "@class": org.squonk.options.SimpleTypeDescriptor 38 | key: arg.num 39 | label: Number to pick 40 | description: Number of molecules to pick 41 | minValues: 0 42 | maxValues: 1 43 | visible: true 44 | editable: true 45 | - "@class": org.squonk.options.OptionDescriptor 46 | modes: 47 | - User 48 | typeDescriptor: 49 | type: java.lang.Float 50 | "@class": org.squonk.options.SimpleTypeDescriptor 51 | key: arg.threshold 52 | label: Threshold 53 | description: Dissimilarity threshold (0.0 is identical) 54 | minValues: 0 55 | maxValues: 1 56 | visible: true 57 | editable: true 58 | - modes: 59 | - User 60 | editable: true 61 | "@class": org.squonk.options.OptionDescriptor 62 | typeDescriptor: 63 | type: java.lang.String 64 | "@class": org.squonk.options.SimpleTypeDescriptor 65 | key: arg.fragment_method 66 | label: Fragment method 67 | description: Approach to use for picking biggest molecular fragment 68 | values: 69 | - hac 70 | - mw 71 | defaultValue: hac 72 | visible: true 73 | - modes: 74 | - User 75 | editable: true 76 | "@class": org.squonk.options.OptionDescriptor 77 | typeDescriptor: 78 | type: java.lang.Boolean 79 | "@class": org.squonk.options.SimpleTypeDescriptor 80 | key: arg.output_fragment 81 | label: Output biggest fragment 82 | description: Output the biggest fragment rather than the whole molecule 83 | defaultValue: false 84 | visible: true 85 | - modes: 86 | - User 87 | editable: true 88 | "@class": org.squonk.options.OptionDescriptor 89 | typeDescriptor: 90 | type: java.lang.String 91 | "@class": org.squonk.options.SimpleTypeDescriptor 92 | key: arg.descriptor 93 | label: Descriptor 94 | description: Descriptor/fingerprint to use 95 | values: 96 | - maccs 97 | - morgan2 98 | - morgan3 99 | defaultValue: morgan2 100 | visible: true 101 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 102 | thinDescriptors: 103 | - input: input 104 | output: output 105 | filtering: true 106 | preserve: false 107 | inputRoutes: 108 | - route: FILE 109 | - route: FILE 110 | outputRoutes: 111 | - route: FILE 112 | imageName: informaticsmatters/rdkit_pipelines 113 | command: >- 114 | python -m pipelines.rdkit.max_min_picker -i ${PIN}input.data.gz -if json -s seeds.data.gz 115 | -o ${POUT}output -of json 116 | ${binding.variables.containsKey('num') ? ' --num ' + num : ''} 117 | ${binding.variables.containsKey('threshold') ? '--threshold ' + threshold : ''} 118 | --fragment-method ${fragment_method} 119 | ${output_fragment ? '--output-fragment' : ''} 120 | --descriptor ${descriptor} 121 | --meta --quiet 122 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/max_min_picker_simple.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.maxminpicker.simple.1 5 | name: RDKitMaxMinPickerSimple 6 | description: RDKit MaxMin picker for diverse subset selection 7 | tags: 8 | - rdkit 9 | - maxmin 10 | - picker 11 | - diverse 12 | - subset 13 | - docker 14 | resourceUrl: 15 | icon: icons/filter_molecules.png 16 | inputDescriptors: 17 | - primaryType: org.squonk.dataset.Dataset 18 | secondaryType: org.squonk.types.MoleculeObject 19 | mediaType: application/x-squonk-dataset-molecule+json 20 | name: input 21 | outputDescriptors: 22 | - primaryType: org.squonk.dataset.Dataset 23 | secondaryType: org.squonk.types.MoleculeObject 24 | mediaType: application/x-squonk-dataset-molecule+json 25 | name: output 26 | optionDescriptors: 27 | - "@class": org.squonk.options.OptionDescriptor 28 | modes: 29 | - User 30 | typeDescriptor: 31 | type: java.lang.Integer 32 | "@class": org.squonk.options.SimpleTypeDescriptor 33 | key: arg.num 34 | label: Number to pick 35 | description: Number of molecules to pick 36 | minValues: 0 37 | maxValues: 1 38 | visible: true 39 | editable: true 40 | - "@class": org.squonk.options.OptionDescriptor 41 | modes: 42 | - User 43 | typeDescriptor: 44 | type: java.lang.Float 45 | "@class": org.squonk.options.SimpleTypeDescriptor 46 | key: arg.threshold 47 | label: Threshold 48 | description: Dissimilarity threshold (0.0 is identical) 49 | minValues: 0 50 | maxValues: 1 51 | visible: true 52 | editable: true 53 | - modes: 54 | - User 55 | editable: true 56 | "@class": org.squonk.options.OptionDescriptor 57 | typeDescriptor: 58 | type: java.lang.String 59 | "@class": org.squonk.options.SimpleTypeDescriptor 60 | key: arg.fragment_method 61 | label: Fragment method 62 | description: Approach to use for picking biggest molecular fragment 63 | values: 64 | - hac 65 | - mw 66 | defaultValue: hac 67 | visible: true 68 | - modes: 69 | - User 70 | editable: true 71 | "@class": org.squonk.options.OptionDescriptor 72 | typeDescriptor: 73 | type: java.lang.Boolean 74 | "@class": org.squonk.options.SimpleTypeDescriptor 75 | key: arg.output_fragment 76 | label: Output biggest fragment 77 | description: Output the biggest fragment rather than the whole molecule 78 | defaultValue: false 79 | visible: true 80 | - modes: 81 | - User 82 | editable: true 83 | "@class": org.squonk.options.OptionDescriptor 84 | typeDescriptor: 85 | type: java.lang.String 86 | "@class": org.squonk.options.SimpleTypeDescriptor 87 | key: arg.descriptor 88 | label: Descriptor 89 | description: Descriptor/fingerprint to use 90 | values: 91 | - maccs 92 | - morgan2 93 | - morgan3 94 | defaultValue: morgan2 95 | visible: true 96 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 97 | thinDescriptors: 98 | - input: input 99 | output: output 100 | filtering: true 101 | preserve: false 102 | inputRoutes: 103 | - route: FILE 104 | outputRoutes: 105 | - route: FILE 106 | imageName: informaticsmatters/rdkit_pipelines 107 | command: >- 108 | python -m pipelines.rdkit.max_min_picker -i input.data.gz -if json -o output -of json 109 | ${binding.variables.containsKey('num') ? '--num ' + num : ''} 110 | ${binding.variables.containsKey('threshold') ? '--threshold ' + threshold : ''} 111 | --fragment-method ${fragment_method} 112 | ${output_fragment ? '--output-fragment' : ''} 113 | --descriptor ${descriptor} 114 | --meta --quiet 115 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/o3dAlign.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.o3da.basic 5 | name: RDKitOpen3DAlign 6 | description: Generate 3D alignments using Open3DAlign in RDKit 7 | tags: 8 | - rdkit 9 | - conformer 10 | - alignment 11 | - open3dalign 12 | - 3d 13 | - docker 14 | resourceUrl: 15 | icon: icons/filter_molecules.png 16 | inputDescriptors: 17 | - primaryType: org.squonk.dataset.Dataset 18 | secondaryType: org.squonk.types.MoleculeObject 19 | mediaType: application/x-squonk-dataset-molecule+json 20 | name: input 21 | - primaryType: org.squonk.dataset.Dataset 22 | secondaryType: org.squonk.types.MoleculeObject 23 | mediaType: application/x-squonk-dataset-molecule+json 24 | name: queryMol 25 | outputDescriptors: 26 | - primaryType: org.squonk.dataset.Dataset 27 | secondaryType: org.squonk.types.MoleculeObject 28 | mediaType: application/x-squonk-dataset-molecule+json 29 | name: output 30 | optionDescriptors: 31 | - modes: 32 | - User 33 | editable: true 34 | visible: true 35 | description: Query molecule index 36 | label: Query mol index 37 | key: arg.qmolidx 38 | minValues: 0 39 | maxValues: 1 40 | typeDescriptor: 41 | type: java.lang.Integer 42 | "@class": org.squonk.options.SimpleTypeDescriptor 43 | "@class": org.squonk.options.OptionDescriptor 44 | - modes: 45 | - User 46 | editable: true 47 | visible: true 48 | label: O3DAlign score threshold 49 | description: Keep molecules with O3DAlign scores within this range of the score 50 | for aligning the query to itself 51 | key: arg.threshold 52 | minValues: 0 53 | maxValues: 1 54 | typeDescriptor: 55 | type: java.lang.Float 56 | "@class": org.squonk.options.SimpleTypeDescriptor 57 | "@class": org.squonk.options.OptionDescriptor 58 | - modes: 59 | - User 60 | editable: true 61 | visible: true 62 | description: Use Crippen (logP) contributions 63 | label: Use Crippen (logP) contributions 64 | key: arg.crippen 65 | minValues: 1 66 | maxValues: 1 67 | typeDescriptor: 68 | type: java.lang.Boolean 69 | "@class": org.squonk.options.SimpleTypeDescriptor 70 | "@class": org.squonk.options.OptionDescriptor 71 | - "@class": org.squonk.options.OptionDescriptor 72 | modes: 73 | - User 74 | typeDescriptor: 75 | type: java.lang.Integer 76 | "@class": org.squonk.options.SimpleTypeDescriptor 77 | editable: true 78 | visible: true 79 | description: Number of conformers to generate if not already 3D 80 | label: Number of conformers 81 | key: arg.num 82 | minValues: 0 83 | maxValues: 1 84 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 85 | thinDescriptors: 86 | - input: input 87 | inputRoutes: 88 | - route: FILE 89 | - route: FILE 90 | outputRoutes: 91 | - route: FILE 92 | imageName: informaticsmatters/rdkit_pipelines 93 | command: >- 94 | python -m pipelines.rdkit.o3dAlign queryMol.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json 95 | ${binding.variables.containsKey('qmolidx') ? '--qmolidx ' + qmolidx : ''} 96 | ${binding.variables.containsKey('num') ? '--num ' + num : ''} 97 | ${binding.variables.containsKey('threshold') ? '--threshold ' + threshold : ''} 98 | ${crippen ? '--crippen' : ''} 99 | --meta -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/o3dAlign.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing o3dAlign.py reading from STDIN and writing to STDOUT 8 | test_raw_o3dalign = [ 9 | 10 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 11 | python -m pipelines.rdkit.o3dAlign 12 | ${PIN}pyrimethamine.mol -n 2 -t 10 -if sdf''', 13 | 14 | stderr: [ 'No output format specified - using sdf', 15 | 'Molecule 36 generated [1-9]\\d* conformers' ], 16 | 17 | ], 18 | 19 | // Testing o3dAlign.py reading from file and writing to file 20 | test_raw_o3dalign_sdf_to_sdf = [ 21 | 22 | command: '''python -m pipelines.rdkit.o3dAlign 23 | ${PIN}pyrimethamine.mol -n 2 -t 10 24 | -i ${PIN}Kinase_inhibs.sdf.gz -if sdf 25 | -o ${POUT}output -of sdf''', 26 | 27 | stderr: [ 'Molecule 36 generated [1-9]\\d* conformers' ], 28 | 29 | creates: [ 'output.sdf.gz' ], 30 | 31 | ], 32 | 33 | // Testing o3dAlign.py reading from file and writing to file 34 | test_raw_o3dalign_sdf_to_json = [ 35 | 36 | command: '''python -m pipelines.rdkit.o3dAlign 37 | ${PIN}pyrimethamine.mol -n 2 -t 10 38 | -i ${PIN}Kinase_inhibs.sdf.gz -if sdf 39 | -o ${POUT}output -of json''', 40 | 41 | stderr: [ 'Molecule 36 generated [1-9]\\d* conformers' ], 42 | 43 | creates: [ 'output.data.gz', 44 | 'output.metadata'], 45 | 46 | ], 47 | 48 | // Testing o3dAlign.py using crippen contributions 49 | test_raw_o3dalign_sdf_to_json = [ 50 | 51 | command: '''python -m pipelines.rdkit.o3dAlign --crippen 52 | ${PIN}pyrimethamine.mol -n 2 -t 10 53 | -i ${PIN}Kinase_inhibs.sdf.gz -if sdf 54 | -o ${POUT}output -of json''', 55 | 56 | stderr: [ 'Molecule 36 generated [1-9]\\d* conformers' ], 57 | 58 | creates: [ 'output.data.gz', 59 | 'output.metadata'], 60 | 61 | ], 62 | 63 | // Uses an invalid file - but here just to improve coverage 64 | // and trap any bad Python links. 65 | test_raw_o3dalign_json_to_sdf = [ 66 | 67 | command: '''python -m pipelines.rdkit.o3dAlign 68 | ${PIN}pyrimethamine.mol -n 2 -t 10 69 | -i ${PIN}nci100.data.gz -if json 70 | -o ${POUT}output -of sdf''', 71 | 72 | exit_error: 'missing MMFF94 parameters for probe molecule', 73 | 74 | ], 75 | 76 | ] 77 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/pbf_ev.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.xchem.pbf_ev.v1 5 | name: PBF EV analyisis 6 | description: 'PBF EV analysis ' 7 | tags: 8 | - rdkit 9 | - xchem 10 | - docker 11 | - pbfev 12 | - icr 13 | - joshuameyers 14 | resourceUrl: 15 | icon: icons/transform_molecule.png 16 | inputDescriptors: 17 | - primaryType: org.squonk.dataset.Dataset 18 | secondaryType: org.squonk.types.MoleculeObject 19 | mediaType: application/x-squonk-dataset-molecule+json 20 | name: input 21 | outputDescriptors: 22 | - primaryType: org.squonk.dataset.Dataset 23 | secondaryType: org.squonk.types.MoleculeObject 24 | mediaType: application/x-squonk-dataset-molecule+json 25 | name: output 26 | optionDescriptors: [] 27 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 28 | inputRoutes: 29 | - route: FILE 30 | outputRoutes: 31 | - route: FILE 32 | imageName: informaticsmatters/rdkit_pipelines 33 | command: >- 34 | python -m pipelines.rdkit.pbf_ev -i ${PIN}input.data.gz -if json -o ${POUT}output -of json --meta -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/pbf_ev.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing pbf_ev.py reading from files 8 | test_pbf_ev_raw = [ 9 | 10 | command: '''python -m pipelines.rdkit.pbf_ev 11 | -i ${PIN}dhfr_3d.sdf -o ${POUT}output''', 12 | 13 | stderr: [ 'No output format specified - using sdf', 14 | 'Handled [1-9]\\d+ molecules, resulting in' ], 15 | 16 | creates: [ 'output.sdf.gz' ], 17 | 18 | ], 19 | 20 | // Testing from file 21 | test_pbf_ev_to_sdf = [ 22 | 23 | command: '''python -m pipelines.rdkit.pbf_ev 24 | -i ${PIN}dhfr_3d.sdf -o ${POUT}output -of sdf''', 25 | 26 | stderr: [ 'Handled [1-9]\\d+ molecules, resulting in' ], 27 | 28 | creates: [ 'output.sdf.gz' ], 29 | 30 | ], 31 | 32 | // Testing from file 33 | test_pbf_ev_to_sdf = [ 34 | 35 | command: '''python -m pipelines.rdkit.pbf_ev 36 | -i ${PIN}dhfr_3d.sdf -o ${POUT}output -of json''', 37 | 38 | stderr: [ 'Handled [1-9]\\d+ molecules, resulting in' ], 39 | 40 | creates: [ 'output.data.gz', 41 | 'output.metadata'], 42 | 43 | ], 44 | 45 | ] 46 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/rxn_maker.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.xchem.reaction.maker.v1 5 | name: XChemReactionMaker 6 | description: XChem react molecules and reactants 7 | tags: 8 | - rdkit 9 | - xchem 10 | - enumeration 11 | - reaction 12 | - docker 13 | resourceUrl: 14 | icon: icons/chemreaction.png 15 | inputDescriptors: 16 | - primaryType: org.squonk.dataset.Dataset 17 | secondaryType: org.squonk.types.MoleculeObject 18 | mediaType: application/x-squonk-dataset-molecule+json 19 | name: input 20 | - primaryType: org.squonk.dataset.Dataset 21 | secondaryType: org.squonk.types.MoleculeObject 22 | mediaType: application/x-squonk-dataset-molecule+json 23 | name: reactants 24 | outputDescriptors: 25 | - primaryType: org.squonk.dataset.Dataset 26 | secondaryType: org.squonk.types.MoleculeObject 27 | mediaType: application/x-squonk-dataset-molecule+json 28 | name: output 29 | optionDescriptors: 30 | - modes: 31 | - User 32 | editable: true 33 | "@class": org.squonk.options.OptionDescriptor 34 | typeDescriptor: 35 | type: java.lang.String 36 | "@class": org.squonk.options.SimpleTypeDescriptor 37 | key: arg.reaction 38 | label: Reaction 39 | description: Reaction to use 40 | values: 41 | - Amides 42 | - Benzimidazole 43 | - Benzoxazole 44 | - Ester_Coupling 45 | - Ether_Coupling 46 | - Indole 47 | - N-Alkylation 48 | - Oxadiazole 49 | - Reductive_Amination 50 | - SNAr 51 | - Sonogashira 52 | - Sulfonamide 53 | - Suzuki_Coupling 54 | - Triazole 55 | - Urea 56 | visible: true 57 | minValues: 1 58 | maxValues: 1 59 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 60 | thinDescriptors: 61 | - input: input 62 | - input: reactants 63 | inputRoutes: 64 | - route: FILE 65 | - route: FILE 66 | outputRoutes: 67 | - route: FILE 68 | imageName: informaticsmatters/rdkit_pipelines 69 | command: >- 70 | python -m pipelines.rdkit.rxn_maker -i ${PIN}input.data.gz --reagent_lib reactants.data.gz 71 | -o ${POUT}output --reaction ${reaction} -of json --meta 72 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/rxn_maker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2017 Informatics Matters Ltd. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | import os 19 | 20 | from pipelines_utils import parameter_utils, utils 21 | from pipelines_utils_rdkit import rdkit_utils 22 | 23 | 24 | ### start main execution ######################################### 25 | 26 | def main(): 27 | ### command line args defintions ######################################### 28 | 29 | ### Define the reactions available 30 | poised_filter = True 31 | if poised_filter == True: 32 | from .poised_filter import Filter 33 | filter_to_use = Filter() 34 | 35 | 36 | parser = argparse.ArgumentParser(description='RDKit rxn process') 37 | parameter_utils.add_default_io_args(parser) 38 | parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode') 39 | parser.add_argument('-m', '--multi', action='store_true', help='Output one file for each reaction') 40 | parser.add_argument('-r', '--reaction', choices=filter_to_use.poised_reactions.keys(), help='Name of reaction to be run') 41 | parser.add_argument('-rl', '--reagent_lib', help="Reagent file, if not defined the STDIN is used") 42 | parser.add_argument('-rlf', '--reagent_lib_format', choices=['sdf', 'json'], help="Reagent file format. When using STDIN this must be specified.") 43 | 44 | 45 | args = parser.parse_args() 46 | utils.log("Screen Args: ", args) 47 | 48 | if not args.output and args.multi: 49 | raise ValueError("Must specify output location when writing individual result files") 50 | 51 | input, suppl = rdkit_utils.default_open_input(args.input, args.informat) 52 | output, writer, output_base = rdkit_utils.default_open_output(args.output, "rxn_maker", args.outformat) 53 | 54 | 55 | i = 0 56 | count = 0 57 | 58 | if args.multi: 59 | dir_base = os.path.dirname(args.output) 60 | writer_dict = filter_to_use.get_writers(dir_base) 61 | else: 62 | writer_dict = None 63 | dir_base = None 64 | 65 | for mol in suppl: 66 | i+=1 67 | if mol is None: continue 68 | reagent_input, reagent_suppl = rdkit_utils.default_open_input(args.reagent_lib, args.reagent_lib_format) 69 | for r_mol in reagent_suppl: 70 | if r_mol is None: 71 | continue 72 | # Return a dict/class here - indicating which filters passed 73 | count = filter_to_use.perform_reaction(mol,args.reaction,r_mol,writer,count) 74 | 75 | 76 | utils.log("Created", count, "molecules from a total of ", i, "input molecules") 77 | 78 | writer.flush() 79 | writer.close() 80 | if input: 81 | input.close() 82 | if reagent_input: 83 | reagent_input.close() 84 | if output: 85 | output.close() 86 | # close the individual writers 87 | if writer_dict: 88 | for key in writer_dict: 89 | writer_dict[key].close() 90 | 91 | if args.meta: 92 | utils.write_metrics(output_base, {'__InputCount__': i, '__OutputCount__': count, 'RxnMaker': count}) 93 | 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/rxn_maker.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing rxn_maker.py reading from files 8 | test_rxn_maker_raw = [ 9 | 10 | command: '''python -m pipelines.rdkit.rxn_maker 11 | -i ${PIN}sulfonyl_chloride.sdf 12 | -r Sulfonamide 13 | -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz 14 | -o ${POUT}output''', 15 | 16 | stderr: [ 'No output format specified - using sdf', 17 | 'Created [1-9]\\d+ molecules from a total of' ], 18 | 19 | creates: [ 'output.sdf.gz' ], 20 | 21 | ], 22 | 23 | test_rxn_maker_raw_to_sdf = [ 24 | 25 | command: '''python -m pipelines.rdkit.rxn_maker 26 | -i ${PIN}sulfonyl_chloride.sdf 27 | -r Sulfonamide 28 | -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz 29 | -o ${POUT}output -of sdf''', 30 | 31 | stderr: [ 'Created [1-9]\\d+ molecules from a total of' ], 32 | 33 | creates: [ 'output.sdf.gz' ], 34 | 35 | ], 36 | 37 | test_rxn_maker_raw_to_json = [ 38 | 39 | command: '''python -m pipelines.rdkit.rxn_maker 40 | -i ${PIN}sulfonyl_chloride.sdf 41 | -r Sulfonamide 42 | -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz 43 | -o ${POUT}output -of json''', 44 | 45 | stderr: [ 'Created [1-9]\\d+ molecules from a total of' ], 46 | 47 | creates: [ 'output.data.gz', 48 | 'output.metadata'], 49 | 50 | ], 51 | 52 | ] 53 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/rxn_selector.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import os 5 | 6 | from pipelines_utils import parameter_utils, utils 7 | from pipelines_utils_rdkit import rdkit_utils 8 | 9 | 10 | ### start main execution ######################################### 11 | 12 | def main(): 13 | ### command line args defintions ######################################### 14 | 15 | ### Define the reactions available 16 | poised_filter = True 17 | if poised_filter == True: 18 | from .poised_filter import Filter 19 | filter_to_use = Filter() 20 | 21 | 22 | parser = argparse.ArgumentParser(description='RDKit rxn process') 23 | parameter_utils.add_default_io_args(parser) 24 | parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode') 25 | parser.add_argument('-m', '--multi', action='store_true', help='Output one file for each reaction') 26 | parser.add_argument('-r', '--reaction', choices=filter_to_use.poised_reactions.keys(), help='Name of reaction to be run') 27 | parser.add_argument('-rl', '--reagent_lib', help="Input SD file, if not defined the STDIN is used") 28 | parser.add_argument('-rlf', '--reagent_lib_format', choices=['sdf', 'json'], help="Input format. When using STDIN this must be specified.") 29 | 30 | args = parser.parse_args() 31 | utils.log("Screen Args: ", args) 32 | 33 | if not args.output and args.multi: 34 | raise ValueError("Must specify output location when writing individual result files") 35 | 36 | input, suppl = rdkit_utils.default_open_input(args.input, args.informat) 37 | reagent_input, reagent_suppl = rdkit_utils.default_open_input(args.reagent_lib, args.reagent_lib_format) 38 | output, writer, output_base = rdkit_utils.default_open_output(args.output, "rxn_maker", args.outformat) 39 | 40 | i = 0 41 | count = 0 42 | 43 | if args.multi: 44 | dir_base = os.path.dirname(args.output) 45 | writer_dict = filter_to_use.get_writers(dir_base) 46 | else: 47 | writer_dict = None 48 | dir_base = None 49 | 50 | for mol in suppl: 51 | i+=1 52 | if mol is None: continue 53 | # Return a dict/class here - indicating which filters passed 54 | count = filter_to_use.perform_reaction(mol,args.reaction,reagent_suppl,writer,count) 55 | 56 | utils.log("Created", count, "molecules from a total of ", i, "input molecules") 57 | 58 | writer.flush() 59 | writer.close() 60 | if input: 61 | input.close() 62 | if output: 63 | output.close() 64 | # close the individual writers 65 | if writer_dict: 66 | for key in writer_dict: 67 | writer_dict[key].close() 68 | 69 | if args.meta: 70 | utils.write_metrics(output_base, {'__InputCount__': i, '__OutputCount__': count, 'RxnSmartsFilter': count}) 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/rxn_selector.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // A basic start-up test for the module. 8 | // Simply makes sure it starts cleanly. 9 | test_help = [ 10 | 11 | command: '''python -m pipelines.rdkit.rxn_selector -h''', 12 | 13 | stdout: [ 'usage: rxn_selector.py' ], 14 | 15 | ], 16 | 17 | test_basic = [ 18 | 19 | command: '''python -m pipelines.rdkit.rxn_selector 20 | -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz -rlf sdf''', 21 | 22 | exit_error: 'Must specify either an input file name or an input format (or both)', 23 | 24 | ], 25 | 26 | // Not sure this test makes any sense (abc) 27 | // Added simply to trey and improve coverage 28 | test_basic_to_sdf = [ 29 | 30 | command: '''python -m pipelines.rdkit.rxn_selector 31 | -i ${PIN}sulfonyl_chloride.sdf -if sdf 32 | -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz -rlf sdf 33 | -o output -of sdf''', 34 | 35 | exit_error: 'KeyError: None', 36 | 37 | ], 38 | 39 | ] 40 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/rxn_smarts_filter.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.xchem.smarts_filter.reaction 5 | name: XChemReactionSmartsFilter 6 | description: XChem Reaction Smarts Filter 7 | tags: 8 | - rdkit 9 | - xchem 10 | - docker 11 | - smarts 12 | - reaction 13 | - filter 14 | - poised 15 | resourceUrl: 16 | icon: icons/filter_molecules.png 17 | inputDescriptors: 18 | - primaryType: org.squonk.dataset.Dataset 19 | secondaryType: org.squonk.types.MoleculeObject 20 | mediaType: application/x-squonk-dataset-molecule+json 21 | name: input 22 | outputDescriptors: 23 | - primaryType: org.squonk.dataset.Dataset 24 | secondaryType: org.squonk.types.MoleculeObject 25 | mediaType: application/x-squonk-dataset-molecule+json 26 | name: output 27 | optionDescriptors: [] 28 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 29 | inputRoutes: 30 | - route: FILE 31 | outputRoutes: 32 | - route: FILE 33 | imageName: informaticsmatters/rdkit_pipelines 34 | command: >- 35 | python -m pipelines.rdkit.rxn_smarts_filter -i ${PIN}input.data.gz -if json 36 | -o ${POUT}output -of json --thin --meta 37 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/rxn_smarts_filter.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing rxn_smarts_filter.py reading from sd file and writing to multiple files 8 | test_rxn_smarts_filter_raw = [ 9 | 10 | command: '''python -m pipelines.rdkit.rxn_smarts_filter 11 | -i ${PIN}Kinase_inhibs.sdf.gz -o ${POUT}output --multi''', 12 | 13 | stderr: [ 'Using 15 reaction filters', 14 | 'No output format specified - using sdf', 15 | 'Matched [1-9]\\d+ molecules from a total of' ], 16 | 17 | creates: [ 'output.sdf.gz', 18 | 'Amides.sdf', 19 | 'Ether_Coupling.sdf', 20 | 'Reductive_Amination.sdf', 21 | 'Suzuki_Coupling.sdf', 22 | 'Benzimidazole.sdf', 23 | 'Indole.sdf', 24 | 'SNAr.sdf', 25 | 'Triazole.sdf', 26 | 'Benzoxazole.sdf', 27 | 'N-Alkylation.sdf', 28 | 'Sonogashira.sdf', 29 | 'Urea.sdf', 30 | 'Ester_Coupling.sdf', 31 | 'Oxadiazole.sdf', 32 | 'Sulfonamide.sdf', 33 | 'output.sdf.gz' ], 34 | 35 | ], 36 | 37 | // Testing rxn_smarts_filter.py reading from STDIN 38 | // and writing to files using SDF 39 | test_rxn_smarts_filter_raw_stdin_to_sdf = [ 40 | 41 | command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz | 42 | python -m pipelines.rdkit.rxn_smarts_filter 43 | -if sdf -o ${POUT}output''', 44 | 45 | stderr: [ 'Using 15 reaction filters', 46 | 'No output format specified - using sdf', 47 | 'Matched [1-9]\\d+ molecules from a total of' ], 48 | 49 | creates: [ 'output.sdf.gz' ], 50 | 51 | ], 52 | 53 | // Testing rxn_smarts_filter.py reading from file 54 | // and writing to files using SDF 55 | test_rxn_smarts_filter_raw_sdf_file_to_sdf = [ 56 | 57 | command: '''python -m pipelines.rdkit.rxn_smarts_filter 58 | -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -o ${POUT}output -of sdf''', 59 | 60 | stderr: [ 'Using 15 reaction filters', 61 | 'Matched [1-9]\\d+ molecules from a total of' ], 62 | 63 | creates: [ 'output.sdf.gz' ], 64 | 65 | ], 66 | 67 | // Testing rxn_smarts_filter.py 68 | // reading from STDIN and writing to files using JSON 69 | test_rxn_smarts_filter_raw_stdin_to_json = [ 70 | 71 | command: '''gunzip -c ${PIN}nci100.data.gz | 72 | python -m pipelines.rdkit.rxn_smarts_filter 73 | -if json -o ${POUT}output -of json --meta --thin''', 74 | 75 | stderr: [ 'Using 15 reaction filters', 76 | 'Matched [1-9]\\d+ molecules from a total of 100' ], 77 | 78 | creates: [ 'output.data.gz', 79 | 'output_metrics.txt', 80 | 'output.metadata' ], 81 | 82 | metrics: [ 'RxnSmartsFilter': '\\d\\d', 83 | '__InputCount__': '100', 84 | '__OutputCount__': '\\d\\d' ], 85 | 86 | 87 | ], 88 | 89 | // Testing rxn_smarts_filter.py 90 | // reading from STDIN and writing to files using JSON 91 | test_rxn_smarts_filter_raw_file_to_json = [ 92 | 93 | command: '''python -m pipelines.rdkit.rxn_smarts_filter 94 | -i ${PIN}nci100.data.gz -if json 95 | -o ${POUT}output -of json --meta --thin''', 96 | 97 | stderr: [ 'Using 15 reaction filters', 98 | 'Matched [1-9]\\d+ molecules from a total of 100' ], 99 | 100 | creates: [ 'output.data.gz', 101 | 'output_metrics.txt', 102 | 'output.metadata' ], 103 | 104 | metrics: [ 'RxnSmartsFilter': '\\d\\d', 105 | '__InputCount__': '100', 106 | '__OutputCount__': '\\d\\d' ], 107 | 108 | 109 | ], 110 | 111 | ] 112 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/sanifier.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // A basic start-up test for the module. 8 | // Simply makes sure it starts cleanly. 9 | // 10 | // For this to work you will need to make sure a number 11 | // of other modules are installed in your conda environemnt. 12 | // - molvs (pip install molvs) 13 | // - standardiser 14 | test_help = [ 15 | 16 | command: '''python -m pipelines.rdkit.sanifier -h''', 17 | 18 | stdout: [ 'usage: sanifier.py' ] 19 | 20 | ], 21 | 22 | ] 23 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/sanifier_enumerator.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.xchem.sanify.enumerate.v1 5 | name: MoleculeEnumerator 6 | description: Enumerate stereoisomers and tautomers 7 | tags: 8 | - rdkit 9 | - xchem 10 | - docker 11 | - sanify 12 | - reaction 13 | - molvs 14 | - enumerate 15 | - stereoisomer 16 | - tautomer 17 | resourceUrl: 18 | icon: icons/molecule_generator.png 19 | inputDescriptors: 20 | - primaryType: org.squonk.dataset.Dataset 21 | secondaryType: org.squonk.types.MoleculeObject 22 | mediaType: application/x-squonk-dataset-molecule+json 23 | name: input 24 | outputDescriptors: 25 | - primaryType: org.squonk.dataset.Dataset 26 | secondaryType: org.squonk.types.MoleculeObject 27 | mediaType: application/x-squonk-dataset-molecule+json 28 | name: output 29 | optionDescriptors: 30 | - modes: 31 | - User 32 | editable: true 33 | "@class": org.squonk.options.OptionDescriptor 34 | typeDescriptor: 35 | type: java.lang.Boolean 36 | "@class": org.squonk.options.SimpleTypeDescriptor 37 | key: arg.tautomers 38 | label: Enumerate tautomers 39 | description: Enumerate tautomers 40 | defaultValue: true 41 | visible: true 42 | - modes: 43 | - User 44 | editable: true 45 | "@class": org.squonk.options.OptionDescriptor 46 | typeDescriptor: 47 | type: java.lang.Boolean 48 | "@class": org.squonk.options.SimpleTypeDescriptor 49 | key: arg.stereoisomers 50 | label: Enumerate stereoisomers 51 | description: Enumerate stereoisomers 52 | defaultValue: true 53 | visible: true 54 | - modes: 55 | - User 56 | editable: true 57 | "@class": org.squonk.options.OptionDescriptor 58 | typeDescriptor: 59 | type: java.lang.String 60 | "@class": org.squonk.options.SimpleTypeDescriptor 61 | key: arg.molFormat 62 | label: Molecule format 63 | description: Output format for molecules 64 | values: 65 | - smiles 66 | - mol_2d 67 | - mol_3d 68 | defaultValue: smiles 69 | minValues: 1 70 | maxValues: 1 71 | visible: true 72 | executorClassName: org.squonk.execution.steps.impl.DefaultDockerExecutorStep 73 | inputRoutes: 74 | - route: FILE 75 | outputRoutes: 76 | - route: FILE 77 | imageName: informaticsmatters/rdkit_pipelines 78 | command: >- 79 | python -m pipelines.rdkit.sanifier -i ${PIN}input.data.gz -if json -o ${POUT}output -of json -mf $molFormat 80 | ${tautomers ? '-et' : ''} 81 | ${stereoisomers ? '-es' : ''} 82 | --meta -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/sanifier_standardiser_flatkinson.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.xchem.sanify.standardise.flatkinson.v1 5 | name: FlatkinsonStandardiser 6 | description: Molecule standardiser from Francis Atkinson 7 | tags: 8 | - rdkit 9 | - xchem 10 | - docker 11 | - sanify 12 | - reaction 13 | - flatkinson 14 | - standardiser 15 | - standardizer 16 | resourceUrl: 17 | icon: icons/transform_molecule.png 18 | inputDescriptors: 19 | - primaryType: org.squonk.dataset.Dataset 20 | secondaryType: org.squonk.types.MoleculeObject 21 | mediaType: application/x-squonk-dataset-molecule+json 22 | name: input 23 | outputDescriptors: 24 | - primaryType: org.squonk.dataset.Dataset 25 | secondaryType: org.squonk.types.MoleculeObject 26 | mediaType: application/x-squonk-dataset-molecule+json 27 | name: output 28 | optionDescriptors: 29 | - modes: 30 | - User 31 | editable: true 32 | "@class": org.squonk.options.OptionDescriptor 33 | typeDescriptor: 34 | type: java.lang.String 35 | "@class": org.squonk.options.SimpleTypeDescriptor 36 | key: dockerImageVersion 37 | label: RDKit version 38 | description: Version of the RDKit Docker image to execute 39 | values: 40 | - latest 41 | - Release_2017_03_1 42 | - Release_2016_09_2 43 | - Release_2016_03_1 44 | - Release_2015_09_2 45 | - Release_2015_09_1 46 | defaultValue: latest 47 | visible: true 48 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 49 | thinDescriptors: 50 | - input: input 51 | preserve: false 52 | inputRoutes: 53 | - route: FILE 54 | outputRoutes: 55 | - route: FILE 56 | imageName: informaticsmatters/rdkit_pipelines 57 | command: >- 58 | python -m pipelines.rdkit.sanifier -i ${PIN}input.data.gz -if json -o ${POUT}output -of json -st -stm flatkinson --meta 59 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/sanifier_standardiser_molvs.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.xchem.sanify.standardise.molvs.v1 5 | name: MolVSStandardiser 6 | description: Molecule standardiser using MolVS 7 | tags: 8 | - rdkit 9 | - xchem 10 | - docker 11 | - sanify 12 | - reaction 13 | - molvs 14 | - standardiser 15 | - standardizer 16 | resourceUrl: 17 | icon: icons/transform_molecule.png 18 | inputDescriptors: 19 | - primaryType: org.squonk.dataset.Dataset 20 | secondaryType: org.squonk.types.MoleculeObject 21 | mediaType: application/x-squonk-dataset-molecule+json 22 | name: input 23 | outputDescriptors: 24 | - primaryType: org.squonk.dataset.Dataset 25 | secondaryType: org.squonk.types.MoleculeObject 26 | mediaType: application/x-squonk-dataset-molecule+json 27 | name: output 28 | optionDescriptors: 29 | - modes: 30 | - User 31 | editable: true 32 | "@class": org.squonk.options.OptionDescriptor 33 | typeDescriptor: 34 | type: java.lang.String 35 | "@class": org.squonk.options.SimpleTypeDescriptor 36 | key: dockerImageVersion 37 | label: RDKit version 38 | description: Version of the RDKit Docker image to execute 39 | values: 40 | - latest 41 | - Release_2017_03_1 42 | - Release_2016_09_2 43 | - Release_2016_03_1 44 | - Release_2015_09_2 45 | - Release_2015_09_1 46 | defaultValue: latest 47 | visible: true 48 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 49 | thinDescriptors: 50 | - input: input 51 | preserve: false 52 | inputRoutes: 53 | - route: FILE 54 | outputRoutes: 55 | - route: FILE 56 | imageName: informaticsmatters/rdkit_pipelines 57 | command: >- 58 | python -m pipelines.rdkit.sanifier -i ${PIN}input.data.gz -if json -o ${POUT}output -of json -st -stm molvs --meta 59 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/sanify_utils.py: -------------------------------------------------------------------------------- 1 | from rdkit import Chem 2 | from copy import copy 3 | 4 | from pipelines_utils import utils 5 | 6 | from molvs import enumerate_tautomers_smiles,canonicalize_tautomer_smiles,Standardizer 7 | from molvs.charge import Uncharger,Reionizer 8 | from standardiser import standardise 9 | 10 | standardizer = Standardizer() 11 | 12 | def _spam(n): 13 | out=[] 14 | for perm in _getPerms(n): 15 | elem = [ int(i) for i in list(perm) ] 16 | out.append(elem) 17 | return out 18 | 19 | def _getPerms(n): 20 | from itertools import permutations 21 | for i in _getCandidates(n): 22 | for perm in set(permutations(i)): 23 | yield ''.join(perm) 24 | 25 | def _getCandidates(n): 26 | for i in range(0, n+1): 27 | res = "1" * i + "0" * (n - i) 28 | yield res 29 | 30 | def enumerateTautomers(mol): 31 | """ 32 | Get all of the Tautomers of a given molecule 33 | :param mol: the input molecule 34 | :return: a list of Tautomers 35 | """ 36 | smiles = Chem.MolToSmiles(mol,isomericSmiles=True) 37 | tauts = enumerate_tautomers_smiles(smiles) 38 | ##TODO Append Parent molecule name 39 | return [Chem.MolFromSmiles(x) for x in tauts] 40 | 41 | def getCanonTautomer(mol): 42 | """ 43 | Get the canonical tautomer form 44 | :param mol: the input molecule 45 | :return: a list of Tautomers 46 | """ 47 | smiles = Chem.MolToSmiles(mol,isomericSmiles=True) 48 | x = canonicalize_tautomer_smiles(smiles) 49 | return Chem.MolFromSmiles(x) 50 | 51 | 52 | def enumerateStereoIsomers(mol): 53 | out = [] 54 | chiralCentres = Chem.FindMolChiralCenters(mol, includeUnassigned=True) 55 | #return the molecule object when no chiral centres where identified 56 | if chiralCentres == []: 57 | return [mol] 58 | 59 | #All bit permutations with number of bits equals number of chiralCentres 60 | elements = _spam(len(chiralCentres)) 61 | 62 | for isoId,element in enumerate(elements): 63 | for centreId,i in enumerate(element): 64 | atomId = chiralCentres[centreId][0] 65 | if i == 0: 66 | mol.GetAtomWithIdx(atomId).SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW) 67 | elif i == 1: 68 | mol.GetAtomWithIdx(atomId).SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW) 69 | outmol = copy(mol) 70 | utils.log("Enumerated ", Chem.MolToSmiles(mol, isomericSmiles=True)) 71 | out.append(outmol) 72 | return out 73 | 74 | 75 | def molVsStandardizer(mol): 76 | return standardizer.standardize(mol) 77 | 78 | def flatkinsonStandardizer(mol): 79 | return standardise.run(mol) 80 | 81 | STANDARD_MOL_METHODS = {"molvs": molVsStandardizer, "flatkinson": flatkinsonStandardizer} 82 | 83 | def getNeutralMolecule(mol): 84 | uncharger = Uncharger() 85 | return uncharger.uncharge(mol) 86 | 87 | def getReionisedMolecule(mol): 88 | reioniser = Reionizer() 89 | return reioniser.reionize(mol) -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/screen_multi.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing screen_multi.py reading target form sdf file, 8 | // query as json file and writing to STDOUT 9 | test_multi = [ 10 | 11 | command: '''gunzip -c ${PIN}dhfr_3d.sdf.gz | 12 | python -m pipelines.rdkit.screen_multi 13 | -if sdf --qjson ${PIN}nci100.data.gz --simmin 0.55''', 14 | 15 | stderr: [ 'Found [1-9]\\d* similar molecules' ], 16 | 17 | ], 18 | 19 | // Testing screen_multi.py reading target form sdf file, 20 | // query as json file and writing to STDOUT 21 | test_multi_from_file = [ 22 | 23 | command: '''gunzip -c ${PIN}dhfr_3d.sdf.gz | 24 | python -m pipelines.rdkit.screen_multi 25 | -i ${PIN}dhfr_3d.sdf.gz -if sdf 26 | --qjson ${PIN}nci100.data.gz --simmin 0.55''', 27 | 28 | stderr: [ 'Found [1-9]\\d* similar molecules' ], 29 | 30 | ], 31 | 32 | // Testing screen_multi.py reading target form sdf file, 33 | // query as json file and writing to file 34 | test_multi_from_file_to_sdf = [ 35 | 36 | command: '''gunzip -c ${PIN}dhfr_3d.sdf.gz | 37 | python -m pipelines.rdkit.screen_multi 38 | -i ${PIN}dhfr_3d.sdf.gz -if sdf 39 | --qjson ${PIN}nci100.data.gz --simmin 0.55 40 | -o ${POUT}output -of sdf''', 41 | 42 | stderr: [ 'Found [1-9]\\d* similar molecules' ], 43 | 44 | creates: [ 'output.sdf.gz' ], 45 | ], 46 | 47 | // Testing screen_multi.py reading target form sdf file, 48 | // query as json file and writing to file 49 | test_multi_from_file_to_json = [ 50 | 51 | command: '''gunzip -c ${PIN}dhfr_3d.sdf.gz | 52 | python -m pipelines.rdkit.screen_multi 53 | -i ${PIN}dhfr_3d.sdf.gz -if sdf 54 | --qjson ${PIN}nci100.data.gz --simmin 0.55 55 | -o ${POUT}output -of json''', 56 | 57 | stderr: [ 'Found [1-9]\\d* similar molecules' ], 58 | 59 | creates: [ 'output.data.gz', 60 | 'output.metadata' ], 61 | ], 62 | 63 | ] 64 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/standardize.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.standardizer.v1 5 | name: RDKitStandardizer 6 | description: Standardize molecules 7 | tags: 8 | - rdkit 9 | - docker 10 | - standardise 11 | - standardize 12 | resourceUrl: 13 | icon: icons/molecule_generator.png 14 | inputDescriptors: 15 | - primaryType: org.squonk.dataset.Dataset 16 | secondaryType: org.squonk.types.MoleculeObject 17 | mediaType: application/x-squonk-dataset-molecule+json 18 | name: input 19 | outputDescriptors: 20 | - primaryType: org.squonk.dataset.Dataset 21 | secondaryType: org.squonk.types.MoleculeObject 22 | mediaType: application/x-squonk-dataset-molecule+json 23 | name: output 24 | optionDescriptors: 25 | - modes: 26 | - User 27 | editable: true 28 | "@class": org.squonk.options.OptionDescriptor 29 | typeDescriptor: 30 | type: java.lang.String 31 | "@class": org.squonk.options.SimpleTypeDescriptor 32 | key: arg.fragment_method 33 | label: Fragment method 34 | description: Approach to use for picking biggest molecular fragment 35 | values: 36 | - hac 37 | - mw 38 | defaultValue: hac 39 | visible: true 40 | - modes: 41 | - User 42 | editable: true 43 | "@class": org.squonk.options.OptionDescriptor 44 | typeDescriptor: 45 | type: java.lang.Boolean 46 | "@class": org.squonk.options.SimpleTypeDescriptor 47 | key: arg.neutralize 48 | label: Neutralize molecules 49 | description: Convert charged groups to neutral form where possible 50 | defaultValue: true 51 | visible: true 52 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 53 | thinDescriptors: 54 | - input: input 55 | inputRoutes: 56 | - route: FILE 57 | outputRoutes: 58 | - route: FILE 59 | imageName: informaticsmatters/rdkit_pipelines 60 | command: >- 61 | python -m pipelines.rdkit.standardize -i ${PIN}input.data.gz -if json -o ${POUT}output -of json 62 | --fragment-method $fragment_method 63 | ${neutralize ? '--neutralize' : ''} 64 | --meta -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/standardize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2018 Informatics Matters Ltd. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | 19 | from rdkit import DataStructs, rdBase 20 | from rdkit.Chem.MolStandardize import rdMolStandardize 21 | 22 | from pipelines_utils import parameter_utils, utils 23 | from pipelines_utils_rdkit import rdkit_utils, mol_utils 24 | 25 | 26 | ### functions ######################################### 27 | 28 | #lfc = rdMolStandardize.LargestFragmentChooser() 29 | uncharger = rdMolStandardize.Uncharger() 30 | 31 | 32 | def standardize(mol, neutralize, fragment): 33 | """ 34 | 35 | :param mol: The molecule to standardize 36 | :param neutralize: Boolean for whether to neutralize the molecule 37 | :param fragment: The approach for choosing the largest fragment. Either 'hac' or 'mw'. If not specified the whole 38 | molecule is used. 39 | :return: The standardized molecule 40 | """ 41 | mol = rdMolStandardize.Cleanup(mol) 42 | #mol = lfc.choose(mol) 43 | # We use our own largest fragment picker as the RDKit one behaves slightly differently 44 | if fragment: 45 | mol = mol_utils.fragment(mol, fragment) 46 | if neutralize: 47 | mol = uncharger.uncharge(mol) 48 | return mol 49 | 50 | 51 | ### start main execution ######################################### 52 | 53 | def main(): 54 | 55 | ### command line args definitions ######################################### 56 | 57 | parser = argparse.ArgumentParser(description='RDKit Standardize') 58 | parser.add_argument('--fragment-method', choices=['hac', 'mw'], help='Approach to find biggest fragment if more than one (hac = biggest by heavy atom count, mw = biggest by mol weight)') 59 | parser.add_argument('--neutralize', action='store_true', help='Neutralize the molecule') 60 | 61 | parameter_utils.add_default_io_args(parser) 62 | parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode') 63 | parser.add_argument('--thin', action='store_true', help='Thin output mode') 64 | 65 | args = parser.parse_args() 66 | utils.log("Standardize Args: ", args) 67 | 68 | # handle metadata 69 | source = "standardize.py" 70 | datasetMetaProps = {"source":source, "description": "Standardize using RDKit " + rdBase.rdkitVersion} 71 | clsMappings = {} 72 | fieldMetaProps = [] 73 | 74 | 75 | input,output,suppl,writer,output_base = rdkit_utils.\ 76 | default_open_input_output(args.input, args.informat, args.output, 77 | 'standardize', args.outformat, 78 | thinOutput=False, valueClassMappings=clsMappings, 79 | datasetMetaProps=datasetMetaProps, 80 | fieldMetaProps=fieldMetaProps) 81 | count = 0 82 | total = 0 83 | errors = 0 84 | for mol in suppl: 85 | count += 1 86 | if mol is None: 87 | errors += 1 88 | continue 89 | m = standardize(mol, args.neutralize, args.fragment_method) 90 | writer.write(m) 91 | total += 1 92 | 93 | input.close() 94 | writer.flush() 95 | writer.close() 96 | output.close() 97 | 98 | if args.meta: 99 | utils.write_metrics(output_base, {'__InputCount__':count, '__OutputCount__':total, '__ErrorCount__':errors, 'RDKitStandardize':total}) 100 | 101 | if __name__ == "__main__": 102 | main() 103 | 104 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/standardize.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // A basic start-up test for the module. 8 | // Simply makes sure it starts cleanly. 9 | // 10 | test_help = [ 11 | 12 | command: '''python -m pipelines.rdkit.standardize -h''', 13 | 14 | stdout: [ 'usage: standardize.py' ] 15 | 16 | ], 17 | 18 | ] 19 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/sucos-max.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.sucos.basic 5 | name: RDKitSuCOSMax 6 | description: Generate 3D overlays using SuCOSMax in RDKit 7 | tags: 8 | - rdkit 9 | - alignment 10 | - sucos 11 | - 3d 12 | - docker 13 | resourceUrl: 14 | icon: icons/filter_molecules.png 15 | inputDescriptors: 16 | - primaryType: org.squonk.dataset.Dataset 17 | secondaryType: org.squonk.types.MoleculeObject 18 | mediaType: application/x-squonk-dataset-molecule+json 19 | name: input 20 | - primaryType: org.squonk.dataset.Dataset 21 | secondaryType: org.squonk.types.MoleculeObject 22 | mediaType: application/x-squonk-dataset-molecule+json 23 | name: target 24 | outputDescriptors: 25 | - primaryType: org.squonk.dataset.Dataset 26 | secondaryType: org.squonk.types.MoleculeObject 27 | mediaType: application/x-squonk-dataset-molecule+json 28 | name: output 29 | optionDescriptors: 30 | - modes: 31 | - User 32 | editable: true 33 | visible: true 34 | description: Target molecule index (default is the first) 35 | label: Target mol index 36 | key: arg.targetidx 37 | minValues: 0 38 | maxValues: 1 39 | typeDescriptor: 40 | type: java.lang.Integer 41 | "@class": org.squonk.options.SimpleTypeDescriptor 42 | "@class": org.squonk.options.OptionDescriptor 43 | - modes: 44 | - User 45 | editable: true 46 | visible: true 47 | description: Name field in targets 48 | label: Target name field 49 | key: arg.name 50 | minValues: 0 51 | maxValues: 1 52 | typeDescriptor: 53 | type: java.lang.String 54 | "@class": org.squonk.options.SimpleTypeDescriptor 55 | "@class": org.squonk.options.OptionDescriptor 56 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 57 | thinDescriptors: 58 | - input: input 59 | inputRoutes: 60 | - route: FILE 61 | - route: FILE 62 | outputRoutes: 63 | - route: FILE 64 | imageName: informaticsmatters/rdkit_pipelines 65 | command: >- 66 | python -m pipelines.rdkit.sucos --target-molecule ${PIN}target.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json 67 | ${binding.variables.containsKey('targetidx') ? '--target-index ' + targetidx : ''} 68 | ${binding.variables.containsKey('name') ? '--name-field ' + name : ''} 69 | --meta -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/sucos-max.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing sucos_max.py reading from file and writing to file 8 | test_sucos_max = [ 9 | 10 | command: '''python -m pipelines.rdkit.sucos_max 11 | --target-molecules ${PIN}/sucos/hits.sdf 12 | -i ${PIN}/sucos/poses.sdf -if sdf 13 | -o ${POUT}output -of sdf''', 14 | 15 | stderr: [ 'Completed 305 comparisons' ], 16 | 17 | creates: [ 'output.sdf.gz' ] 18 | 19 | ] 20 | ] 21 | -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/sucos.dsd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | "@class": org.squonk.core.DockerServiceDescriptor 3 | serviceConfig: 4 | id: pipelines.rdkit.sucos.basic 5 | name: RDKitSuCOS 6 | description: Generate 3D overlay using SuCOS in RDKit 7 | tags: 8 | - rdkit 9 | - alignment 10 | - sucos 11 | - 3d 12 | - docker 13 | resourceUrl: 14 | icon: icons/filter_molecules.png 15 | inputDescriptors: 16 | - primaryType: org.squonk.dataset.Dataset 17 | secondaryType: org.squonk.types.MoleculeObject 18 | mediaType: application/x-squonk-dataset-molecule+json 19 | name: input 20 | - primaryType: org.squonk.dataset.Dataset 21 | secondaryType: org.squonk.types.MoleculeObject 22 | mediaType: application/x-squonk-dataset-molecule+json 23 | name: target 24 | outputDescriptors: 25 | - primaryType: org.squonk.dataset.Dataset 26 | secondaryType: org.squonk.types.MoleculeObject 27 | mediaType: application/x-squonk-dataset-molecule+json 28 | name: output 29 | optionDescriptors: 30 | - modes: 31 | - User 32 | editable: true 33 | visible: true 34 | description: Target molecule index (default is the first) 35 | label: Target mol index 36 | key: arg.targetidx 37 | minValues: 0 38 | maxValues: 1 39 | typeDescriptor: 40 | type: java.lang.Integer 41 | "@class": org.squonk.options.SimpleTypeDescriptor 42 | "@class": org.squonk.options.OptionDescriptor 43 | executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep 44 | thinDescriptors: 45 | - input: input 46 | inputRoutes: 47 | - route: FILE 48 | - route: FILE 49 | outputRoutes: 50 | - route: FILE 51 | imageName: informaticsmatters/rdkit_pipelines 52 | command: >- 53 | python -m pipelines.rdkit.sucos --target-molecule ${PIN}target.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json 54 | ${binding.variables.containsKey('targetidx') ? '--target-index ' + targetidx : ''} 55 | --meta -------------------------------------------------------------------------------- /src/python/pipelines/rdkit/sucos.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing sucos.py reading from file and writing to file 8 | test_raw_sucos_sdf_to_mol = [ 9 | 10 | command: '''python -m pipelines.rdkit.sucos 11 | --target ${PIN}/sucos/4e3g_lig.mol 12 | -i ${PIN}/sucos/benzene.sdf -if sdf 13 | -o ${POUT}output -of sdf''', 14 | 15 | stderr: [ 'Scores: 0.8' ], 16 | 17 | creates: [ 'output.sdf.gz' ], 18 | 19 | ], 20 | 21 | test_raw_sucos_sdf_pick_target_1 = [ 22 | 23 | command: '''python -m pipelines.rdkit.sucos 24 | --target ${PIN}/sucos/mols.sdf 25 | --targetidx 1 26 | -i ${PIN}/sucos/mols.sdf -if sdf 27 | -o ${POUT}output -of sdf''', 28 | 29 | stderr: [ 'Scores: 0.8' ], 30 | 31 | creates: [ 'output.sdf.gz' ], 32 | 33 | ], 34 | 35 | test_raw_sucos_sdf_pick_target_6 = [ 36 | 37 | command: '''python -m pipelines.rdkit.sucos 38 | --target ${PIN}/sucos/mols.sdf 39 | --targetidx 6 40 | -i ${PIN}/sucos/mols.sdf -if sdf 41 | -o ${POUT}output -of sdf''', 42 | 43 | stderr: [ 'Scores: 1.0 1.0 1.0' ], 44 | 45 | creates: [ 'output.sdf.gz' ], 46 | 47 | ] 48 | 49 | ] 50 | -------------------------------------------------------------------------------- /src/python/pipelines/xchem/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/xchem/__init__.py -------------------------------------------------------------------------------- /src/python/pipelines/xchem/build_oddt_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2020 Informatics Matters Ltd. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """ 18 | Run this to generate the RFScore and NNScore models. 19 | The files RFScore_v1_pdbbind2016.pickle, RFScore_v2_pdbbind2016.pickle, RFScore_v3_pdbbind2016.pickle and 20 | NNScore_pdbbind2016.pickle are generated. 21 | If you want them to be re-generated they must first be deleted. 22 | """ 23 | 24 | 25 | from oddt.virtualscreening import virtualscreening as vs 26 | 27 | ligands = '../../data/mpro/hits-17.sdf.gz' 28 | protein = '../../data/mpro/Mpro-x0387_0.pdb' 29 | pipeline=vs() 30 | print('Loading') 31 | pipeline.load_ligands('sdf', ligands) 32 | print('Scoring with rfscore') 33 | pipeline.score(function='rfscore_v1', protein=protein) 34 | pipeline.score(function='rfscore_v2', protein=protein) 35 | pipeline.score(function='rfscore_v3', protein=protein) 36 | print('Scoring with nnscore') 37 | pipeline.score(function='nnscore', protein=protein) 38 | # print('Scoring with plecscore') 39 | # pipeline.score(function='pleclinear', protein=protein) 40 | # pipeline.score(function='plecnn', protein=protein) 41 | # pipeline.score(function='plecrf', protein=protein) 42 | print('Writing') 43 | pipeline.write('sdf', 'scored.sdf') 44 | print('Done') -------------------------------------------------------------------------------- /src/python/pipelines/xchem/featurestein_generate.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing featurestein_generate.py 8 | // reading from STDIN and writing to STDOUT 9 | test_featurestein_generate = [ 10 | 11 | command: '''python -m pipelines.xchem.featurestein_generate 12 | -i ${PIN}/mpro/hits-17.sdf.gz 13 | -f ${POUT}featurestein.p''', 14 | 15 | stderr: [ 'Wrote merged feature map with 69 features as pickle to' ], 16 | 17 | creates: [ 'featurestein.p' ] 18 | ] 19 | 20 | ] -------------------------------------------------------------------------------- /src/python/pipelines/xchem/featurestein_generate_and_score.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing featurestein_generate_and_score.py 8 | // reading from STDIN and writing to STDOUT 9 | test_featurestein_generate_and_score = [ 10 | 11 | command: '''python -m pipelines.xchem.featurestein_generate_and_score 12 | -f ${PIN}/mpro/hits-17.sdf.gz 13 | -i ${PIN}/mpro/poses.sdf.gz 14 | -o ${POUT}FSOUT''', 15 | 16 | stderr: [ 'Scored 14 molecules. 0 errors.' ], 17 | 18 | creates: [ 'FSOUT.sdf.gz' ] 19 | ] 20 | 21 | ] -------------------------------------------------------------------------------- /src/python/pipelines/xchem/featurestein_score.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing featurestein_score.py 8 | // reading from STDIN and writing to STDOUT 9 | test_featurestein_score = [ 10 | 11 | command: '''python -m pipelines.xchem.featurestein_score 12 | -i ${PIN}/mpro/poses.sdf.gz 13 | -f ${PIN}/mpro/featurestein.p 14 | -o ${POUT}fstein''', 15 | 16 | stderr: [ 'FeatureMap has 69 features' ], 17 | 18 | creates: [ 'fstein.sdf.gz' ] 19 | ] 20 | 21 | ] -------------------------------------------------------------------------------- /src/python/pipelines/xchem/rdkit_align.py: -------------------------------------------------------------------------------- 1 | from rdkit import Chem 2 | from rdkit.Chem import AllChem 3 | 4 | def find_calphas(protein): 5 | calphas = {} 6 | for atom in protein.GetAtoms(): 7 | resinfo = atom.GetPDBResidueInfo() 8 | moninfo = atom.GetMonomerInfo() 9 | resnum = resinfo.GetResidueNumber() 10 | atomname = moninfo.GetName().strip() 11 | if 'CA' == atomname: 12 | calphas[resnum] = atom.GetIdx() 13 | return calphas 14 | 15 | def align_calphas(probe, reference): 16 | 17 | ref_calphas = find_calphas(reference) 18 | print('Found', len(ref_calphas), 'CAs') 19 | prb_calphas = find_calphas(probe) 20 | print('Found', len(prb_calphas), 'CAs') 21 | atom_map = [] 22 | for resnum, idx in prb_calphas.items(): 23 | if resnum in ref_calphas: 24 | atom_map.append((idx, ref_calphas[resnum])) 25 | else: 26 | print('WARNING: residue', resnum, 'not found in reference') 27 | 28 | print('Mapped', len(atom_map), 'atoms') 29 | rmsd = AllChem.AlignMol(probe, reference, atomMap=atom_map) 30 | 31 | print('RMSD:', rmsd) 32 | 33 | def extract_ligand(protein, resname): 34 | mol = Chem.RWMol(protein) 35 | atoms_to_delete = [] 36 | for atom in mol.GetAtoms(): 37 | resinfo = atom.GetPDBResidueInfo() 38 | if resinfo.GetResidueName().strip() != resname: 39 | atoms_to_delete.append(atom.GetIdx()) 40 | print('Deleting', len(atoms_to_delete), 'atoms') 41 | for idx in reversed(atoms_to_delete): 42 | mol.RemoveAtom(idx) 43 | return mol 44 | 45 | def main(): 46 | reference = Chem.MolFromPDBFile('hits23_complex_init_0.pdb') 47 | probe = Chem.MolFromPDBFile('hits23_complex_mini_0.pdb') 48 | align_calphas(probe, reference) 49 | Chem.MolToPDBFile(probe, 'hits23_complex_algn_0.pdb') 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /src/python/pipelines/xchem/split_fragnet_candidates.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2020 Informatics Matters Ltd. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse, os, sys, json, traceback 18 | from pipelines_utils import utils 19 | from pipelines_utils import utils 20 | 21 | def gen_filename(id, generate_filenames): 22 | if generate_filenames: 23 | return str(count) 24 | else: 25 | return id 26 | 27 | def execute(candidates_json, generate_filenames): 28 | 29 | with open(candidates_json, 'r') as f: 30 | candidates = json.load(f) 31 | queries = candidates['queries']['molecules'] 32 | results = candidates['results'] 33 | hitCounts = candidates['hitCounts'] 34 | utils.log('Processing', len(queries), 'queries and', len(results), 'results') 35 | 36 | num_mols = 0 37 | num_hits = 0 38 | 39 | count = 0 40 | ids2Filenames = {} 41 | for query in queries: 42 | id = query['id'] 43 | if id in hitCounts: 44 | molfile = query['originalMol'] 45 | if generate_filenames: 46 | fname = str(count).zfil(3) 47 | else: 48 | fname = id 49 | utils.log('Using file name of', fname) 50 | 51 | with open(fname + '.mol', 'w') as f: 52 | f.write(molfile) 53 | num_hits += 1 54 | ids2Filenames[id] = fname 55 | count += 1 56 | 57 | writers = {} 58 | 59 | for result in results: 60 | 61 | num_mols += 1 62 | 63 | for id in result['sourceMols']: 64 | 65 | if id in writers: 66 | writer = writers[id] 67 | else: 68 | fname = ids2Filenames[id] 69 | writer = open(fname + '.smi', 'w') 70 | writers[id] = writer 71 | 72 | smiles = result['smiles'] 73 | #utils.log('Processing', smiles) 74 | 75 | writer.write(smiles + '\n') 76 | 77 | for w in writers.values(): 78 | w.close() 79 | 80 | utils.log('Totals - hits:', num_hits, 'outputs:', num_mols) 81 | 82 | def main(): 83 | """ 84 | Example usage: 85 | python -m pipelines.xchem.split-fragnet-candidates -i ../../data/mpro/expanded-17.json 86 | 87 | :return: 88 | """ 89 | 90 | parser = argparse.ArgumentParser(description='Split fragnet candidates - Split fragment network expansion into individual sets') 91 | 92 | parser.add_argument('-i', '--input', help='JSON containing the expanded candidates)') 93 | parser.add_argument('-g', '--generate-filenames', action='store_true', help='Use automatically generated file names instead of the title field)') 94 | 95 | args = parser.parse_args() 96 | utils.log("Split fragnet candidates args: ", args) 97 | 98 | infile = args.input 99 | 100 | execute(infile, args.generate_filenames) 101 | 102 | if __name__ == "__main__": 103 | main() 104 | -------------------------------------------------------------------------------- /src/python/pipelines/xchem/xcos.test: -------------------------------------------------------------------------------- 1 | // Automated pipeline test specification. 2 | 3 | [ 4 | 5 | version = 1, 6 | 7 | // Testing xcos.py 8 | // reading from STDIN and writing to STDOUT 9 | test_xcos = [ 10 | 11 | command: '''python -m pipelines.xchem.xcos 12 | -i ${PIN}/mpro/poses.sdf.gz 13 | -f ${PIN}/mpro/hits-17.sdf.gz 14 | -o ${POUT}xcos''', 15 | 16 | stderr: [ 'No output format specified - using sdf' ], 17 | 18 | creates: [ 'xcos.sdf.gz' ] 19 | ] 20 | 21 | ] -------------------------------------------------------------------------------- /src/python/simple_inters.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import oddt 3 | from oddt import interactions 4 | 5 | 6 | if len(sys.argv) != 3: 7 | print("Usage: simple_inters.py protein.pdb ligand.mol") 8 | exit(1) 9 | 10 | def get_canonical_hbond(atom): 11 | # print('classifying', atom['atomtype'], atom['isbackbone'], atom['isacceptor'], atom['isdonor'], atom['isdonorh']) 12 | res = atom['resname'] + str(atom['resnum']) 13 | if atom['isbackbone']: 14 | if atom['atomtype'] == 'N.am' or atom['atomtype'] == 'N.3': 15 | return res + 'BN' 16 | elif atom['atomtype'] == 'O.2': 17 | return res + 'BO' 18 | else: 19 | print('Unexpected H-bond atom', res, atom['atomtype']) 20 | else: 21 | return res + 'SC' 22 | 23 | protein_pdbfile = sys.argv[1] 24 | ligand_molfile = sys.argv[2] 25 | 26 | exact_ligand = True 27 | 28 | ligand = next(oddt.toolkit.readfile('sdf', ligand_molfile)) 29 | protein = next(oddt.toolkit.readfile('pdb', protein_pdbfile)) 30 | protein.protein = True 31 | 32 | print('Protein:', protein_pdbfile) 33 | print('Ligand:' + ligand_molfile) 34 | print('Num protein/ligand atoms:', len(protein.atoms), len(ligand.atoms)) 35 | print('Exact ligand =', exact_ligand) 36 | 37 | protein_atoms, ligand_atoms, strict = interactions.hbonds(protein, ligand, mol1_exact=False, mol2_exact=exact_ligand) 38 | count = 0 39 | for p, l, s in zip(protein_atoms, ligand_atoms, strict): 40 | count += 1 41 | print(' H-bond', get_canonical_hbond(p), '-', l['atomtype'], l['id'].item(), s) 42 | print('Found', count, 'H-bond interactions') 43 | 44 | protein_atoms, ligand_atoms = interactions.salt_bridges(protein, ligand, mol2_exact=exact_ligand) 45 | count = 0 46 | for p, l in zip(protein_atoms, ligand_atoms): 47 | count += 1 48 | print(' SaltBr', p['resname'] + str(p['resnum']), '-', l['atomtype'], l['id'].item()) 49 | print('Found', count, 'SaltBr interactions') 50 | 51 | protein_atoms, ligand_atoms = oddt.interactions.hydrophobic_contacts(protein, ligand) 52 | count = 0 53 | for p, l in zip(protein_atoms, ligand_atoms): 54 | count += 1 55 | print(' Hphobe', p['resname'] + str(p['resnum']), '-', l['atomtype'], l['id'].item()) 56 | print('Found', count, 'Hphobe interactions') 57 | 58 | protein_atoms, ligand_atoms, strict_parallel, strict_perpendicular = oddt.interactions.pi_stacking(protein, ligand) 59 | count = 0 60 | for p, l, s1, s2 in zip(protein_atoms, ligand_atoms, strict_parallel, strict_perpendicular): 61 | count += 1 62 | print(' PiStack', p['resname'] + str(p['resnum']), '-', s1, s2) 63 | print('Found', count, 'pistack interactions') 64 | 65 | count = 0 66 | rings, cation, strict = oddt.interactions.pi_cation(protein, ligand, cation_exact=exact_ligand) 67 | for ring, cat, s in zip(rings, cation, strict): 68 | count += 1 69 | print(' PiCation', ring['resname'] + str(ring['resnum']), 'protein-ligand -', s) 70 | rings, cation, strict = oddt.interactions.pi_cation(ligand, protein, cation_exact=False) 71 | for ring, cat, s in zip(rings, cation, strict): 72 | count += 1 73 | print(' PiCation', cat['resname'] + str(cat['resnum']), 'ligand-protein -', s) 74 | print('Found', count, 'pication interactions') 75 | 76 | protein_atoms, ligand_atoms, strict = oddt.interactions.halogenbonds(protein, ligand) 77 | count = 0 78 | for p, l, s in zip(protein_atoms, ligand_atoms, strict): 79 | count += 1 80 | print(' Halogen', p['resname'] + str(p['resnum']), '-', l['atomtype'], l['id'].item(), s) 81 | print('Found', count, 'halogen interactions') 82 | -------------------------------------------------------------------------------- /test-nextflow.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Simple manual tests for Nextflow workflows 3 | # Before running make sure you have the latest images by running `./gradlew buildDockerImages` 4 | 5 | set -e 6 | 7 | echo 'Running screen+conformers in basic mode' 8 | nextflow run src/nextflow/rdkit/screen+conformers.nf -c src/nextflow/rdkit/screen.config -with-docker 9 | 10 | 11 | echo 'Running SMoG2016 in basic mode' 12 | nextflow run src/nextflow/docking/smog.nf -c src/nextflow/docking/smog.config -with-docker --ligands data/smog/confs.sdf --protein data/smog/DCP2_1.pdb informaticsmatters/smog:latest 13 | 14 | echo 'Running PLI in basic mode' 15 | nextflow run src/nextflow/docking/plip.nf -c src/nextflow/docking/plip.config -with-docker --ligands data/smog/confs.sdf --protein data/smog/DCP2_1.pdb informaticsmatters/pli:latest 16 | 17 | echo 'Running rDock in basic mode' 18 | nextflow run src/nextflow/docking/rdock.nf -c src/nextflow/docking/rdock.config -with-docker\ 19 | --ligands data/hivpr_ligprep_100.sdf.gz\ 20 | --protein data/hivpr_rdock.mol2\ 21 | --asfile data/hivpr_rdock.as\ 22 | --prmfile data/hivpr_rdock.prm\ 23 | --num_dockings 2 24 | 25 | echo 'Running SMoG2016 in squonk mode' 26 | sudo rm -rf tmp/* 27 | cd tmp 28 | ln ../src/nextflow/docking/smog.nsd.nf nextflow.nf 29 | ln ../src/nextflow/docking/smog.nsd.config nextflow.config 30 | gzip -c ../data/smog/DCP2_1.pdb > protein.pdb.gz 31 | ln ../data/smog/confs.data.gz ligands.data.gz 32 | ln ../data/smog/confs.metadata ligands.metadata 33 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2 sh -c 'nextflow run nextflow.nf -c nextflow.config --score 100.0 -with-docker' 34 | cd .. 35 | 36 | echo 'Running PLI in squonk mode' 37 | sudo rm -rf tmp/* 38 | cd tmp 39 | ln ../src/nextflow/docking/plip.nsd.nf nextflow.nf 40 | ln ../src/nextflow/docking/plip.nsd.config nextflow.config 41 | gzip -c ../data/smog/DCP2_1.pdb > protein.pdb.gz 42 | ln ../data/smog/confs.data.gz ligands.data.gz 43 | ln ../data/smog/confs.metadata ligands.metadata 44 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2 sh -c 'nextflow run nextflow.nf -c nextflow.config --score 100.0 -with-docker' 45 | cd .. 46 | 47 | echo 'Running rDock in squonk mode' 48 | sudo rm -rf tmp/* 49 | cd tmp 50 | ln ../src/nextflow/docking/rdock.nsd.nf nextflow.nf 51 | ln ../src/nextflow/docking/rdock.nsd.config nextflow.config 52 | ln ../data/hivpr.config.zip config.zip 53 | ln ../data/dhfr_3d.data.gz ligands.data.gz 54 | ln ../data/dhfr_3d.metadata ligands.metadata 55 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2 sh -c 'nextflow run nextflow.nf -c nextflow.config --num_dockings 1 --limit 40 --chunk 5 -with-docker' 56 | cd .. 57 | 58 | echo 'Running screen in squonk mode' 59 | sudo rm -rf tmp/* 60 | cd tmp 61 | ln ../src/nextflow/rdkit/screen-dataset.nsd.nf nextflow.nf 62 | ln ../src/nextflow/rdkit/screen-dataset.nsd.config nextflow.config 63 | ln ../data/dhfr_3d.data.gz input.data.gz 64 | ln ../data/dhfr_3d.metadata ligands.metadata 65 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2\ 66 | sh -c 'nextflow run nextflow.nf -c nextflow.config -with-docker --chunk 100 --simmin 0.5 --qsmiles "OC(=O)C1=CC=C(NC2=NC3=C(CN=C(C4=CC(Cl)=CC=C34)C3=C(F)C=CC=C3F)C=N2)C=C1"' 67 | cd .. 68 | 69 | echo 'Running screen-multi in squonk mode' 70 | sudo rm -rf tmp/* 71 | cd tmp 72 | ln ../src/nextflow/rdkit/screen-multi-dataset.nsd.nf nextflow.nf 73 | ln ../src/nextflow/rdkit/screen-multi-dataset.nsd.config nextflow.config 74 | ln ../data/dhfr_3d.data.gz target.data.gz 75 | ln ../data/nci100.data.gz query.data.gz 76 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2\ 77 | sh -c 'nextflow run nextflow.nf -c nextflow.config -with-docker --chunk 100 --simmin 0.55' 78 | cd .. 79 | 80 | sudo rm -rf tmp/* --------------------------------------------------------------------------------