├── .dockerignore
├── .github
    └── workflows
    │   ├── build-latest.yaml
    │   ├── build-stable.yaml
    │   ├── build-tag.yaml
    │   └── build.yaml
├── .gitignore
├── Dockerfile-obabel
├── Dockerfile-pli
├── Dockerfile-rdkit
├── Dockerfile-rdkit-centos
├── Dockerfile-sdposter
├── Dockerfile-smog
├── Jenkinsfile
├── LICENSE
├── README.md
├── build.gradle
├── data
    ├── DCP2_1.pdb.gz
    ├── DCP2_1_confs.sdf.gz
    ├── Kinase_inhibs.sdf.gz
    ├── XChemReactionMaker1.sdf.gz
    ├── conformers_to_align.data.gz
    ├── dhfr_3d.data.gz
    ├── dhfr_3d.metadata
    ├── dhfr_3d.sdf
    ├── dhfr_3d.sdf.gz
    ├── hivpr.config.zip
    ├── hivpr_ligprep.sdf.gz
    ├── hivpr_ligprep_100.sdf.gz
    ├── hivpr_rdock.as
    ├── hivpr_rdock.mol2
    ├── hivpr_rdock.prm
    ├── mpro
    │   ├── Mpro-x0387_0.mol
    │   ├── Mpro-x0387_0.mol2
    │   ├── Mpro-x0387_0.pdb
    │   ├── Mpro-x0387_0.smi
    │   ├── Mpro-x0678_0_016.mol
    │   ├── Mpro-x0678_0_016.smi
    │   ├── docking-tethered.as
    │   ├── docking-tethered.prm
    │   ├── expanded-17.json
    │   ├── featurestein.p
    │   ├── hits-17.sdf.gz
    │   ├── hits-23.sdf.gz
    │   ├── hits-5.sdf.gz
    │   ├── poses.sdf
    │   └── poses.sdf.gz
    ├── nci10.smiles
    ├── nci100.data.gz
    ├── nudt7
    │   ├── ligands.data.gz
    │   ├── ligands.sdf.gz
    │   ├── receptor.mol2
    │   └── refmol.mol
    ├── pyrimethamine.mol
    ├── ref_mol.sdf.gz
    ├── sdf-aliphatic-primary-amines-175.data.gz
    ├── sdf-aliphatic-primary-amines-175.metadata
    ├── sdf-aliphatic-primary-amines-175.sdf.gz
    ├── smog
    │   ├── DCP2_1.pdb
    │   ├── NUDT22_holo.pdb
    │   ├── confs.data.gz
    │   ├── confs.metadata
    │   └── confs.sdf
    ├── sucos
    │   ├── 4e3g_lig.mol
    │   ├── benzene.sdf
    │   ├── hits.sdf
    │   ├── mols.sdf
    │   └── poses.sdf
    └── sulfonyl_chloride.sdf
├── environment-rdkit-utils.yml
├── execute
├── gradle
    └── wrapper
    │   ├── gradle-wrapper.jar
    │   └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── post-service-descriptors.sh
├── requirements-obabel.txt
├── requirements-rdkit.txt
├── src
    ├── nextflow
    │   ├── README.md
    │   ├── docking
    │   │   ├── .gitignore
    │   │   ├── plip.config
    │   │   ├── plip.nf
    │   │   ├── plip.nsd.config
    │   │   ├── plip.nsd.nf
    │   │   ├── plip.nsd.yml
    │   │   ├── rdock-filter.nsd.config
    │   │   ├── rdock-filter.nsd.nf
    │   │   ├── rdock-filter.nsd.yml
    │   │   ├── rdock.config
    │   │   ├── rdock.nf
    │   │   ├── rdock.nsd.config
    │   │   ├── rdock.nsd.nf
    │   │   ├── rdock.nsd.yml
    │   │   ├── smog.config
    │   │   ├── smog.nf
    │   │   ├── smog.nsd.config
    │   │   ├── smog.nsd.nf
    │   │   └── smog.nsd.yml
    │   ├── nextflow-docker.config
    │   ├── rdkit
    │   │   ├── screen+conformers.nf
    │   │   ├── screen-dataset.nsd.config
    │   │   ├── screen-dataset.nsd.nf
    │   │   ├── screen-dataset.nsd.yml
    │   │   ├── screen-multi-dataset.nsd.config
    │   │   ├── screen-multi-dataset.nsd.nf
    │   │   ├── screen-multi-dataset.nsd.yml
    │   │   ├── screen.config
    │   │   └── screen.nf
    │   └── xchem
    │   │   ├── dock-score.nf
    │   │   ├── expand.nf
    │   │   ├── featurestein.nf
    │   │   ├── prepare-tether-featurestein.nf
    │   │   ├── prepare-tether.nf
    │   │   ├── tether-dock-score.nf
    │   │   └── xcos.nf
    └── python
    │   ├── NNScore_pdbbind2016.pickle
    │   ├── README.rst
    │   ├── RFScore_v1_pdbbind2016.pickle
    │   ├── RFScore_v2_pdbbind2016.pickle
    │   ├── RFScore_v3_pdbbind2016.pickle
    │   ├── __init__.py
    │   ├── notebooks
    │       └── default.ipynb
    │   ├── pipelines
    │       ├── __init__.py
    │       ├── dimorphite
    │       │   ├── LICENSE.txt
    │       │   ├── README.txt
    │       │   ├── __init__.py
    │       │   ├── dimorphite_dl.py
    │       │   ├── enumerate_charges.dsd.yml
    │       │   ├── enumerate_charges.py
    │       │   ├── enumerate_charges.test
    │       │   ├── run.py
    │       │   └── site_substructures.smarts
    │       ├── dmpk
    │       │   ├── __init__.py
    │       │   ├── pk_tmax_cmax_sim.dsd.yml
    │       │   ├── pk_tmax_cmax_sim.py
    │       │   └── pk_tmax_cmax_sim.test
    │       ├── docking
    │       │   ├── __init__.py
    │       │   ├── obabel_prepare_pdb.dsd.yml
    │       │   ├── obabel_prepare_pdb.py
    │       │   ├── obabel_prepare_pdb.test
    │       │   ├── plip.py
    │       │   ├── plip.test
    │       │   ├── smog2016.py
    │       │   └── smog2016.test
    │       ├── rdkit
    │       │   ├── README.md
    │       │   ├── __init__.py
    │       │   ├── cluster_3d.py
    │       │   ├── cluster_butina.dsd.yml
    │       │   ├── cluster_butina.py
    │       │   ├── cluster_butina.test
    │       │   ├── cluster_butina_diverse_subset_picker.dsd.yml
    │       │   ├── cluster_butina_matrix.dsd.yml
    │       │   ├── cluster_butina_matrix.py
    │       │   ├── cluster_butina_matrix.test
    │       │   ├── conformers.dsd.yml
    │       │   ├── conformers.py
    │       │   ├── conformers.test
    │       │   ├── constrained_conf_gen.dsd.yml
    │       │   ├── constrained_conf_gen.py
    │       │   ├── constrained_conf_gen.test
    │       │   ├── enumerate_candidates.py
    │       │   ├── max_min_picker.py
    │       │   ├── max_min_picker.test
    │       │   ├── max_min_picker_enrich.dsd.yml
    │       │   ├── max_min_picker_simple.dsd.yml
    │       │   ├── o3dAlign.dsd.yml
    │       │   ├── o3dAlign.py
    │       │   ├── o3dAlign.test
    │       │   ├── pbf_ev.dsd.yml
    │       │   ├── pbf_ev.py
    │       │   ├── pbf_ev.test
    │       │   ├── poised_filter.py
    │       │   ├── prepare_3d.py
    │       │   ├── rxn_maker.dsd.yml
    │       │   ├── rxn_maker.py
    │       │   ├── rxn_maker.test
    │       │   ├── rxn_selector.py
    │       │   ├── rxn_selector.test
    │       │   ├── rxn_smarts_filter.dsd.yml
    │       │   ├── rxn_smarts_filter.py
    │       │   ├── rxn_smarts_filter.test
    │       │   ├── sanifier.py
    │       │   ├── sanifier.test
    │       │   ├── sanifier_enumerator.dsd.yml
    │       │   ├── sanifier_standardiser_flatkinson.dsd.yml
    │       │   ├── sanifier_standardiser_molvs.dsd.yml
    │       │   ├── sanify_utils.py
    │       │   ├── screen.py
    │       │   ├── screen.test
    │       │   ├── screen_multi.py
    │       │   ├── screen_multi.test
    │       │   ├── show_feats.py
    │       │   ├── standardize.dsd.yml
    │       │   ├── standardize.py
    │       │   ├── standardize.test
    │       │   ├── sucos-max.dsd.yml
    │       │   ├── sucos-max.test
    │       │   ├── sucos.dsd.yml
    │       │   ├── sucos.py
    │       │   ├── sucos.test
    │       │   └── sucos_max.py
    │       └── xchem
    │       │   ├── __init__.py
    │       │   ├── build_oddt_models.py
    │       │   ├── calc_interactions.py
    │       │   ├── featurestein_generate.py
    │       │   ├── featurestein_generate.test
    │       │   ├── featurestein_generate_and_score.py
    │       │   ├── featurestein_generate_and_score.test
    │       │   ├── featurestein_score.py
    │       │   ├── featurestein_score.test
    │       │   ├── filter_interactions.py
    │       │   ├── fragnet_expand.py
    │       │   ├── interactions.py
    │       │   ├── prepare_tether.py
    │       │   ├── rdkit_align.py
    │       │   ├── rmsd_filter.py
    │       │   ├── split_fragnet_candidates.py
    │       │   ├── xcos.py
    │       │   └── xcos.test
    │   ├── setup.py
    │   └── simple_inters.py
├── test-nextflow.sh
└── test-rdkit.sh


/.dockerignore:
--------------------------------------------------------------------------------
1 | data
2 | gradle
3 | .gradle
4 | openshift
5 | work
6 | .nextflow
7 | trace.txt*
8 | report.html*
9 | wip


--------------------------------------------------------------------------------
/.github/workflows/build-latest.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: build latest
 3 | 
 4 | # Actions on the main/master branch for every change.
 5 | 
 6 | # -----------------
 7 | # Control variables (GitHub Secrets)
 8 | # -----------------
 9 | #
10 | # At the GitHub 'organisation' or 'project' level you must have the following
11 | # GitHub 'Repository Secrets' defined (i.e. via 'Settings -> Secrets'): -
12 | #
13 | # DOCKERHUB_USERNAME
14 | # DOCKERHUB_TOKEN
15 | #
16 | # -----------
17 | # Environment (GitHub Environments)
18 | # -----------
19 | #
20 | # Environment         (n/a)
21 | 
22 | on:
23 |   push:
24 |     branches:
25 |     - 'master'
26 |   repository_dispatch:
27 |     types:
28 |     - pipelines-utils-rdkit
29 |     - pipelines-utils
30 | 
31 | jobs:
32 |   build:
33 |     runs-on: ubuntu-latest
34 |     steps:
35 |     - uses: actions/checkout@v2
36 |     # Build the images
37 |     - name: Login to DockerHub
38 |       uses: docker/login-action@v1
39 |       with:
40 |         username: ${{ secrets.DOCKERHUB_USERNAME }}
41 |         password: ${{ secrets.DOCKERHUB_TOKEN }}
42 |     - name: Build pipelines
43 |       uses: docker/build-push-action@v2
44 |       with:
45 |         push: false
46 |         file: Dockerfile-rdkit
47 |         tags: informaticsmatters/rdkit_pipelines:latest
48 |     # Test the images
49 |     # We need Java and Groovy
50 |     - name: Set up Java 11
51 |       uses: actions/setup-java@v1
52 |       with:
53 |         java-version: 11
54 |     - name: Run pipeline tester
55 |       uses: informaticsmatters/pipeline-tester-action@v1
56 |     # Publish the images
57 |     - name: Push
58 |       run: docker push informaticsmatters/rdkit_pipelines:latest
59 |     - name: Build and push pipelines sdposter
60 |       uses: docker/build-push-action@v2
61 |       with:
62 |         push: true
63 |         file: Dockerfile-sdposter
64 |         tags: squonk/rdkit-pipelines-sdposter:latest
65 | 


--------------------------------------------------------------------------------
/.github/workflows/build-stable.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: build stable
 3 | 
 4 | # Actions for an 'official' tag.
 5 | 
 6 | # An official tag is a 2 or 3-digit value (i.e. 'N.N[.N]').
 7 | # We publish images using the tag as a tag and one using 'stable' as s tag.
 8 | 
 9 | # -----------------
10 | # Control variables (GitHub Secrets)
11 | # -----------------
12 | #
13 | # At the GitHub 'organisation' or 'project' level you must have the following
14 | # GitHub 'Repository Secrets' defined (i.e. via 'Settings -> Secrets'): -
15 | #
16 | # DOCKERHUB_USERNAME
17 | # DOCKERHUB_TOKEN
18 | #
19 | # -----------
20 | # Environment (GitHub Environments)
21 | # -----------
22 | #
23 | # Environment         (n/a)
24 | 
25 | on:
26 |   push:
27 |     tags:
28 |     - '[0-9]+.[0-9]+.[0-9]+'
29 |     - '[0-9]+.[0-9]+'
30 | 
31 | jobs:
32 |   publish-stable:
33 |     runs-on: ubuntu-latest
34 |     steps:
35 |     - uses: actions/checkout@v2
36 |     - name: Inject slug/short variables
37 |       uses: rlespinasse/github-slug-action@v3.x
38 |     - name: Login to DockerHub
39 |       uses: docker/login-action@v1
40 |       with:
41 |         username: ${{ secrets.DOCKERHUB_USERNAME }}
42 |         password: ${{ secrets.DOCKERHUB_TOKEN }}
43 |     - name: Build and push pipelines
44 |       uses: docker/build-push-action@v2
45 |       with:
46 |         push: true
47 |         file: Dockerfile-rdkit
48 |         tags: |
49 |           informaticsmatters/rdkit_pipelines:${{ env.GITHUB_REF_SLUG }}
50 |           informaticsmatters/rdkit_pipelines:stable
51 |     - name: Build and push pipelines sdposter
52 |       uses: docker/build-push-action@v2
53 |       with:
54 |         push: true
55 |         file: Dockerfile-sdposter
56 |         tags: |
57 |           squonk/rdkit-pipelines-sdposter:${{ env.GITHUB_REF_SLUG }}
58 |           squonk/rdkit-pipelines-sdposter:stable
59 | 


--------------------------------------------------------------------------------
/.github/workflows/build-tag.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: build tag
 3 | 
 4 | # Actions for any 'unofficial' tag.
 5 | 
 6 | # It's not an official tag if it's not formed from 2 or 3 digits
 7 | # (i.e. is not 'N.N.N'). We publish images using the tag as a tag.
 8 | 
 9 | # -----------------
10 | # Control variables (GitHub Secrets)
11 | # -----------------
12 | #
13 | # At the GitHub 'organisation' or 'project' level you must have the following
14 | # GitHub 'Repository Secrets' defined (i.e. via 'Settings -> Secrets'): -
15 | #
16 | # DOCKERHUB_USERNAME
17 | # DOCKERHUB_TOKEN
18 | #
19 | # -----------
20 | # Environment (GitHub Environments)
21 | # -----------
22 | #
23 | # Environment         (n/a)
24 | 
25 | on:
26 |   push:
27 |     tags:
28 |     - '**'
29 |     - '![0-9]+.[0-9]+.[0-9]+'
30 |     - '![0-9]+.[0-9]+'
31 | 
32 | jobs:
33 |   publish-tag:
34 |     runs-on: ubuntu-latest
35 |     steps:
36 |     - uses: actions/checkout@v2
37 |     # Build and push the images with a tag.
38 |     # Testing will have been done using the 'build-latest' workflow.
39 |     - name: Inject slug/short variables
40 |       uses: rlespinasse/github-slug-action@v3.x
41 |     - name: Login to DockerHub
42 |       uses: docker/login-action@v1
43 |       with:
44 |         username: ${{ secrets.DOCKERHUB_USERNAME }}
45 |         password: ${{ secrets.DOCKERHUB_TOKEN }}
46 |     - name: Build and push pipelines
47 |       uses: docker/build-push-action@v2
48 |       with:
49 |         push: true
50 |         file: Dockerfile-rdkit
51 |         tags: informaticsmatters/rdkit_pipelines:${{ env.GITHUB_REF_SLUG }}
52 |     - name: Build and push pipelines sdposter
53 |       uses: docker/build-push-action@v2
54 |       with:
55 |         push: true
56 |         file: Dockerfile-sdposter
57 |         tags: squonk/rdkit-pipelines-sdposter:${{ env.GITHUB_REF_SLUG }}
58 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: build
 3 | 
 4 | # Actions that take place on branches
 5 | # or are driven by pull-requests to the main/master branch.
 6 | # Here we build container images but don't push them
 7 | # and therefore do not require docker credentials.
 8 | 
 9 | # -----------------
10 | # Control variables (GitHub Secrets)
11 | # -----------------
12 | #
13 | # (n/a)
14 | #
15 | # -----------
16 | # Environment (GitHub Environments)
17 | # -----------
18 | #
19 | # Environment         (n/a)
20 | 
21 | on:
22 |   push:
23 |     branches-ignore:
24 |     - 'master'
25 |   pull_request:
26 |     branches:
27 |     - 'master'
28 | 
29 | jobs:
30 |   build:
31 |     runs-on: ubuntu-latest
32 |     steps:
33 |     - uses: actions/checkout@v2
34 |     # Build the images
35 |     - name: Build pipelines
36 |       uses: docker/build-push-action@v2
37 |       with:
38 |         file: Dockerfile-rdkit
39 |         tags: informaticsmatters/rdkit_pipelines:latest
40 |     - name: Build pipelines sdposter
41 |       uses: docker/build-push-action@v2
42 |       with:
43 |         file: Dockerfile-sdposter
44 |         tags: squonk/rdkit-pipelines-sdposter:latest
45 |     # Test the images
46 |     # We need Java and Groovy
47 |     - name: Set up Java 11
48 |       uses: actions/setup-java@v1
49 |       with:
50 |         java-version: 11
51 |     - name: Run pipeline tester
52 |       uses: informaticsmatters/pipeline-tester-action@v1
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | .gradle
 3 | .idea
 4 | *.ipr
 5 | *.iws
 6 | build
 7 | dist
 8 | *.iml
 9 | *.pyc
10 | work
11 | .nextflow
12 | .nextflow.log*
13 | /tmp
14 | **/*.egg-info
15 | **/.DS_Store
16 | *.retry


--------------------------------------------------------------------------------
/Dockerfile-obabel:
--------------------------------------------------------------------------------
 1 | # We should create an Open Babel implementation of pipeline_utils that handles the basic I/O for
 2 | # structure files so that the dependency on RDKit can be removed.
 3 | # See https://github.com/InformaticsMatters/pipelines-obabel/issues/1
 4 | 
 5 | FROM informaticsmatters/obabel:latest
 6 | LABEL maintainer="Tim Dudgeon<tdudgeon@informaticsmatters.com>"
 7 | 
 8 | USER root
 9 | 
10 | # Copy the obabel pipeline implementation into the image
11 | COPY src/python /opt/python-obabel
12 | RUN apt-get update && \
13 |     apt-get install -y --no-install-recommends \
14 |         python-setuptools \
15 |         gzip \
16 |         python-pip && \
17 |     pip install -e /opt/python-obabel
18 | # And the project pip requirements
19 | COPY requirements-obabel.txt /root/
20 | RUN pip install -r /root/requirements-obabel.txt
21 | 
22 | # The CMD is simply to run 'execute' in the WORKDIR.
23 | # The user would normally mount a volume with their own execute
24 | # script in it and then set the WORKDIR to the directory it's in.
25 | # In its absence we just run the built-in 'execute',
26 | # which is expected to echo some descriptive/helpful text.
27 | #
28 | # The default 'execute' relies on an ENV to name the pipeline it's in,
29 | # which can be defined with the docker 'pipeline' build argument.
30 | ARG pipeline=informaticsmatters/pipelines-obabel:latest
31 | ENV PIPELINE=$pipeline
32 | WORKDIR /home/obabel
33 | COPY execute ./
34 | RUN chown obabel:0 ./execute && \
35 |     chmod +x ./execute
36 | CMD ["./execute"]
37 | 
38 | USER obabel
39 | 


--------------------------------------------------------------------------------
/Dockerfile-pli:
--------------------------------------------------------------------------------
 1 | FROM informaticsmatters/rdkit_pipelines:latest
 2 | LABEL maintainer="Tim Dudgeon<tdudgeon@informaticsmatters.com>"
 3 | 
 4 | 
 5 | USER root
 6 | RUN apt-get update -y && apt-get install zlib1g-dev make gcc git -y
 7 | RUN mkdir -p /usr/local/
 8 | WORKDIR /usr/local/
 9 | RUN git clone https://bitbucket.org/AstexUK/pli.git
10 | WORKDIR /usr/local/pli
11 | RUN make
12 | 
13 | RUN useradd -u 1001 -g 0 -m pli
14 | 
15 | WORKDIR /home/pli
16 | ENV PLI_DIR /usr/local/pli
17 | 
18 | # The CMD is simply to run 'execute' in the WORKDIR.
19 | # The user would normally mount a volume with their own execute
20 | # script in it and then set the WORKDIR to the directory it's in.
21 | # In its absence we just run the built-in 'execute',
22 | # which is expected to echo some descriptive/helpful text.
23 | #
24 | # The default 'execute' relies on an ENV to name the pipeline it's in,
25 | # which can be defined with the docker 'pipeline' build argument.
26 | ARG pipeline=informaticsmatters/pli:latest
27 | ENV PIPELINE=$pipeline
28 | COPY execute ./
29 | RUN chown 1001:0 ./execute && \
30 |     chmod +x ./execute
31 | CMD ["./execute"]
32 | 
33 | USER 1001
34 | 


--------------------------------------------------------------------------------
/Dockerfile-rdkit:
--------------------------------------------------------------------------------
 1 | FROM informaticsmatters/rdkit-python3-debian:Release_2020_09_1
 2 | LABEL maintainer="Tim Dudgeon<tdudgeon@informaticsmatters.com>"
 3 | 
 4 | USER root
 5 | 
 6 | # install required packages
 7 | RUN apt-get -y update && apt-get -y install zip unzip procps
 8 | 
 9 | # Copy the pipeline implementation into the image
10 | COPY src/python /opt/python
11 | COPY requirements-rdkit.txt /root/
12 | RUN pip install -e /opt/python
13 | # And the pip packages from the project requirements
14 | # NOTE: matplotlib is missing
15 | RUN pip install -r /root/requirements-rdkit.txt
16 | 
17 | # The CMD is simply to run 'execute' in the WORKDIR.
18 | # The user would normally mount a volume with their own execute
19 | # script in it and then set the WORKDIR to the directory it's in.
20 | # In its absence we just run the built-in 'execute',
21 | # which is expected to echo some descriptive/helpful text.
22 | #
23 | # The default 'execute' relies on an ENV to name the pipeline it's in,
24 | # which can be defined with the docker 'pipeline' build argument.
25 | ARG pipeline=informaticsmatters/rdkit_pipelines:latest
26 | ENV PIPELINE=$pipeline
27 | WORKDIR /home/rdkit
28 | COPY execute ./
29 | RUN chmod +x ./execute
30 | CMD ["./execute"]
31 | 


--------------------------------------------------------------------------------
/Dockerfile-rdkit-centos:
--------------------------------------------------------------------------------
 1 | FROM informaticsmatters/rdkit-python-centos:latest
 2 | LABEL maintainer="Tim Dudgeon<tdudgeon@informaticsmatters.com>"
 3 | 
 4 | USER root
 5 | 
 6 | # install required packages
 7 | RUN yum -y update && yum -y install zip unzip python-devel python2-pip python-setuptools python2-matplotlib
 8 | 
 9 | # Copy the pipeline implementation into the image
10 | COPY src/python /opt/python
11 | COPY requirements-rdkit.txt /root/
12 | RUN pip install -e /opt/python
13 | # And the pip packages from the project requirements
14 | # NOTE: matplotlib is missing
15 | RUN pip install -r /root/requirements-rdkit.txt
16 | 
17 | # The CMD is simply to run 'execute' in the WORKDIR.
18 | # The user would normally mount a volume with their own execute
19 | # script in it and then set the WORKDIR to the directory it's in.
20 | # In its absence we just run the built-in 'execute',
21 | # which is expected to echo some descriptive/helpful text.
22 | #
23 | # The default 'execute' relies on an ENV to name the pipeline it's in,
24 | # which can be defined with the docker 'pipeline' build argument.
25 | ARG pipeline=informaticsmatters/rdkit_pipelines:latest
26 | ENV PIPELINE=$pipeline
27 | WORKDIR /home/rdkit
28 | COPY execute ./
29 | RUN chmod +x ./execute
30 | CMD ["./execute"]
31 | 


--------------------------------------------------------------------------------
/Dockerfile-sdposter:
--------------------------------------------------------------------------------
 1 | FROM centos:7
 2 | MAINTAINER Tim Dudgeon <tdudgeon@informaticsmatters.com>
 3 | 
 4 | # The image tag for the pipelines we're expected to post.
 5 | # By default this is 'latest' but the build environment can
 6 | # use a built-arg to over-ride this.
 7 | # So, a poster container image built for Git tag '1.0.0' would be expected
 8 | # to have its IMAGE_TAG environment variable set to '1.0.0' and therefore
 9 | # running poster:1.0.0 would inject pipelines for container image '1.0.0'
10 | ARG image_tag=latest
11 | ENV IMAGE_TAG=$image_tag
12 | 
13 | # An image to populate the Core with the contents of the
14 | # Service Descriptors located in SD_SRC.
15 | 
16 | ENV SD_SRC /sd-src
17 | WORKDIR ${SD_SRC}
18 | 
19 | # Copy all potential Service Descriptors into the image...
20 | COPY src/python/ ${SD_SRC}/src/python/
21 | COPY src/nextflow/ ${SD_SRC}/src/nextflow/
22 | COPY post-service-descriptors.sh ${SD_SRC}/
23 | RUN chmod 755 post-service-descriptors.sh
24 | 
25 | # On execution copy files from source to destination...
26 | CMD ./post-service-descriptors.sh
27 | 


--------------------------------------------------------------------------------
/Dockerfile-smog:
--------------------------------------------------------------------------------
 1 | FROM informaticsmatters/rdkit_pipelines:latest
 2 | LABEL maintainer="Tim Dudgeon<tdudgeon@informaticsmatters.com>"
 3 | 
 4 | 
 5 | USER root
 6 | 
 7 | RUN echo 'deb http://deb.debian.org/debian experimental main' >> /etc/apt/sources.list
 8 | 
 9 | RUN apt-get update -y && apt-get install -t experimental libopenbabel-dev g++ -y
10 | 
11 | WORKDIR /usr/local
12 | RUN wget https://sourceforge.net/projects/opengrowth/files/SMoG2016.tar.gz/download -O smog.tar.gz &&\
13 |   tar xfz smog.tar.gz &&\
14 |   rm smog.tar.gz &&\
15 |   g++ -O3 -Wall -std=c++11 -Wno-uninitialized -I/usr/include/openbabel-2.0 -lm -c /usr/local/SMoG2016/SMoG2016.cpp -o /usr/local/SMoG2016/SMoG2016.o &&\
16 |   g++ /usr/local/SMoG2016/SMoG2016.o -o /usr/local/SMoG2016/SMoG2016.exe -rdynamic /usr/lib/libopenbabel.so -Wl,-rpath,/usr/lib
17 | 
18 | ARG USERID=1001
19 | 
20 | RUN useradd -u $USERID -g 0 -m smog
21 | 
22 | # The CMD is simply to run 'execute' in the WORKDIR.
23 | # The user would normally mount a volume with their own execute
24 | # script in it and then set the WORKDIR to the directory it's in.
25 | # In its absence we just run the built-in 'execute',
26 | # which is expected to echo some descriptive/helpful text.
27 | #
28 | # The default 'execute' relies on an ENV to name the pipeline it's in,
29 | # which can be defined with the docker 'pipeline' build argument.
30 | ARG pipeline=informaticsmatters/smog:latest
31 | ENV PIPELINE=$pipeline
32 | WORKDIR /home/smog
33 | COPY execute ./
34 | RUN chown $USERID:0 ./execute && \
35 |     chmod +x ./execute
36 | CMD ["./execute"]
37 | 
38 | USER $USERID
39 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | #!groovy​
 2 | 
 3 | // Part of the Squonk/OepnShift CI/CD Jenkins Pipeline.
 4 | //
 5 | // This is the primary CI/CD pipeline, which provides basic assembly,
 6 | // unit testing and Docker image construction. Other pipelines may offer
 7 | // static analysis and code coverage for example.
 8 | 
 9 | pipeline {
10 | 
11 |     // As we may need different flavours of agent,
12 |     // the agent definition is deferred to each stage.
13 |     agent none
14 | 
15 |     // Some environment variables for every stage...
16 |     environment {
17 | 
18 |         USER = 'jenkins'
19 |         REGISTRY = 'docker-registry.default:5000'
20 |         NAMESPACE = 'squonk-cicd'
21 | 
22 |         PIPELINES_IMAGE = 'rdkit_pipelines'
23 |         LOADER_IMAGE = "${PIPELINES_IMAGE}_loader"
24 |         TAG = 'latest'
25 | 
26 |         P_IMAGE = "${NAMESPACE}/${PIPELINES_IMAGE}:${TAG}"
27 |         L_IMAGE = "${NAMESPACE}/${LOADER_IMAGE}:${TAG}"
28 | 
29 |     }
30 | 
31 |     stages {
32 | 
33 |         // --------------------------------------------------------------------
34 |         // Deploy
35 |         // --------------------------------------------------------------------
36 | 
37 |         stage ('Deploy') {
38 | 
39 |             // Here we build and Deploy the docker images.
40 |             // We need a custom agent that's capable of building images.
41 |             agent {
42 |                 label 'buildah-slave'
43 |             }
44 | 
45 |             steps {
46 | 
47 |                 // Registry..
48 |                 echo "Expecting registry at ${env.REGISTRY}"
49 |                 echo "Expecting registry user ${env.USER}"
50 |                 echo "Expecting registry project ${env.PUSH_NAMESPACE}"
51 | 
52 |                 // Expose tool versions...
53 |                 sh 'buildah -v'
54 |                 sh 'podman -v'
55 |                 sh 'skopeo -v'
56 | 
57 |                 // Build...
58 |                 // (Small image first)
59 |                 sh "buildah bud --format docker -f Dockerfile-sdposter -t ${env.P_IMAGE} ."
60 |                 sh "buildah bud --format docker -f Dockerfile-rdkit -t ${env.L_IMAGE} ."
61 | 
62 |                 // Deploy...
63 |                 // Get user login token
64 |                 script {
65 |                     TOKEN = sh(script: 'oc whoami -t', returnStdout: true).trim()
66 |                 }
67 |                 // Login to the target registry, push images and logout
68 |                 sh "podman login --tls-verify=false --username ${env.USER} --password ${TOKEN} ${env.REGISTRY}"
69 | //                sh "buildah push --tls-verify=false ${env.P_IMAGE} docker://${env.REGISTRY}/${env.P_IMAGE}"
70 | //                sh "buildah push --tls-verify=false ${env.L_IMAGE} docker://${env.REGISTRY}/${env.L_IMAGE}"
71 |                 sh "podman logout ${env.REGISTRY}"
72 | 
73 |             }
74 | 
75 |         }
76 | 
77 |     }
78 | 
79 |     // End-of-pipeline post-processing actions...
80 |     post {
81 | 
82 |         failure {
83 |             mail to: 'achristie@informaticsmatters.com tdudgeon@informaticsmatters.com',
84 |             subject: 'Failed Pipelines Job',
85 |             body: "Something is wrong with the Squonk CI/CD PIPELINES build ${env.BUILD_URL}"
86 |         }
87 | 
88 |     }
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/data/DCP2_1.pdb.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/DCP2_1.pdb.gz


--------------------------------------------------------------------------------
/data/DCP2_1_confs.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/DCP2_1_confs.sdf.gz


--------------------------------------------------------------------------------
/data/Kinase_inhibs.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/Kinase_inhibs.sdf.gz


--------------------------------------------------------------------------------
/data/XChemReactionMaker1.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/XChemReactionMaker1.sdf.gz


--------------------------------------------------------------------------------
/data/conformers_to_align.data.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/conformers_to_align.data.gz


--------------------------------------------------------------------------------
/data/dhfr_3d.data.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/dhfr_3d.data.gz


--------------------------------------------------------------------------------
/data/dhfr_3d.metadata:
--------------------------------------------------------------------------------
1 | {"type": "org.squonk.types.MoleculeObject"}


--------------------------------------------------------------------------------
/data/dhfr_3d.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/dhfr_3d.sdf.gz


--------------------------------------------------------------------------------
/data/hivpr.config.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/hivpr.config.zip


--------------------------------------------------------------------------------
/data/hivpr_ligprep.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/hivpr_ligprep.sdf.gz


--------------------------------------------------------------------------------
/data/hivpr_ligprep_100.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/hivpr_ligprep_100.sdf.gz


--------------------------------------------------------------------------------
/data/hivpr_rdock.as:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/hivpr_rdock.as


--------------------------------------------------------------------------------
/data/hivpr_rdock.prm:
--------------------------------------------------------------------------------
 1 | RBT_PARAMETER_FILE_V1.00
 2 | TITLE hivpr_DUD
 3 | 
 4 | RECEPTOR_FILE hivpr_rdock.mol2
 5 | RECEPTOR_FLEX 3.0
 6 | 
 7 | ##################################################################
 8 | ### CAVITY DEFINITION: REFERENCE LIGAND METHOD
 9 | ##################################################################
10 | SECTION MAPPER
11 |     SITE_MAPPER RbtLigandSiteMapper
12 |     REF_MOL xtal-lig.sd
13 |     RADIUS 6.0
14 |     SMALL_SPHERE 1.0
15 |     MIN_VOLUME 100
16 |     MAX_CAVITIES 1
17 |     VOL_INCR 0.0
18 |    GRIDSTEP 0.5
19 | END_SECTION
20 | 
21 | #################################
22 | #CAVITY RESTRAINT PENALTY
23 | #################################
24 | SECTION CAVITY
25 |     SCORING_FUNCTION RbtCavityGridSF
26 |     WEIGHT 1.0
27 | END_SECTION
28 | 
29 | 


--------------------------------------------------------------------------------
/data/mpro/Mpro-x0387_0.mol:
--------------------------------------------------------------------------------
 1 | Mpro-x0387_0
 2 |      RDKit          3D
 3 | 
 4 |  13 14  0  0  0  0  0  0  0  0999 V2000
 5 |     9.0650   -4.7370   27.6980 O   0  0  0  0  0  0  0  0  0  0  0  0
 6 |     9.2630   -5.0400   26.3380 C   0  0  0  0  0  0  0  0  0  0  0  0
 7 |    10.5520   -4.5380   25.9490 C   0  0  0  0  0  0  0  0  0  0  0  0
 8 |    10.4810   -3.0380   25.6850 C   0  0  0  0  0  0  0  0  0  0  0  0
 9 |     9.7300   -2.7840   24.4990 N   0  0  0  0  0  0  0  0  0  0  0  0
10 |     9.6860   -1.6170   24.1270 C   0  0  0  0  0  0  0  0  0  0  0  0
11 |    11.0770   -1.2320   23.6120 C   0  0  0  0  0  0  0  0  0  0  0  0
12 |    11.9690   -0.1320   24.1710 C   0  0  0  0  0  0  0  0  0  0  0  0
13 |    13.2030   -0.0890   23.4410 C   0  0  0  0  0  0  0  0  0  0  0  0
14 |    13.2070   -1.2250   22.2850 S   0  0  0  0  0  0  0  0  0  0  0  0
15 |    11.7550   -1.8900   22.5360 C   0  0  0  0  0  0  0  0  0  0  0  0
16 |     8.5600   -3.6210   24.3460 C   0  0  0  0  0  0  0  0  0  0  0  0
17 |     8.1750   -4.4830   25.5640 C   0  0  0  0  0  0  0  0  0  0  0  0
18 |   2  1  1  0
19 |   2  3  1  0
20 |   2 13  1  0
21 |   3  4  1  0
22 |  13 12  1  0
23 |   4  5  1  0
24 |   5  6  1  0
25 |   5 12  1  0
26 |   6  7  1  0
27 |   7  8  1  0
28 |   7 11  2  0
29 |   8  9  2  0
30 |  11 10  1  0
31 |   9 10  1  0
32 | M  END
33 | 


--------------------------------------------------------------------------------
/data/mpro/Mpro-x0387_0.smi:
--------------------------------------------------------------------------------
 1 | c1cc(CN2CCCCC2)cs1
 2 | CN1CCC(O)CC1
 3 | c1cc(OC2CCN(Cc3ccsc3)CC2)ccn1
 4 | OCC1CN(Cc2ccsc2)CCC1O
 5 | OCC1(O)CCN(Cc2ccsc2)CC1
 6 | OC1CCN(Cc2cscc2Cl)CC1
 7 | OC1CCN(Cc2cscc2C(F)(F)F)CC1
 8 | OC1CCN(Cc2cscc2Br)CC1
 9 | OC1CCN(Cc2csc(Cl)c2)CC1
10 | OC1CCN(Cc2csc(C(F)(F)F)c2)CC1
11 | OC1CCN(Cc2csc(Br)c2)CC1
12 | OC1CCN(Cc2ccsc2Cl)CC1
13 | OC1CCN(Cc2ccsc2C(F)(F)F)CC1
14 | OC1CCN(Cc2ccsc2Br)CC1
15 | OC1CCN(Cc2ccsc2)CC1F
16 | OC1CCN(Cc2ccsc2)C(c2ccccc2)C1
17 | OC1(c2ccccn2)CCN(Cc2ccsc2)CC1
18 | OC1(C(F)(F)F)CCN(Cc2ccsc2)CC1
19 | O=Cc1cc(CN2CCC(O)CC2)cs1
20 | NCC1(O)CCN(Cc2ccsc2)CC1
21 | NC(=O)C1(O)CCN(Cc2ccsc2)CC1
22 | N#Cc1cc(CN2CCC(O)CC2)cs1
23 | N#CC1(O)CCN(Cc2ccsc2)CC1
24 | Cc1sccc1CN1CCC(O)CC1
25 | Cc1cscc1CN1CCC(O)CC1
26 | Cc1cc(CN2CCC(O)CC2)cs1
27 | COC1CN(Cc2ccsc2)CCC1O
28 | COC(=O)c1sccc1CN1CCC(O)CC1
29 | COC(=O)c1cc(CN2CCC(O)CC2)cs1
30 | COC(=O)C1(O)CCN(Cc2ccsc2)CC1
31 | CN(C)CC1(O)CCN(Cc2ccsc2)CC1
32 | CCC1CN(Cc2ccsc2)CCC1O
33 | CCC1(O)CCN(Cc2ccsc2)CC1
34 | CC1CN(Cc2ccsc2)CCC1O
35 | CC1CC(O)CCN1Cc1ccsc1
36 | C[C@@H]1C[C@H](O)CCN1Cc1ccsc1
37 | CC1(O)CCN(Cc2ccsc2)CC1
38 | C#CC1(O)CCN(Cc2ccsc2)CC1
39 | 


--------------------------------------------------------------------------------
/data/mpro/Mpro-x0678_0_016.mol:
--------------------------------------------------------------------------------
 1 | Mpro-x2193_0
 2 |      RDKit          2D
 3 | 
 4 |  12 13  0  0  0  0  0  0  0  0999 V2000
 5 |     5.0456   -2.1321    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
 6 |     5.0456    0.3429    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
 7 |     5.0456   -3.7821    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
 8 |     5.7600   -0.0696    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
 9 |     5.7600   -0.8946    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
10 |     5.0456   -1.3071    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
11 |     4.3311   -0.8946    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
12 |     4.3311   -0.0696    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
13 |     5.7600   -2.5446    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
14 |     5.7600   -3.3696    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
15 |     4.3311   -3.3696    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
16 |     4.3311   -2.5446    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
17 |   1  6  1  0
18 |   1  9  1  0
19 |   1 12  1  0
20 |   6  5  1  0
21 |   6  7  1  0
22 |   9 10  1  0
23 |  12 11  1  0
24 |   2  4  1  0
25 |   2  8  1  0
26 |   4  5  1  0
27 |   8  7  1  0
28 |   3 10  1  0
29 |   3 11  1  0
30 | M  END


--------------------------------------------------------------------------------
/data/mpro/docking-tethered.as:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/docking-tethered.as


--------------------------------------------------------------------------------
/data/mpro/docking-tethered.prm:
--------------------------------------------------------------------------------
 1 | RBT_PARAMETER_FILE_V1.00
 2 | TITLE Mpro virtual screening
 3 | 
 4 | RECEPTOR_FILE receptor.mol2
 5 | RECEPTOR_FLEX 3.0
 6 | 
 7 | ##################################################################
 8 | ### CAVITY DEFINITION: REFERENCE LIGAND METHOD
 9 | ##################################################################
10 | SECTION MAPPER
11 |     SITE_MAPPER RbtLigandSiteMapper
12 |     REF_MOL hits_frankenstein.sdf
13 |     RADIUS 3.0
14 |     SMALL_SPHERE 1.0
15 |     MIN_VOLUME 100
16 |     MAX_CAVITIES 1
17 |     VOL_INCR 0.0
18 |     GRIDSTEP 0.5
19 | END_SECTION
20 | 
21 | 
22 | #################################
23 | #CAVITY RESTRAINT PENALTY
24 | #################################
25 | SECTION CAVITY
26 |     SCORING_FUNCTION RbtCavityGridSF
27 |     WEIGHT 1.0
28 | END_SECTION
29 | 
30 | SECTION LIGAND
31 |     TRANS_MODE TETHERED
32 |     ROT_MODE TETHERED
33 |     DIHEDRAL_MODE TETHERED
34 |     MAX_TRANS 0.1
35 |     MAX_ROT 1
36 |     MAX_DIHEDRAL 1
37 | END_SECTION
38 | 
39 | 


--------------------------------------------------------------------------------
/data/mpro/featurestein.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/featurestein.p


--------------------------------------------------------------------------------
/data/mpro/hits-17.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/hits-17.sdf.gz


--------------------------------------------------------------------------------
/data/mpro/hits-23.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/hits-23.sdf.gz


--------------------------------------------------------------------------------
/data/mpro/hits-5.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/hits-5.sdf.gz


--------------------------------------------------------------------------------
/data/mpro/poses.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/mpro/poses.sdf.gz


--------------------------------------------------------------------------------
/data/nci10.smiles:
--------------------------------------------------------------------------------
 1 | CC1=CC(=O)C=CC1=O	1
 2 | S(SC1=NC2=CC=CC=C2S1)C3=NC4=C(S3)C=CC=C4	2
 3 | OC1=C(Cl)C=C(C=C1[N+]([O-])=O)[N+]([O-])=O	3
 4 | [O-][N+](=O)C1=CNC(=N)S1	4
 5 | NC1=CC2=C(C=C1)C(=O)C3=C(C=CC=C3)C2=O	5
 6 | OC(=O)C1=C(C=CC=C1)C2=C3C=CC(=O)C(=C3OC4=C2C=CC(=C4Br)O)Br	6
 7 | CN(C)C1=C(Cl)C(=O)C2=C(C=CC=C2)C1=O	7
 8 | CC1=C(C2=C(C=C1)C(=O)C3=CC=CC=C3C2=O)[N+]([O-])=O	8
 9 | CC(=NO)C(C)=NO	9
10 | C1=CC=C(C=C1)P(C2=CC=CC=C2)C3=CC=CC=C3	10
11 | 


--------------------------------------------------------------------------------
/data/nci100.data.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/nci100.data.gz


--------------------------------------------------------------------------------
/data/nudt7/ligands.data.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/nudt7/ligands.data.gz


--------------------------------------------------------------------------------
/data/nudt7/ligands.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/nudt7/ligands.sdf.gz


--------------------------------------------------------------------------------
/data/nudt7/refmol.mol:
--------------------------------------------------------------------------------
 1 | 
 2 |  OpenBabel06051719483D
 3 | 
 4 |  18 19  0  0  0  0              2 V2000
 5 |    29.0700  -43.2240   73.7660 C   0  0  0  0  0
 6 |    36.2650  -44.8070   74.9140 C   0  0  0  0  0
 7 |    37.1260  -44.6280   73.8270 C   0  0  0  0  0
 8 |    38.5050  -44.6030   73.9960 C   0  0  0  0  0
 9 |    39.0530  -44.7650   75.2580 C   0  0  0  0  0
10 |    38.2200  -44.9420   76.3450 C   0  0  0  0  0
11 |    36.8400  -44.9680   76.1790 C   0  0  0  0  0
12 |    30.0630  -44.1750   73.1160 C   0  0  0  0  0
13 |    29.4310  -45.1480   72.1250 C   0  0  0  0  0
14 |    32.0050  -44.9160   74.3360 C   0  0  0  0  0
15 |    32.8370  -45.6680   73.5100 C   0  0  0  0  0
16 |    34.2080  -45.6230   73.7110 C   0  0  0  0  0
17 |    34.7860  -44.8390   74.7200 C   0  0  0  0  0
18 |    33.9230  -44.0930   75.5330 C   0  0  0  0  0
19 |    32.5490  -44.1280   75.3490 C   0  0  0  0  0
20 |    30.2050  -45.7640   71.3670 O   0  0  0  0  0
21 |    28.1910  -45.2760   72.1490 O   0  0  0  0  0
22 |    30.6380  -44.9430   74.1670 O   0  0  0  0  0
23 |   1  8  1  0  0  0
24 |   2  3  2  0  0  0
25 |   2  7  1  0  0  0
26 |   2 13  1  0  0  0
27 |   3  4  1  0  0  0
28 |   4  5  2  0  0  0
29 |   5  6  1  0  0  0
30 |   6  7  2  0  0  0
31 |   8  9  1  0  0  0
32 |   8 18  1  0  0  0
33 |   9 16  2  0  0  0
34 |   9 17  1  0  0  0
35 |  10 11  2  0  0  0
36 |  10 15  1  0  0  0
37 |  10 18  1  0  0  0
38 |  11 12  1  0  0  0
39 |  12 13  2  0  0  0
40 |  13 14  1  0  0  0
41 |  14 15  2  0  0  0
42 | M  CHG  1  17  -1
43 | M  END
44 | 


--------------------------------------------------------------------------------
/data/pyrimethamine.mol:
--------------------------------------------------------------------------------
 1 | 1-pyrimethamine
 2 |   Cerius2 12180216023D 1   1.00000                          
 3 |  Structure written by MMmdl.
 4 |  30 31  0  0  0  0  0  0  0  0999 V2000
 5 |    -2.8357    0.2028    0.4209 N   0  0  0  0  0  0
 6 |    -2.8255   -1.1104    0.1969 C   0  0  0  0  0  0
 7 |    -1.7271   -1.8218   -0.0537 N   0  0  0  0  0  0
 8 |    -0.5417   -1.1654   -0.0884 C   0  0  0  0  0  0
 9 |    -0.4439    0.2086    0.1358 C   0  0  0  0  0  0
10 |    -1.6550    0.8467    0.4052 C   0  0  0  0  0  0
11 |     0.8362    0.9302    0.0951 C   0  0  0  0  0  0
12 |     1.6327    1.0444    1.2466 C   0  0  0  0  0  0
13 |     2.8536    1.7244    1.2069 C   0  0  0  0  0  0
14 |     3.2885    2.2979    0.0146 C   0  0  0  0  0  0
15 |     2.5126    2.1980   -1.1375 C   0  0  0  0  0  0
16 |     1.2918    1.5178   -1.0966 C   0  0  0  0  0  0
17 |    -4.0141   -1.7715    0.2232 N   0  0  0  0  0  0
18 |    -1.7348    2.2137    0.6034 N   0  0  0  0  0  0
19 |     4.7918    3.1344   -0.0351 Cl  0  0  0  0  0  0
20 |     0.4261   -3.4744   -0.6318 C   0  0  0  0  0  0
21 |     0.6932   -1.9936   -0.3864 C   0  0  0  0  0  0
22 |     1.3065    0.6014    2.1872 H   0  0  0  0  0  0
23 |     3.4548    1.8000    2.1100 H   0  0  0  0  0  0
24 |     2.8462    2.6451   -2.0710 H   0  0  0  0  0  0
25 |     0.6948    1.4500   -2.0056 H   0  0  0  0  0  0
26 |    -4.0348   -2.7663    0.0556 H   0  0  0  0  0  0
27 |    -4.8657   -1.2631    0.4089 H   0  0  0  0  0  0
28 |    -0.9674    2.6820    1.0738 H   0  0  0  0  0  0
29 |    -2.6605    2.5780    0.8038 H   0  0  0  0  0  0
30 |     1.3655   -3.9964   -0.8406 H   0  0  0  0  0  0
31 |    -0.2391   -3.6219   -1.4893 H   0  0  0  0  0  0
32 |    -0.0337   -3.9462    0.2432 H   0  0  0  0  0  0
33 |     1.3880   -1.9081    0.4572 H   0  0  0  0  0  0
34 |     1.1868   -1.5858   -1.2763 H   0  0  0  0  0  0
35 |   1  2  2  0  0  0
36 |   1  6  1  0  0  0
37 |   2  3  1  0  0  0
38 |   2 13  1  0  0  0
39 |   3  4  2  0  0  0
40 |   4  5  1  0  0  0
41 |   4 17  1  0  0  0
42 |   5  6  2  0  0  0
43 |   5  7  1  0  0  0
44 |   6 14  1  0  0  0
45 |   7  8  2  0  0  0
46 |   7 12  1  0  0  0
47 |   8  9  1  0  0  0
48 |   8 18  1  0  0  0
49 |   9 10  2  0  0  0
50 |   9 19  1  0  0  0
51 |  10 11  1  0  0  0
52 |  10 15  1  0  0  0
53 |  11 12  2  0  0  0
54 |  11 20  1  0  0  0
55 |  12 21  1  0  0  0
56 |  13 22  1  0  0  0
57 |  13 23  1  0  0  0
58 |  14 24  1  0  0  0
59 |  14 25  1  0  0  0
60 |  16 17  1  0  0  0
61 |  16 26  1  0  0  0
62 |  16 27  1  0  0  0
63 |  16 28  1  0  0  0
64 |  17 29  1  0  0  0
65 |  17 30  1  0  0  0
66 | M  END
67 | 


--------------------------------------------------------------------------------
/data/ref_mol.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/ref_mol.sdf.gz


--------------------------------------------------------------------------------
/data/sdf-aliphatic-primary-amines-175.data.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/sdf-aliphatic-primary-amines-175.data.gz


--------------------------------------------------------------------------------
/data/sdf-aliphatic-primary-amines-175.metadata:
--------------------------------------------------------------------------------
1 | {"type": "org.squonk.types.MoleculeObject"}


--------------------------------------------------------------------------------
/data/sdf-aliphatic-primary-amines-175.sdf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/sdf-aliphatic-primary-amines-175.sdf.gz


--------------------------------------------------------------------------------
/data/smog/confs.data.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/data/smog/confs.data.gz


--------------------------------------------------------------------------------
/data/smog/confs.metadata:
--------------------------------------------------------------------------------
1 | {"type": "org.squonk.types.MoleculeObject"}


--------------------------------------------------------------------------------
/data/sucos/4e3g_lig.mol:
--------------------------------------------------------------------------------
 1 | 
 2 |      RDKit          3D
 3 | 
 4 |  10 10  0  0  0  0  0  0  0  0999 V2000
 5 |    17.3410    1.4040   15.6300 O   0  0  0  0  0  0  0  0  0  0  0  0
 6 |    16.4400    2.1870   15.2350 C   0  0  0  0  0  0  0  0  0  0  0  0
 7 |    15.2530    1.8470   15.5410 O   0  0  0  0  0  0  0  0  0  0  0  0
 8 |    16.7060    3.4620   14.6760 C   0  0  0  0  0  0  0  0  0  0  0  0
 9 |    18.0480    3.9210   14.6600 C   0  0  0  0  0  0  0  0  0  0  0  0
10 |    18.4420    5.2100   14.1920 C   0  0  0  0  0  0  0  0  0  0  0  0
11 |    17.3440    5.9500   13.6530 C   0  0  0  0  0  0  0  0  0  0  0  0
12 |    17.5460    7.2150   13.0840 O   0  0  0  0  0  0  0  0  0  0  0  0
13 |    16.0150    5.4960   13.6580 C   0  0  0  0  0  0  0  0  0  0  0  0
14 |    15.6810    4.2780   14.2290 C   0  0  0  0  0  0  0  0  0  0  0  0
15 |   2  1  2  0
16 |   3  2  1  0
17 |   4  2  1  0
18 |   5  4  2  0
19 |   6  5  1  0
20 |   7  6  2  0
21 |   8  7  1  0
22 |   9  7  1  0
23 |  10  9  2  0
24 |  10  4  1  0
25 | M  END
26 | 


--------------------------------------------------------------------------------
/data/sucos/benzene.sdf:
--------------------------------------------------------------------------------
 1 | 
 2 |   PyMOL2.1          3D                             0
 3 | 
 4 |   6  6  0  0  0  0  0  0  0  0999 V2000
 5 |    16.7060    3.4620   14.6760 C   0  0  0  0  0  0  0  0  0  0  0  0
 6 |    18.0480    3.9210   14.6600 C   0  0  0  0  0  0  0  0  0  0  0  0
 7 |    18.4420    5.2100   14.1920 C   0  0  0  0  0  0  0  0  0  0  0  0
 8 |    17.3440    5.9500   13.6530 C   0  0  0  0  0  0  0  0  0  0  0  0
 9 |    16.0150    5.4960   13.6580 C   0  0  0  0  0  0  0  0  0  0  0  0
10 |    15.6810    4.2780   14.2290 C   0  0  0  0  0  0  0  0  0  0  0  0
11 |   1  2  2  0  0  0  0
12 |   1  6  1  0  0  0  0
13 |   2  3  1  0  0  0  0
14 |   3  4  2  0  0  0  0
15 |   4  5  1  0  0  0  0
16 |   5  6  2  0  0  0  0
17 | M  END
18 | $$$$
19 | 


--------------------------------------------------------------------------------
/data/sulfonyl_chloride.sdf:
--------------------------------------------------------------------------------
 1 | 
 2 |      RDKit          
 3 | 
 4 |  12 12  0  0  0  0  0  0  0  0999 V2000
 5 |     0.0000    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
 6 |     0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
 7 |     0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
 8 |     0.0000    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
 9 |     0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
10 |     0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
11 |     0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
12 |     0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
13 |     0.0000    0.0000    0.0000 S   0  0  0  0  0  0  0  0  0  0  0  0
14 |     0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
15 |     0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
16 |     0.0000    0.0000    0.0000 Cl  0  0  0  0  0  0  0  0  0  0  0  0
17 |   1  2  3  0
18 |   2  3  1  0
19 |   3  4  2  0
20 |   4  5  1  0
21 |   5  6  2  0
22 |   6  7  1  0
23 |   7  8  2  0
24 |   8  9  1  0
25 |   9 10  2  0
26 |   9 11  2  0
27 |   9 12  1  0
28 |   8  3  1  0
29 | M  END
30 | $$$$
31 | 


--------------------------------------------------------------------------------
/environment-rdkit-utils.yml:
--------------------------------------------------------------------------------
 1 | name: pipelines-utils
 2 | channels:
 3 |     - conda-forge
 4 | dependencies:
 5 |     - python=3.7.3
 6 |     - pip=20.0.2
 7 |     - rdkit=2020.09.1
 8 |     - pandas=1.0.1
 9 |     - scikit-learn=0.22.1
10 |     - pip:
11 |         - requests==2.24.0
12 |         - matplotlib==2.2.*
13 |         - molvs==0.1.1
14 |         - standardiser==0.1.9
15 |         - oddt==0.7
16 |         - im-pipelines-utils
17 |         - im-pipelines-utils-rdkit
18 | 


--------------------------------------------------------------------------------
/execute:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | echo
 4 | echo "PIPELINES"
 5 | echo "---------"
 6 | echo "You have just run the built-in 'execute' command. Congratulations!"
 7 | echo ""
 8 | echo "In order to use this container productively you need to provide your own"
 9 | echo "'execute' script or command and any corresponding data files."
10 | echo ""
11 | echo "You could mount a volume into this container that contains an executable"
12 | echo "file named 'execute', which contains the command you wish to run,"
13 | echo "and also include any additional data files. When you run the container,"
14 | echo "adjust the default container WORKDIR by using the docker '-w' option,"
15 | echo "specifying the mounted path of the volume as its value. As an example,"
16 | echo "if you've placed an 'execute' and its data in the current working"
17 | echo "directory you could run the container like this: -"
18 | echo ""
19 | echo "  $ docker run --rm \\"
20 | echo "      -v \$PWD:/squonk/work/docker \\"
21 | echo "      -w=\"/squonk/work/docker\" \\"
22 | echo "      $PIPELINE"
23 | echo ""
24 | echo "Alternatively you can run interactive commands by over-riding the"
25 | echo "built-in container 'CMD' (which is './execute'). To enter the"
26 | echo "container's shell (bash) using the default 'WORKDIR' you can run: -"
27 | echo ""
28 | echo "  $ docker run --rm \\"
29 | echo "      -it \\"
30 | echo "      $PIPELINE \\"
31 | echo "      bash"
32 | echo
33 | 


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | zipStoreBase=GRADLE_USER_HOME
4 | zipStorePath=wrapper/dists
5 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.4.1-bin.zip
6 | 


--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
 1 | @if "%DEBUG%" == "" @echo off
 2 | @rem ##########################################################################
 3 | @rem
 4 | @rem  Gradle startup script for Windows
 5 | @rem
 6 | @rem ##########################################################################
 7 | 
 8 | @rem Set local scope for the variables with windows NT shell
 9 | if "%OS%"=="Windows_NT" setlocal
10 | 
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 | 
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS=
18 | 
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 | 
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 | 
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 | 
32 | goto fail
33 | 
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 | 
38 | if exist "%JAVA_EXE%" goto init
39 | 
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 | 
46 | goto fail
47 | 
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 | 
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | 
53 | :win9xME_args
54 | @rem Slurp the command line arguments.
55 | set CMD_LINE_ARGS=
56 | set _SKIP=2
57 | 
58 | :win9xME_args_slurp
59 | if "x%~1" == "x" goto execute
60 | 
61 | set CMD_LINE_ARGS=%*
62 | 
63 | :execute
64 | @rem Setup the command line
65 | 
66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67 | 
68 | @rem Execute Gradle
69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70 | 
71 | :end
72 | @rem End local scope for the variables with windows NT shell
73 | if "%ERRORLEVEL%"=="0" goto mainEnd
74 | 
75 | :fail
76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77 | rem the _cmd.exe /c_ return code!
78 | if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79 | exit /b 1
80 | 
81 | :mainEnd
82 | if "%OS%"=="Windows_NT" endlocal
83 | 
84 | :omega
85 | 


--------------------------------------------------------------------------------
/post-service-descriptors.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # run locally with something like this:
 3 | # ./post-service-descriptors.sh http://localhost:8091/coreservices/rest/v1/services
 4 | # or
 5 | # docker run -it --rm -v $PWD:$PWD:Z -w $PWD --network deploy_squonk_back centos:7 ./post-service-descriptors.sh
 6 | 
 7 | set -e
 8 | 
 9 | POST=${1:-http://coreservices:8080/coreservices/rest/v1/services}
10 | BASE_D=docker://github.com/InformaticsMatters/pipelines
11 | BASE_N=nextflow://github.com/InformaticsMatters/pipelines
12 | IMAGE_TAG=${IMAGE_TAG:-}
13 | CT_DJ="application/x-squonk-service-descriptor-docker+json"
14 | CT_DY="application/x-squonk-service-descriptor-docker+yaml"
15 | CT_MM="multipart/mixed"
16 | 
17 | echo BASE_D="${BASE_D}"
18 | echo BASE_N="${BASE_N}"
19 | echo IMAGE_TAG="${IMAGE_TAG}"
20 | 
21 | for d in 'src/python/pipelines/dmpk' \
22 |          'src/python/pipelines/docking' \
23 |          'src/python/pipelines/rdkit' \
24 |          'src/python/pipelines/dimorphite'
25 | do
26 |     for file in $d/*.dsd.yml
27 |     do
28 | 	    echo $file
29 | 	    curl -X POST \
30 |          -T $file\
31 |          -H "Content-Type: $CT_DY"\
32 |          -H "Base-URL: $BASE_D"\
33 |          -H "Image-Tag: $IMAGE_TAG"\
34 |          $POST
35 |          echo ""
36 |     done
37 | done
38 | 
39 | for d in 'src/nextflow/docking' \
40 |          'src/nextflow/rdkit'
41 | do
42 |     for file in $d/*.nsd.yml
43 |     do
44 | 	    basename=${file::-4}
45 | 	    echo $basename
46 | 	    curl -X POST \
47 |          -F "nextflow.nsd.yml=@${basename}.yml;type=application/x-squonk-service-descriptor-nextflow+yaml;filename=nextflow.nsd.yml"\
48 |          -F "nextflow.nf=@${basename}.nf;type=text/plain;filename=nextflow.nf"\
49 |          -F "nextflow.config=@${basename}.config;type=text/plain;filename=nextflow.config"\
50 |          -H "Content-Type: $CT_MM"\
51 |          -H "Base-URL: $BASE_N"\
52 |          $POST
53 |          echo ""
54 |     done
55 | done
56 | 


--------------------------------------------------------------------------------
/requirements-obabel.txt:
--------------------------------------------------------------------------------
1 | im-pipelines-utils==2.4.*
2 | im-pipelines-utils-rdkit==1.5.*
3 | 


--------------------------------------------------------------------------------
/requirements-rdkit.txt:
--------------------------------------------------------------------------------
 1 | im-pipelines-utils==2.4.*
 2 | im-pipelines-utils-rdkit==1.5.*
 3 | matplotlib==2.2.*
 4 | molvs==0.1.1
 5 | standardiser==0.1.9
 6 | numpy==1.19.1
 7 | pandas==1.0.1
 8 | scikit-learn==0.22.1
 9 | requests==2.24.0
10 | oddt==0.7
11 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/.gitignore:
--------------------------------------------------------------------------------
1 | results


--------------------------------------------------------------------------------
/src/nextflow/docking/plip.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty


--------------------------------------------------------------------------------
/src/nextflow/docking/plip.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | /* Example Nextflow pipline that runs PLI scoring
 4 | */
 5 | 
 6 | 
 7 | params.ligands = 'ligands.sdf.gz'
 8 | params.protein = 'protein.pdb'
 9 | params.chunk = 25
10 | params.score = null
11 | 
12 | 
13 | ligands = file(params.ligands)
14 | protein = file(params.protein)
15 | 
16 | /* Splits the input SD file into multiple files of ${params.chunk} records.
17 | * Each file is sent individually to the ligand_parts channel.
18 | * Parts are renamed so as to be in correct sorted area.
19 | */
20 | process sdsplit {
21 | 
22 | 	input:
23 |     file ligands
24 | 
25 |     output:
26 |     file 'ligands_part*' into ligand_parts mode flatten
27 |     
28 |     
29 |     """
30 |     python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -d 5 -o ligands_part -of sdf
31 |     """
32 | }
33 | 
34 | /* Scores each file from the ligand_parts channel sending each resulting SD file to the results channel
35 | */
36 | process pli_scoring {
37 | 
38 | 	input:
39 |     file part from ligand_parts
40 | 	file protein
41 | 
42 |     output:
43 |     file 'scored_part*.sdf' into scored_parts
44 |     
45 |     """
46 | 	python -m pipelines.docking.plip -i $part -pdb $protein -o ${part.name.replace('ligands', 'scored')[0..-8]} -of sdf --no-gzip ${params.score ? ' -t ' + params.score : ''} --threads 1 &> scored_out.log
47 |     """
48 | }
49 | 
50 | /* Recombine and publish the results
51 | */
52 | process results {
53 | 
54 | 	
55 | 	input:
56 | 	file ligands
57 | 	file part from scored_parts.collect()
58 | 	
59 | 	output:
60 | 	file 'output.sdf.gz' into results
61 | 	
62 |  
63 | 	"""
64 | 	cat scored_part*.sdf | gzip > output.sdf.gz
65 | 	"""
66 | }
67 | 
68 | results.println { "Results: $it" }
69 |     
70 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/plip.nsd.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty


--------------------------------------------------------------------------------
/src/nextflow/docking/plip.nsd.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | params.ligands = 'ligands.data.gz'
 4 | params.protein = 'protein.pdb.gz'
 5 | params.chunk = 25
 6 | params.score = null
 7 | params.limit = 0
 8 | params.digits = 4
 9 | 
10 | ligands = file(params.ligands)
11 | protein = file(params.protein)
12 | 
13 | process splitter {
14 | 
15 |     container 'informaticsmatters/pli:latest'
16 |     beforeScript 'chmod g+w .'
17 | 
18 |     input:
19 |     file ligands
20 | 
21 |     output:
22 |     file 'ligand_part*.sdf.gz' into ligand_parts mode flatten
23 |     file 'ligand_part_metrics.txt' into splitter_metrics
24 | 
25 |     """
26 |     python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -l $params.limit -d $params.digits -o ligand_part -of sdf --meta
27 |     """
28 | }
29 | 
30 | 
31 | /* Scores each file from the ligand_parts channel sending each resulting SD file to the results channel
32 | */
33 | process pli_scoring {
34 | 
35 |     container 'informaticsmatters/pli:latest'
36 |     beforeScript 'chmod g+w .'
37 | 
38 | 	input:
39 |     file part from ligand_parts
40 | 	file protein
41 | 
42 |     output:
43 |     file 'scored_part*.sdf' into scored_parts
44 | 
45 |     """
46 | 	python -m pipelines.docking.plip -i $part -pdb $protein -o ${part.name.replace('ligand', 'scored')[0..-8]} -of sdf --no-gzip ${params.score ? ' -t ' + params.score : ''} --threads 1 &> scored_out.log
47 |     """
48 | }
49 | 
50 | process joiner {
51 | 
52 |     container 'informaticsmatters/pli:latest'
53 |     beforeScript 'chmod g+w .'
54 |     publishDir "$baseDir/results", mode: 'move'
55 | 
56 |     input:
57 |     file parts from scored_parts.collect()
58 |     file 'splitter_metrics.txt' from splitter_metrics
59 | 
60 |     output:
61 |     file 'output.data.gz'
62 |     file 'output_metrics.txt'
63 |     file 'output.metadata'
64 | 
65 |     """
66 |     cat scored_part*.sdf | python -m pipelines_utils_rdkit.filter -if sdf -of json -o output --meta --thin
67 |     mv output_metrics.txt joiner_metrics.txt
68 |     grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/PLI/ > output_metrics.txt
69 |     grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt
70 |     grep '__OutputCount__' joiner_metrics.txt >> output_metrics.txt
71 |     echo '{"type":"org.squonk.types.BasicObject","valueClassMappings":{"pliff_cscore":"java.lang.Float","pliff_iscore":"java.lang.Float","pliff_tscore":"java.lang.Float","pliff_gscore":"java.lang.Float","pliff_score":"java.lang.Float","pliff_nb_score":"java.lang.Float"}}' > output.metadata
72 |     """
73 | }
74 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/plip.nsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | !<org.squonk.core.NextflowServiceDescriptor>
 3 | serviceConfig:
 4 |   id: "pipelines.pli.v1"
 5 |   "name": "PLI docking scoring"
 6 |   "description": "Score docked ligands with PLI"
 7 |   tags:
 8 |   - "pli"
 9 |   - "docking"
10 |   - "scoring"
11 |   - "3d"
12 |   - "docker"
13 |   - "nextflow"
14 |   resourceUrl: null
15 |   icon: "icons/filter_molecules.png"
16 |   inputDescriptors:
17 |   - name: "ligands"
18 |     mediaType: "application/x-squonk-dataset-molecule+json"
19 |     primaryType: "org.squonk.dataset.Dataset"
20 |     secondaryType: "org.squonk.types.MoleculeObject"
21 |   - name: "protein"
22 |     mediaType: "chemical/x-pdb"
23 |     primaryType: "org.squonk.types.PDBFile"
24 |   outputDescriptors:
25 |   - name: "output"
26 |     mediaType: "application/x-squonk-dataset-molecule+json"
27 |     primaryType: "org.squonk.dataset.Dataset"
28 |     secondaryType: "org.squonk.types.MoleculeObject"
29 |   optionDescriptors:
30 |   - !<org.squonk.options.OptionDescriptor>
31 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
32 |       type: "java.lang.Float"
33 |     key: "arg.score"
34 |     label: "Score threshold"
35 |     description: "Keep only molecules with scores better than this value"
36 |     minValues: 0
37 |     maxValues: 1
38 |     visible: true
39 |     editable: true
40 |     modes:
41 |     - "User"
42 | 
43 |   executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep"
44 | thinDescriptors:
45 |   - input: ligands
46 |     output: output
47 | inputRoutes:
48 | - route: "FILE"
49 | - route: "FILE"
50 | outputRoutes:
51 | - route: "FILE"
52 | nextflowParams: |
53 |   ${binding.variables.containsKey('score') ? 'params.score = ' + score : ''}
54 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/rdock-filter.nsd.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty


--------------------------------------------------------------------------------
/src/nextflow/docking/rdock-filter.nsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | !<org.squonk.core.NextflowServiceDescriptor>
 3 | serviceConfig:
 4 |   id: pipelines.docking.rdock.filter.v1
 5 |   "name": rDock filtering
 6 |   "description": Dock ligands with rDock filtering poses relative to reference ligand
 7 |   tags:
 8 |   - rdock
 9 |   - docking
10 |   - scoring
11 |   - 3d
12 |   - docker
13 |   - nextflow
14 |   resourceUrl: null
15 |   icon: icons/filter_molecules.png
16 |   inputDescriptors:
17 |   - name: receptor
18 |     mediaType: chemical/x-mol2
19 |     primaryType: org.squonk.types.Mol2File
20 |   - name: refmol
21 |     mediaType: chemical/x-mdl-molfile
22 |     primaryType: org.squonk.types.MolFile
23 |   - name: ligands
24 |     mediaType: application/x-squonk-dataset-molecule+json
25 |     primaryType: org.squonk.dataset.Dataset
26 |     secondaryType: org.squonk.types.MoleculeObject
27 |   outputDescriptors:
28 |   - name: output
29 |     mediaType: application/x-squonk-dataset-molecule+json
30 |     primaryType: org.squonk.dataset.Dataset
31 |     secondaryType: org.squonk.types.MoleculeObject
32 |   optionDescriptors:
33 |   - !<org.squonk.options.OptionDescriptor>
34 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
35 |       type: java.lang.Integer
36 |     key: arg.num_dockings
37 |     label: Number of dockings
38 |     description: Execute this many docking runs
39 |     defaultValue: 10
40 |     minValues: 1
41 |     maxValues: 1
42 |     visible: true
43 |     editable: true
44 |     modes:
45 |     - User
46 |   - !<org.squonk.options.OptionDescriptor>
47 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
48 |       type: java.lang.Integer
49 |     key: arg.top
50 |     label: Top scoring poses
51 |     description: Keep this many top scoring poses
52 |     defaultValue: 1
53 |     minValues: 1
54 |     maxValues: 1
55 |     visible: true
56 |     editable: true
57 |     modes:
58 |     - User
59 |   - !<org.squonk.options.OptionDescriptor>
60 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
61 |       type: java.lang.Float
62 |     key: arg.threshold
63 |     label: Score threshold
64 |     description: Filter out poses with relative scores above this value compared to reference
65 |     defaultValue: 0.0
66 |     minValues: 1
67 |     maxValues: 1
68 |     visible: true
69 |     editable: true
70 |     modes:
71 |     - User
72 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep
73 | thinDescriptors:
74 |   - input: ligands
75 | inputRoutes:
76 | - route: FILE
77 | - route: FILE
78 | - route: FILE
79 | outputRoutes:
80 | - route: FILE
81 | nextflowParams: |
82 |   params.receptor = 'receptor.mol2.gz'
83 |   params.refmol = 'refmol.mol'
84 |   params.ligands = 'ligands.data.gz'
85 |   ${binding.variables.containsKey('num_dockings') ? 'params.num_dockings = ' + num_dockings : ''}
86 |   ${binding.variables.containsKey('top') ? 'params.top = ' + top : ''}
87 |   ${binding.variables.containsKey('threshold') ? 'params.threshold = ' + threshold : ''}
88 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/rdock.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty
2 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/rdock.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | /* Example Nextflow pipeline that runs rDock docking
 4 | */
 5 | 
 6 | 
 7 | params.ligands = 'ligands.sdf.gz'
 8 | params.prmfile = 'receptor.prm'
 9 | params.protein = 'receptor.mol2'
10 | params.asfile = 'receptor.as'
11 | params.chunk = 25
12 | 
13 | params.num_dockings = 100
14 | params.top = 1
15 | params.score = null
16 | params.nscore = null
17 | params.limit = 0
18 | params.digits = 4
19 | 
20 | 
21 | ligands = file(params.ligands)
22 | protein = file(params.protein)
23 | prmfile = file(params.prmfile)
24 | asfile = file(params.asfile)
25 | 
26 | /* Splits the input SD file into multiple files of ${params.chunk} records.
27 | * Each file is sent individually to the ligand_parts channel.
28 | * Parts are named so as to be in correct sorted area.
29 | */
30 | process sdsplit {
31 | 
32 |     container 'informaticsmatters/rdkit_pipelines:latest'
33 | 
34 | 	input:
35 |     file ligands
36 | 
37 |     output:
38 |     file 'ligands_part*.sdf' into ligand_parts mode flatten
39 |     
40 |     
41 |     """
42 |     python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -d $params.digits -o ligands_part -of sdf --no-gzip
43 |     """
44 | }
45 | 
46 | /* Docks each file from the ligand_parts channel sending each resulting SD file to the results channel
47 | */
48 | process rdock {
49 | 
50 | 	input:
51 |     file part from ligand_parts
52 | 	file protein
53 | 	file prmfile
54 | 	file asfile
55 | 
56 |     output:
57 |     file 'docked_part*.sd' into docked_parts
58 | 
59 |     """
60 |     rbdock -r $prmfile -p dock.prm -n $params.num_dockings -i $part -o ${part.name.replace('ligands', 'docked')[0..-5]} > docked_out.log
61 |     """
62 | }
63 | 
64 | /* Filter, combine and publish the results
65 | */
66 | process results {
67 | 
68 | 	input:
69 | 	file part from docked_parts.collect()
70 | 
71 | 	output:
72 | 	file 'results.sdf' into results
73 | 
74 | 	"""
75 | 	sdsort -n -s -fSCORE docked_part*.sd |${params.score == null ? '' : " sdfilter -f'\$SCORE <= $params.score' |"}${params.nscore == null ? '' : " sdfilter -f'\$SCORE.norm <= $params.nscore' |"} sdfilter -f'\$_COUNT <= ${params.top}' > results.sdf
76 | 	"""
77 | }
78 |     
79 | results.println { "Results: $it" }
80 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/rdock.nsd.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty


--------------------------------------------------------------------------------
/src/nextflow/docking/rdock.nsd.nf:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env nextflow
  2 | 
  3 | /* Squonk Nextflow pipline that runs Docking using rDock.
  4 | * The contents of the zip file specified by params.receptor must contain the following:
  5 | * 1. receptor.mol2 - the prepared protein in mol2 format
  6 | * 2. receptor.as - the receptor active site definition
  7 | * 3. receptor.prm - the rDock configuration file that refers to receptor.mol2 in its RECEPTOR_FILE property.
  8 | * This zip file is unzipped and the contents used by rDock.
  9 | * To test this manually run something like this:
 10 | * nextflow run src/nextflow/docking/rdock.nsd.nf --ligands data/dhfr_3d.data.gz --receptor data/hivpr.config.zip --num_dockings 5 -with-docker informaticsmatters/rdkit_pipelines
 11 | */
 12 | 
 13 | params.ligands = "$baseDir/ligands.data.gz"
 14 | params.receptor = "$baseDir/config.zip"
 15 | params.chunk = 25
 16 | params.num_dockings = 100
 17 | params.top = 1
 18 | params.score = null
 19 | params.nscore = null
 20 | params.limit = 0
 21 | params.digits = 4
 22 | 
 23 | 
 24 | ligands = file(params.ligands)
 25 | receptorzip = file(params.receptor)
 26 | 
 27 | process unzip_config {
 28 | 
 29 |     beforeScript 'chmod g+w .'
 30 |     container 'informaticsmatters/rdkit_pipelines:latest'
 31 | 
 32 |     input:
 33 |     file receptorzip
 34 | 
 35 |     output:
 36 |     file 'receptor.prm' into prmfile
 37 |     file 'receptor.mol2' into protein
 38 |     file 'receptor.as' into asfile
 39 | 
 40 |     """
 41 |     unzip $receptorzip
 42 |     """
 43 | 
 44 | }
 45 | 
 46 | /* Splits the input into multiple files of ${params.chunk} records.
 47 | */
 48 | process splitter {
 49 | 
 50 |     beforeScript 'chmod g+w .'
 51 |     container 'informaticsmatters/rdkit_pipelines:latest'
 52 | 
 53 |     input:
 54 |     file ligands
 55 | 
 56 |     output:
 57 |     file 'ligands_part*.sdf' into ligands_parts mode flatten
 58 |     file 'ligands_part_metrics.txt' into splitter_metrics
 59 | 
 60 |     """
 61 |     python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -l $params.limit -d $params.digits -o ligands_part -of sdf --no-gzip --meta
 62 |     """
 63 | }
 64 | 
 65 | /* Docks each file from the ligand_parts channel sending each resulting SD file to the results channel
 66 | */
 67 | process rdock {
 68 | 
 69 |     container 'informaticsmatters/rdock-mini:latest'
 70 |     // change permissions on the work dir so that the rdock user in the container
 71 |     // can write to the directory that is owned by root
 72 |     beforeScript 'chmod g+w .'
 73 | 
 74 | 	input:
 75 |     file part from ligands_parts
 76 |     file prmfile
 77 | 	file protein
 78 | 	file asfile
 79 | 
 80 |     output:
 81 |     file 'docked_part*.sd' into docked_parts
 82 | 
 83 |     """
 84 | 	rbdock -r $prmfile -p dock.prm -n $params.num_dockings -i $part -o ${part.name.replace('ligands', 'docked')[0..-5]} > docked_out.log
 85 |     """
 86 | }
 87 | 
 88 | /* Filter, combine and publish the results
 89 | */
 90 | process results {
 91 | 
 92 | 	container 'informaticsmatters/rdock-mini'
 93 | 	// change permissions - see above
 94 | 	beforeScript 'chmod g+w .'
 95 | 
 96 | 	input:
 97 | 	file ligands
 98 | 	file part from docked_parts.collect()
 99 | 
100 | 	output:
101 | 	file 'results.sdf' into results
102 | 
103 | 	"""
104 | 	sdsort -n -s -fSCORE docked_part*.sd |${params.score == null ? '' : " sdfilter -f'\$SCORE <= $params.score' |"}${params.nscore == null ? '' : " sdfilter -f'\$SCORE.norm <= $params.nscore' |"} sdfilter -f'\$_COUNT <= ${params.top}' > results.sdf
105 | 	"""
106 | }
107 | 
108 | process metrics {
109 | 
110 |     beforeScript 'chmod g+w .'
111 |     container 'informaticsmatters/rdkit_pipelines:latest'
112 | 
113 |     publishDir "$baseDir/results", mode: 'move'
114 | 
115 |     input:
116 |     file 'results.sdf' from results
117 |     file 'splitter_metrics.txt' from splitter_metrics
118 | 
119 |     output:
120 |     file 'output.data.gz'
121 |     file 'output.metadata'
122 |     file 'output_metrics.txt'
123 | 
124 |     """
125 |     python -m pipelines_utils_rdkit.filter -i results.sdf -of json -o output --meta
126 |     mv output_metrics.txt old_metrics.txt
127 |     grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/DockingRDock/ > output_metrics.txt
128 |     grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt
129 |     grep '__OutputCount__' old_metrics.txt >> output_metrics.txt
130 |     """
131 | }
132 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/rdock.nsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | !<org.squonk.core.NextflowServiceDescriptor>
 3 | serviceConfig:
 4 |   id: "pipelines.docking.rdock.basic.v2"
 5 |   "name": "rDock docking"
 6 |   "description": "Dock ligands with rDock"
 7 |   tags:
 8 |   - "rdock"
 9 |   - "docking"
10 |   - "scoring"
11 |   - "3d"
12 |   - "docker"
13 |   - "nextflow"
14 |   resourceUrl: null
15 |   icon: "icons/filter_molecules.png"
16 |   inputDescriptors:
17 |   - name: "ligands"
18 |     mediaType: "application/x-squonk-dataset-molecule+json"
19 |     primaryType: "org.squonk.dataset.Dataset"
20 |     secondaryType: "org.squonk.types.MoleculeObject"
21 |   - name: "config"
22 |     mediaType: "application/zip"
23 |     primaryType: "org.squonk.types.ZipFile"
24 |   outputDescriptors:
25 |   - name: "output"
26 |     mediaType: "application/x-squonk-dataset-molecule+json"
27 |     primaryType: "org.squonk.dataset.Dataset"
28 |     secondaryType: "org.squonk.types.MoleculeObject"
29 |   optionDescriptors:
30 |   - !<org.squonk.options.OptionDescriptor>
31 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
32 |       type: "java.lang.Integer"
33 |     key: "arg.num"
34 |     label: "Number of dockings"
35 |     description: "Execute this many docking runs"
36 |     defaultValue: 10
37 |     minValues: 1
38 |     maxValues: 1
39 |     visible: true
40 |     editable: true
41 |     modes:
42 |     - "User"
43 |   - !<org.squonk.options.OptionDescriptor>
44 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
45 |       type: "java.lang.Integer"
46 |     key: "arg.top"
47 |     label: "Top scoring poses"
48 |     description: "Keep this many top scoring poses"
49 |     defaultValue: 1
50 |     minValues: 1
51 |     maxValues: 1
52 |     visible: true
53 |     editable: true
54 |     modes:
55 |     - "User"
56 |   - !<org.squonk.options.OptionDescriptor>
57 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
58 |       type: "java.lang.Float"
59 |     key: "arg.score"
60 |     label: "Score filter"
61 |     description: "Filter out scores above this value"
62 |     minValues: 0
63 |     maxValues: 1
64 |     visible: true
65 |     editable: true
66 |     modes:
67 |     - "User"
68 |   - !<org.squonk.options.OptionDescriptor>
69 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
70 |       type: "java.lang.Float"
71 |     key: "arg.nscore"
72 |     label: "Normalised score filter"
73 |     description: "Filter out scores normalised by heavy atom count above this value"
74 |     minValues: 0
75 |     maxValues: 1
76 |     visible: true
77 |     editable: true
78 |     modes:
79 |     - "User"
80 | 
81 |   executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep"
82 | thinDescriptors:
83 |   - input: "ligands"
84 | inputRoutes:
85 | - route: "FILE"
86 | - route: "FILE"
87 | outputRoutes:
88 | - route: "FILE"
89 | nextflowParams: |
90 |   params.ligands = 'ligands.data.gz'
91 |   params.receptor = 'config.zip'
92 |   params.num_dockings = $num
93 |   params.top = $top
94 |   ${binding.variables.containsKey('score') ? 'params.score = ' + score : ''}
95 |   ${binding.variables.containsKey('nscore') ? 'params.nscore = ' + nscore : ''}
96 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/smog.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty


--------------------------------------------------------------------------------
/src/nextflow/docking/smog.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | /* Example Nextflow pipline that runs SMoG2016 scoring
 4 | */
 5 | 
 6 | 
 7 | params.ligands = 'ligands.sdf.gz'
 8 | params.protein = 'protein.pdb'
 9 | params.chunk = 25
10 | params.score = null
11 | 
12 | 
13 | ligands = file(params.ligands)
14 | protein = file(params.protein)
15 | 
16 | /* Splits the input SD file into multiple files of ${params.chunk} records.
17 | * Each file is sent individually to the ligand_parts channel.
18 | * Parts are renamed so as to be in correct sorted area.
19 | */
20 | process sdsplit {
21 | 
22 | 	input:
23 |     file ligands
24 | 
25 |     output:
26 |     file 'ligands_part*' into ligand_parts mode flatten
27 |     
28 |     
29 |     """
30 |     python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -d 5 -o ligands_part -of sdf
31 |     """
32 | }
33 | 
34 | /* Scores each file from the ligand_parts channel sending each resulting SD file to the results channel
35 | */
36 | process smog_scoring {
37 | 
38 | 	input:
39 |     file part from ligand_parts
40 | 	file protein
41 | 
42 |     output:
43 |     file 'scored_part*.sdf' into scored_parts
44 |     
45 |     """
46 | 	python -m pipelines.docking.smog2016 -i $part -pdb $protein -o ${part.name.replace('ligands', 'scored')[0..-8]} -of sdf --thin --no-gzip ${params.score ? ' -t ' + params.score : ''} --threads 1 &> scored_out.log
47 |     """
48 |  }
49 | 
50 | /* Recombine the results
51 | */
52 | process results {
53 | 
54 | 	
55 | 	input:
56 | 	file ligands
57 | 	file part from scored_parts.collect()
58 | 	
59 | 	output:
60 | 	file 'output.sdf.gz' into results
61 | 	
62 |  
63 | 	"""
64 | 	cat scored_part*.sdf | gzip > output.sdf.gz
65 | 	"""
66 | }
67 | 
68 | results.println { "Results: $it" }


--------------------------------------------------------------------------------
/src/nextflow/docking/smog.nsd.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty


--------------------------------------------------------------------------------
/src/nextflow/docking/smog.nsd.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | params.ligands = 'ligands.data.gz'
 4 | params.protein = 'protein.pdb.gz'
 5 | params.chunk = 25
 6 | params.score = null
 7 | params.limit = 0
 8 | params.digits = 4
 9 | 
10 | ligands = file(params.ligands)
11 | protein = file(params.protein)
12 | 
13 | process splitter {
14 | 
15 |     container 'informaticsmatters/smog:latest'
16 |     beforeScript 'chmod g+w .'
17 | 
18 |     input:
19 |     file ligands
20 | 
21 |     output:
22 |     file 'ligand_part*.sdf.gz' into ligand_parts mode flatten
23 |     file 'ligand_part_metrics.txt' into splitter_metrics
24 | 
25 |     """
26 |     python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -l $params.limit -d $params.digits -o ligand_part -of sdf --meta
27 |     """
28 | }
29 | 
30 | 
31 | /* Scores each file from the ligand_parts channel sending each resulting SD file to the results channel
32 | */
33 | process smog_scoring {
34 | 
35 |     container 'informaticsmatters/smog:latest'
36 |     beforeScript 'chmod g+w .'
37 | 
38 | 	input:
39 |     file part from ligand_parts
40 | 	file protein
41 | 
42 |     output:
43 |     file 'scored_part*.sdf' into scored_parts
44 | 
45 |     """
46 | 	python -m pipelines.docking.smog2016 -i $part -pdb $protein -o ${part.name.replace('ligand', 'scored')[0..-8]} -of sdf --no-gzip ${params.score ? ' -t ' + params.score : ''} --threads 1 &> scored_out.log
47 |     """
48 | }
49 | 
50 | process joiner {
51 | 
52 |     container 'informaticsmatters/smog:latest'
53 |     beforeScript 'chmod g+w .'
54 |     publishDir "$baseDir/results", mode: 'move'
55 | 
56 |     input:
57 |     file parts from scored_parts.collect()
58 |     file 'splitter_metrics.txt' from splitter_metrics
59 | 
60 | 	output:
61 | 	file 'output_metrics.txt'
62 | 	file 'output.data.gz'
63 |     file 'output.metadata'
64 | 
65 | 	"""
66 | 	cat scored_part*.sdf | python -m pipelines_utils_rdkit.filter -if sdf -of json -o output --meta --thin
67 | 	mv output_metrics.txt joiner_metrics.txt
68 |     grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/SMOG/ > output_metrics.txt
69 |     grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt
70 |     grep '__OutputCount__' joiner_metrics.txt >> output_metrics.txt
71 |     echo '{"type":"org.squonk.types.BasicObject","valueClassMappings":{"SMoG2016_SCORE":"java.lang.Float","EmbedRMS":"java.lang.Float"}}' > output.metadata
72 |   	"""
73 | }
74 | 


--------------------------------------------------------------------------------
/src/nextflow/docking/smog.nsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | !<org.squonk.core.NextflowServiceDescriptor>
 3 | serviceConfig:
 4 |   id: "pipelines.docking.smog2016.v1"
 5 |   "name": "SMoG2016 docking scoring"
 6 |   "description": "Score docked ligands with SMoG2016"
 7 |   tags:
 8 |   - "smog2016"
 9 |   - "docking"
10 |   - "scoring"
11 |   - "3d"
12 |   - "docker"
13 |   - "nextflow"
14 |   resourceUrl: null
15 |   icon: "icons/filter_molecules.png"
16 |   inputDescriptors:
17 |   - name: "ligands"
18 |     mediaType: "application/x-squonk-dataset-molecule+json"
19 |     primaryType: "org.squonk.dataset.Dataset"
20 |     secondaryType: "org.squonk.types.MoleculeObject"
21 |   - name: "protein"
22 |     mediaType: "chemical/x-pdb"
23 |     primaryType: "org.squonk.types.PDBFile"
24 |   outputDescriptors:
25 |   - name: "output"
26 |     mediaType: "application/x-squonk-dataset-molecule+json"
27 |     primaryType: "org.squonk.dataset.Dataset"
28 |     secondaryType: "org.squonk.types.MoleculeObject"
29 |   optionDescriptors:
30 |   - !<org.squonk.options.OptionDescriptor>
31 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
32 |       type: "java.lang.Float"
33 |     key: "arg.score"
34 |     label: "Score threshold"
35 |     description: "Keep only molecules with scores better than this value"
36 |     minValues: 0
37 |     maxValues: 1
38 |     visible: true
39 |     editable: true
40 |     modes:
41 |     - "User"
42 | 
43 |   executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep"
44 | thinDescriptors:
45 |   - input: ligands
46 |     output: output
47 | inputRoutes:
48 | - route: "FILE"
49 | - route: "FILE"
50 | outputRoutes:
51 | - route: "FILE"
52 | nextflowParams: |
53 |   ${binding.variables.containsKey('score') ? 'params.score = ' + score : ''}
54 | 


--------------------------------------------------------------------------------
/src/nextflow/nextflow-docker.config:
--------------------------------------------------------------------------------
1 | docker.enabled = true
2 | docker.mountFlags = 'z'
3 | docker.runOptions = '-u $(id -u):$(id -g)'
4 | process.container = 'busybox'
5 | 


--------------------------------------------------------------------------------
/src/nextflow/rdkit/screen+conformers.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | /* Example Nextflow pipline that runs screen.py followed by conformers.py 
 4 | */
 5 | 
 6 | params.qsmiles = 'OC(=O)C1=CC=C(NC2=NC3=C(CN=C(C4=CC(Cl)=CC=C34)C3=C(F)C=CC=C3F)C=N2)C=C1'
 7 | params.target = 'data/Kinase_inhibs.sdf.gz'
 8 | params.simmin = 0.7
 9 | params.simmax = 1.0
10 | params.descriptor = 'rdkit'
11 | params.metric = 'tanimoto'
12 | params.num = 1
13 | params.attempts = 0
14 | 
15 | target = file(params.target)
16 | 
17 | process rdkitScreen {
18 | 
19 | 	input:
20 |     file target
21 | 
22 |     output:
23 |     stdout screenOutput
24 |     
25 |     
26 |     """
27 |     python -m pipelines.rdkit.screen --qsmiles '$params.qsmiles' --simmin $params.simmin --simmax $params.simmax -d $params.descriptor -m $params.metric -i $target
28 |     """
29 |     
30 | }
31 | 
32 | process rdkitConformer {
33 | 
34 | 	input:
35 |     stdin screenOutput
36 | 
37 |     output:
38 |     file 'results.sdf.gz' into results 
39 |     
40 |     
41 |     """
42 |     python -m pipelines.rdkit.conformers -if sdf -n $params.num -a $params.attempts -o results
43 |     """
44 |     
45 | }
46 | 
47 | results.println { "Results: $it" }
48 | 


--------------------------------------------------------------------------------
/src/nextflow/rdkit/screen-dataset.nsd.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty


--------------------------------------------------------------------------------
/src/nextflow/rdkit/screen-dataset.nsd.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | params.input = "$baseDir/input.data.gz"
 4 | params.qsmiles
 5 | params.simmin = 0.7
 6 | params.simmax = 1.0
 7 | params.descriptor = 'rdkit'
 8 | params.metric = 'tanimoto'
 9 | params.chunk = 2500
10 | params.limit = 0
11 | params.digits = 4
12 | 
13 | target = file(params.input)
14 | 
15 | process splitter {
16 | 
17 |     container 'informaticsmatters/rdkit_pipelines:latest'
18 | 
19 |     input:
20 |     file target
21 | 
22 |     output:
23 |     file 'target_part*.sdf.gz' into target_parts mode flatten
24 |     file 'target_part_metrics.txt' into splitter_metrics
25 | 
26 |     """
27 |     python -m pipelines_utils_rdkit.filter -i $target -c $params.chunk -l $params.limit -d $params.digits -o target_part -of sdf --meta
28 |     """
29 | }
30 | 
31 | process rdkitScreen {
32 | 
33 |     container 'informaticsmatters/rdkit_pipelines'
34 | 
35 | 	input:
36 |     file part from target_parts
37 | 
38 |     output:
39 |     file 'screened_part*.sdf.gz' into screened_parts
40 | 
41 |     """
42 |     python -m pipelines.rdkit.screen --qsmiles '$params.qsmiles' --simmin $params.simmin --simmax $params.simmax -d $params.descriptor -m $params.metric -i $part -o ${part.name.replace('target', 'screened')[0..-8]} -of sdf
43 |     """
44 | }
45 | 
46 | process joiner {
47 | 
48 |     container 'informaticsmatters/rdkit_pipelines:latest'
49 | 
50 |     publishDir "$baseDir/results", mode: 'move'
51 | 
52 |     input:
53 |     file 'splitter_metrics.txt' from splitter_metrics
54 | 	file parts from screened_parts.collect()
55 | 
56 | 	output:
57 | 	file 'output_metrics.txt'
58 | 	file 'output.data.gz'
59 | 	file 'output.metadata'
60 | 
61 | 	"""
62 | 	zcat $parts | python -m pipelines_utils_rdkit.filter -if sdf -of json -o output --meta
63 | 	mv output_metrics.txt joiner_metrics.txt
64 | 	grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/RDKitScreen/ > output_metrics.txt
65 |     grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt
66 |     grep '__OutputCount__' joiner_metrics.txt >> output_metrics.txt
67 | 	"""
68 | }
69 | 


--------------------------------------------------------------------------------
/src/nextflow/rdkit/screen-dataset.nsd.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | !<org.squonk.core.NextflowServiceDescriptor>
  3 | serviceConfig:
  4 |   id: "pipelines.rdkit.screen.basic"
  5 |   name: "RDKitSimilarityScreening"
  6 |   description: "RDKit Similarity Screening"
  7 |   tags:
  8 |   - "rdkit"
  9 |   - "screening"
 10 |   - "similarity"
 11 |   - "docker"
 12 |   - "nextflow"
 13 |   resourceUrl: null
 14 |   icon: "icons/filter_molecules.png"
 15 |   inputDescriptors:
 16 |   - name: "input"
 17 |     mediaType: "application/x-squonk-dataset-molecule+json"
 18 |     primaryType: "org.squonk.dataset.Dataset"
 19 |     secondaryType: "org.squonk.types.MoleculeObject"
 20 |   outputDescriptors:
 21 |   - name: "output"
 22 |     mediaType: "application/x-squonk-dataset-molecule+json"
 23 |     primaryType: "org.squonk.dataset.Dataset"
 24 |     secondaryType: "org.squonk.types.MoleculeObject"
 25 |   optionDescriptors:
 26 |   - !<org.squonk.options.OptionDescriptor>
 27 |     typeDescriptor: !<org.squonk.options.MoleculeTypeDescriptor>
 28 |       type: "org.squonk.options.types.Structure"
 29 |       formats: ["smiles"]
 30 |       molType: "DISCRETE"
 31 |     key: "arg.query"
 32 |     label: "Query molecule"
 33 |     description: "Query molecule as smiles"
 34 |     visible: true
 35 |     editable: true
 36 |     modes:
 37 |     - "User"
 38 |   - !<org.squonk.options.OptionDescriptor>
 39 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
 40 |       type: "org.squonk.types.NumberRange$Float"
 41 |     key: "arg.sim"
 42 |     label: "Similarity"
 43 |     description: "Similarity threshold (1.0 is identical)"
 44 |     defaultValue: !<org.squonk.types.NumberRange$Float>
 45 |       minValue: 0.7
 46 |       maxValue: 1.0
 47 |     visible: true
 48 |     editable: true
 49 |     modes:
 50 |     - "User"
 51 |   - !<org.squonk.options.OptionDescriptor>
 52 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
 53 |       type: "java.lang.String"
 54 |     key: "arg.descriptor"
 55 |     label: "Descriptor"
 56 |     description: "Descriptor/fingerprint to use"
 57 |     values:
 58 |     - "maccs"
 59 |     - "morgan2"
 60 |     - "morgan3"
 61 |     - "rdkit"
 62 |     defaultValue: "rdkit"
 63 |     visible: true
 64 |     editable: true
 65 |     modes:
 66 |     - "User"
 67 |   - !<org.squonk.options.OptionDescriptor>
 68 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
 69 |       type: "java.lang.String"
 70 |     key: "arg.metric"
 71 |     label: "Metric"
 72 |     description: "Similarity metric to use"
 73 |     values:
 74 |     - "asymmetric"
 75 |     - "braunblanquet"
 76 |     - "cosine"
 77 |     - "dice"
 78 |     - "kulczynski"
 79 |     - "mcconnaughey"
 80 |     - "rogotgoldberg"
 81 |     - "russel"
 82 |     - "sokal"
 83 |     - "tanimoto"
 84 |     defaultValue: "tanimoto"
 85 |     visible: true
 86 |     editable: true
 87 |     modes:
 88 |     - "User"
 89 |   executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep"
 90 | thinDescriptors:
 91 |   - input: "input"
 92 |     output: "output"
 93 |     filtering: true
 94 | inputRoutes:
 95 | - route: "FILE"
 96 | outputRoutes:
 97 | - route: "FILE"
 98 | nextflowParams: |
 99 |   params.qsmiles = '$query_source'
100 |   params.simmin = $sim.minValue
101 |   params.simmax = $sim.maxValue
102 |   params.descriptor = '$descriptor'
103 |   params.metric = '$metric'
104 | 


--------------------------------------------------------------------------------
/src/nextflow/rdkit/screen-multi-dataset.nsd.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty


--------------------------------------------------------------------------------
/src/nextflow/rdkit/screen-multi-dataset.nsd.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | params.target = "$baseDir/target.data.gz"
 4 | params.query = "$baseDir/query.data.gz"
 5 | params.simmin = 0.7
 6 | params.simmax = 1.0
 7 | params.descriptor = 'rdkit'
 8 | params.metric = 'tanimoto'
 9 | params.chunk = 500
10 | params.limit = 0
11 | params.digits = 4
12 | 
13 | target = file(params.target)
14 | query = file(params.query)
15 | 
16 | process splitter {
17 | 
18 |     container 'informaticsmatters/rdkit_pipelines:latest'
19 | 
20 |     input:
21 |     file target
22 | 
23 |     output:
24 |     file 'target_part*.sdf.gz' into target_parts mode flatten
25 |     file 'target_part_metrics.txt' into splitter_metrics
26 | 
27 |     """
28 |     python -m pipelines_utils_rdkit.filter -i $target -c $params.chunk -l $params.limit -d $params.digits -o target_part -of sdf --meta
29 |     """
30 | }
31 | 
32 | process rdkitScreen {
33 | 
34 |     container 'informaticsmatters/rdkit_pipelines:latest'
35 | 
36 | 	input:
37 |     file part from target_parts
38 |     file 'query.data.gz' from query
39 | 
40 |     output:
41 |     file 'screened_part*.sdf.gz' into screened_parts
42 | 
43 |     """
44 |     python -m pipelines.rdkit.screen_multi -i $part --qjson query.data.gz --simmin $params.simmin --simmax $params.simmax -d $params.descriptor -m $params.metric -o ${part.name.replace('target', 'screened')[0..-8]} -of sdf
45 |     """
46 | }
47 | 
48 | process joiner {
49 | 
50 |     container 'informaticsmatters/rdkit_pipelines:latest'
51 | 
52 |     publishDir "$baseDir/results", mode: 'move'
53 | 
54 |     input:
55 |     file 'splitter_metrics.txt' from splitter_metrics
56 | 	file parts from screened_parts.collect()
57 | 
58 | 	output:
59 | 	file 'output_metrics.txt'
60 | 	file 'output.data.gz'
61 | 	file 'output.metadata'
62 | 
63 | 	"""
64 | 	zcat $parts | python -m pipelines_utils_rdkit.filter -if sdf -of json -o output --meta
65 | 	mv output_metrics.txt joiner_metrics.txt
66 | 	grep '__InputCount__' splitter_metrics.txt | sed s/__InputCount__/RDKitScreen/ > output_metrics.txt
67 |     grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt
68 |     grep '__OutputCount__' joiner_metrics.txt >> output_metrics.txt
69 | 	"""
70 | }
71 | 


--------------------------------------------------------------------------------
/src/nextflow/rdkit/screen-multi-dataset.nsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | !<org.squonk.core.NextflowServiceDescriptor>
 3 | serviceConfig:
 4 |   id: "pipelines.rdkit.screen.multi"
 5 |   name: "RDKitMultiSimilarityScreening"
 6 |   description: "RDKit Similarity Screening against multiple query structures"
 7 |   tags:
 8 |   - "rdkit"
 9 |   - "screening"
10 |   - "similarity"
11 |   - "docker"
12 |   - "nextflow"
13 |   resourceUrl: null
14 |   icon: "icons/filter_molecules.png"
15 |   inputDescriptors:
16 |   - name: "target"
17 |     mediaType: "application/x-squonk-dataset-molecule+json"
18 |     primaryType: "org.squonk.dataset.Dataset"
19 |     secondaryType: "org.squonk.types.MoleculeObject"
20 |   - name: "query"
21 |     mediaType: "application/x-squonk-dataset-molecule+json"
22 |     primaryType: "org.squonk.dataset.Dataset"
23 |     secondaryType: "org.squonk.types.MoleculeObject"
24 |   outputDescriptors:
25 |   - name: "output"
26 |     mediaType: "application/x-squonk-dataset-molecule+json"
27 |     primaryType: "org.squonk.dataset.Dataset"
28 |     secondaryType: "org.squonk.types.MoleculeObject"
29 |   optionDescriptors:
30 |   - !<org.squonk.options.OptionDescriptor>
31 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
32 |       type: "org.squonk.types.NumberRange$Float"
33 |     key: "arg.sim"
34 |     label: "Similarity"
35 |     description: "Similarity threshold (1.0 is identical)"
36 |     defaultValue: !<org.squonk.types.NumberRange$Float>
37 |       minValue: 0.7
38 |       maxValue: 1.0
39 |     visible: true
40 |     editable: true
41 |     modes:
42 |     - "User"
43 |   - !<org.squonk.options.OptionDescriptor>
44 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
45 |       type: "java.lang.String"
46 |     key: "arg.descriptor"
47 |     label: "Descriptor"
48 |     description: "Descriptor/fingerprint to use"
49 |     values:
50 |     - "maccs"
51 |     - "morgan2"
52 |     - "morgan3"
53 |     - "rdkit"
54 |     defaultValue: "rdkit"
55 |     visible: true
56 |     editable: true
57 |     modes:
58 |     - "User"
59 |   - !<org.squonk.options.OptionDescriptor>
60 |     typeDescriptor: !<org.squonk.options.SimpleTypeDescriptor>
61 |       type: "java.lang.String"
62 |     key: "arg.metric"
63 |     label: "Metric"
64 |     description: "Similarity metric to use"
65 |     values:
66 |     - "asymmetric"
67 |     - "braunblanquet"
68 |     - "cosine"
69 |     - "dice"
70 |     - "kulczynski"
71 |     - "mcconnaughey"
72 |     - "rogotgoldberg"
73 |     - "russel"
74 |     - "sokal"
75 |     - "tanimoto"
76 |     defaultValue: "tanimoto"
77 |     visible: true
78 |     editable: true
79 |     modes:
80 |     - "User"
81 |   executorClassName: "org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep"
82 | thinDescriptors:
83 |   - input: "target"
84 |     output: "output"
85 |     filtering: true
86 |   - input: "query"
87 | inputRoutes:
88 | - route: "FILE"
89 | - route: "FILE"
90 | outputRoutes:
91 | - route: "FILE"
92 | nextflowParams: |
93 |   params.simmin = $sim.minValue
94 |   params.simmax = $sim.maxValue
95 |   params.descriptor = '$descriptor'
96 |   params.metric = '$metric'
97 | 


--------------------------------------------------------------------------------
/src/nextflow/rdkit/screen.config:
--------------------------------------------------------------------------------
1 | // Intentionally Empty


--------------------------------------------------------------------------------
/src/nextflow/rdkit/screen.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | params.qsmiles = 'OC(=O)C1=CC=C(NC2=NC3=C(CN=C(C4=CC(Cl)=CC=C34)C3=C(F)C=CC=C3F)C=N2)C=C1'
 4 | params.target = 'data/Kinase_inhibs.sdf.gz'
 5 | params.simmin = 0.7
 6 | params.simmax = 1.0
 7 | params.descriptor = 'rdkit'
 8 | params.metric = 'tanimoto'
 9 | 
10 | target = file(params.target)
11 | 
12 | process rdkitScreen {
13 | 
14 |      input:
15 |     file target
16 | 
17 |     output:
18 |     file 'results.sdf.gz' into results
19 | 
20 |     """
21 |     python -m pipelines.rdkit.screen --qsmiles '$params.qsmiles' --simmin $params.simmin --simmax $params.simmax -d $params.descriptor -m $params.metric -i $target -o results
22 |     """
23 | 
24 | }
25 | 
26 | results.println { "Results: $it" }


--------------------------------------------------------------------------------
/src/nextflow/xchem/expand.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | // expand params
 4 | params.hits = 'data/mpro/hits-5.sdf.gz'
 5 | params.token = null
 6 | params.hac_min = 3
 7 | params.hac_max = 3
 8 | params.rac_min = 1
 9 | params.rac_max = 1
10 | params.hops = 1
11 | params.server = null
12 | 
13 | 
14 | // files
15 | hits = file(params.hits)
16 | 
17 | process fragnet_expand {
18 | 
19 |     container 'informaticsmatters/rdkit_pipelines:latest'
20 | 
21 |     publishDir ".", mode: 'copy'
22 | 
23 |     input:
24 |     file hits
25 | 
26 |     output:
27 |     file '*.smi' into smiles
28 |     file '*.mol' into mols
29 | 
30 |     """
31 |     python -m pipelines.xchem.fragnet_expand -i '$hits' ${params.token ? '--token ' + params.token : ''}\
32 |       --hops $params.hops\
33 |       --hac-min $params.hac_min\
34 |       --hac-max $params.hac_max\
35 |       --rac-min $params.rac_min\
36 |       --rac-max $params.rac_max\
37 |       ${params.server ? '--server ' + params.server : ''}
38 |     """
39 | }


--------------------------------------------------------------------------------
/src/nextflow/xchem/featurestein.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | params.inputs = "data/mpro/poses.sdf.gz"
 4 | params.fragments = "data/mpro/hits-17.sdf.gz"
 5 | params.chunk = 5000
 6 | params.limit = 0
 7 | params.digits = 4
 8 | 
 9 | inputs = file(params.inputs)
10 | fragments = file(params.fragments)
11 | 
12 | process generate_feat_maps {
13 | 
14 |     container 'informaticsmatters/rdkit_pipelines:latest'
15 | 
16 |     input:
17 |     file fragments
18 | 
19 |     output:
20 |     file 'featurestein.p' into fmaps
21 | 
22 |     """
23 |     python -m pipelines.xchem.featurestein_generate -i '$fragments' -f featurestein.p
24 |     """
25 | }
26 | 
27 | process splitter {
28 | 
29 |     container 'informaticsmatters/rdkit_pipelines:latest'
30 | 
31 |     input:
32 |     file inputs
33 | 
34 |     output:
35 |     file 'inputs_part*.sdf.gz' into inputs_parts mode flatten
36 | 
37 |     """
38 |     python -m pipelines_utils_rdkit.filter -i '$inputs' -c $params.chunk -l $params.limit -d $params.digits -o 'inputs_part_' -of sdf
39 |     """
40 | }
41 | 
42 | process score {
43 | 
44 |     container 'informaticsmatters/rdkit_pipelines:latest'
45 | 
46 | 	input:
47 |     file part from inputs_parts
48 |     file fmaps
49 | 
50 |     output:
51 |     file 'scored_part*.sdf' into scored_parts
52 | 
53 |     """
54 |     python -m pipelines.xchem.featurestein_score -i '$part' -f '$fmaps' -o '${part.name.replace('inputs', 'scored')[0..-8]}' -of sdf --no-gzip
55 |     """
56 | }
57 | 
58 | process joiner {
59 | 
60 |     container 'informaticsmatters/rdkit_pipelines:latest'
61 | 
62 |     publishDir ".", mode: 'link'
63 | 
64 |     input:
65 | 	file parts from scored_parts.collect()
66 | 
67 | 	output:
68 | 	file 'featurestein_scored.sdf.gz'
69 | 
70 | 	"""
71 | 	cat $parts | gzip > featurestein_scored.sdf.gz
72 | 	"""
73 | }
74 | 


--------------------------------------------------------------------------------
/src/nextflow/xchem/prepare-tether-featurestein.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | params.smiles = '*.smi'
 4 | params.molfiles = '*.mol'
 5 | params.fragments = "data/mpro/hits-17.sdf.gz"
 6 | params.chunk_tether = 250
 7 | params.chunk_score = 10000
 8 | params.limit = 0
 9 | params.digits = 4
10 | params.generate_filenames = false
11 | params.num_conformers = 10
12 | 
13 | // files
14 | smilesfiles = file(params.smiles)
15 | molfiles = file(params.molfiles)
16 | fragments = file(params.fragments)
17 | 
18 | process generate_feat_maps {
19 | 
20 |     container 'informaticsmatters/rdkit_pipelines:latest'
21 | 
22 |     input:
23 |     file fragments
24 | 
25 |     output:
26 |     file 'featurestein.p' into fmaps
27 | 
28 |     """
29 |     python -m pipelines.xchem.featurestein_generate -i '$fragments' -f featurestein.p
30 |     """
31 | }
32 | 
33 | process splitter {
34 | 
35 |     container 'informaticsmatters/rdkit_pipelines:latest'
36 | 
37 |     input:
38 |     file smiles from smilesfiles.flatten()
39 |     file mol from molfiles.flatten()
40 | 
41 |     output:
42 |     file '*.mol' into mols
43 |     file '*.smi' into smiles
44 | 
45 |     """
46 |     stem=${smiles.name[0..-5]}
47 |     split -l $params.chunk_tether -d -a 3 --additional-suffix .smi $smiles \${stem}_
48 |     mv $smiles ${smiles}.orig
49 |     for f in *.smi
50 |     do
51 |       cp $mol \${f:0:-4}.mol
52 |     done
53 |     mv $mol ${mol}.orig
54 |     """
55 | }
56 | 
57 | process tether {
58 | 
59 |     container 'informaticsmatters/rdkit_pipelines:latest'
60 | 
61 |     input:
62 |     file smiles from smiles.flatten()
63 |     file mol from mols.flatten()
64 | 
65 |     output:
66 |     file 'Tethered_*.sdf' into tethered_parts
67 | 
68 |     """
69 |     python -m pipelines.xchem.prepare_tether --smi '$smiles' --mol '$mol' --chunk-size $params.chunk_score --num-conformers $params.num_conformers -o 'Tethered_${smiles.name[0..-5]}'
70 |     """
71 | }
72 | 
73 | process score {
74 | 
75 |     container 'informaticsmatters/rdkit_pipelines:latest'
76 |     publishDir '.'
77 | 
78 | 	input:
79 |     file part from tethered_parts.flatten()
80 |     file fmaps
81 | 
82 |     output:
83 |     file 'Scored_*.sdf' into scored_parts
84 | 
85 |     """
86 |     python -m pipelines.xchem.featurestein_score -i '$part' -f '$fmaps' -o 'Scored_${part.name[0..-5]}' -of sdf --no-gzip
87 |     """
88 | }
89 | 


--------------------------------------------------------------------------------
/src/nextflow/xchem/prepare-tether.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | params.smiles = '*.smi'
 4 | params.molfiles = '*.mol'
 5 | params.chunk_tether = 250
 6 | params.chunk_score = 1000
 7 | params.limit = 0
 8 | params.num_conformers = 1
 9 | params.ph_min = null
10 | params.ph_max = null
11 | params.atom_compare = 'CompareElements'
12 | params.bond_compare = 'CompareOrder'
13 | params.complete_rings_only = true
14 | params.ring_matches_ring_only = true
15 | params.minimize = 4
16 | params.timeout_embed = null
17 | 
18 | 
19 | smilesfiles = file(params.smiles)
20 | molfiles = file(params.molfiles)
21 | 
22 | process splitter {
23 | 
24 |     container 'informaticsmatters/rdkit_pipelines:latest'
25 | 
26 |     input:
27 |     file smiles from smilesfiles.flatten()
28 |     file mol from molfiles.flatten()
29 | 
30 |     output:
31 |     file '*.mol' into mols
32 |     file '*.smi' into smiles
33 | 
34 |     """
35 |     stem=${smiles.name[0..-5]}
36 |     split -l $params.chunk_tether -d -a 3 --additional-suffix .smi $smiles \${stem}_
37 |     mv $smiles ${smiles}.orig
38 |     for f in *.smi
39 |     do
40 |       cp $mol \${f:0:-4}.mol
41 |     done
42 |     mv $mol ${mol}.orig
43 |     """
44 | }
45 | 
46 | process tether {
47 | 
48 |     container 'informaticsmatters/rdkit_pipelines:latest'
49 |     publishDir '.'
50 | 
51 |     input:
52 |     file smiles from smiles.flatten()
53 |     file mol from mols.flatten()
54 | 
55 |     output:
56 |     file 'Tethered_*.sdf' into tethered_parts
57 | 
58 |     """
59 |     python -m pipelines.xchem.prepare_tether --smi '$smiles' --mol '$mol' --chunk-size $params.chunk_score\
60 |       -o 'Tethered_${smiles.name[0..-5]}'\
61 |       --num-conformers $params.num_conformers\
62 |       --atom-compare $params.atom_compare --bond-compare $params.bond_compare\
63 |       ${params.complete_rings_only ? '--complete-rings-only' : ''}\
64 |       ${params.ring_matches_ring_only ? '--ring-matches-ring-only' : ''}\
65 |       --minimize $params.minimize\
66 |       ${params.ph_min != null ? '--min-ph ' + params.ph_min : ''}\
67 |       ${params.ph_max != null ? '--max-ph ' + params.ph_max : ''}\
68 |       ${params.timeout_embed != null ? '--timeout-embed ' + params.timeout_embed : ''}
69 |     """
70 | }
71 | 


--------------------------------------------------------------------------------
/src/nextflow/xchem/xcos.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | params.inputs = 'data/mpro/poses.sdf.gz'
 4 | params.fragments = 'data/mpro/hits-17.sdf.gz'
 5 | params.threshold = 0.4 // XCos score theshold
 6 | params.chunk = 500     // chunk size to split input into
 7 | params.limit = 0       // max number of molecules to process
 8 | params.digits = 4      // number of digits for the split file name number
 9 | 
10 | inputs = file(params.inputs)
11 | fragments = file(params.fragments)
12 | 
13 | process splitter {
14 | 
15 |     container 'informaticsmatters/rdkit_pipelines:latest'
16 | 
17 |     input:
18 |     file inputs
19 | 
20 |     output:
21 |     file 'inputs_part*.sdf.gz' into inputs_parts mode flatten
22 | 
23 |     """
24 |     python -m pipelines_utils_rdkit.filter -i '$inputs' -c $params.chunk -l $params.limit -d $params.digits -o 'inputs_part_' -of sdf
25 |     """
26 | }
27 | 
28 | process xcos {
29 | 
30 |     container 'informaticsmatters/rdkit_pipelines:latest'
31 | 
32 | 	input:
33 |     file part from inputs_parts
34 |     file fragments
35 | 
36 |     output:
37 |     file 'scored_part*.sdf' into scored_parts
38 | 
39 |     """
40 |     python -m pipelines.xchem.xcos -i '$part' -f '$fragments' -t $params.threshold -o '${part.name.replace('inputs', 'scored')[0..-8]}' -of sdf --no-gzip
41 |     """
42 | }
43 | 
44 | process joiner {
45 | 
46 |     container 'informaticsmatters/rdkit_pipelines:latest'
47 | 
48 |     publishDir ".", mode: 'link'
49 | 
50 |     input:
51 | 	file parts from scored_parts.collect()
52 | 
53 | 	output:
54 | 	file 'xcos_scored.sdf.gz'
55 | 
56 | 	"""
57 | 	cat $parts | gzip > xcos_scored.sdf.gz
58 | 	"""
59 | }
60 | 


--------------------------------------------------------------------------------
/src/python/NNScore_pdbbind2016.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/NNScore_pdbbind2016.pickle


--------------------------------------------------------------------------------
/src/python/README.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/README.rst


--------------------------------------------------------------------------------
/src/python/RFScore_v1_pdbbind2016.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/RFScore_v1_pdbbind2016.pickle


--------------------------------------------------------------------------------
/src/python/RFScore_v2_pdbbind2016.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/RFScore_v2_pdbbind2016.pickle


--------------------------------------------------------------------------------
/src/python/RFScore_v3_pdbbind2016.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/RFScore_v3_pdbbind2016.pickle


--------------------------------------------------------------------------------
/src/python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/__init__.py


--------------------------------------------------------------------------------
/src/python/notebooks/default.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "from collections import defaultdict\n",
12 |     "import numpy as np\n",
13 |     "from rdkit import Chem\n",
14 |     "from rdkit.Chem import AllChem,Draw\n",
15 |     "from rdkit.Chem.Draw import IPythonConsole\n",
16 |     "IPythonConsole.ipython_3d=True\n",
17 |     "%pylab inline\n",
18 |     "from pipelines.utils import load_data\n",
19 |     "\n",
20 |     "# Load_data loads in all the data placed in the normal places\n",
21 |     "input_data = load_data()"
22 |    ]
23 |   }
24 |  ],
25 |  "metadata": {
26 |   "kernelspec": {
27 |    "display_name": "Python 2",
28 |    "language": "python",
29 |    "name": "python2"
30 |   },
31 |   "language_info": {
32 |    "codemirror_mode": {
33 |     "name": "ipython",
34 |     "version": 2.0
35 |    },
36 |    "file_extension": ".py",
37 |    "mimetype": "text/x-python",
38 |    "name": "python",
39 |    "nbconvert_exporter": "python",
40 |    "pygments_lexer": "ipython2",
41 |    "version": "2.7.6"
42 |   }
43 |  },
44 |  "nbformat": 4,
45 |  "nbformat_minor": 0
46 | }


--------------------------------------------------------------------------------
/src/python/pipelines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/__init__.py


--------------------------------------------------------------------------------
/src/python/pipelines/dimorphite/README.txt:
--------------------------------------------------------------------------------
1 | This package uses dimorphite-dl from the Durrant lab at University of Pittsburg.
2 | For details see:
3 | Ropp PJ, Kaminsky JC, Yablonski S, Durrant JD (2019) Dimorphite-DL: An open-source
4 | program for enumerating the ionization states of drug-like small molecules.
5 | J Cheminform 11:14. doi:10.1186/s13321-019-0336-9.
6 | 
7 | The original Dimorphite-dl code can be found here:
8 | https://git.durrantlab.pitt.edu/jdurrant/dimorphite_dl
9 | The two files dimorphite_dl.py and site_substructures.smarts are copied here.


--------------------------------------------------------------------------------
/src/python/pipelines/dimorphite/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/dimorphite/__init__.py


--------------------------------------------------------------------------------
/src/python/pipelines/dimorphite/enumerate_charges.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.dimorphite.enumerate_charges.v1
 5 |   name: EnumerateCharges
 6 |   description: Generate charge forms using Dimporphite_DL
 7 |   tags:
 8 |   - rdkit
 9 |   - dimorphite
10 |   - charges
11 |   - enumerate
12 |   - docker
13 |   resourceUrl:
14 |   icon: icons/molecule_generator.png
15 |   inputDescriptors:
16 |   - primaryType: org.squonk.dataset.Dataset
17 |     secondaryType: org.squonk.types.MoleculeObject
18 |     mediaType: application/x-squonk-dataset-molecule+json
19 |     name: input
20 |   outputDescriptors:
21 |   - primaryType: org.squonk.dataset.Dataset
22 |     secondaryType: org.squonk.types.MoleculeObject
23 |     mediaType: application/x-squonk-dataset-molecule+json
24 |     name: output
25 |   optionDescriptors:
26 |   - "@class": org.squonk.options.OptionDescriptor
27 |     modes:
28 |     - User
29 |     typeDescriptor:
30 |       type: java.lang.Float
31 |       "@class": org.squonk.options.SimpleTypeDescriptor
32 |     editable: true
33 |     visible: true
34 |     description: Minimum pH
35 |     defaultValue: 5.0
36 |     label: Minimum pH value
37 |     key: arg.min_ph
38 |     minValues: 1
39 |     maxValues: 1
40 |   - "@class": org.squonk.options.OptionDescriptor
41 |     modes:
42 |       - User
43 |     typeDescriptor:
44 |       type: java.lang.Float
45 |       "@class": org.squonk.options.SimpleTypeDescriptor
46 |     editable: true
47 |     visible: true
48 |     description: Maximum pH
49 |     defaultValue: 9.0
50 |     label: Maximum pH value
51 |     key: arg.max_ph
52 |     minValues: 1
53 |     maxValues: 1
54 |   - modes:
55 |       - User
56 |     editable: true
57 |     "@class": org.squonk.options.OptionDescriptor
58 |     typeDescriptor:
59 |       type: java.lang.String
60 |       "@class": org.squonk.options.SimpleTypeDescriptor
61 |     key: arg.fragment_method
62 |     label: Fragment method
63 |     description: Approach to use for picking biggest molecular fragment
64 |     values:
65 |       - hac
66 |       - mw
67 |     defaultValue: hac
68 |     visible: true
69 |   executorClassName: org.squonk.execution.steps.impl.DefaultDockerExecutorStep
70 | #thinDescriptors:
71 | #- input: input
72 | inputRoutes:
73 | - route: FILE
74 | outputRoutes:
75 | - route: FILE
76 | imageName: informaticsmatters/rdkit_pipelines
77 | command: >-
78 |   python -m pipelines.dimorphite.enumerate_charges -i ${PIN}input.data.gz -if json -o ${POUT}output -of json --meta
79 |   ${binding.variables.containsKey('min_ph') ? ' --min-ph ' + min_ph : ''}
80 |   ${binding.variables.containsKey('max_ph') ? ' --max-ph ' + max_ph : ''}
81 |   ${binding.variables.containsKey('fragment_method') ? ' --fragment-method ' + fragment_method : ''}


--------------------------------------------------------------------------------
/src/python/pipelines/dimorphite/enumerate_charges.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing cluster_butina.py reading from STDIN and writing to STDOUT
 8 |     test_dimorphite_simple = [
 9 | 
10 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
11 |                  python -m pipelines.dimorphite.enumerate_charges -if sdf''',
12 | 
13 |         stderr: [ 'No output format specified - using sdf',
14 |                   '36 1265 0']
15 | 
16 |     ]
17 | ]
18 | 


--------------------------------------------------------------------------------
/src/python/pipelines/dimorphite/run.py:
--------------------------------------------------------------------------------
 1 | from rdkit import Chem
 2 | import dimorphite_dl
 3 | 
 4 | #suppl = [Chem.MolFromSmiles(s) for s in ["C[C@](F)(Br)CC(O)=O", "CCCCCN"]]
 5 | suppl = Chem.SDMolSupplier('Kinase_inhibs.sdf')
 6 | #suppl = Chem.SDMolSupplier('dhfr_standardized.sdf')
 7 | 
 8 | print(suppl)
 9 | 
10 | protonated_mols = dimorphite_dl.run_with_mol_list(
11 |     suppl,
12 |     min_ph=5.0,
13 |     max_ph=9.0,
14 | )
15 | 
16 | 
17 | print("Charged mols ------------------------------------------------")
18 | 
19 | for m in protonated_mols:
20 |     if m:
21 |         print(Chem.MolToSmiles(m) + " " + ",".join(m.GetPropNames()))
22 | 


--------------------------------------------------------------------------------
/src/python/pipelines/dimorphite/site_substructures.smarts:
--------------------------------------------------------------------------------
 1 | *Azide	[N+0:1]=[N+:2]=[N+0:3]-[H]	2	4.65	0.07071067811865513
 2 | Nitro	[C,c,N,n,O,o:1]-[NX3:2](=[O:3])-[O:4]-[H]	3	-1000.0	0
 3 | AmidineGuanidine1	[N:1]-[C:2](-[N:3])=[NX2:4]-[H:5]	3	12.025333333333334	1.5941046150769165
 4 | AmidineGuanidine2	[C:1](-[N:2])=[NX2+0:3]	2	10.035538461538462	2.1312826469414716
 5 | Sulfate	[SX4:1](=[O:2])(=[O:3])([O:4]-[C,c,N,n:5])-[OX2:6]-[H]	5	-2.36	1.3048043093561141
 6 | Sulfonate	[SX4:1](=[O:2])(=[O:3])(-[C,c,N,n:4])-[OX2:5]-[H]	4	-1.8184615384615386	1.4086213481855594
 7 | Sulfinic_acid	[SX3:1](=[O:2])-[O:3]-[H]	2	1.7933333333333332	0.4372070447739835
 8 | Phenyl_carboxyl	[c,n,o:1]-[C:2](=[O:3])-[O:4]-[H]	3	3.463441968255319	1.2518054407928614
 9 | Carboxyl	[C:1](=[O:2])-[O:3]-[H]	2	3.456652971502591	1.2871420886834017
10 | Thioic_acid	[C,c,N,n:1](=[O,S:2])-[SX2,OX2:3]-[H]	2	0.678267	1.497048763660801
11 | Phenyl_Thiol	[c,n:1]-[SX2:2]-[H]	1	4.978235294117647	2.6137000480499806
12 | Thiol	[C,N:1]-[SX2:2]-[H]	1	9.12448275862069	1.3317968158171463
13 | Phosphate	[PX4:1](=[O:2])(-[OX2:3]-[H])(-[O+0:4])-[OX2:5]-[H]	2	2.4182608695652172	1.1091177991945305	5	6.5055	0.9512787792174668
14 | Phosphonate	[PX4:1](=[O:2])(-[OX2:3]-[H])(-[C,c,N,n:4])-[OX2:5]-[H]	2	1.8835714285714287	0.5925999820080644	5	7.247254901960784	0.8511476450801531
15 | Phenol	[c,n,o:1]-[O:2]-[H]	1	7.065359866910526	3.277356122295936
16 | Peroxide1	[O:1]([$(C=O),$(C[Cl]),$(CF),$(C[Br]),$(CC#N):2])-[O:3]-[H]	2	8.738888888888889	0.7562592839596507
17 | Peroxide2	[C:1]-[O:2]-[O:3]-[H]	2	11.978235294117647	0.8697645895163075
18 | O=C-C=C-OH	[O:1]=[C;R:2]-[C;R:3]=[C;R:4]-[O:5]-[H]	4	3.554	0.803339458581667
19 | Vinyl_alcohol	[C:1]=[C:2]-[O:3]-[H]	2	8.871850714285713	1.660200255394124
20 | Alcohol	[C:1]-[O:2]-[H]	1	14.780384615384616	2.546464970533435
21 | N-hydroxyamide	[C:1](=[O:2])-[N:3]-[O:4]-[H]	3	9.301904761904762	1.2181897185891002
22 | *Ringed_imide1	[O,S:1]=[C;R:2]([$([#8]),$([#7]),$([#16]),$([#6][Cl]),$([#6]F),$([#6][Br]):3])-[N;R:4]([C;R:5]=[O,S:6])-[H]	3	6.4525	0.5555627777308341
23 | *Ringed_imide2	[O,S:1]=[C;R:2]-[N;R:3]([C;R:4]=[O,S:5])-[H]	2	8.681666666666667	1.8657779975741713
24 | *Imide	[F,Cl,Br,S,s,P,p:1][#6:2][CX3:3](=[O,S:4])-[NX3+0:5]([CX3:6]=[O,S:7])-[H]	4	2.466666666666667	1.4843629385474877
25 | *Imide2	[O,S:1]=[CX3:2]-[NX3+0:3]([CX3:4]=[O,S:5])-[H]	2	10.23	1.1198214143335534
26 | *Amide_electronegative	[C:1](=[O:2])-[N:3](-[Br,Cl,I,F,S,O,N,P:4])-[H]	2	3.4896	2.688124315081677
27 | *Amide	[C:1](=[O:2])-[N:3]-[H]	2	12.00611111111111	4.512491341218857
28 | *Sulfonamide	[SX4:1](=[O:2])(=[O:3])-[NX3+0:4]-[H]	3	7.9160326086956525	1.9842121316708763
29 | Anilines_primary	[c:1]-[NX3+0:2]([H:3])[H:4]	1	3.899298673194805	2.068768503987161
30 | Anilines_secondary	[c:1]-[NX3+0:2]([H:3])[!H:4]	1	4.335408163265306	2.1768842022330843
31 | Anilines_tertiary	[c:1]-[NX3+0:2]([!H:3])[!H:4]	1	4.16690685045614	2.005865735782679
32 | Aromatic_nitrogen_unprotonated	[n+0&H0:1]	0	4.3535441240733945	2.0714072661859584
33 | Amines_primary_secondary_tertiary	[C:1]-[NX3+0:2]	1	8.159107682388349	2.5183597445318147
34 | Phosphinic_acid	[PX4:1](=[O:2])(-[C,c,N,n,F,Cl,Br,I:3])(-[C,c,N,n,F,Cl,Br,I:4])-[OX2:5]-[H]	4	2.9745	0.6867886750744557
35 | Phosphate_diester	[PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[O+0:5]-[C,c,N,n,F,Cl,Br,I:4])-[OX2:6]-[H]	6	2.7280434782608696	2.5437448856908316
36 | Phosphonate_ester	[PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[C,c,N,n,F,Cl,Br,I:5])-[OX2:6]-[H]	5	2.0868	0.4503028610465036
37 | Primary_hydroxyl_amine	[C,c:1]-[O:2]-[NH2:3]	2	4.035714285714286	0.8463816543155368
38 | *Indole_pyrrole	[c;R:1]1[c;R:2][c;R:3][c;R:4][n;R:5]1[H]	4	14.52875	4.06702491591416
39 | *Aromatic_nitrogen_protonated	[n:1]-[H]	0	7.17	2.94602395490212
40 | 


--------------------------------------------------------------------------------
/src/python/pipelines/dmpk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/dmpk/__init__.py


--------------------------------------------------------------------------------
/src/python/pipelines/dmpk/pk_tmax_cmax_sim.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.dmpk.sygnature.tmax_cmax_sim.1
 5 |   name: TmaxCmaxSimulation
 6 |   description: Simulation of Tmax and Cmax
 7 |   tags:
 8 |   - sygnature
 9 |   - dmpk
10 |   - pk
11 |   - cmax
12 |   - tmax
13 |   - simulation
14 |   - prediction
15 |   - docker
16 |   resourceUrl:
17 |   icon: icons/program.png
18 |   outputDescriptors:
19 |   - primaryType: org.squonk.types.PngImageFile
20 |     mediaType: image/png
21 |     name: output
22 |   optionDescriptors:
23 |   - modes:
24 |     - User
25 |     editable: true
26 |     visible: true
27 |     description: Half life(elim)(hr)
28 |     label: Half life(elim)(hr)
29 |     key: arg.halfLife
30 |     minValues: 1
31 |     maxValues: 1
32 |     typeDescriptor:
33 |       type: java.lang.Float
34 |       "@class": org.squonk.options.SimpleTypeDescriptor
35 |     "@class": org.squonk.options.OptionDescriptor
36 |   - modes:
37 |     - User
38 |     editable: true
39 |     visible: true
40 |     description: Half life(abs)(hr)
41 |     label: Half life(abs)(hr)
42 |     key: arg.absorption
43 |     minValues: 1
44 |     maxValues: 1
45 |     typeDescriptor:
46 |       type: java.lang.Float
47 |       "@class": org.squonk.options.SimpleTypeDescriptor
48 |     "@class": org.squonk.options.OptionDescriptor
49 |   - modes:
50 |     - User
51 |     editable: true
52 |     visible: true
53 |     description: Dose(mg)
54 |     label: Dose(mg)
55 |     key: arg.dose
56 |     minValues: 1
57 |     maxValues: 1
58 |     typeDescriptor:
59 |       type: java.lang.Float
60 |       "@class": org.squonk.options.SimpleTypeDescriptor
61 |     "@class": org.squonk.options.OptionDescriptor
62 |   - modes:
63 |     - User
64 |     editable: true
65 |     visible: true
66 |     description: AUC(mg/L.hr)
67 |     label: AUC(mg/L.hr)
68 |     key: arg.auc
69 |     minValues: 1
70 |     maxValues: 1
71 |     typeDescriptor:
72 |       type: java.lang.Float
73 |       "@class": org.squonk.options.SimpleTypeDescriptor
74 |     "@class": org.squonk.options.OptionDescriptor
75 |   - modes:
76 |     - User
77 |     editable: true
78 |     visible: true
79 |     description: Simulation Time Length(hr)
80 |     label: Simulation Time Length(hr)
81 |     key: arg.time
82 |     minValues: 1
83 |     maxValues: 1
84 |     typeDescriptor:
85 |       type: java.lang.Float
86 |       "@class": org.squonk.options.SimpleTypeDescriptor
87 |     "@class": org.squonk.options.OptionDescriptor
88 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
89 | outputRoutes:
90 | - route: FILE
91 | imageName: informaticsmatters/rdkit_pipelines
92 | command: >-
93 |   python -m pipelines.dmpk.pk_tmax_cmax_sim -o ${POUT}output --half-life $halfLife
94 |   --absorption $absorption --dose $dose --auc $auc --time $time
95 | 


--------------------------------------------------------------------------------
/src/python/pipelines/dmpk/pk_tmax_cmax_sim.test:
--------------------------------------------------------------------------------
 1 | // The `pk_tmax_cmax_sim` automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     setup_collection = [
 8 |         creates: [ 'output.png',
 9 |                    'output_metrics.txt']
10 |     ],
11 | 
12 |     test_absorption_0_5 = [
13 | 
14 |         params: [ halfLife: 0.79,
15 |                   absorption: 0.5,
16 |                   dose: 0.14,
17 |                   auc: 0.88,
18 |                   time: 8 ],
19 | 
20 |         stderr: [ 'kel  0.8774014943',
21 |                   'ka   1.3862943611',
22 |                   'Tmax 0.8988627605',
23 |                   'Cmax 0.3508882480',
24 |                   'V_F  0.1813205358' ],
25 | 
26 |         metrics: [ '__StatusMessage__': 'Tmax(hr): 0.899, Cmax(mg/L): 0.351, kel(hr-1): 0.877, ka(hr-1): 1.39, V/F(L): 0.181',
27 |                    'DMPK.Syg.TmaxCmax': '1' ]
28 | 
29 |     ],
30 | 
31 |     test_raw_command = [
32 | 
33 |         command: '''python -m pipelines.dmpk.pk_tmax_cmax_sim -o ${POUT}output
34 |                     --half-life 0.79 --absorption 0.5 --dose 0.14
35 |                     --auc 0.88 --time 8''',
36 | 
37 |         stderr: [ 'kel  0.8774014943',
38 |                   'ka   1.3862943611',
39 |                   'Tmax 0.8988627605',
40 |                   'Cmax 0.3508882480',
41 |                   'V_F  0.1813205358' ],
42 | 
43 |         metrics: [ '__StatusMessage__': 'Tmax(hr): 0.899, Cmax(mg/L): 0.351, kel(hr-1): 0.877, ka(hr-1): 1.39, V/F(L): 0.181',
44 |                    'DMPK.Syg.TmaxCmax': '1' ]
45 | 
46 |     ],
47 | 
48 | ]
49 | 


--------------------------------------------------------------------------------
/src/python/pipelines/docking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/docking/__init__.py


--------------------------------------------------------------------------------
/src/python/pipelines/docking/obabel_prepare_pdb.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.xchem.obabel.prepare.pdb.v1
 5 |   name: OBabelPreparePDB
 6 |   description: Prepare PDB file using Open Babel
 7 |   tags:
 8 |   - rdkit
 9 |   - xchem
10 |   - docker
11 |   - openbabel
12 |   - obabel
13 |   - prepare
14 |   - pdb
15 |   - convert
16 |   - protein
17 |   resourceUrl:
18 |   icon: icons/transform_molecule.png
19 |   inputDescriptors:
20 |   - primaryType: org.squonk.types.PDBFile
21 |     mediaType: chemical/x-pdb
22 |     name: pdb
23 |   outputDescriptors:
24 |   - primaryType: org.squonk.types.Mol2File
25 |     mediaType: chemical/x-mol2
26 |     name: mol2
27 |   optionDescriptors:
28 |   - modes:
29 |     - User
30 |     editable: true
31 |     "@class": org.squonk.options.OptionDescriptor
32 |     typeDescriptor:
33 |       type: java.lang.Float
34 |       "@class": org.squonk.options.SimpleTypeDescriptor
35 |     key: arg.protonate
36 |     label: Protonate at pH
37 |     description: Optionally protonate side chains at this pH
38 |     visible: true
39 |     minValues: 0
40 |     maxValues: 1
41 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
42 | inputRoutes:
43 | - route: FILE
44 | outputRoutes:
45 | - route: FILE
46 | imageName: informaticsmatters/pipelines-obabel:latest
47 | command: >-
48 |   python -m pipelines.docking.obabel_prepare_pdb -i ${PIN}pdb.pdb.gz -o ${POUT}mol2 -mol2
49 |   ${binding.variables.containsKey('protonate') ? '-prot ' + protonate : ''}
50 |   --meta


--------------------------------------------------------------------------------
/src/python/pipelines/docking/obabel_prepare_pdb.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2018 Informatics Matters Ltd.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import argparse
18 | import sys, subprocess
19 | 
20 | from pipelines_utils import utils
21 | 
22 | 
23 | def execute(input, output, extension, format, ph, noGzip):
24 | 
25 |     # TODO - convert this to use the Python API rather than an external process
26 | 
27 |     filename = output + "." + extension
28 |     base_args = ["obabel", "-ipdb", input, format, "-O", filename]
29 |     if ph:
30 |         base_args.append("-p")
31 |         base_args.append(str(ph))
32 |     utils.log("Command: " + " ".join(base_args))
33 | 
34 |     subprocess.check_call(base_args, stdout=sys.stderr, stderr=sys.stderr)
35 | 
36 |     # NOTE the -z argument does not seem to work correctly with obabel (truncated files generated) so we
37 |     # fall back to good old gzip to handle the compression once the uncompressed file is created
38 |     if not noGzip:
39 |         subprocess.check_call(['gzip', filename], stdout=sys.stderr, stderr=sys.stderr)
40 | 
41 | def main():
42 |     global PDB_PATH,WRITER,THRESHOLD
43 |     parser = argparse.ArgumentParser(description='Open babel PDB prepare')
44 |     parser.add_argument('--no-gzip', action='store_true', help='Do not compress the output')
45 |     parser.add_argument('-i', '--input', help="PDB file for converting")
46 |     parser.add_argument('-o', '--output', help="Base name for output files (no extension).")
47 |     parser.add_argument('-mol2', '--mol2', action='store_true', help='Output as Mol2 format.')
48 |     parser.add_argument('-pdbqt', '--pdbqt', action='store_true', help='Output as pdbqt format.')
49 |     parser.add_argument('--meta', action='store_true', help='Write metrics files')
50 |     parser.add_argument('-prot', '--protonate', type=float, help="protonate at this pH (optional)")
51 | 
52 |     args = parser.parse_args()
53 | 
54 |     utils.log("Prepare Args: ", args)
55 | 
56 |     if not (args.mol2 or args.pdbqt):
57 |         raise ValueError("Must specify at least one output fromat: mol2 and/or pdbqt")
58 | 
59 | 
60 |     if args.pdbqt:
61 |         utils.log("Preparing as pdbqt")
62 |         execute(args.input, args.output, "pdbqt", "-opdbqt", args.protonate, args.no_gzip)
63 | 
64 |     if args.mol2:
65 |         utils.log("Preparing as mol2")
66 |         execute(args.input, args.output, "mol2", "-omol2", args.protonate, args.no_gzip)
67 | 
68 |     utils.log("Preparation complete")
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main()
73 | 


--------------------------------------------------------------------------------
/src/python/pipelines/docking/obabel_prepare_pdb.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // A basic start-up test for the module.
 8 |     // Simply makes sure it starts cleanly.
 9 |     ignore_test_help = [
10 | 
11 |         command: '''python -m pipelines.docking.obabel_prepare_pdb -h''',
12 | 
13 |         stdout: [ 'usage: obabel_prepare_pdb.py' ]
14 | 
15 |     ],
16 | 
17 | ]
18 | 


--------------------------------------------------------------------------------
/src/python/pipelines/docking/plip.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // A basic start-up test for the module.
 8 |     // Simply makes sure it starts cleanly.
 9 |     ignore_test_help = [
10 | 
11 |         command: '''python -m pipelines.docking.plip -h''',
12 | 
13 |         stdout: [ 'usage: plip.py' ]
14 | 
15 |     ],
16 | 
17 | ]
18 | 


--------------------------------------------------------------------------------
/src/python/pipelines/docking/smog2016.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // A basic start-up test for the module.
 8 |     // Simply makes sure it starts cleanly.
 9 |     ignore_test_help = [
10 | 
11 |         command: '''python -m pipelines.docking.smog2016 -h''',
12 | 
13 |         stdout: [ 'usage: smog2016.py' ]
14 | 
15 |     ],
16 | 
17 | ]
18 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/rdkit/__init__.py


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/cluster_butina.dsd.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | "@class": org.squonk.core.DockerServiceDescriptor
  3 | serviceConfig:
  4 |   id: pipelines.rdkit.cluster.butina
  5 |   name: RDKitButinaClustering
  6 |   description: Clustering similar molecules using RDKit Butina clustering
  7 |   tags:
  8 |   - rdkit
  9 |   - clustering
 10 |   - similarity
 11 |   - butina
 12 |   - docker
 13 |   resourceUrl:
 14 |   icon: icons/clustering.png
 15 |   inputDescriptors:
 16 |   - primaryType: org.squonk.dataset.Dataset
 17 |     secondaryType: org.squonk.types.MoleculeObject
 18 |     mediaType: application/x-squonk-dataset-molecule+json
 19 |     name: input
 20 |   outputDescriptors:
 21 |   - primaryType: org.squonk.dataset.Dataset
 22 |     secondaryType: org.squonk.types.MoleculeObject
 23 |     mediaType: application/x-squonk-dataset-molecule+json
 24 |     name: output
 25 |   optionDescriptors:
 26 |   - modes:
 27 |     - User
 28 |     "@class": org.squonk.options.OptionDescriptor
 29 |     typeDescriptor:
 30 |       type: java.lang.Float
 31 |       "@class": org.squonk.options.SimpleTypeDescriptor
 32 |     key: arg.threshold
 33 |     label: Threshold
 34 |     description: Similarity threshold
 35 |     defaultValue:
 36 |     - java.lang.Float
 37 |     - 0.7
 38 |     visible: true
 39 |     editable: true
 40 |   - modes:
 41 |     - User
 42 |     editable: true
 43 |     "@class": org.squonk.options.OptionDescriptor
 44 |     typeDescriptor:
 45 |       type: java.lang.String
 46 |       "@class": org.squonk.options.SimpleTypeDescriptor
 47 |     key: arg.fragment_method
 48 |     label: Fragment method
 49 |     description: Approach to use for picking biggest molecular fragment
 50 |     values:
 51 |     - hac
 52 |     - mw
 53 |     defaultValue: hac
 54 |     visible: true
 55 |   - modes:
 56 |     - User
 57 |     editable: true
 58 |     "@class": org.squonk.options.OptionDescriptor
 59 |     typeDescriptor:
 60 |       type: java.lang.Boolean
 61 |       "@class": org.squonk.options.SimpleTypeDescriptor
 62 |     key: arg.output_fragment
 63 |     label: Output biggest fragment
 64 |     description: Output the biggest fragment rather than the whole molecule
 65 |     defaultValue: false
 66 |     visible: true
 67 |   - modes:
 68 |     - User
 69 |     editable: true
 70 |     "@class": org.squonk.options.OptionDescriptor
 71 |     typeDescriptor:
 72 |       type: java.lang.String
 73 |       "@class": org.squonk.options.SimpleTypeDescriptor
 74 |     key: arg.descriptor
 75 |     label: Descriptor
 76 |     description: Descriptor/fingerprint to use
 77 |     values:
 78 |     - maccs
 79 |     - morgan2
 80 |     - morgan3
 81 |     - rdkit
 82 |     defaultValue: rdkit
 83 |     visible: true
 84 |   - modes:
 85 |     - User
 86 |     editable: true
 87 |     "@class": org.squonk.options.OptionDescriptor
 88 |     typeDescriptor:
 89 |       type: java.lang.String
 90 |       "@class": org.squonk.options.SimpleTypeDescriptor
 91 |     key: arg.metric
 92 |     label: Metric
 93 |     description: Similarity metric to use
 94 |     values:
 95 |     - asymmetric
 96 |     - braunblanquet
 97 |     - cosine
 98 |     - dice
 99 |     - kulczynski
100 |     - mcconnaughey
101 |     - rogotgoldberg
102 |     - russel
103 |     - sokal
104 |     - tanimoto
105 |     defaultValue: tanimoto
106 |     visible: true
107 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
108 | thinDescriptors:
109 | - input: input
110 |   output: output
111 |   preserve: false
112 | inputRoutes:
113 | - route: FILE
114 | outputRoutes:
115 | - route: FILE
116 | imageName: informaticsmatters/rdkit_pipelines
117 | command: >-
118 |   python -m pipelines.rdkit.cluster_butina -i ${PIN}input.data.gz -if json
119 |   -o ${POUT}output -of json -t ${threshold} -d ${descriptor} -m ${metric}
120 |   ${binding.variables.containsKey('fragment_method') ? ' --fragment-method ' + fragment_method : ''}
121 |   ${binding.variables.containsKey('output_fragment') && output_fragment ? ' --output-fragment' : ''} --thin --meta
122 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/cluster_butina.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing cluster_butina.py reading from STDIN and writing to STDOUT
 8 |     test_raw_cluster_butina_from_sdf_to_default = [
 9 | 
10 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
11 |                  python -m pipelines.rdkit.cluster_butina -t 0.6 -if sdf''',
12 | 
13 |         stderr: [ 'No output format specified - using sdf',
14 |                   'Found [1-9]\\d+ clusters',
15 |                   'Final Clusters:'],
16 | 
17 |     ],
18 | 
19 |     // Testing cluster_butina.py reading from STDIN and writing to STDOUT
20 |     test_raw_cluster_butina_from_sdf_to_sdf = [
21 | 
22 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
23 |                  python -m pipelines.rdkit.cluster_butina -t 0.6 -if sdf -of sdf''',
24 | 
25 |         stderr: [ "outformat='sdf'",
26 |                   'Found [1-9]\\d+ clusters',
27 |                   'Final Clusters:'],
28 | 
29 |     ],
30 | 
31 |     // Testing cluster_butina.py reading from STDIN and writing to STDOUT
32 |     test_raw_cluster_butina_from_sdf_to_json = [
33 | 
34 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
35 |                  python -m pipelines.rdkit.cluster_butina -t 0.6 -if sdf -of json''',
36 | 
37 |         stderr: [ "outformat='json'",
38 |                   'Found [1-9]\\d+ clusters',
39 |                   'Final Clusters:'],
40 | 
41 |     ],
42 | 
43 |     // Testing cluster_butina.py reading from file
44 |     test_raw_cluster_butina_from_json_file_to_sdf = [
45 | 
46 |         command: '''python -m pipelines.rdkit.cluster_butina -t 0.6 -i ${PIN}nci100.data.gz -if json -of sdf''',
47 | 
48 |         stderr: [ "outformat='sdf'",
49 |                   'Found [1-9]\\d+ clusters',
50 |                   'Final Clusters:'],
51 | 
52 |     ],
53 | 
54 |     // Testing cluster_butina.py reading from file
55 |     test_raw_cluster_butina_from_json_file_to_json = [
56 | 
57 |         command: '''gunzip -c ${PIN}nci100.data.gz |
58 |                  python -m pipelines.rdkit.cluster_butina -t 0.6 -i ${PIN}nci100.data.gz -if json -of json''',
59 | 
60 |         stderr: [ "outformat='json'",
61 |                   'Found [1-9]\\d+ clusters',
62 |                   'Final Clusters:'],
63 | 
64 |     ],
65 | 
66 |     // Testing cluster_butina.py reading and writing from/to file
67 |     test_raw_cluster_butina_from_json_to_json_file = [
68 | 
69 |         command: '''gunzip -c ${PIN}nci100.data.gz |
70 |                  python -m pipelines.rdkit.cluster_butina -t 0.6 -i ${PIN}nci100.data.gz -if json -o ${POUT}output -of json''',
71 | 
72 |         stderr: [ "outformat='json'",
73 |                   'Found [1-9]\\d+ clusters',
74 |                   'Final Clusters:'],
75 | 
76 |         creates: [ 'output.data.gz' ],
77 | 
78 |     ],
79 | 
80 |     // Testing cluster_butina.py reading and writing from/to file
81 |     test_raw_cluster_butina_from_sdf_to_json_file = [
82 | 
83 |         command: '''gunzip -c ${PIN}nci100.data.gz |
84 |                  python -m pipelines.rdkit.cluster_butina -t 0.6 -i ${PIN}Kinase_inhibs.sdf.gz -if json -o ${POUT}output -of json''',
85 | 
86 |         stderr: [ "outformat='json'",
87 |                   'Found [1-9]\\d+ clusters',
88 |                   'Final Clusters:'],
89 | 
90 |         creates: [ 'output.data.gz' ],
91 | 
92 |     ],
93 | 
94 | ]
95 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/cluster_butina_matrix.dsd.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | "@class": org.squonk.core.DockerServiceDescriptor
  3 | serviceConfig:
  4 |   id: pipelines.rdkit.cluster.butina.matrix.v1
  5 |   name: RDKitClusterMatrix
  6 |   description: Generate similarity matrix using RDKit Butina clustering
  7 |   tags:
  8 |   - rdkit
  9 |   - clustering
 10 |   - similarity
 11 |   - butina
 12 |   - matrix
 13 |   - docker
 14 |   resourceUrl:
 15 |   icon: icons/clustering.png
 16 |   inputDescriptors:
 17 |   - primaryType: org.squonk.dataset.Dataset
 18 |     secondaryType: org.squonk.types.MoleculeObject
 19 |     mediaType: application/x-squonk-dataset-molecule+json
 20 |     name: input
 21 |   outputDescriptors:
 22 |   - primaryType: org.squonk.dataset.Dataset
 23 |     secondaryType: org.squonk.types.BasicObject
 24 |     mediaType: application/x-squonk-dataset-basic+json
 25 |     name: output
 26 |   optionDescriptors:
 27 |   - modes:
 28 |     - User
 29 |     "@class": org.squonk.options.OptionDescriptor
 30 |     typeDescriptor:
 31 |       type: java.lang.Float
 32 |       "@class": org.squonk.options.SimpleTypeDescriptor
 33 |     key: arg.threshold
 34 |     label: Threshold
 35 |     description: Similarity threshold for clustering
 36 |     defaultValue:
 37 |     - java.lang.Float
 38 |     - 0.7
 39 |     visible: true
 40 |     editable: true
 41 |   - modes:
 42 |     - User
 43 |     "@class": org.squonk.options.OptionDescriptor
 44 |     typeDescriptor:
 45 |       type: java.lang.Float
 46 |       "@class": org.squonk.options.SimpleTypeDescriptor
 47 |     key: arg.matrixThreshold
 48 |     label: Matrix Threshold
 49 |     description: Only output values above this similarity threshold
 50 |     defaultValue:
 51 |     - java.lang.Float
 52 |     - 0.5
 53 |     visible: true
 54 |     editable: true
 55 |   - modes:
 56 |     - User
 57 |     editable: true
 58 |     "@class": org.squonk.options.OptionDescriptor
 59 |     typeDescriptor:
 60 |       type: java.lang.String
 61 |       "@class": org.squonk.options.SimpleTypeDescriptor
 62 |     key: arg.descriptor
 63 |     label: Descriptor
 64 |     description: Descriptor/fingerprint to use
 65 |     values:
 66 |     - maccs
 67 |     - morgan2
 68 |     - morgan3
 69 |     - rdkit
 70 |     defaultValue: rdkit
 71 |     visible: true
 72 |   - modes:
 73 |     - User
 74 |     editable: true
 75 |     "@class": org.squonk.options.OptionDescriptor
 76 |     typeDescriptor:
 77 |       type: java.lang.String
 78 |       "@class": org.squonk.options.SimpleTypeDescriptor
 79 |     key: arg.metric
 80 |     label: Metric
 81 |     description: Similarity metric to use
 82 |     values:
 83 |     - asymmetric
 84 |     - braunblanquet
 85 |     - cosine
 86 |     - dice
 87 |     - kulczynski
 88 |     - mcconnaughey
 89 |     - rogotgoldberg
 90 |     - russel
 91 |     - sokal
 92 |     - tanimoto
 93 |     defaultValue: tanimoto
 94 |     visible: true
 95 |   executorClassName: org.squonk.execution.steps.impl.DefaultDockerExecutorStep
 96 | inputRoutes:
 97 | - route: FILE
 98 | outputRoutes:
 99 | - route: FILE
100 | imageName: informaticsmatters/rdkit_pipelines
101 | command: >-
102 |   python -m pipelines.rdkit.cluster_butina_matrix -i ${PIN}input.data.gz -if json -o ${POUT}output -of json
103 |   -t ${threshold} -mt ${matrixThreshold} -d ${descriptor}
104 |   -m ${metric} --meta
105 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/cluster_butina_matrix.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing cluster_butina_matrix.py
 8 |     // reading from STDIN and writing TSV to file
 9 |     test_raw_cluster_butina_matirx_to_tsv = [
10 | 
11 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
12 |                  python -m pipelines.rdkit.cluster_butina_matrix
13 |                  -t 0.6 -if sdf -of tsv -o ${POUT}output''',
14 | 
15 |         stderr: [ 'Found [1-9]\\d+ clusters',
16 |                   'Clusters:'],
17 | 
18 |         creates: [ 'output.tsv.gz' ],
19 | 
20 |     ],
21 | 
22 |     // Testing cluster_butina_matrix.py
23 |     // reading from SDF file and writing TSV to file
24 |     test_raw_cluster_butina_matirx_sdf_file_to_tsv = [
25 | 
26 |         command: '''python -m pipelines.rdkit.cluster_butina_matrix
27 |                  -t 0.6 -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -of tsv -o ${POUT}output''',
28 | 
29 |         stderr: [ 'Found [1-9]\\d+ clusters',
30 |                   'Clusters:'],
31 | 
32 |         creates: [ 'output.tsv.gz' ],
33 | 
34 |     ],
35 | 
36 |     // Testing cluster_butina_matrix.py
37 |     // reading from JSON file and writing TSV to file
38 |     test_raw_cluster_butina_matirx_json_file_to_tsv = [
39 | 
40 |         command: '''python -m pipelines.rdkit.cluster_butina_matrix
41 |                  -t 0.6 -i ${PIN}nci100.data.gz -if json -of tsv -o ${POUT}output''',
42 | 
43 |         stderr: [ 'Found [1-9]\\d+ clusters',
44 |                   'Clusters:'],
45 | 
46 |         creates: [ 'output.tsv.gz' ],
47 | 
48 |     ],
49 | 
50 |     // Testing cluster_butina_matrix.py
51 |     // reading from STDIN and writing JSON to file
52 |     test_raw_cluster_butina_matirx_to_json = [
53 | 
54 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
55 |                  python -m pipelines.rdkit.cluster_butina_matrix
56 |                  -t 0.6 -if sdf -of json -o ${POUT}output''',
57 | 
58 |         stderr: [ 'Found [1-9]\\d+ clusters',
59 |                   'Clusters:'],
60 | 
61 |         creates: [ 'output.data.gz',
62 |                    'output.metadata' ],
63 | 
64 |     ],
65 | 
66 |     // Testing cluster_butina_matrix.py
67 |     // reading from file (SDF) and writing JSON to file
68 |     test_raw_cluster_butina_matirx_sdf_file_to_json = [
69 | 
70 |         command: '''python -m pipelines.rdkit.cluster_butina_matrix
71 |                  -t 0.6 -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -of json -o ${POUT}output''',
72 | 
73 |         stderr: [ 'Found [1-9]\\d+ clusters',
74 |                   'Clusters:'],
75 | 
76 |         creates: [ 'output.data.gz',
77 |                    'output.metadata' ],
78 | 
79 |     ],
80 | 
81 | ]
82 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/conformers.dsd.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | "@class": org.squonk.core.DockerServiceDescriptor
  3 | serviceConfig:
  4 |   id: pipelines.rdkit.conformer.basic
  5 |   name: RDKitConformers
  6 |   description: Generate 3D conformers using RDKit
  7 |   tags:
  8 |   - rdkit
  9 |   - conformer
 10 |   - 3d
 11 |   - docker
 12 |   resourceUrl:
 13 |   icon: icons/molecule_generator.png
 14 |   inputDescriptors:
 15 |   - primaryType: org.squonk.dataset.Dataset
 16 |     secondaryType: org.squonk.types.MoleculeObject
 17 |     mediaType: application/x-squonk-dataset-molecule+json
 18 |     name: input
 19 |   outputDescriptors:
 20 |   - primaryType: org.squonk.dataset.Dataset
 21 |     secondaryType: org.squonk.types.MoleculeObject
 22 |     mediaType: application/x-squonk-dataset-molecule+json
 23 |     name: output
 24 |   optionDescriptors:
 25 |   - "@class": org.squonk.options.OptionDescriptor
 26 |     modes:
 27 |     - User
 28 |     typeDescriptor:
 29 |       type: java.lang.Integer
 30 |       "@class": org.squonk.options.SimpleTypeDescriptor
 31 |     editable: true
 32 |     visible: true
 33 |     description: Number of conformers to aim to generate
 34 |     defaultValue: 1
 35 |     label: Number of conformers
 36 |     key: arg.num
 37 |     minValues: 1
 38 |     maxValues: 1
 39 |   - modes:
 40 |     - User
 41 |     editable: true
 42 |     visible: true
 43 |     description: Number of attempts to generate conformers
 44 |     label: Number of attempts
 45 |     key: arg.attempts
 46 |     minValues: 0
 47 |     maxValues: 1
 48 |     typeDescriptor:
 49 |       type: java.lang.Integer
 50 |       "@class": org.squonk.options.SimpleTypeDescriptor
 51 |     "@class": org.squonk.options.OptionDescriptor
 52 |   - modes:
 53 |     - User
 54 |     editable: true
 55 |     visible: true
 56 |     description: Prune RMSD threshold for removing similar conformers
 57 |     label: Prune RMSD threshold
 58 |     key: arg.prune
 59 |     minValues: 0
 60 |     maxValues: 1
 61 |     typeDescriptor:
 62 |       type: java.lang.Float
 63 |       "@class": org.squonk.options.SimpleTypeDescriptor
 64 |     "@class": org.squonk.options.OptionDescriptor
 65 |   - modes:
 66 |     - User
 67 |     editable: true
 68 |     visible: true
 69 |     description: Cluster method (RMSD or TFD)
 70 |     label: Cluster method
 71 |     key: arg.method
 72 |     values:
 73 |     - RMSD
 74 |     - TFD
 75 |     defaultValue: RMSD
 76 |     minValues: 1
 77 |     maxValues: 1
 78 |     typeDescriptor:
 79 |       type: java.lang.String
 80 |       "@class": org.squonk.options.SimpleTypeDescriptor
 81 |     "@class": org.squonk.options.OptionDescriptor
 82 |   - modes:
 83 |     - User
 84 |     editable: true
 85 |     visible: true
 86 |     description: Cluster threshold
 87 |     label: Cluster threshold
 88 |     key: arg.threshold
 89 |     minValues: 0
 90 |     maxValues: 1
 91 |     typeDescriptor:
 92 |       type: java.lang.Float
 93 |       "@class": org.squonk.options.SimpleTypeDescriptor
 94 |     "@class": org.squonk.options.OptionDescriptor
 95 |   - modes:
 96 |     - User
 97 |     editable: true
 98 |     visible: true
 99 |     description: Number of energy minimization iterations
100 |     defaultValue: 0
101 |     label: Energy minimization iterations
102 |     key: arg.minimize
103 |     minValues: 1
104 |     maxValues: 1
105 |     typeDescriptor:
106 |       type: java.lang.Integer
107 |       "@class": org.squonk.options.SimpleTypeDescriptor
108 |     "@class": org.squonk.options.OptionDescriptor
109 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
110 | thinDescriptors:
111 | - input: input
112 | inputRoutes:
113 | - route: FILE
114 | outputRoutes:
115 | - route: FILE
116 | imageName: informaticsmatters/rdkit_pipelines
117 | command: >-
118 |   python -m pipelines.rdkit.conformers -i ${PIN}input.data.gz -if json -o ${POUT}output -of json -n $num -c $method
119 |   ${binding.variables.containsKey('attempts') ? ' -a ' + attempts : ''}
120 |   ${binding.variables.containsKey('prune') ? ' -r ' + prune : ''}
121 |   ${binding.variables.containsKey('threshold') ? ' -t ' + threshold : ''}
122 |   ${binding.variables.containsKey('minimize') ? ' -e ' + minimize : ''} --meta


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/conformers.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing conformers.py reading from STDIN and writing to STDOUT
 8 |     test_raw_conformers = [
 9 | 
10 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
11 |                  python -m pipelines.rdkit.conformers -n 2 -if sdf''',
12 | 
13 |         stderr: [ 'No output format specified - using sdf',
14 |                   'Molecule 36 generated [1-9]\\d* conformers' ],
15 | 
16 |     ],
17 | 
18 |     test_raw_conformers_to_sdf = [
19 | 
20 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
21 |                  python -m pipelines.rdkit.conformers -n 2 -if sdf -of sdf''',
22 | 
23 |         stderr: [ 'Molecule 36 generated [1-9]\\d* conformers' ],
24 | 
25 |     ],
26 | 
27 |     // Testing conformers.py with clustering
28 |     // reading from STDIN and writing to STDOUT
29 |     test_raw_conformers_with_rmsd_clustering = [
30 | 
31 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
32 |                  python -m pipelines.rdkit.conformers -n 2 -c RMSD -if sdf''',
33 | 
34 |         stderr: [ "cluster='rmsd'",
35 |                   'Molecule 35 generated [1-9]\\d* conformers and [1-9]\\d* clusters' ],
36 | 
37 |     ],
38 | 
39 |     // Testing conformers.py with clustering
40 |     // reading from file and writing to file
41 |     test_raw_conformers_with_tfd_clustering_from_sdf_file = [
42 | 
43 |         command: '''python -m pipelines.rdkit.conformers -n 2 -c TFD
44 |             -i ${PIN}Kinase_inhibs.sdf.gz -if sdf''',
45 | 
46 |         stderr: [ "cluster='tfd'",
47 |                   'Molecule 35 generated [1-9]\\d* conformers and [1-9]\\d* clusters' ],
48 | 
49 |     ],
50 | 
51 |     // Testing conformers.py with clustering
52 |     // reading from file and writing to file
53 |     test_raw_conformers_with_tfd_clustering_from_sdf_file_to_json_file = [
54 | 
55 |         command: '''python -m pipelines.rdkit.conformers -n 2 -c TFD
56 |             -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -o ${POUT}output -of json''',
57 | 
58 |         stderr: [ "cluster='tfd'",
59 |                   'Molecule 35 generated [1-9]\\d* conformers and [1-9]\\d* clusters' ],
60 | 
61 |         creates: [ 'output.data.gz',
62 |                    'output.metadata' ],
63 | 
64 |     ],
65 | 
66 | ]
67 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/constrained_conf_gen.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.rdkit.conformer.constrained
 5 |   name: RDKitConstrainedConformers
 6 |   description: Generate constrained 3D conformers using RDKit
 7 |   tags:
 8 |   - rdkit
 9 |   - conformer
10 |   - 3d
11 |   - docker
12 |   resourceUrl:
13 |   icon: icons/molecule_generator.png
14 |   inputDescriptors:
15 |   - primaryType: org.squonk.dataset.Dataset
16 |     secondaryType: org.squonk.types.MoleculeObject
17 |     mediaType: application/x-squonk-dataset-molecule+json
18 |     name: input
19 |   - primaryType: org.squonk.dataset.Dataset
20 |     secondaryType: org.squonk.types.MoleculeObject
21 |     mediaType: application/x-squonk-dataset-molecule+json
22 |     name: refMol
23 |   outputDescriptors:
24 |   - primaryType: org.squonk.dataset.Dataset
25 |     secondaryType: org.squonk.types.MoleculeObject
26 |     mediaType: application/x-squonk-dataset-molecule+json
27 |     name: output
28 |   optionDescriptors:
29 |   - "@class": org.squonk.options.OptionDescriptor
30 |     modes:
31 |     - User
32 |     typeDescriptor:
33 |       type: java.lang.Integer
34 |       "@class": org.squonk.options.SimpleTypeDescriptor
35 |     editable: true
36 |     visible: true
37 |     description: Number of conformers to generate
38 |     defaultValue: 1
39 |     label: Number of conformers
40 |     key: arg.num
41 |     minValues: 1
42 |     maxValues: 1
43 |   - modes:
44 |     - User
45 |     editable: true
46 |     visible: true
47 |     description: Reference molecule index
48 |     label: Reference mol index
49 |     key: arg.refmolidx
50 |     minValues: 1
51 |     maxValues: 1
52 |     defaultValue: 1
53 |     typeDescriptor:
54 |       type: java.lang.Integer
55 |       "@class": org.squonk.options.SimpleTypeDescriptor
56 |     "@class": org.squonk.options.OptionDescriptor
57 |   - modes:
58 |     - User
59 |     editable: true
60 |     visible: true
61 |     description: Core smiles to constrain (optional)
62 |     label: Core smiles
63 |     key: arg.coresmiles
64 |     minValues: 0
65 |     maxValues: 1
66 |     typeDescriptor:
67 |       type: java.lang.String
68 |       "@class": org.squonk.options.SimpleTypeDescriptor
69 |     "@class": org.squonk.options.OptionDescriptor
70 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
71 | thinDescriptors:
72 | - input: input
73 | inputRoutes:
74 | - route: FILE
75 | - route: FILE
76 | outputRoutes:
77 | - route: FILE
78 | imageName: informaticsmatters/rdkit_pipelines
79 | command: >-
80 |   python -m pipelines.rdkit.constrained_conf_gen -i ${PIN}input.data.gz -o ${POUT}output -of json -n $num
81 |   -r refMol.data.gz --refmolidx $refmolidx
82 |   ${binding.variables.containsKey('coresmiles') ? ' -c "' + coresmiles + '"' : ''} --meta


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/constrained_conf_gen.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing constrained_conf_gen.py
 8 |     // reading from STDIN and writing to STDOUT
 9 |     test_raw_constrained_default_of = [
10 | 
11 |         command: '''python -m pipelines.rdkit.constrained_conf_gen
12 |                  -n 2 -i ${PIN}XChemReactionMaker1.sdf.gz
13 |                  -r ${PIN}ref_mol.sdf.gz
14 |                  -o ${POUT}constrained_conf_gen''',
15 | 
16 |         stderr: [ 'No output format specified - using sdf' ],
17 | 
18 |         creates: [ 'constrained_conf_gen.sdf.gz' ],
19 |     ],
20 | 
21 |     test_raw_constrained_sdf_of = [
22 | 
23 |         command: '''python -m pipelines.rdkit.constrained_conf_gen
24 |                  -n 2 -i ${PIN}XChemReactionMaker1.sdf.gz
25 |                  -r ${PIN}ref_mol.sdf.gz
26 |                  -o ${POUT}constrained_conf_gen -of sdf''',
27 | 
28 |         creates: [ 'constrained_conf_gen.sdf.gz' ],
29 |     ],
30 | 
31 |     test_raw_constrained_json_of = [
32 | 
33 |         command: '''python -m pipelines.rdkit.constrained_conf_gen
34 |                  -n 2 -i ${PIN}XChemReactionMaker1.sdf.gz
35 |                  -r ${PIN}ref_mol.sdf.gz
36 |                  -o ${POUT}constrained_conf_gen -of json''',
37 | 
38 |         creates: [ 'constrained_conf_gen.data.gz',
39 |                    'constrained_conf_gen.metadata'],
40 |     ],
41 | 
42 | ]
43 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/max_min_picker.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // A basic start-up test for the module.
 8 |     // Simply makes sure it starts cleanly.
 9 |     test_help = [
10 | 
11 |         command: '''python -m pipelines.rdkit.max_min_picker -h''',
12 | 
13 |         stdout: [ 'usage: max_min_picker.py' ],
14 | 
15 |     ],
16 | 
17 |     test_missing_threshold = [
18 | 
19 |         command: '''python -m pipelines.rdkit.max_min_picker''',
20 | 
21 |         exit_error: '--num or --threshold arguments must be specified, or both',
22 | 
23 |     ],
24 | 
25 |     test_missing_file = [
26 | 
27 |         command: '''python -m pipelines.rdkit.max_min_picker -t 1.0''',
28 | 
29 |         exit_error: 'Must specify either an input file name or an input format (or both)',
30 | 
31 |     ],
32 | 
33 |     test_missing_sdf_input = [
34 | 
35 |         command: '''python -m pipelines.rdkit.max_min_picker
36 |             -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -t 1.0''',
37 | 
38 |         stderr: [ 'No output format specified - using sdf',
39 |                   'MaxMinPicking with descriptor morgan2 and threshold 1.0',
40 |                   'Output 1 molecules' ],
41 | 
42 |     ],
43 | 
44 |     test_missing_json_input_sdf_output = [
45 | 
46 |         command: '''python -m pipelines.rdkit.max_min_picker
47 |             -i ${PIN}nci100.data.gz -if json -t 1.0 -o ${POUT}output -of sdf''',
48 | 
49 |         stderr: [ "outformat='sdf'",
50 |                   'MaxMinPicking with descriptor morgan2 and threshold 1.0',
51 |                   'Output 1 molecules' ],
52 | 
53 |         creates: [ 'output.sdf.gz' ],
54 | 
55 |     ],
56 | 
57 | ]
58 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/max_min_picker_enrich.dsd.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | "@class": org.squonk.core.DockerServiceDescriptor
  3 | serviceConfig:
  4 |   id: pipelines.rdkit.maxminpicker.enrich.1
  5 |   name: RDKitMaxMinPickerEnrich
  6 |   description: RDKit MaxMin picker for enriching a dataset with a diverse selection
  7 |   tags:
  8 |   - rdkit
  9 |   - maxmin
 10 |   - picker
 11 |   - diverse
 12 |   - subset
 13 |   - enrich
 14 |   - docker
 15 |   resourceUrl:
 16 |   icon: icons/filter_molecules.png
 17 |   inputDescriptors:
 18 |   - primaryType: org.squonk.dataset.Dataset
 19 |     secondaryType: org.squonk.types.MoleculeObject
 20 |     mediaType: application/x-squonk-dataset-molecule+json
 21 |     name: input
 22 |   - primaryType: org.squonk.dataset.Dataset
 23 |     secondaryType: org.squonk.types.MoleculeObject
 24 |     mediaType: application/x-squonk-dataset-molecule+json
 25 |     name: seeds
 26 |   outputDescriptors:
 27 |   - primaryType: org.squonk.dataset.Dataset
 28 |     secondaryType: org.squonk.types.MoleculeObject
 29 |     mediaType: application/x-squonk-dataset-molecule+json
 30 |     name: output
 31 |   optionDescriptors:
 32 |   - "@class": org.squonk.options.OptionDescriptor
 33 |     modes:
 34 |     - User
 35 |     typeDescriptor:
 36 |       type: java.lang.Integer
 37 |       "@class": org.squonk.options.SimpleTypeDescriptor
 38 |     key: arg.num
 39 |     label: Number to pick
 40 |     description: Number of molecules to pick
 41 |     minValues: 0
 42 |     maxValues: 1
 43 |     visible: true
 44 |     editable: true
 45 |   - "@class": org.squonk.options.OptionDescriptor
 46 |     modes:
 47 |     - User
 48 |     typeDescriptor:
 49 |       type: java.lang.Float
 50 |       "@class": org.squonk.options.SimpleTypeDescriptor
 51 |     key: arg.threshold
 52 |     label: Threshold
 53 |     description: Dissimilarity threshold (0.0 is identical)
 54 |     minValues: 0
 55 |     maxValues: 1
 56 |     visible: true
 57 |     editable: true
 58 |   - modes:
 59 |     - User
 60 |     editable: true
 61 |     "@class": org.squonk.options.OptionDescriptor
 62 |     typeDescriptor:
 63 |       type: java.lang.String
 64 |       "@class": org.squonk.options.SimpleTypeDescriptor
 65 |     key: arg.fragment_method
 66 |     label: Fragment method
 67 |     description: Approach to use for picking biggest molecular fragment
 68 |     values:
 69 |     - hac
 70 |     - mw
 71 |     defaultValue: hac
 72 |     visible: true
 73 |   - modes:
 74 |     - User
 75 |     editable: true
 76 |     "@class": org.squonk.options.OptionDescriptor
 77 |     typeDescriptor:
 78 |       type: java.lang.Boolean
 79 |       "@class": org.squonk.options.SimpleTypeDescriptor
 80 |     key: arg.output_fragment
 81 |     label: Output biggest fragment
 82 |     description: Output the biggest fragment rather than the whole molecule
 83 |     defaultValue: false
 84 |     visible: true
 85 |   - modes:
 86 |     - User
 87 |     editable: true
 88 |     "@class": org.squonk.options.OptionDescriptor
 89 |     typeDescriptor:
 90 |       type: java.lang.String
 91 |       "@class": org.squonk.options.SimpleTypeDescriptor
 92 |     key: arg.descriptor
 93 |     label: Descriptor
 94 |     description: Descriptor/fingerprint to use
 95 |     values:
 96 |     - maccs
 97 |     - morgan2
 98 |     - morgan3
 99 |     defaultValue: morgan2
100 |     visible: true
101 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
102 | thinDescriptors:
103 | - input: input
104 |   output: output
105 |   filtering: true
106 |   preserve: false
107 | inputRoutes:
108 | - route: FILE
109 | - route: FILE
110 | outputRoutes:
111 | - route: FILE
112 | imageName: informaticsmatters/rdkit_pipelines
113 | command: >-
114 |   python -m pipelines.rdkit.max_min_picker -i ${PIN}input.data.gz -if json -s seeds.data.gz
115 |   -o ${POUT}output -of json
116 |   ${binding.variables.containsKey('num') ? ' --num ' + num : ''}
117 |   ${binding.variables.containsKey('threshold') ? '--threshold ' + threshold : ''}
118 |   --fragment-method ${fragment_method}
119 |   ${output_fragment ? '--output-fragment' : ''}
120 |   --descriptor ${descriptor}
121 |   --meta --quiet
122 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/max_min_picker_simple.dsd.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | "@class": org.squonk.core.DockerServiceDescriptor
  3 | serviceConfig:
  4 |   id: pipelines.rdkit.maxminpicker.simple.1
  5 |   name: RDKitMaxMinPickerSimple
  6 |   description: RDKit MaxMin picker for diverse subset selection
  7 |   tags:
  8 |   - rdkit
  9 |   - maxmin
 10 |   - picker
 11 |   - diverse
 12 |   - subset
 13 |   - docker
 14 |   resourceUrl:
 15 |   icon: icons/filter_molecules.png
 16 |   inputDescriptors:
 17 |   - primaryType: org.squonk.dataset.Dataset
 18 |     secondaryType: org.squonk.types.MoleculeObject
 19 |     mediaType: application/x-squonk-dataset-molecule+json
 20 |     name: input
 21 |   outputDescriptors:
 22 |   - primaryType: org.squonk.dataset.Dataset
 23 |     secondaryType: org.squonk.types.MoleculeObject
 24 |     mediaType: application/x-squonk-dataset-molecule+json
 25 |     name: output
 26 |   optionDescriptors:
 27 |   - "@class": org.squonk.options.OptionDescriptor
 28 |     modes:
 29 |     - User
 30 |     typeDescriptor:
 31 |       type: java.lang.Integer
 32 |       "@class": org.squonk.options.SimpleTypeDescriptor
 33 |     key: arg.num
 34 |     label: Number to pick
 35 |     description: Number of molecules to pick
 36 |     minValues: 0
 37 |     maxValues: 1
 38 |     visible: true
 39 |     editable: true
 40 |   - "@class": org.squonk.options.OptionDescriptor
 41 |     modes:
 42 |     - User
 43 |     typeDescriptor:
 44 |       type: java.lang.Float
 45 |       "@class": org.squonk.options.SimpleTypeDescriptor
 46 |     key: arg.threshold
 47 |     label: Threshold
 48 |     description: Dissimilarity threshold (0.0 is identical)
 49 |     minValues: 0
 50 |     maxValues: 1
 51 |     visible: true
 52 |     editable: true
 53 |   - modes:
 54 |     - User
 55 |     editable: true
 56 |     "@class": org.squonk.options.OptionDescriptor
 57 |     typeDescriptor:
 58 |       type: java.lang.String
 59 |       "@class": org.squonk.options.SimpleTypeDescriptor
 60 |     key: arg.fragment_method
 61 |     label: Fragment method
 62 |     description: Approach to use for picking biggest molecular fragment
 63 |     values:
 64 |     - hac
 65 |     - mw
 66 |     defaultValue: hac
 67 |     visible: true
 68 |   - modes:
 69 |     - User
 70 |     editable: true
 71 |     "@class": org.squonk.options.OptionDescriptor
 72 |     typeDescriptor:
 73 |       type: java.lang.Boolean
 74 |       "@class": org.squonk.options.SimpleTypeDescriptor
 75 |     key: arg.output_fragment
 76 |     label: Output biggest fragment
 77 |     description: Output the biggest fragment rather than the whole molecule
 78 |     defaultValue: false
 79 |     visible: true
 80 |   - modes:
 81 |     - User
 82 |     editable: true
 83 |     "@class": org.squonk.options.OptionDescriptor
 84 |     typeDescriptor:
 85 |       type: java.lang.String
 86 |       "@class": org.squonk.options.SimpleTypeDescriptor
 87 |     key: arg.descriptor
 88 |     label: Descriptor
 89 |     description: Descriptor/fingerprint to use
 90 |     values:
 91 |     - maccs
 92 |     - morgan2
 93 |     - morgan3
 94 |     defaultValue: morgan2
 95 |     visible: true
 96 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
 97 | thinDescriptors:
 98 | - input: input
 99 |   output: output
100 |   filtering: true
101 |   preserve: false
102 | inputRoutes:
103 | - route: FILE
104 | outputRoutes:
105 | - route: FILE
106 | imageName: informaticsmatters/rdkit_pipelines
107 | command: >-
108 |   python -m pipelines.rdkit.max_min_picker -i input.data.gz -if json -o output -of json
109 |   ${binding.variables.containsKey('num') ? '--num ' + num : ''}
110 |   ${binding.variables.containsKey('threshold') ? '--threshold ' + threshold : ''}
111 |   --fragment-method ${fragment_method}
112 |   ${output_fragment ? '--output-fragment' : ''}
113 |   --descriptor ${descriptor}
114 |   --meta --quiet
115 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/o3dAlign.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.rdkit.o3da.basic
 5 |   name: RDKitOpen3DAlign
 6 |   description: Generate 3D alignments using Open3DAlign in RDKit
 7 |   tags:
 8 |   - rdkit
 9 |   - conformer
10 |   - alignment
11 |   - open3dalign
12 |   - 3d
13 |   - docker
14 |   resourceUrl:
15 |   icon: icons/filter_molecules.png
16 |   inputDescriptors:
17 |   - primaryType: org.squonk.dataset.Dataset
18 |     secondaryType: org.squonk.types.MoleculeObject
19 |     mediaType: application/x-squonk-dataset-molecule+json
20 |     name: input
21 |   - primaryType: org.squonk.dataset.Dataset
22 |     secondaryType: org.squonk.types.MoleculeObject
23 |     mediaType: application/x-squonk-dataset-molecule+json
24 |     name: queryMol
25 |   outputDescriptors:
26 |   - primaryType: org.squonk.dataset.Dataset
27 |     secondaryType: org.squonk.types.MoleculeObject
28 |     mediaType: application/x-squonk-dataset-molecule+json
29 |     name: output
30 |   optionDescriptors:
31 |   - modes:
32 |     - User
33 |     editable: true
34 |     visible: true
35 |     description: Query molecule index
36 |     label: Query mol index
37 |     key: arg.qmolidx
38 |     minValues: 0
39 |     maxValues: 1
40 |     typeDescriptor:
41 |       type: java.lang.Integer
42 |       "@class": org.squonk.options.SimpleTypeDescriptor
43 |     "@class": org.squonk.options.OptionDescriptor
44 |   - modes:
45 |     - User
46 |     editable: true
47 |     visible: true
48 |     label: O3DAlign score threshold
49 |     description: Keep molecules with O3DAlign scores within this range of the score
50 |       for aligning the query to itself
51 |     key: arg.threshold
52 |     minValues: 0
53 |     maxValues: 1
54 |     typeDescriptor:
55 |       type: java.lang.Float
56 |       "@class": org.squonk.options.SimpleTypeDescriptor
57 |     "@class": org.squonk.options.OptionDescriptor
58 |   - modes:
59 |     - User
60 |     editable: true
61 |     visible: true
62 |     description: Use Crippen (logP) contributions
63 |     label: Use Crippen (logP) contributions
64 |     key: arg.crippen
65 |     minValues: 1
66 |     maxValues: 1
67 |     typeDescriptor:
68 |       type: java.lang.Boolean
69 |       "@class": org.squonk.options.SimpleTypeDescriptor
70 |     "@class": org.squonk.options.OptionDescriptor
71 |   - "@class": org.squonk.options.OptionDescriptor
72 |     modes:
73 |     - User
74 |     typeDescriptor:
75 |       type: java.lang.Integer
76 |       "@class": org.squonk.options.SimpleTypeDescriptor
77 |     editable: true
78 |     visible: true
79 |     description: Number of conformers to generate if not already 3D
80 |     label: Number of conformers
81 |     key: arg.num
82 |     minValues: 0
83 |     maxValues: 1
84 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
85 | thinDescriptors:
86 | - input: input
87 | inputRoutes:
88 | - route: FILE
89 | - route: FILE
90 | outputRoutes:
91 | - route: FILE
92 | imageName: informaticsmatters/rdkit_pipelines
93 | command: >-
94 |   python -m pipelines.rdkit.o3dAlign queryMol.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json
95 |   ${binding.variables.containsKey('qmolidx') ? '--qmolidx ' + qmolidx : ''}
96 |   ${binding.variables.containsKey('num') ? '--num ' + num : ''}
97 |   ${binding.variables.containsKey('threshold') ? '--threshold ' + threshold : ''}
98 |   ${crippen ? '--crippen' : ''}
99 |   --meta


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/o3dAlign.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing o3dAlign.py reading from STDIN and writing to STDOUT
 8 |     test_raw_o3dalign = [
 9 | 
10 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
11 |                  python -m pipelines.rdkit.o3dAlign
12 |                  ${PIN}pyrimethamine.mol -n 2 -t 10 -if sdf''',
13 | 
14 |         stderr: [ 'No output format specified - using sdf',
15 |                   'Molecule 36 generated [1-9]\\d* conformers' ],
16 | 
17 |     ],
18 | 
19 |     // Testing o3dAlign.py reading from file and writing to file
20 |     test_raw_o3dalign_sdf_to_sdf = [
21 | 
22 |         command: '''python -m pipelines.rdkit.o3dAlign
23 |                  ${PIN}pyrimethamine.mol -n 2 -t 10
24 |                  -i ${PIN}Kinase_inhibs.sdf.gz -if sdf
25 |                  -o ${POUT}output -of sdf''',
26 | 
27 |         stderr: [ 'Molecule 36 generated [1-9]\\d* conformers' ],
28 | 
29 |         creates: [ 'output.sdf.gz' ],
30 | 
31 |     ],
32 | 
33 |     // Testing o3dAlign.py reading from file and writing to file
34 |     test_raw_o3dalign_sdf_to_json = [
35 | 
36 |         command: '''python -m pipelines.rdkit.o3dAlign
37 |                  ${PIN}pyrimethamine.mol -n 2 -t 10
38 |                  -i ${PIN}Kinase_inhibs.sdf.gz -if sdf
39 |                  -o ${POUT}output -of json''',
40 | 
41 |         stderr: [ 'Molecule 36 generated [1-9]\\d* conformers' ],
42 | 
43 |         creates: [ 'output.data.gz',
44 |                    'output.metadata'],
45 | 
46 |     ],
47 | 
48 |          // Testing o3dAlign.py using crippen contributions
49 |          test_raw_o3dalign_sdf_to_json = [
50 | 
51 |              command: '''python -m pipelines.rdkit.o3dAlign --crippen
52 |                       ${PIN}pyrimethamine.mol -n 2 -t 10
53 |                       -i ${PIN}Kinase_inhibs.sdf.gz -if sdf
54 |                       -o ${POUT}output -of json''',
55 | 
56 |              stderr: [ 'Molecule 36 generated [1-9]\\d* conformers' ],
57 | 
58 |              creates: [ 'output.data.gz',
59 |                         'output.metadata'],
60 | 
61 |     ],
62 | 
63 |     // Uses an invalid file - but here just to improve coverage
64 |     // and trap any bad Python links.
65 |     test_raw_o3dalign_json_to_sdf = [
66 | 
67 |         command: '''python -m pipelines.rdkit.o3dAlign
68 |                  ${PIN}pyrimethamine.mol -n 2 -t 10
69 |                  -i ${PIN}nci100.data.gz -if json
70 |                  -o ${POUT}output -of sdf''',
71 | 
72 |         exit_error: 'missing MMFF94 parameters for probe molecule',
73 | 
74 |     ],
75 | 
76 | ]
77 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/pbf_ev.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.xchem.pbf_ev.v1
 5 |   name: PBF EV analyisis
 6 |   description: 'PBF EV analysis '
 7 |   tags:
 8 |   - rdkit
 9 |   - xchem
10 |   - docker
11 |   - pbfev
12 |   - icr
13 |   - joshuameyers
14 |   resourceUrl:
15 |   icon: icons/transform_molecule.png
16 |   inputDescriptors:
17 |   - primaryType: org.squonk.dataset.Dataset
18 |     secondaryType: org.squonk.types.MoleculeObject
19 |     mediaType: application/x-squonk-dataset-molecule+json
20 |     name: input
21 |   outputDescriptors:
22 |   - primaryType: org.squonk.dataset.Dataset
23 |     secondaryType: org.squonk.types.MoleculeObject
24 |     mediaType: application/x-squonk-dataset-molecule+json
25 |     name: output
26 |   optionDescriptors: []
27 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
28 | inputRoutes:
29 | - route: FILE
30 | outputRoutes:
31 | - route: FILE
32 | imageName: informaticsmatters/rdkit_pipelines
33 | command: >-
34 |   python -m pipelines.rdkit.pbf_ev -i ${PIN}input.data.gz -if json -o ${POUT}output -of json --meta


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/pbf_ev.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing pbf_ev.py reading from files
 8 |     test_pbf_ev_raw = [
 9 | 
10 |         command: '''python -m pipelines.rdkit.pbf_ev
11 |                  -i ${PIN}dhfr_3d.sdf -o ${POUT}output''',
12 | 
13 |         stderr: [ 'No output format specified - using sdf',
14 |                   'Handled [1-9]\\d+ molecules, resulting in' ],
15 | 
16 |         creates: [ 'output.sdf.gz' ],
17 | 
18 |     ],
19 | 
20 |     // Testing from file
21 |     test_pbf_ev_to_sdf = [
22 | 
23 |         command: '''python -m pipelines.rdkit.pbf_ev
24 |                  -i ${PIN}dhfr_3d.sdf -o ${POUT}output -of sdf''',
25 | 
26 |         stderr: [ 'Handled [1-9]\\d+ molecules, resulting in' ],
27 | 
28 |         creates: [ 'output.sdf.gz' ],
29 | 
30 |     ],
31 | 
32 |     // Testing from file
33 |     test_pbf_ev_to_sdf = [
34 | 
35 |         command: '''python -m pipelines.rdkit.pbf_ev
36 |                  -i ${PIN}dhfr_3d.sdf -o ${POUT}output -of json''',
37 | 
38 |         stderr: [ 'Handled [1-9]\\d+ molecules, resulting in' ],
39 | 
40 |         creates: [ 'output.data.gz',
41 |                    'output.metadata'],
42 | 
43 |     ],
44 | 
45 | ]
46 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/rxn_maker.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.xchem.reaction.maker.v1
 5 |   name: XChemReactionMaker
 6 |   description: XChem react molecules and reactants
 7 |   tags:
 8 |   - rdkit
 9 |   - xchem
10 |   - enumeration
11 |   - reaction
12 |   - docker
13 |   resourceUrl:
14 |   icon: icons/chemreaction.png
15 |   inputDescriptors:
16 |   - primaryType: org.squonk.dataset.Dataset
17 |     secondaryType: org.squonk.types.MoleculeObject
18 |     mediaType: application/x-squonk-dataset-molecule+json
19 |     name: input
20 |   - primaryType: org.squonk.dataset.Dataset
21 |     secondaryType: org.squonk.types.MoleculeObject
22 |     mediaType: application/x-squonk-dataset-molecule+json
23 |     name: reactants
24 |   outputDescriptors:
25 |   - primaryType: org.squonk.dataset.Dataset
26 |     secondaryType: org.squonk.types.MoleculeObject
27 |     mediaType: application/x-squonk-dataset-molecule+json
28 |     name: output
29 |   optionDescriptors:
30 |   - modes:
31 |     - User
32 |     editable: true
33 |     "@class": org.squonk.options.OptionDescriptor
34 |     typeDescriptor:
35 |       type: java.lang.String
36 |       "@class": org.squonk.options.SimpleTypeDescriptor
37 |     key: arg.reaction
38 |     label: Reaction
39 |     description: Reaction to use
40 |     values:
41 |     - Amides
42 |     - Benzimidazole
43 |     - Benzoxazole
44 |     - Ester_Coupling
45 |     - Ether_Coupling
46 |     - Indole
47 |     - N-Alkylation
48 |     - Oxadiazole
49 |     - Reductive_Amination
50 |     - SNAr
51 |     - Sonogashira
52 |     - Sulfonamide
53 |     - Suzuki_Coupling
54 |     - Triazole
55 |     - Urea
56 |     visible: true
57 |     minValues: 1
58 |     maxValues: 1
59 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
60 | thinDescriptors:
61 | - input: input
62 | - input: reactants
63 | inputRoutes:
64 | - route: FILE
65 | - route: FILE
66 | outputRoutes:
67 | - route: FILE
68 | imageName: informaticsmatters/rdkit_pipelines
69 | command: >-
70 |   python -m pipelines.rdkit.rxn_maker -i ${PIN}input.data.gz --reagent_lib reactants.data.gz
71 |   -o ${POUT}output --reaction ${reaction} -of json --meta
72 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/rxn_maker.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2017 Informatics Matters Ltd.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import argparse
18 | import os
19 | 
20 | from pipelines_utils import parameter_utils, utils
21 | from pipelines_utils_rdkit import rdkit_utils
22 | 
23 | 
24 | ### start main execution #########################################
25 | 
26 | def main():
27 |     ### command line args defintions #########################################
28 | 
29 |     ### Define the reactions available
30 |     poised_filter = True
31 |     if poised_filter == True:
32 |         from .poised_filter import Filter
33 |         filter_to_use = Filter()
34 | 
35 | 
36 |     parser = argparse.ArgumentParser(description='RDKit rxn process')
37 |     parameter_utils.add_default_io_args(parser)
38 |     parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode')
39 |     parser.add_argument('-m', '--multi', action='store_true', help='Output one file for each reaction')
40 |     parser.add_argument('-r', '--reaction', choices=filter_to_use.poised_reactions.keys(), help='Name of reaction to be run')
41 |     parser.add_argument('-rl', '--reagent_lib', help="Reagent file, if not defined the STDIN is used")
42 |     parser.add_argument('-rlf', '--reagent_lib_format', choices=['sdf', 'json'], help="Reagent file format. When using STDIN this must be specified.")
43 | 
44 | 
45 |     args = parser.parse_args()
46 |     utils.log("Screen Args: ", args)
47 | 
48 |     if not args.output and args.multi:
49 |         raise ValueError("Must specify output location when writing individual result files")
50 | 
51 |     input, suppl = rdkit_utils.default_open_input(args.input, args.informat)
52 |     output, writer, output_base = rdkit_utils.default_open_output(args.output, "rxn_maker", args.outformat)
53 | 
54 | 
55 |     i = 0
56 |     count = 0
57 | 
58 |     if args.multi:
59 |         dir_base = os.path.dirname(args.output)
60 |         writer_dict = filter_to_use.get_writers(dir_base)
61 |     else:
62 |         writer_dict = None
63 |         dir_base = None
64 | 
65 |     for mol in suppl:
66 |         i+=1
67 |         if mol is None: continue
68 |         reagent_input, reagent_suppl = rdkit_utils.default_open_input(args.reagent_lib, args.reagent_lib_format)
69 |         for r_mol in reagent_suppl:
70 |             if r_mol is None:
71 |                 continue
72 |             # Return a dict/class here - indicating which filters passed
73 |             count = filter_to_use.perform_reaction(mol,args.reaction,r_mol,writer,count)
74 | 
75 | 
76 |     utils.log("Created", count, "molecules from a total of ", i, "input molecules")
77 | 
78 |     writer.flush()
79 |     writer.close()
80 |     if input:
81 |         input.close()
82 |     if reagent_input:
83 |         reagent_input.close()
84 |     if output:
85 |         output.close()
86 |     # close the individual writers
87 |     if writer_dict:
88 |         for key in writer_dict:
89 |             writer_dict[key].close()
90 | 
91 |     if args.meta:
92 |         utils.write_metrics(output_base, {'__InputCount__': i, '__OutputCount__': count, 'RxnMaker': count})
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/rxn_maker.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing rxn_maker.py reading from files
 8 |     test_rxn_maker_raw = [
 9 | 
10 |         command: '''python -m pipelines.rdkit.rxn_maker
11 |                  -i ${PIN}sulfonyl_chloride.sdf
12 |                  -r Sulfonamide
13 |                  -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz
14 |                  -o ${POUT}output''',
15 | 
16 |         stderr: [ 'No output format specified - using sdf',
17 |                   'Created [1-9]\\d+ molecules from a total of' ],
18 | 
19 |         creates: [ 'output.sdf.gz' ],
20 | 
21 |     ],
22 | 
23 |     test_rxn_maker_raw_to_sdf = [
24 | 
25 |         command: '''python -m pipelines.rdkit.rxn_maker
26 |                  -i ${PIN}sulfonyl_chloride.sdf
27 |                  -r Sulfonamide
28 |                  -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz
29 |                  -o ${POUT}output -of sdf''',
30 | 
31 |         stderr: [ 'Created [1-9]\\d+ molecules from a total of' ],
32 | 
33 |         creates: [ 'output.sdf.gz' ],
34 | 
35 |     ],
36 | 
37 |     test_rxn_maker_raw_to_json = [
38 | 
39 |         command: '''python -m pipelines.rdkit.rxn_maker
40 |                  -i ${PIN}sulfonyl_chloride.sdf
41 |                  -r Sulfonamide
42 |                  -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz
43 |                  -o ${POUT}output -of json''',
44 | 
45 |         stderr: [ 'Created [1-9]\\d+ molecules from a total of' ],
46 | 
47 |         creates: [ 'output.data.gz',
48 |                    'output.metadata'],
49 | 
50 |     ],
51 | 
52 | ]
53 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/rxn_selector.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | import os
 5 | 
 6 | from pipelines_utils import parameter_utils, utils
 7 | from pipelines_utils_rdkit import rdkit_utils
 8 | 
 9 | 
10 | ### start main execution #########################################
11 | 
12 | def main():
13 |     ### command line args defintions #########################################
14 | 
15 |     ### Define the reactions available
16 |     poised_filter = True
17 |     if poised_filter == True:
18 |         from .poised_filter import Filter
19 |         filter_to_use = Filter()
20 | 
21 | 
22 |     parser = argparse.ArgumentParser(description='RDKit rxn process')
23 |     parameter_utils.add_default_io_args(parser)
24 |     parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode')
25 |     parser.add_argument('-m', '--multi', action='store_true', help='Output one file for each reaction')
26 |     parser.add_argument('-r', '--reaction', choices=filter_to_use.poised_reactions.keys(), help='Name of reaction to be run')
27 |     parser.add_argument('-rl', '--reagent_lib', help="Input SD file, if not defined the STDIN is used")
28 |     parser.add_argument('-rlf', '--reagent_lib_format', choices=['sdf', 'json'], help="Input format. When using STDIN this must be specified.")
29 |     
30 |     args = parser.parse_args()
31 |     utils.log("Screen Args: ", args)
32 | 
33 |     if not args.output and args.multi:
34 |         raise ValueError("Must specify output location when writing individual result files")
35 | 
36 |     input, suppl = rdkit_utils.default_open_input(args.input, args.informat)
37 |     reagent_input, reagent_suppl = rdkit_utils.default_open_input(args.reagent_lib, args.reagent_lib_format)
38 |     output, writer, output_base = rdkit_utils.default_open_output(args.output, "rxn_maker", args.outformat)
39 | 
40 |     i = 0
41 |     count = 0
42 | 
43 |     if args.multi:
44 |         dir_base = os.path.dirname(args.output)
45 |         writer_dict = filter_to_use.get_writers(dir_base)
46 |     else:
47 |         writer_dict = None
48 |         dir_base = None
49 | 
50 |     for mol in suppl:
51 |         i+=1
52 |         if mol is None: continue
53 |         # Return a dict/class here - indicating which filters passed
54 |         count = filter_to_use.perform_reaction(mol,args.reaction,reagent_suppl,writer,count)
55 | 
56 |     utils.log("Created", count, "molecules from a total of ", i, "input molecules")
57 | 
58 |     writer.flush()
59 |     writer.close()
60 |     if input:
61 |         input.close()
62 |     if output:
63 |         output.close()
64 |     # close the individual writers
65 |     if writer_dict:
66 |         for key in writer_dict:
67 |             writer_dict[key].close()
68 | 
69 |     if args.meta:
70 |         utils.write_metrics(output_base, {'__InputCount__': i, '__OutputCount__': count, 'RxnSmartsFilter': count})
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/rxn_selector.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // A basic start-up test for the module.
 8 |     // Simply makes sure it starts cleanly.
 9 |     test_help = [
10 | 
11 |         command: '''python -m pipelines.rdkit.rxn_selector -h''',
12 | 
13 |         stdout: [ 'usage: rxn_selector.py' ],
14 | 
15 |     ],
16 | 
17 |     test_basic = [
18 | 
19 |         command: '''python -m pipelines.rdkit.rxn_selector
20 |             -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz -rlf sdf''',
21 | 
22 |         exit_error: 'Must specify either an input file name or an input format (or both)',
23 | 
24 |     ],
25 | 
26 |     // Not sure this test makes any sense (abc)
27 |     // Added simply to trey and improve coverage
28 |     test_basic_to_sdf = [
29 | 
30 |         command: '''python -m pipelines.rdkit.rxn_selector
31 |             -i ${PIN}sulfonyl_chloride.sdf -if sdf
32 |             -rl ${PIN}sdf-aliphatic-primary-amines-175.sdf.gz -rlf sdf
33 |             -o output -of sdf''',
34 | 
35 |         exit_error: 'KeyError: None',
36 | 
37 |     ],
38 | 
39 | ]
40 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/rxn_smarts_filter.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.xchem.smarts_filter.reaction
 5 |   name: XChemReactionSmartsFilter
 6 |   description: XChem Reaction Smarts Filter
 7 |   tags:
 8 |   - rdkit
 9 |   - xchem
10 |   - docker
11 |   - smarts
12 |   - reaction
13 |   - filter
14 |   - poised
15 |   resourceUrl:
16 |   icon: icons/filter_molecules.png
17 |   inputDescriptors:
18 |   - primaryType: org.squonk.dataset.Dataset
19 |     secondaryType: org.squonk.types.MoleculeObject
20 |     mediaType: application/x-squonk-dataset-molecule+json
21 |     name: input
22 |   outputDescriptors:
23 |   - primaryType: org.squonk.dataset.Dataset
24 |     secondaryType: org.squonk.types.MoleculeObject
25 |     mediaType: application/x-squonk-dataset-molecule+json
26 |     name: output
27 |   optionDescriptors: []
28 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
29 | inputRoutes:
30 | - route: FILE
31 | outputRoutes:
32 | - route: FILE
33 | imageName: informaticsmatters/rdkit_pipelines
34 | command: >-
35 |   python -m pipelines.rdkit.rxn_smarts_filter -i ${PIN}input.data.gz -if json
36 |   -o ${POUT}output -of json --thin --meta
37 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/rxn_smarts_filter.test:
--------------------------------------------------------------------------------
  1 | // Automated pipeline test specification.
  2 | 
  3 | [
  4 | 
  5 |     version = 1,
  6 | 
  7 |     // Testing rxn_smarts_filter.py reading from sd file and writing to multiple files
  8 |     test_rxn_smarts_filter_raw = [
  9 | 
 10 |         command: '''python -m pipelines.rdkit.rxn_smarts_filter
 11 |                  -i ${PIN}Kinase_inhibs.sdf.gz -o ${POUT}output --multi''',
 12 | 
 13 |         stderr: [ 'Using 15 reaction filters',
 14 |                   'No output format specified - using sdf',
 15 |                   'Matched [1-9]\\d+ molecules from a total of' ],
 16 | 
 17 |         creates: [ 'output.sdf.gz',
 18 |                    'Amides.sdf',
 19 |                    'Ether_Coupling.sdf',
 20 |                    'Reductive_Amination.sdf',
 21 |                    'Suzuki_Coupling.sdf',
 22 |                    'Benzimidazole.sdf',
 23 |                    'Indole.sdf',
 24 |                    'SNAr.sdf',
 25 |                    'Triazole.sdf',
 26 |                    'Benzoxazole.sdf',
 27 |                    'N-Alkylation.sdf',
 28 |                    'Sonogashira.sdf',
 29 |                    'Urea.sdf',
 30 |                    'Ester_Coupling.sdf',
 31 |                    'Oxadiazole.sdf',
 32 |                    'Sulfonamide.sdf',
 33 |                    'output.sdf.gz' ],
 34 | 
 35 |     ],
 36 | 
 37 |     // Testing rxn_smarts_filter.py reading from STDIN
 38 |     // and writing to files using SDF
 39 |     test_rxn_smarts_filter_raw_stdin_to_sdf = [
 40 | 
 41 |         command: '''gunzip -c ${PIN}Kinase_inhibs.sdf.gz |
 42 |                  python -m pipelines.rdkit.rxn_smarts_filter
 43 |                  -if sdf -o ${POUT}output''',
 44 | 
 45 |         stderr: [ 'Using 15 reaction filters',
 46 |                   'No output format specified - using sdf',
 47 |                   'Matched [1-9]\\d+ molecules from a total of' ],
 48 | 
 49 |         creates: [ 'output.sdf.gz' ],
 50 | 
 51 |     ],
 52 | 
 53 |     // Testing rxn_smarts_filter.py reading from file
 54 |     // and writing to files using SDF
 55 |     test_rxn_smarts_filter_raw_sdf_file_to_sdf = [
 56 | 
 57 |         command: '''python -m pipelines.rdkit.rxn_smarts_filter
 58 |                  -i ${PIN}Kinase_inhibs.sdf.gz -if sdf -o ${POUT}output -of sdf''',
 59 | 
 60 |         stderr: [ 'Using 15 reaction filters',
 61 |                   'Matched [1-9]\\d+ molecules from a total of' ],
 62 | 
 63 |         creates: [ 'output.sdf.gz' ],
 64 | 
 65 |     ],
 66 | 
 67 |     // Testing rxn_smarts_filter.py
 68 |     // reading from STDIN and writing to files using JSON
 69 |     test_rxn_smarts_filter_raw_stdin_to_json = [
 70 | 
 71 |         command: '''gunzip -c ${PIN}nci100.data.gz |
 72 |                  python -m pipelines.rdkit.rxn_smarts_filter
 73 |                  -if json -o ${POUT}output -of json --meta --thin''',
 74 | 
 75 |         stderr: [ 'Using 15 reaction filters',
 76 |                   'Matched [1-9]\\d+ molecules from a total of 100' ],
 77 | 
 78 |         creates: [ 'output.data.gz',
 79 |                    'output_metrics.txt',
 80 |                    'output.metadata' ],
 81 | 
 82 |         metrics: [ 'RxnSmartsFilter': '\\d\\d',
 83 |                    '__InputCount__': '100',
 84 |                    '__OutputCount__': '\\d\\d' ],
 85 | 
 86 | 
 87 |     ],
 88 | 
 89 |     // Testing rxn_smarts_filter.py
 90 |     // reading from STDIN and writing to files using JSON
 91 |     test_rxn_smarts_filter_raw_file_to_json = [
 92 | 
 93 |         command: '''python -m pipelines.rdkit.rxn_smarts_filter
 94 |                  -i ${PIN}nci100.data.gz -if json
 95 |                  -o ${POUT}output -of json --meta --thin''',
 96 | 
 97 |         stderr: [ 'Using 15 reaction filters',
 98 |                   'Matched [1-9]\\d+ molecules from a total of 100' ],
 99 | 
100 |         creates: [ 'output.data.gz',
101 |                    'output_metrics.txt',
102 |                    'output.metadata' ],
103 | 
104 |         metrics: [ 'RxnSmartsFilter': '\\d\\d',
105 |                    '__InputCount__': '100',
106 |                    '__OutputCount__': '\\d\\d' ],
107 | 
108 | 
109 |     ],
110 | 
111 | ]
112 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/sanifier.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // A basic start-up test for the module.
 8 |     // Simply makes sure it starts cleanly.
 9 |     //
10 |     // For this to work you will need to make sure a number
11 |     // of other modules are installed in your conda environemnt.
12 |     // - molvs (pip install molvs)
13 |     // - standardiser
14 |     test_help = [
15 | 
16 |         command: '''python -m pipelines.rdkit.sanifier -h''',
17 | 
18 |         stdout: [ 'usage: sanifier.py' ]
19 | 
20 |     ],
21 | 
22 | ]
23 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/sanifier_enumerator.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.xchem.sanify.enumerate.v1
 5 |   name: MoleculeEnumerator
 6 |   description: Enumerate stereoisomers and tautomers
 7 |   tags:
 8 |   - rdkit
 9 |   - xchem
10 |   - docker
11 |   - sanify
12 |   - reaction
13 |   - molvs
14 |   - enumerate
15 |   - stereoisomer
16 |   - tautomer
17 |   resourceUrl:
18 |   icon: icons/molecule_generator.png
19 |   inputDescriptors:
20 |   - primaryType: org.squonk.dataset.Dataset
21 |     secondaryType: org.squonk.types.MoleculeObject
22 |     mediaType: application/x-squonk-dataset-molecule+json
23 |     name: input
24 |   outputDescriptors:
25 |   - primaryType: org.squonk.dataset.Dataset
26 |     secondaryType: org.squonk.types.MoleculeObject
27 |     mediaType: application/x-squonk-dataset-molecule+json
28 |     name: output
29 |   optionDescriptors:
30 |   - modes:
31 |     - User
32 |     editable: true
33 |     "@class": org.squonk.options.OptionDescriptor
34 |     typeDescriptor:
35 |       type: java.lang.Boolean
36 |       "@class": org.squonk.options.SimpleTypeDescriptor
37 |     key: arg.tautomers
38 |     label: Enumerate tautomers
39 |     description: Enumerate tautomers
40 |     defaultValue: true
41 |     visible: true
42 |   - modes:
43 |     - User
44 |     editable: true
45 |     "@class": org.squonk.options.OptionDescriptor
46 |     typeDescriptor:
47 |       type: java.lang.Boolean
48 |       "@class": org.squonk.options.SimpleTypeDescriptor
49 |     key: arg.stereoisomers
50 |     label: Enumerate stereoisomers
51 |     description: Enumerate stereoisomers
52 |     defaultValue: true
53 |     visible: true
54 |   - modes:
55 |     - User
56 |     editable: true
57 |     "@class": org.squonk.options.OptionDescriptor
58 |     typeDescriptor:
59 |       type: java.lang.String
60 |       "@class": org.squonk.options.SimpleTypeDescriptor
61 |     key: arg.molFormat
62 |     label: Molecule format
63 |     description: Output format for molecules
64 |     values:
65 |     - smiles
66 |     - mol_2d
67 |     - mol_3d
68 |     defaultValue: smiles
69 |     minValues: 1
70 |     maxValues: 1
71 |     visible: true
72 |   executorClassName: org.squonk.execution.steps.impl.DefaultDockerExecutorStep
73 | inputRoutes:
74 | - route: FILE
75 | outputRoutes:
76 | - route: FILE
77 | imageName: informaticsmatters/rdkit_pipelines
78 | command: >-
79 |   python -m pipelines.rdkit.sanifier -i ${PIN}input.data.gz -if json -o ${POUT}output -of json -mf $molFormat
80 |   ${tautomers ? '-et' : ''}
81 |   ${stereoisomers ? '-es' : ''}
82 |   --meta


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/sanifier_standardiser_flatkinson.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.xchem.sanify.standardise.flatkinson.v1
 5 |   name: FlatkinsonStandardiser
 6 |   description: Molecule standardiser from Francis Atkinson
 7 |   tags:
 8 |   - rdkit
 9 |   - xchem
10 |   - docker
11 |   - sanify
12 |   - reaction
13 |   - flatkinson
14 |   - standardiser
15 |   - standardizer
16 |   resourceUrl:
17 |   icon: icons/transform_molecule.png
18 |   inputDescriptors:
19 |   - primaryType: org.squonk.dataset.Dataset
20 |     secondaryType: org.squonk.types.MoleculeObject
21 |     mediaType: application/x-squonk-dataset-molecule+json
22 |     name: input
23 |   outputDescriptors:
24 |   - primaryType: org.squonk.dataset.Dataset
25 |     secondaryType: org.squonk.types.MoleculeObject
26 |     mediaType: application/x-squonk-dataset-molecule+json
27 |     name: output
28 |   optionDescriptors:
29 |   - modes:
30 |     - User
31 |     editable: true
32 |     "@class": org.squonk.options.OptionDescriptor
33 |     typeDescriptor:
34 |       type: java.lang.String
35 |       "@class": org.squonk.options.SimpleTypeDescriptor
36 |     key: dockerImageVersion
37 |     label: RDKit version
38 |     description: Version of the RDKit Docker image to execute
39 |     values:
40 |     - latest
41 |     - Release_2017_03_1
42 |     - Release_2016_09_2
43 |     - Release_2016_03_1
44 |     - Release_2015_09_2
45 |     - Release_2015_09_1
46 |     defaultValue: latest
47 |     visible: true
48 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
49 | thinDescriptors:
50 | - input: input
51 |   preserve: false
52 | inputRoutes:
53 | - route: FILE
54 | outputRoutes:
55 | - route: FILE
56 | imageName: informaticsmatters/rdkit_pipelines
57 | command: >-
58 |   python -m pipelines.rdkit.sanifier -i ${PIN}input.data.gz -if json -o ${POUT}output -of json -st -stm flatkinson --meta
59 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/sanifier_standardiser_molvs.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.xchem.sanify.standardise.molvs.v1
 5 |   name: MolVSStandardiser
 6 |   description: Molecule standardiser using MolVS
 7 |   tags:
 8 |   - rdkit
 9 |   - xchem
10 |   - docker
11 |   - sanify
12 |   - reaction
13 |   - molvs
14 |   - standardiser
15 |   - standardizer
16 |   resourceUrl:
17 |   icon: icons/transform_molecule.png
18 |   inputDescriptors:
19 |   - primaryType: org.squonk.dataset.Dataset
20 |     secondaryType: org.squonk.types.MoleculeObject
21 |     mediaType: application/x-squonk-dataset-molecule+json
22 |     name: input
23 |   outputDescriptors:
24 |   - primaryType: org.squonk.dataset.Dataset
25 |     secondaryType: org.squonk.types.MoleculeObject
26 |     mediaType: application/x-squonk-dataset-molecule+json
27 |     name: output
28 |   optionDescriptors:
29 |   - modes:
30 |     - User
31 |     editable: true
32 |     "@class": org.squonk.options.OptionDescriptor
33 |     typeDescriptor:
34 |       type: java.lang.String
35 |       "@class": org.squonk.options.SimpleTypeDescriptor
36 |     key: dockerImageVersion
37 |     label: RDKit version
38 |     description: Version of the RDKit Docker image to execute
39 |     values:
40 |     - latest
41 |     - Release_2017_03_1
42 |     - Release_2016_09_2
43 |     - Release_2016_03_1
44 |     - Release_2015_09_2
45 |     - Release_2015_09_1
46 |     defaultValue: latest
47 |     visible: true
48 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
49 | thinDescriptors:
50 | - input: input
51 |   preserve: false
52 | inputRoutes:
53 | - route: FILE
54 | outputRoutes:
55 | - route: FILE
56 | imageName: informaticsmatters/rdkit_pipelines
57 | command: >-
58 |   python -m pipelines.rdkit.sanifier -i ${PIN}input.data.gz -if json -o ${POUT}output -of json -st -stm molvs --meta
59 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/sanify_utils.py:
--------------------------------------------------------------------------------
 1 | from rdkit import Chem
 2 | from copy import copy
 3 | 
 4 | from pipelines_utils import utils
 5 | 
 6 | from molvs import enumerate_tautomers_smiles,canonicalize_tautomer_smiles,Standardizer
 7 | from molvs.charge import Uncharger,Reionizer
 8 | from standardiser import standardise
 9 | 
10 | standardizer = Standardizer()
11 | 
12 | def _spam(n):
13 |     out=[]
14 |     for perm in _getPerms(n):
15 |         elem = [ int(i) for i in list(perm) ]
16 |         out.append(elem)
17 |     return out
18 | 
19 | def _getPerms(n):
20 |     from itertools import permutations
21 |     for i in _getCandidates(n):
22 |         for perm in set(permutations(i)):
23 |             yield ''.join(perm)
24 | 
25 | def _getCandidates(n):
26 |     for i in range(0, n+1):
27 |         res = "1" * i + "0" * (n - i)
28 |         yield res
29 | 
30 | def enumerateTautomers(mol):
31 |     """
32 |     Get all of the Tautomers of a given molecule
33 |     :param mol: the input molecule
34 |     :return: a list of Tautomers
35 |     """
36 |     smiles = Chem.MolToSmiles(mol,isomericSmiles=True)
37 |     tauts = enumerate_tautomers_smiles(smiles)
38 |     ##TODO Append Parent molecule name
39 |     return  [Chem.MolFromSmiles(x) for x in tauts]
40 | 
41 | def getCanonTautomer(mol):
42 |     """
43 |     Get the canonical tautomer form
44 |     :param mol: the input molecule
45 |     :return: a list of Tautomers
46 |     """
47 |     smiles = Chem.MolToSmiles(mol,isomericSmiles=True)
48 |     x = canonicalize_tautomer_smiles(smiles)
49 |     return Chem.MolFromSmiles(x)
50 | 
51 | 
52 | def enumerateStereoIsomers(mol):
53 |     out = []
54 |     chiralCentres = Chem.FindMolChiralCenters(mol, includeUnassigned=True)
55 |     #return the molecule object when no chiral centres where identified
56 |     if chiralCentres == []:
57 |         return [mol]
58 | 
59 |     #All bit permutations with number of bits equals number of chiralCentres
60 |     elements = _spam(len(chiralCentres))
61 | 
62 |     for isoId,element in enumerate(elements):
63 |         for centreId,i in enumerate(element):
64 |             atomId = chiralCentres[centreId][0]
65 |             if i == 0:
66 |                 mol.GetAtomWithIdx(atomId).SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW)
67 |             elif i == 1:
68 |                 mol.GetAtomWithIdx(atomId).SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW)
69 |         outmol = copy(mol)
70 |         utils.log("Enumerated ", Chem.MolToSmiles(mol, isomericSmiles=True))
71 |         out.append(outmol)
72 |     return out
73 | 
74 | 
75 | def molVsStandardizer(mol):
76 |     return standardizer.standardize(mol)
77 | 
78 | def flatkinsonStandardizer(mol):
79 |     return standardise.run(mol)
80 | 
81 | STANDARD_MOL_METHODS = {"molvs": molVsStandardizer, "flatkinson": flatkinsonStandardizer}
82 | 
83 | def getNeutralMolecule(mol):
84 |     uncharger = Uncharger()
85 |     return uncharger.uncharge(mol)
86 | 
87 | def getReionisedMolecule(mol):
88 |     reioniser = Reionizer()
89 |     return reioniser.reionize(mol)


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/screen_multi.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing screen_multi.py reading target form sdf file,
 8 |     // query as json file and writing to STDOUT
 9 |     test_multi = [
10 | 
11 |         command: '''gunzip -c ${PIN}dhfr_3d.sdf.gz |
12 |                  python -m pipelines.rdkit.screen_multi
13 |                  -if sdf --qjson ${PIN}nci100.data.gz --simmin 0.55''',
14 | 
15 |         stderr: [ 'Found [1-9]\\d* similar molecules' ],
16 | 
17 |     ],
18 | 
19 |     // Testing screen_multi.py reading target form sdf file,
20 |     // query as json file and writing to STDOUT
21 |     test_multi_from_file = [
22 | 
23 |         command: '''gunzip -c ${PIN}dhfr_3d.sdf.gz |
24 |                  python -m pipelines.rdkit.screen_multi
25 |                  -i ${PIN}dhfr_3d.sdf.gz -if sdf
26 |                  --qjson ${PIN}nci100.data.gz --simmin 0.55''',
27 | 
28 |         stderr: [ 'Found [1-9]\\d* similar molecules' ],
29 | 
30 |     ],
31 | 
32 |     // Testing screen_multi.py reading target form sdf file,
33 |     // query as json file and writing to file
34 |     test_multi_from_file_to_sdf = [
35 | 
36 |         command: '''gunzip -c ${PIN}dhfr_3d.sdf.gz |
37 |                  python -m pipelines.rdkit.screen_multi
38 |                  -i ${PIN}dhfr_3d.sdf.gz -if sdf
39 |                  --qjson ${PIN}nci100.data.gz --simmin 0.55
40 |                  -o ${POUT}output -of sdf''',
41 | 
42 |         stderr: [ 'Found [1-9]\\d* similar molecules' ],
43 | 
44 |         creates: [ 'output.sdf.gz' ],
45 |     ],
46 | 
47 |     // Testing screen_multi.py reading target form sdf file,
48 |     // query as json file and writing to file
49 |     test_multi_from_file_to_json = [
50 | 
51 |         command: '''gunzip -c ${PIN}dhfr_3d.sdf.gz |
52 |                  python -m pipelines.rdkit.screen_multi
53 |                  -i ${PIN}dhfr_3d.sdf.gz -if sdf
54 |                  --qjson ${PIN}nci100.data.gz --simmin 0.55
55 |                  -o ${POUT}output -of json''',
56 | 
57 |         stderr: [ 'Found [1-9]\\d* similar molecules' ],
58 | 
59 |         creates: [ 'output.data.gz',
60 |                    'output.metadata' ],
61 |     ],
62 | 
63 | ]
64 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/standardize.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.rdkit.standardizer.v1
 5 |   name: RDKitStandardizer
 6 |   description: Standardize molecules
 7 |   tags:
 8 |   - rdkit
 9 |   - docker
10 |   - standardise
11 |   - standardize
12 |   resourceUrl:
13 |   icon: icons/molecule_generator.png
14 |   inputDescriptors:
15 |   - primaryType: org.squonk.dataset.Dataset
16 |     secondaryType: org.squonk.types.MoleculeObject
17 |     mediaType: application/x-squonk-dataset-molecule+json
18 |     name: input
19 |   outputDescriptors:
20 |   - primaryType: org.squonk.dataset.Dataset
21 |     secondaryType: org.squonk.types.MoleculeObject
22 |     mediaType: application/x-squonk-dataset-molecule+json
23 |     name: output
24 |   optionDescriptors:
25 |   - modes:
26 |     - User
27 |     editable: true
28 |     "@class": org.squonk.options.OptionDescriptor
29 |     typeDescriptor:
30 |       type: java.lang.String
31 |       "@class": org.squonk.options.SimpleTypeDescriptor
32 |     key: arg.fragment_method
33 |     label: Fragment method
34 |     description: Approach to use for picking biggest molecular fragment
35 |     values:
36 |     - hac
37 |     - mw
38 |     defaultValue: hac
39 |     visible: true
40 |   - modes:
41 |     - User
42 |     editable: true
43 |     "@class": org.squonk.options.OptionDescriptor
44 |     typeDescriptor:
45 |       type: java.lang.Boolean
46 |       "@class": org.squonk.options.SimpleTypeDescriptor
47 |     key: arg.neutralize
48 |     label: Neutralize molecules
49 |     description: Convert charged groups to neutral form where possible
50 |     defaultValue: true
51 |     visible: true
52 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
53 | thinDescriptors:
54 | - input: input
55 | inputRoutes:
56 | - route: FILE
57 | outputRoutes:
58 | - route: FILE
59 | imageName: informaticsmatters/rdkit_pipelines
60 | command: >-
61 |   python -m pipelines.rdkit.standardize -i ${PIN}input.data.gz -if json -o ${POUT}output -of json
62 |   --fragment-method $fragment_method
63 |   ${neutralize ? '--neutralize' : ''}
64 |   --meta


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/standardize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright 2018 Informatics Matters Ltd.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import argparse
 18 | 
 19 | from rdkit import DataStructs, rdBase
 20 | from rdkit.Chem.MolStandardize import rdMolStandardize
 21 | 
 22 | from pipelines_utils import parameter_utils, utils
 23 | from pipelines_utils_rdkit import rdkit_utils, mol_utils
 24 | 
 25 | 
 26 | ### functions #########################################
 27 | 
 28 | #lfc = rdMolStandardize.LargestFragmentChooser()
 29 | uncharger = rdMolStandardize.Uncharger()
 30 | 
 31 | 
 32 | def standardize(mol, neutralize, fragment):
 33 |     """
 34 | 
 35 |     :param mol: The molecule to standardize
 36 |     :param neutralize: Boolean for whether to neutralize the molecule
 37 |     :param fragment: The approach for choosing the largest fragment. Either 'hac' or 'mw'. If not specified the whole
 38 |     molecule is used.
 39 |     :return: The standardized molecule
 40 |     """
 41 |     mol = rdMolStandardize.Cleanup(mol)
 42 |     #mol = lfc.choose(mol)
 43 |     # We use our own largest fragment picker as the RDKit one behaves slightly differently
 44 |     if fragment:
 45 |         mol = mol_utils.fragment(mol, fragment)
 46 |     if neutralize:
 47 |         mol = uncharger.uncharge(mol)
 48 |     return mol
 49 | 
 50 | 
 51 | ### start main execution #########################################
 52 | 
 53 | def main():
 54 | 
 55 |     ### command line args definitions #########################################
 56 | 
 57 |     parser = argparse.ArgumentParser(description='RDKit Standardize')
 58 |     parser.add_argument('--fragment-method', choices=['hac', 'mw'], help='Approach to find biggest fragment if more than one (hac = biggest by heavy atom count, mw = biggest by mol weight)')
 59 |     parser.add_argument('--neutralize', action='store_true', help='Neutralize the molecule')
 60 | 
 61 |     parameter_utils.add_default_io_args(parser)
 62 |     parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode')
 63 |     parser.add_argument('--thin', action='store_true', help='Thin output mode')
 64 | 
 65 |     args = parser.parse_args()
 66 |     utils.log("Standardize Args: ", args)
 67 | 
 68 |     # handle metadata
 69 |     source = "standardize.py"
 70 |     datasetMetaProps = {"source":source, "description": "Standardize using RDKit " + rdBase.rdkitVersion}
 71 |     clsMappings = {}
 72 |     fieldMetaProps = []
 73 | 
 74 | 
 75 |     input,output,suppl,writer,output_base = rdkit_utils.\
 76 |         default_open_input_output(args.input, args.informat, args.output,
 77 |                                   'standardize', args.outformat,
 78 |                                   thinOutput=False, valueClassMappings=clsMappings,
 79 |                                   datasetMetaProps=datasetMetaProps,
 80 |                                   fieldMetaProps=fieldMetaProps)
 81 |     count = 0
 82 |     total = 0
 83 |     errors = 0
 84 |     for mol in suppl:
 85 |         count += 1
 86 |         if mol is None:
 87 |             errors += 1
 88 |             continue
 89 |         m = standardize(mol, args.neutralize, args.fragment_method)
 90 |         writer.write(m)
 91 |         total += 1
 92 | 
 93 |     input.close()
 94 |     writer.flush()
 95 |     writer.close()
 96 |     output.close()
 97 | 
 98 |     if args.meta:
 99 |         utils.write_metrics(output_base, {'__InputCount__':count, '__OutputCount__':total, '__ErrorCount__':errors, 'RDKitStandardize':total})
100 | 
101 | if __name__ == "__main__":
102 |     main()
103 | 
104 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/standardize.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // A basic start-up test for the module.
 8 |     // Simply makes sure it starts cleanly.
 9 |     //
10 |     test_help = [
11 | 
12 |         command: '''python -m pipelines.rdkit.standardize -h''',
13 | 
14 |         stdout: [ 'usage: standardize.py' ]
15 | 
16 |     ],
17 | 
18 | ]
19 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/sucos-max.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.rdkit.sucos.basic
 5 |   name: RDKitSuCOSMax
 6 |   description: Generate 3D overlays using SuCOSMax in RDKit
 7 |   tags:
 8 |   - rdkit
 9 |   - alignment
10 |   - sucos
11 |   - 3d
12 |   - docker
13 |   resourceUrl:
14 |   icon: icons/filter_molecules.png
15 |   inputDescriptors:
16 |   - primaryType: org.squonk.dataset.Dataset
17 |     secondaryType: org.squonk.types.MoleculeObject
18 |     mediaType: application/x-squonk-dataset-molecule+json
19 |     name: input
20 |   - primaryType: org.squonk.dataset.Dataset
21 |     secondaryType: org.squonk.types.MoleculeObject
22 |     mediaType: application/x-squonk-dataset-molecule+json
23 |     name: target
24 |   outputDescriptors:
25 |   - primaryType: org.squonk.dataset.Dataset
26 |     secondaryType: org.squonk.types.MoleculeObject
27 |     mediaType: application/x-squonk-dataset-molecule+json
28 |     name: output
29 |   optionDescriptors:
30 |   - modes:
31 |     - User
32 |     editable: true
33 |     visible: true
34 |     description: Target molecule index (default is the first)
35 |     label: Target mol index
36 |     key: arg.targetidx
37 |     minValues: 0
38 |     maxValues: 1
39 |     typeDescriptor:
40 |       type: java.lang.Integer
41 |       "@class": org.squonk.options.SimpleTypeDescriptor
42 |     "@class": org.squonk.options.OptionDescriptor
43 |   - modes:
44 |     - User
45 |     editable: true
46 |     visible: true
47 |     description: Name field in targets
48 |     label: Target name field
49 |     key: arg.name
50 |     minValues: 0
51 |     maxValues: 1
52 |     typeDescriptor:
53 |       type: java.lang.String
54 |       "@class": org.squonk.options.SimpleTypeDescriptor
55 |     "@class": org.squonk.options.OptionDescriptor
56 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
57 | thinDescriptors:
58 | - input: input
59 | inputRoutes:
60 | - route: FILE
61 | - route: FILE
62 | outputRoutes:
63 | - route: FILE
64 | imageName: informaticsmatters/rdkit_pipelines
65 | command: >-
66 |   python -m pipelines.rdkit.sucos --target-molecule ${PIN}target.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json
67 |   ${binding.variables.containsKey('targetidx') ? '--target-index ' + targetidx : ''}
68 |   ${binding.variables.containsKey('name') ? '--name-field ' + name : ''}
69 |   --meta


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/sucos-max.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing sucos_max.py reading from file and writing to file
 8 |     test_sucos_max = [
 9 | 
10 |         command: '''python -m pipelines.rdkit.sucos_max
11 |                  --target-molecules ${PIN}/sucos/hits.sdf
12 |                  -i ${PIN}/sucos/poses.sdf -if sdf
13 |                  -o ${POUT}output -of sdf''',
14 | 
15 |         stderr: [ 'Completed 305 comparisons' ],
16 | 
17 |         creates: [ 'output.sdf.gz' ]
18 | 
19 |     ]
20 | ]
21 | 


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/sucos.dsd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | "@class": org.squonk.core.DockerServiceDescriptor
 3 | serviceConfig:
 4 |   id: pipelines.rdkit.sucos.basic
 5 |   name: RDKitSuCOS
 6 |   description: Generate 3D overlay using SuCOS in RDKit
 7 |   tags:
 8 |   - rdkit
 9 |   - alignment
10 |   - sucos
11 |   - 3d
12 |   - docker
13 |   resourceUrl:
14 |   icon: icons/filter_molecules.png
15 |   inputDescriptors:
16 |   - primaryType: org.squonk.dataset.Dataset
17 |     secondaryType: org.squonk.types.MoleculeObject
18 |     mediaType: application/x-squonk-dataset-molecule+json
19 |     name: input
20 |   - primaryType: org.squonk.dataset.Dataset
21 |     secondaryType: org.squonk.types.MoleculeObject
22 |     mediaType: application/x-squonk-dataset-molecule+json
23 |     name: target
24 |   outputDescriptors:
25 |   - primaryType: org.squonk.dataset.Dataset
26 |     secondaryType: org.squonk.types.MoleculeObject
27 |     mediaType: application/x-squonk-dataset-molecule+json
28 |     name: output
29 |   optionDescriptors:
30 |   - modes:
31 |     - User
32 |     editable: true
33 |     visible: true
34 |     description: Target molecule index (default is the first)
35 |     label: Target mol index
36 |     key: arg.targetidx
37 |     minValues: 0
38 |     maxValues: 1
39 |     typeDescriptor:
40 |       type: java.lang.Integer
41 |       "@class": org.squonk.options.SimpleTypeDescriptor
42 |     "@class": org.squonk.options.OptionDescriptor
43 |   executorClassName: org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep
44 | thinDescriptors:
45 | - input: input
46 | inputRoutes:
47 | - route: FILE
48 | - route: FILE
49 | outputRoutes:
50 | - route: FILE
51 | imageName: informaticsmatters/rdkit_pipelines
52 | command: >-
53 |   python -m pipelines.rdkit.sucos --target-molecule ${PIN}target.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json
54 |   ${binding.variables.containsKey('targetidx') ? '--target-index ' + targetidx : ''}
55 |   --meta


--------------------------------------------------------------------------------
/src/python/pipelines/rdkit/sucos.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing sucos.py reading from file and writing to file
 8 |     test_raw_sucos_sdf_to_mol = [
 9 | 
10 |         command: '''python -m pipelines.rdkit.sucos
11 |                  --target ${PIN}/sucos/4e3g_lig.mol
12 |                  -i ${PIN}/sucos/benzene.sdf -if sdf
13 |                  -o ${POUT}output -of sdf''',
14 | 
15 |         stderr: [ 'Scores: 0.8' ],
16 | 
17 |         creates: [ 'output.sdf.gz' ],
18 | 
19 |     ],
20 | 
21 |     test_raw_sucos_sdf_pick_target_1 = [
22 | 
23 |         command: '''python -m pipelines.rdkit.sucos
24 |             --target ${PIN}/sucos/mols.sdf
25 |             --targetidx 1
26 |             -i ${PIN}/sucos/mols.sdf -if sdf
27 |             -o ${POUT}output -of sdf''',
28 | 
29 |         stderr: [ 'Scores: 0.8' ],
30 | 
31 |         creates: [ 'output.sdf.gz' ],
32 | 
33 |     ],
34 | 
35 |     test_raw_sucos_sdf_pick_target_6 = [
36 | 
37 |             command: '''python -m pipelines.rdkit.sucos
38 |                      --target ${PIN}/sucos/mols.sdf
39 |                      --targetidx 6
40 |                      -i ${PIN}/sucos/mols.sdf -if sdf
41 |                      -o ${POUT}output -of sdf''',
42 | 
43 |             stderr: [ 'Scores: 1.0 1.0 1.0' ],
44 | 
45 |             creates: [ 'output.sdf.gz' ],
46 | 
47 |     ]
48 | 
49 | ]
50 | 


--------------------------------------------------------------------------------
/src/python/pipelines/xchem/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InformaticsMatters/pipelines/b0830631bc77745ee5c71df2ea2c624124594802/src/python/pipelines/xchem/__init__.py


--------------------------------------------------------------------------------
/src/python/pipelines/xchem/build_oddt_models.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2020 Informatics Matters Ltd.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | """
18 | Run this to generate the RFScore and NNScore models.
19 | The files RFScore_v1_pdbbind2016.pickle, RFScore_v2_pdbbind2016.pickle, RFScore_v3_pdbbind2016.pickle and
20 | NNScore_pdbbind2016.pickle are generated.
21 | If you want them to be re-generated they must first be deleted.
22 | """
23 | 
24 | 
25 | from oddt.virtualscreening import virtualscreening as vs
26 | 
27 | ligands = '../../data/mpro/hits-17.sdf.gz'
28 | protein = '../../data/mpro/Mpro-x0387_0.pdb'
29 | pipeline=vs()
30 | print('Loading')
31 | pipeline.load_ligands('sdf', ligands)
32 | print('Scoring with rfscore')
33 | pipeline.score(function='rfscore_v1', protein=protein)
34 | pipeline.score(function='rfscore_v2', protein=protein)
35 | pipeline.score(function='rfscore_v3', protein=protein)
36 | print('Scoring with nnscore')
37 | pipeline.score(function='nnscore', protein=protein)
38 | # print('Scoring with plecscore')
39 | # pipeline.score(function='pleclinear', protein=protein)
40 | # pipeline.score(function='plecnn', protein=protein)
41 | # pipeline.score(function='plecrf', protein=protein)
42 | print('Writing')
43 | pipeline.write('sdf', 'scored.sdf')
44 | print('Done')


--------------------------------------------------------------------------------
/src/python/pipelines/xchem/featurestein_generate.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing featurestein_generate.py
 8 |     // reading from STDIN and writing to STDOUT
 9 |     test_featurestein_generate = [
10 | 
11 |         command: '''python -m pipelines.xchem.featurestein_generate
12 |                  -i ${PIN}/mpro/hits-17.sdf.gz
13 |                  -f ${POUT}featurestein.p''',
14 | 
15 |         stderr: [ 'Wrote merged feature map with 69 features as pickle to' ],
16 | 
17 |         creates: [ 'featurestein.p' ]
18 |     ]
19 | 
20 | ]


--------------------------------------------------------------------------------
/src/python/pipelines/xchem/featurestein_generate_and_score.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing featurestein_generate_and_score.py
 8 |     // reading from STDIN and writing to STDOUT
 9 |     test_featurestein_generate_and_score = [
10 | 
11 |         command: '''python -m pipelines.xchem.featurestein_generate_and_score
12 |                  -f ${PIN}/mpro/hits-17.sdf.gz
13 |                  -i ${PIN}/mpro/poses.sdf.gz
14 |                  -o ${POUT}FSOUT''',
15 | 
16 |         stderr: [ 'Scored 14 molecules. 0 errors.' ],
17 | 
18 |         creates: [ 'FSOUT.sdf.gz' ]
19 |     ]
20 | 
21 | ]


--------------------------------------------------------------------------------
/src/python/pipelines/xchem/featurestein_score.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing featurestein_score.py
 8 |     // reading from STDIN and writing to STDOUT
 9 |     test_featurestein_score = [
10 | 
11 |         command: '''python -m pipelines.xchem.featurestein_score
12 |                  -i ${PIN}/mpro/poses.sdf.gz
13 |                  -f ${PIN}/mpro/featurestein.p
14 |                  -o ${POUT}fstein''',
15 | 
16 |         stderr: [ 'FeatureMap has 69 features' ],
17 | 
18 |         creates: [ 'fstein.sdf.gz' ]
19 |     ]
20 | 
21 | ]


--------------------------------------------------------------------------------
/src/python/pipelines/xchem/rdkit_align.py:
--------------------------------------------------------------------------------
 1 | from rdkit import Chem
 2 | from rdkit.Chem import AllChem
 3 | 
 4 | def find_calphas(protein):
 5 |     calphas = {}
 6 |     for atom in protein.GetAtoms():
 7 |         resinfo = atom.GetPDBResidueInfo()
 8 |         moninfo = atom.GetMonomerInfo()
 9 |         resnum = resinfo.GetResidueNumber()
10 |         atomname = moninfo.GetName().strip()
11 |         if 'CA' == atomname:
12 |             calphas[resnum] = atom.GetIdx()
13 |     return calphas
14 | 
15 | def align_calphas(probe, reference):
16 | 
17 |     ref_calphas = find_calphas(reference)
18 |     print('Found', len(ref_calphas), 'CAs')
19 |     prb_calphas = find_calphas(probe)
20 |     print('Found', len(prb_calphas), 'CAs')
21 |     atom_map = []
22 |     for resnum, idx in prb_calphas.items():
23 |         if resnum in ref_calphas:
24 |             atom_map.append((idx, ref_calphas[resnum]))
25 |         else:
26 |             print('WARNING: residue', resnum, 'not found in reference')
27 | 
28 |     print('Mapped', len(atom_map), 'atoms')
29 |     rmsd = AllChem.AlignMol(probe, reference, atomMap=atom_map)
30 | 
31 |     print('RMSD:', rmsd)
32 | 
33 | def extract_ligand(protein, resname):
34 |     mol = Chem.RWMol(protein)
35 |     atoms_to_delete = []
36 |     for atom in mol.GetAtoms():
37 |         resinfo = atom.GetPDBResidueInfo()
38 |         if resinfo.GetResidueName().strip() != resname:
39 |             atoms_to_delete.append(atom.GetIdx())
40 |     print('Deleting', len(atoms_to_delete), 'atoms')
41 |     for idx in reversed(atoms_to_delete):
42 |         mol.RemoveAtom(idx)
43 |     return mol
44 | 
45 | def main():
46 |     reference = Chem.MolFromPDBFile('hits23_complex_init_0.pdb')
47 |     probe = Chem.MolFromPDBFile('hits23_complex_mini_0.pdb')
48 |     align_calphas(probe, reference)
49 |     Chem.MolToPDBFile(probe, 'hits23_complex_algn_0.pdb')
50 | 
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/src/python/pipelines/xchem/split_fragnet_candidates.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright 2020 Informatics Matters Ltd.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import argparse, os, sys, json, traceback
 18 | from pipelines_utils import utils
 19 | from pipelines_utils import utils
 20 | 
 21 | def gen_filename(id, generate_filenames):
 22 |     if generate_filenames:
 23 |         return str(count)
 24 |     else:
 25 |         return id
 26 | 
 27 | def execute(candidates_json, generate_filenames):
 28 | 
 29 |     with open(candidates_json, 'r') as f:
 30 |         candidates = json.load(f)
 31 |         queries = candidates['queries']['molecules']
 32 |         results = candidates['results']
 33 |         hitCounts = candidates['hitCounts']
 34 |         utils.log('Processing', len(queries), 'queries and', len(results), 'results')
 35 | 
 36 |         num_mols = 0
 37 |         num_hits = 0
 38 | 
 39 |         count = 0
 40 |         ids2Filenames = {}
 41 |         for query in queries:
 42 |             id = query['id']
 43 |             if id in hitCounts:
 44 |                 molfile = query['originalMol']
 45 |                 if generate_filenames:
 46 |                     fname = str(count).zfil(3)
 47 |                 else:
 48 |                     fname = id
 49 |                 utils.log('Using file name of', fname)
 50 | 
 51 |                 with open(fname + '.mol', 'w') as f:
 52 |                     f.write(molfile)
 53 |                     num_hits += 1
 54 |                 ids2Filenames[id] = fname
 55 |                 count += 1
 56 | 
 57 |         writers = {}
 58 | 
 59 |         for result in results:
 60 | 
 61 |             num_mols += 1
 62 | 
 63 |             for id in result['sourceMols']:
 64 | 
 65 |                 if id in writers:
 66 |                     writer =  writers[id]
 67 |                 else:
 68 |                     fname = ids2Filenames[id]
 69 |                     writer = open(fname + '.smi', 'w')
 70 |                     writers[id] = writer
 71 | 
 72 |                 smiles = result['smiles']
 73 |                 #utils.log('Processing', smiles)
 74 | 
 75 |                 writer.write(smiles + '\n')
 76 | 
 77 |         for w in writers.values():
 78 |             w.close()
 79 | 
 80 |         utils.log('Totals - hits:', num_hits, 'outputs:', num_mols)
 81 | 
 82 | def main():
 83 |     """
 84 |     Example usage:
 85 |     python -m pipelines.xchem.split-fragnet-candidates -i ../../data/mpro/expanded-17.json
 86 | 
 87 |     :return:
 88 |     """
 89 | 
 90 |     parser = argparse.ArgumentParser(description='Split fragnet candidates - Split fragment network expansion into individual sets')
 91 | 
 92 |     parser.add_argument('-i', '--input', help='JSON containing the expanded candidates)')
 93 |     parser.add_argument('-g', '--generate-filenames', action='store_true', help='Use automatically generated file names instead of the title field)')
 94 | 
 95 |     args = parser.parse_args()
 96 |     utils.log("Split fragnet candidates args: ", args)
 97 | 
 98 |     infile = args.input
 99 | 
100 |     execute(infile, args.generate_filenames)
101 | 
102 | if __name__ == "__main__":
103 |     main()
104 | 


--------------------------------------------------------------------------------
/src/python/pipelines/xchem/xcos.test:
--------------------------------------------------------------------------------
 1 | // Automated pipeline test specification.
 2 | 
 3 | [
 4 | 
 5 |     version = 1,
 6 | 
 7 |     // Testing xcos.py
 8 |     // reading from STDIN and writing to STDOUT
 9 |     test_xcos = [
10 | 
11 |         command: '''python -m pipelines.xchem.xcos
12 |                  -i ${PIN}/mpro/poses.sdf.gz
13 |                  -f ${PIN}/mpro/hits-17.sdf.gz
14 |                  -o ${POUT}xcos''',
15 | 
16 |         stderr: [ 'No output format specified - using sdf' ],
17 | 
18 |         creates: [ 'xcos.sdf.gz' ]
19 |     ]
20 | 
21 | ]


--------------------------------------------------------------------------------
/src/python/simple_inters.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import oddt
 3 | from oddt import interactions
 4 | 
 5 | 
 6 | if len(sys.argv) != 3:
 7 |     print("Usage: simple_inters.py protein.pdb ligand.mol")
 8 |     exit(1)
 9 | 
10 | def get_canonical_hbond(atom):
11 |     # print('classifying', atom['atomtype'], atom['isbackbone'], atom['isacceptor'], atom['isdonor'], atom['isdonorh'])
12 |     res = atom['resname'] + str(atom['resnum'])
13 |     if atom['isbackbone']:
14 |         if atom['atomtype'] == 'N.am' or atom['atomtype'] == 'N.3':
15 |             return res + 'BN'
16 |         elif atom['atomtype'] == 'O.2':
17 |             return res + 'BO'
18 |         else:
19 |             print('Unexpected H-bond atom', res, atom['atomtype'])
20 |     else:
21 |         return res + 'SC'
22 | 
23 | protein_pdbfile = sys.argv[1]
24 | ligand_molfile = sys.argv[2]
25 | 
26 | exact_ligand = True
27 | 
28 | ligand = next(oddt.toolkit.readfile('sdf', ligand_molfile))
29 | protein = next(oddt.toolkit.readfile('pdb', protein_pdbfile))
30 | protein.protein = True
31 | 
32 | print('Protein:', protein_pdbfile)
33 | print('Ligand:' + ligand_molfile)
34 | print('Num protein/ligand atoms:', len(protein.atoms), len(ligand.atoms))
35 | print('Exact ligand =', exact_ligand)
36 | 
37 | protein_atoms, ligand_atoms, strict = interactions.hbonds(protein, ligand, mol1_exact=False, mol2_exact=exact_ligand)
38 | count = 0
39 | for p, l, s in zip(protein_atoms, ligand_atoms, strict):
40 |     count += 1
41 |     print('  H-bond', get_canonical_hbond(p), '-', l['atomtype'], l['id'].item(), s)
42 | print('Found', count, 'H-bond interactions')
43 | 
44 | protein_atoms, ligand_atoms = interactions.salt_bridges(protein, ligand, mol2_exact=exact_ligand)
45 | count = 0
46 | for p, l in zip(protein_atoms, ligand_atoms):
47 |     count += 1
48 |     print('  SaltBr', p['resname'] + str(p['resnum']), '-', l['atomtype'], l['id'].item())
49 | print('Found', count, 'SaltBr interactions')
50 | 
51 | protein_atoms, ligand_atoms = oddt.interactions.hydrophobic_contacts(protein, ligand)
52 | count = 0
53 | for p, l in zip(protein_atoms, ligand_atoms):
54 |     count += 1
55 |     print('  Hphobe', p['resname'] + str(p['resnum']), '-', l['atomtype'], l['id'].item())
56 | print('Found', count, 'Hphobe interactions')
57 | 
58 | protein_atoms, ligand_atoms, strict_parallel, strict_perpendicular = oddt.interactions.pi_stacking(protein, ligand)
59 | count = 0
60 | for p, l, s1, s2 in zip(protein_atoms, ligand_atoms, strict_parallel, strict_perpendicular):
61 |     count += 1
62 |     print('  PiStack', p['resname'] + str(p['resnum']), '-', s1, s2)
63 | print('Found', count, 'pistack interactions')
64 | 
65 | count = 0
66 | rings, cation, strict = oddt.interactions.pi_cation(protein, ligand, cation_exact=exact_ligand)
67 | for ring, cat, s in zip(rings, cation, strict):
68 |     count += 1
69 |     print('  PiCation', ring['resname'] + str(ring['resnum']), 'protein-ligand -', s)
70 | rings, cation, strict = oddt.interactions.pi_cation(ligand, protein, cation_exact=False)
71 | for ring, cat, s in zip(rings, cation, strict):
72 |     count += 1
73 |     print('  PiCation', cat['resname'] + str(cat['resnum']), 'ligand-protein -', s)
74 | print('Found', count, 'pication interactions')
75 | 
76 | protein_atoms, ligand_atoms, strict = oddt.interactions.halogenbonds(protein, ligand)
77 | count = 0
78 | for p, l, s in zip(protein_atoms, ligand_atoms, strict):
79 |     count += 1
80 |     print('  Halogen', p['resname'] + str(p['resnum']), '-', l['atomtype'], l['id'].item(), s)
81 | print('Found', count, 'halogen interactions')
82 | 


--------------------------------------------------------------------------------
/test-nextflow.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Simple manual tests for Nextflow workflows
 3 | # Before running make sure you have the latest images by running `./gradlew buildDockerImages`
 4 | 
 5 | set -e
 6 | 
 7 | echo 'Running screen+conformers in basic mode'
 8 | nextflow run src/nextflow/rdkit/screen+conformers.nf -c  src/nextflow/rdkit/screen.config -with-docker
 9 | 
10 | 
11 | echo 'Running SMoG2016 in basic mode'
12 | nextflow run src/nextflow/docking/smog.nf -c src/nextflow/docking/smog.config -with-docker --ligands data/smog/confs.sdf --protein data/smog/DCP2_1.pdb informaticsmatters/smog:latest
13 | 
14 | echo 'Running PLI in basic mode'
15 | nextflow run src/nextflow/docking/plip.nf -c src/nextflow/docking/plip.config -with-docker --ligands data/smog/confs.sdf --protein data/smog/DCP2_1.pdb informaticsmatters/pli:latest
16 | 
17 | echo 'Running rDock in basic mode'
18 | nextflow run src/nextflow/docking/rdock.nf -c src/nextflow/docking/rdock.config -with-docker\
19 |   --ligands data/hivpr_ligprep_100.sdf.gz\
20 |   --protein data/hivpr_rdock.mol2\
21 |   --asfile data/hivpr_rdock.as\
22 |   --prmfile data/hivpr_rdock.prm\
23 |   --num_dockings 2
24 | 
25 | echo 'Running SMoG2016 in squonk mode'
26 | sudo rm -rf tmp/*
27 | cd tmp
28 | ln ../src/nextflow/docking/smog.nsd.nf nextflow.nf
29 | ln ../src/nextflow/docking/smog.nsd.config nextflow.config
30 | gzip -c ../data/smog/DCP2_1.pdb > protein.pdb.gz
31 | ln ../data/smog/confs.data.gz ligands.data.gz
32 | ln ../data/smog/confs.metadata ligands.metadata
33 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2 sh -c 'nextflow run nextflow.nf -c nextflow.config --score 100.0 -with-docker'
34 | cd ..
35 | 
36 | echo 'Running PLI in squonk mode'
37 | sudo rm -rf tmp/*
38 | cd tmp
39 | ln ../src/nextflow/docking/plip.nsd.nf nextflow.nf
40 | ln ../src/nextflow/docking/plip.nsd.config nextflow.config
41 | gzip -c ../data/smog/DCP2_1.pdb > protein.pdb.gz
42 | ln ../data/smog/confs.data.gz ligands.data.gz
43 | ln ../data/smog/confs.metadata ligands.metadata
44 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2 sh -c 'nextflow run nextflow.nf -c nextflow.config --score 100.0 -with-docker'
45 | cd ..
46 | 
47 | echo 'Running rDock in squonk mode'
48 | sudo rm -rf tmp/*
49 | cd tmp
50 | ln ../src/nextflow/docking/rdock.nsd.nf nextflow.nf
51 | ln ../src/nextflow/docking/rdock.nsd.config nextflow.config
52 | ln ../data/hivpr.config.zip config.zip
53 | ln ../data/dhfr_3d.data.gz ligands.data.gz
54 | ln ../data/dhfr_3d.metadata ligands.metadata
55 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2 sh -c 'nextflow run nextflow.nf -c nextflow.config --num_dockings 1 --limit 40 --chunk 5 -with-docker'
56 | cd ..
57 | 
58 | echo 'Running screen in squonk mode'
59 | sudo rm -rf tmp/*
60 | cd tmp
61 | ln ../src/nextflow/rdkit/screen-dataset.nsd.nf nextflow.nf
62 | ln ../src/nextflow/rdkit/screen-dataset.nsd.config nextflow.config
63 | ln ../data/dhfr_3d.data.gz input.data.gz
64 | ln ../data/dhfr_3d.metadata ligands.metadata
65 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2\
66 |   sh -c 'nextflow run nextflow.nf -c nextflow.config -with-docker --chunk 100 --simmin 0.5 --qsmiles "OC(=O)C1=CC=C(NC2=NC3=C(CN=C(C4=CC(Cl)=CC=C34)C3=C(F)C=CC=C3F)C=N2)C=C1"'
67 | cd ..
68 | 
69 | echo 'Running screen-multi in squonk mode'
70 | sudo rm -rf tmp/*
71 | cd tmp
72 | ln ../src/nextflow/rdkit/screen-multi-dataset.nsd.nf nextflow.nf
73 | ln ../src/nextflow/rdkit/screen-multi-dataset.nsd.config nextflow.config
74 | ln ../data/dhfr_3d.data.gz target.data.gz
75 | ln ../data/nci100.data.gz query.data.gz
76 | docker run -it --rm -v $PWD:$PWD:z -w $PWD -v /var/run/docker.sock:/var/run/docker.sock informaticsmatters/nextflow-docker:0.30.2\
77 |   sh -c 'nextflow run nextflow.nf -c nextflow.config -with-docker --chunk 100 --simmin 0.55'
78 | cd ..
79 | 
80 | sudo rm -rf tmp/*


--------------------------------------------------------------------------------