├── model
└── .gitkeep
├── Img2Mol.png
├── benchmark_data
├── STAKER_map.pkl
├── Img2Mol_map.pkl
└── README.md
├── examples
├── digital_example1.png
├── digital_example2.png
├── handwritten_example1.png
└── handwritten_example2.jpg
├── environment.yml
├── environment.local-cddd.yml
├── download_model.sh
├── img2mol
├── README.md
├── cddd_server.py
├── model.py
└── inference.py
├── setup.py
├── .gitignore
├── README.md
├── LICENSE
└── example_inference.ipynb
/model/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Img2Mol.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayer-science-for-a-better-life/Img2Mol/HEAD/Img2Mol.png
--------------------------------------------------------------------------------
/benchmark_data/STAKER_map.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayer-science-for-a-better-life/Img2Mol/HEAD/benchmark_data/STAKER_map.pkl
--------------------------------------------------------------------------------
/examples/digital_example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayer-science-for-a-better-life/Img2Mol/HEAD/examples/digital_example1.png
--------------------------------------------------------------------------------
/examples/digital_example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayer-science-for-a-better-life/Img2Mol/HEAD/examples/digital_example2.png
--------------------------------------------------------------------------------
/benchmark_data/Img2Mol_map.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayer-science-for-a-better-life/Img2Mol/HEAD/benchmark_data/Img2Mol_map.pkl
--------------------------------------------------------------------------------
/examples/handwritten_example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayer-science-for-a-better-life/Img2Mol/HEAD/examples/handwritten_example1.png
--------------------------------------------------------------------------------
/examples/handwritten_example2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayer-science-for-a-better-life/Img2Mol/HEAD/examples/handwritten_example2.jpg
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: img2mol
2 |
3 | channels:
4 | - rdkit
5 | - pytorch
6 | - anaconda
7 | - conda-forge
8 | - defaults
9 | dependencies:
10 | - python=3.8.5
11 | - pip=20.2.4
12 | - notebook=6.4.2
13 | - pillow=8.0.1
14 | - numpy=1.19.2
15 | - rdkit=2020.03.1
16 | - cudatoolkit=11.0
17 | - torchvision=0.8.0
18 | - torchaudio=0.7.0
19 | - pytorch=1.7.0
20 | - pytorch-lightning=1.0.8
--------------------------------------------------------------------------------
/environment.local-cddd.yml:
--------------------------------------------------------------------------------
1 | name: img2mol
2 |
3 | channels:
4 | - rdkit
5 | - pytorch
6 | - anaconda
7 | - conda-forge
8 | - defaults
9 | dependencies:
10 | - python=3.6
11 | - pip=20.2.4
12 | - pandas<=1.0.3
13 | - notebook=6.4.2
14 | - pillow=8.0.1
15 | - scikit-learn
16 | - rdkit=2020.03.1
17 | - cudatoolkit=11.0
18 | - torchvision=0.8.0
19 | - torchaudio=0.7.0
20 | - pytorch=1.7.0
21 | - pytorch-lightning=1.0.8
22 | - pip:
23 | - https://github.com/jrwnter/cddd/archive/refs/tags/1.0.tar.gz
24 | - tensorflow==1.10.1
25 | - tensorboard==1.15
26 | - numpy==1.19.2
27 | - .
28 |
--------------------------------------------------------------------------------
/download_model.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | gURL=https://drive.google.com/file/d/1pk21r4Zzb9ZJkszJwP9SObTlfTaRMMtF/view?usp=sharing
4 | # match more than 26 word characters
5 | ggID=$(echo "$gURL" | egrep -o '(\w|-){26,}')
6 | # alternative, just hardcode the id
7 | ggID='1pk21r4Zzb9ZJkszJwP9SObTlfTaRMMtF'
8 | ggURL='https://drive.google.com/uc?export=download'
9 |
10 | curl -sc /tmp/gcokie "${ggURL}&id=${ggID}" >/dev/null
11 | getcode="$(awk '/_warning_/ {print $NF}' /tmp/gcokie)"
12 |
13 | FILE=/model/model.ckpt
14 | if test -f "$FILE"; then
15 | echo "$FILE exists."
16 | else
17 | echo "$FILE does not exist."
18 | echo -e "Downloading from "$gURL"...\n"
19 | cmd='curl --insecure -C - -LOJb /tmp/gcokie "${ggURL}&confirm=${getcode}&id=${ggID}"'
20 | eval $cmd
21 | mv 'model.ckpt' 'model/'
22 | fi
--------------------------------------------------------------------------------
/img2mol/README.md:
--------------------------------------------------------------------------------
1 | # `img2mol` module structure
2 | This directory consists of the necessary python scripts to perform inference tasks with the `img2mol` model.
3 | The list below summarizes each module:
4 |
5 |
6 | * `cddd_server.py`
7 | * class for utilizing th CDDD encoder-decoder described by [Winter et al. (2019)](https://pubs.rsc.org/en/content/articlelanding/2019/sc/c8sc04175j#!divAbstract)
8 | * note that the implemented model class is licensed under the CC BY-NC 4.0 license and only applicable in non-commercial setting
9 | * `model.py`
10 | * Model implementation of the `img2mol` as described in our paper. We use Pytorch Lightning for model training, but essentially, only using PyTorch is also possible
11 | * `inference.py`
12 | * inference class that can be used for predicting the SMILES representation based on an image representation. By default, the model weights are randomly initialized and when instantatiating the inference class, a model checkpoint can be used for loading trained weights.
13 | * The provided model weights are licensed under CC BY-NC 4.0 and only applicable for non-commercial usage
14 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Machine Learning Research @ Bayer AG
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Install script for setuptools."""
16 |
17 | from setuptools import setup
18 |
19 |
20 | setup(
21 | name='img2mol',
22 | version='0.1',
23 | packages=['img2mol'],
24 | url='https://github.com/bayer-science-for-a-better-life/Img2Mol',
25 | license='Apache License, Version 2.0',
26 | author='Djork-Arné Clevert, Tuan Le, Robin Winter and Floriane Montanari',
27 | author_email='djork-arne.clevert@bayer.com',
28 | description='Inferring molecules from images'
29 | )
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | model/*
2 | !model/.gitkeep
3 |
4 | # Editors
5 | .vscode/
6 | .idea/
7 |
8 | # Vagrant
9 | .vagrant/
10 |
11 | # Mac/OSX
12 | .DS_Store
13 |
14 | # Windows
15 | Thumbs.db
16 |
17 | # Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
18 | # Byte-compiled / optimized / DLL files
19 | __pycache__/
20 | *.py[cod]
21 | *$py.class
22 |
23 | # C extensions
24 | *.so
25 |
26 | # Distribution / packaging
27 | .Python
28 | build/
29 | develop-eggs/
30 | dist/
31 | downloads/
32 | eggs/
33 | .eggs/
34 | lib/
35 | lib64/
36 | parts/
37 | sdist/
38 | var/
39 | wheels/
40 | *.egg-info/
41 | .installed.cfg
42 | *.egg
43 | MANIFEST
44 |
45 | # PyInstaller
46 | # Usually these files are written by a python script from a template
47 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
48 | *.manifest
49 | *.spec
50 |
51 | # Installer logs
52 | pip-log.txt
53 | pip-delete-this-directory.txt
54 |
55 | # Unit test / coverage reports
56 | htmlcov/
57 | .tox/
58 | .nox/
59 | .coverage
60 | .coverage.*
61 | .cache
62 | nosetests.xml
63 | coverage.xml
64 | *.cover
65 | .hypothesis/
66 | .pytest_cache/
67 |
68 | # Translations
69 | *.mo
70 | *.pot
71 |
72 | # Django stuff:
73 | *.log
74 | local_settings.py
75 | db.sqlite3
76 |
77 | # Flask stuff:
78 | instance/
79 | .webassets-cache
80 |
81 | # Scrapy stuff:
82 | .scrapy
83 |
84 | # Sphinx documentation
85 | docs/_build/
86 |
87 | # PyBuilder
88 | target/
89 |
90 | # Jupyter Notebook
91 | .ipynb_checkpoints
92 |
93 | # IPython
94 | profile_default/
95 | ipython_config.py
96 |
97 | # pyenv
98 | .python-version
99 |
100 | # celery beat schedule file
101 | celerybeat-schedule
102 |
103 | # SageMath parsed files
104 | *.sage.py
105 |
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 |
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 |
119 | # Rope project settings
120 | .ropeproject
121 |
122 | # mkdocs documentation
123 | /site
124 |
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 |
--------------------------------------------------------------------------------
/img2mol/cddd_server.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Machine Learning Research @ Bayer AG
2 | #
3 | # Licensed for non-commercial use only, under the terms of the
4 | # Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license.
5 | # You can find details at: https://creativecommons.org/licenses/by-nc/4.0/legalcode
6 |
7 |
8 | import json
9 | import requests
10 | requests.packages.urllib3.disable_warnings()
11 |
12 | """
13 | CDDD Server to encode SMILES string to molecular embeddings and decode the molecular embeddings to SMILES string.
14 |
15 | For further details, please refer to:
16 | [1] R. Winter, F. Montanari, F. Noe and D. Clevert, Chem. Sci, 2019,
17 | https://pubs.rsc.org/en/content/articlelanding/2019/sc/c8sc04175j#!divAbstract
18 |
19 | and: https://github.com/jrwnter/cddd
20 | """
21 |
22 | # Note that the DEFAULT_HOST is accessing the AWS instance deployed by Machine Learning Research Group of Bayer.
23 | DEFAULT_HOST = "http://ec2-18-157-240-87.eu-central-1.compute.amazonaws.com"
24 |
25 | """
26 | The CDDD server is applicable for non-commercial use only, under the terms of the
27 | Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license.
28 | You can find details at: https://creativecommons.org/licenses/by-nc/4.0/legalcode
29 | """
30 |
31 |
32 | class CDDDRequest:
33 | def __init__(self, host=DEFAULT_HOST, port=8892):
34 | self.host = host
35 | self.port = port
36 | self.headers = {'content-type': 'application/json'}
37 |
38 | def smiles_to_cddd(self, smiles):
39 | url = "{}:{}/smiles_to_cddd/".format(self.host, self.port)
40 | req = json.dumps({"smiles": smiles})
41 | response = requests.post(url, data=req, headers=self.headers, verify=False)
42 | return json.loads(response.content.decode("utf-8"))
43 |
44 | def seq_to_emb(self, smiles):
45 | return self.smiles_to_cddd(smiles)
46 |
47 | def cddd_to_smiles(self, embedding):
48 | url = "{}:{}/cddd_to_smiles/".format(self.host, self.port)
49 | req = json.dumps({"cddd": embedding})
50 | response = requests.post(url, data=req, headers=self.headers, verify=False)
51 | return json.loads(response.content.decode("utf-8"))
52 |
53 | def emb_to_seq(self, embedding):
54 | return self.cddd_to_smiles(embedding)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Img2Mol: inferring molecules from pictures
2 | ==========================================
3 | 
4 | Welcome to Img2Mol! :wave:.
5 |
6 | :point_right: For the Img2Mol web app switch to the "deployment-example" branch.
7 |
8 | ## Overview
9 | Here we provide the implementation of the `img2mol` model using [PyTorch](https://github.com/pytorch/pytorch) and [PyTorch Lightning](https://github.com/PyTorchLightning/pytorch-lightning) for training and inference, along with an exemplary jupyter notebook.
10 |
11 | This repository is organized as follows:
12 | * `examples/`: contains example images to apply our proposed model on
13 | * `img2mol/`: contains necessary python modules for our proposed model
14 | * `model/`: stores the trained model weights as pickled files. The download-link will be provided in future soon
15 |
16 | ## Installation
17 | #### Requirements
18 | ```
19 | python=3.8.5
20 | pip=20.2.4
21 | notebook=6.4.2
22 | pillow=8.0.1
23 | numpy=1.19.2
24 | rdkit=2020.03.1
25 | cudatoolkit=11.0
26 | torchvision=0.8.0
27 | torchaudio=0.7.0
28 | pytorch=1.7.0
29 | pytorch-lightning=1.0.8
30 | ```
31 |
32 | #### Environment
33 | Create a new environment:
34 | ```bash
35 | git clone git@github.com:bayer-science-for-a-better-life/Img2Mol.git
36 | cd Img2Mol
37 | conda env create -f environment.yml
38 | conda activate img2mol
39 | pip install .
40 | ```
41 | *If you want to run Img2Mol as a standalone version with a locally loaded CDDD model instead of sending requests to our CDDD server, install the environment from `environment.local-cddd.yml` instead of `environment.yml`*
42 | ## Download Model Weights
43 | You can download the trained parameters for the default model (~2.4GB) as described in our paper using the following link:
44 | https://drive.google.com/file/d/1pk21r4Zzb9ZJkszJwP9SObTlfTaRMMtF/view .
45 | Please move the downloaded file `model.ckpt` into the `model/` directory.
46 |
47 | If you are working with the local CDDD installation, please * [download and unzip the CDDD model](https://drive.google.com/u/0/uc?id=1oyknOulq_j0w9kzOKKIHdTLo5HphT99h&export=download) and ove the directory *default_model* to `path/to/anaconda3/envs/img2mol/lib/python3.6/site-packages/cddd/data/`
48 |
49 | Alternatively, we provide a bash script that will download and move the file automatically.
50 | ```bash
51 | bash download_model.sh
52 | ```
53 | If you have problems downloading the file using the bash script, please manually download the file using the browser.
54 |
55 | ## Examples
56 | Check the example notebook `example_inference.ipynb` to see how the inference class can be used. A demonstration of the usage with the usage with the local CDDD model is demonstrated in `example_inference_local_cddd.ipynb`.
57 |
58 | ## Reference
59 | Please cite our manuscript if you use our model in your work.
60 |
61 | D.-A. Clevert, T. Le, R. Winter, F. Montanari, Chem. Sci., 2021, [DOI: 10.1039/D1SC01839F](https://doi.org/10.1039/D1SC01839F)
62 |
63 | ## Img2Mol Code License
64 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0.
65 |
66 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
67 |
68 | ## Model Parameters License
69 | The Img2Mol parameters are made available for non-commercial use only, under the terms of the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license. You can find details at: https://creativecommons.org/licenses/by-nc/4.0/legalcode
70 |
--------------------------------------------------------------------------------
/img2mol/model.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Machine Learning Research @ Bayer AG
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import torch
16 | from torch import nn
17 | import torch.nn.functional as F
18 | import pytorch_lightning as pl
19 | from typing import Union, List, Optional
20 |
21 |
22 | MODEL_CONFIGS: List = [[128, 7, 3, 4],
23 | [256, 5, 1, 1],
24 | [384, 5, 1, 1],
25 | 'M',
26 | [384, 3, 1, 1],
27 | [384, 3, 1, 1],
28 | 'M',
29 | [512, 3, 1, 1],
30 | [512, 3, 1, 1],
31 | [512, 3, 1, 1],
32 | 'M']
33 |
34 |
35 | def make_layers(cfg: Optional[List[Union[str, int]]] = None,
36 | batch_norm: bool = False) -> nn.Sequential:
37 | """
38 | Helper function to create the convolutional layers for the Img2Mol model to be passed into a nn.Sequential module.
39 | :param cfg: list populated with either a str or a list, where the str object refers to the pooling method and the
40 | list object will be unrolled to obtain the convolutional-filter parameters.
41 | Defaults to the `MODEL_CONFIGS` list.
42 | :param batch_norm: boolean of batch normalization should be used in-between conv2d and relu activation.
43 | Defaults to False
44 | :return: torch.nn.Sequential module as feature-extractor
45 | """
46 | if cfg is None:
47 | cfg = MODEL_CONFIGS
48 |
49 | layers: List[nn.Module] = []
50 |
51 | in_channels = 1
52 | for v in cfg:
53 | if v == 'A':
54 | layers += [nn.AvgPool2d(kernel_size=2, stride=2)]
55 | else:
56 | if v == 'M':
57 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
58 | else:
59 | units, kern_size, stride, padding = v
60 | conv2d = nn.Conv2d(in_channels, units, kernel_size=kern_size, stride=stride, padding=padding)
61 | if batch_norm:
62 | layers += [conv2d, nn.BatchNorm2d(units), nn.ReLU(inplace=True)]
63 | else:
64 | layers += [conv2d, nn.ReLU(inplace=True)]
65 | in_channels = units
66 |
67 | model = nn.Sequential(*layers)
68 | return model
69 |
70 |
71 | class Img2MolPlModel(pl.LightningModule):
72 | """
73 | Wraps the Img2Mol model into pytorch lightning for easy training and inference
74 | """
75 | def __init__(self, learning_rate: float = 1e-4, batch_norm: bool = False):
76 | super().__init__()
77 | self.learning_rate = learning_rate
78 |
79 | # convolutional NN for feature extraction
80 | self.features = make_layers(cfg=MODEL_CONFIGS, batch_norm=batch_norm)
81 | # fully-connected network for classification based on CNN feature extractor
82 | self.classifier = nn.Sequential(
83 | nn.Linear(512 * 9 * 9, 4096),
84 | nn.ReLU(True),
85 | nn.Dropout(p=0.0),
86 | nn.Linear(4096, 4096),
87 | nn.ReLU(True),
88 | nn.Dropout(p=0.0),
89 | nn.Linear(4096, 512),
90 | nn.Tanh(),
91 | )
92 |
93 | self._initialize_weights()
94 |
95 | def forward(self, x: torch.Tensor) -> torch.Tensor:
96 | x = self.features(x)
97 | x = torch.flatten(x, 1)
98 | x = self.classifier(x)
99 | return x
100 |
101 | def _initialize_weights(self) -> None:
102 | for m in self.modules():
103 | if isinstance(m, nn.Conv2d):
104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 | if m.bias is not None:
106 | nn.init.constant_(m.bias, 0)
107 | elif isinstance(m, nn.BatchNorm2d):
108 | nn.init.constant_(m.weight, 1)
109 | nn.init.constant_(m.bias, 0)
110 | elif isinstance(m, nn.Linear):
111 | nn.init.normal_(m.weight, 0, 0.01)
112 | nn.init.constant_(m.bias, 0)
113 |
114 | def training_step(self, batch, batch_idx):
115 | x, cddd = batch
116 | cddd_hat = self(x)
117 | loss = F.mse_loss(cddd_hat, cddd)
118 | self.log('train_loss', loss, on_epoch=True, prog_bar=True, logger=True)
119 | return loss
120 |
121 | def validation_step(self, batch, batch_idx):
122 | x, cddd = batch
123 | cddd_hat = self(x)
124 | loss = F.mse_loss(cddd_hat, cddd)
125 | self.log('valid_loss', loss, on_epoch=True, prog_bar=True, logger=True)
126 |
127 | def test_step(self, batch, batch_idx):
128 | x, cddd = batch
129 | cddd_hat = self(x)
130 | loss = F.mse_loss(cddd_hat, cddd)
131 | self.log('test_loss', loss)
132 |
133 | def configure_optimizers(self):
134 | return torch.optim.AdamW(self.parameters(), lr=self.learning_rate)
135 |
136 |
137 | if __name__ == "__main__":
138 | pl_model = Img2MolPlModel()
139 | print(pl_model)
140 |
--------------------------------------------------------------------------------
/benchmark_data/README.md:
--------------------------------------------------------------------------------
1 | Here we provide the benchmark datasets that was used to evaluate the performance of Img2Mol and compare it with that of state-of-the-art molecular recognition methods. The following benchmark datasets (all 8-bit grayscale images) were used.
2 | For the smaller benchmark datasets (USPTO, UoB, CLEF and JPO), we applied a slight input perturbation by adding rotation (randomly drawn from [−5°, 5°]) and shearing (xy-shearing factor randomly drawn from [−0.1, 0.1]). Every input image of those benchmarks is perturbed five times randomly. This is done in order to detect potential overfitting of the baseline methods to those small, well known datasets.
3 |
4 | #### Img2Mol
5 | Test set collection of 25,000 images and molecule descriptions. Images were generated as described in subsection 3.3 of the paper. The resolution of the images is 224 × 224 px. Only half of our original test set is used due to the computational time of the baseline methods. The data set consists of typical small molecules with an average size of 25 atoms, ranging between 6 and 44 atoms. Please load the pickled dataframe object to get the mapping images<>smiles.
6 |
7 | You can download the tgz-file (~114MB) of the images here:
8 | https://drive.google.com/file/d/1FZxjcncEQ-aK4Gl5obepNxAJCFOcEc8W/view
10 |
11 | #### STAKER
12 | The validation set collection of 30,000 images and molecule descriptions provided by Staker et al. The images are based on US Patent Office (USPTO) data. The image resolution is 256 × 256 px. Molecules are composed of 24 atoms on average, ranging from 7 at the minimum to 51 at the maximum. Please load the pickled dataframe object to get the mapping images<>smiles.
13 |
14 | You can download the tgz-file (~110MB) of the images here:
15 | https://drive.google.com/file/d/1rYPMSF6C7AbHubll8BZZJF2zvd7UYzp6/view.
16 | #### USPTO
17 | A collection of 4852 images and molecule descriptions based on US Patent Office (USPTO) data, obtained from Rajan et al. The average resolution of the images is 649 × 417 px. The dataset consists of molecules with an average size of 28 atoms, ranging between 10 and 96 atoms.
18 |
19 | You can download the tgz-file (~12MB) of the images here:
20 | https://drive.google.com/file/d/15h1c50AmcJ3jCuQOdLjkVhcFkqe7slLn/view.
22 | #### UoB
23 | 5716 images and molecule descriptions of chemical structures developed by the University of Birmingham, obtained from Rajan et al. The average resolution of the images is 762 × 412 px. The molecules in this data set are quite small, consisting on average of only 13 atoms, ranging between 4 and 34 atoms.
24 |
25 | You can download the tgz-file of the images (~124MB) here:
26 | https://drive.google.com/file/d/13Ul94f6hUEpDbUKLUP_e7xEfSRZIqFuy/view.
28 | #### CLEF
29 | A collection of 711 images and molecule descriptions based on the Conference and Labs of the Evaluation Forum (CLEF) test set, obtained from Rajan et al. The average resolution of the images is 1243 × 392 px. The dataset consists of molecules with an average size of 26 atoms, ranging between 4 and 42 atoms.
30 |
31 | You can download the tgz-file (~12MB) of the images here:
32 | https://drive.google.com/file/d/1fqMg0N582ti9ij71Pbntbq6vMw8z1BJI/view.
34 | #### JPO
35 | A collection of 365 images and molecule descriptions based on Japanese Patent Office (JPO) data, obtained from Rajan et al. Note that this data set contains many textual labels, including Japanese characters, and irregular features, including line thickness variations. In addition, some images are characterised by poor quality. The average resolution of the images is 607 × 373 px. Molecules are composed of 20 atoms on average, ranging from 5 at the minimum to 43 at the maximum.
36 |
37 | You can download the tgz-file (~12MB) of the images here:
38 | https://drive.google.com/file/d/11GxOLvQn_TanDAW8u7oCvSA_FJ-SXU4F/view.
40 |
41 |
42 | #### Influence of the depiction library
43 |
44 | To investigate how the rendering library (RDKit, OEChem TK, or Indigo) used to create input images affects the performance of chemical structure recognition models, we compiled the following benchmark dataset. A subset of 5000 compounds from the Img2Mol test set depicted each five times by each of the three libraries. Please use the Img2Mol mapping information to link images to the smiles.
45 |
46 | You can download the tgz-file (~1GB) of the images herere:
47 | https://drive.google.com/file/d/1ixGj51F5NnhRfHFydpuCBYvYKexfaX3E/view.
49 |
50 |
51 | #### Influence of the image resolution
52 |
53 | To investigate how the image resolution used to create input images affects the performance of chemical structure recognition models, we compiled the following benchmark dataset. A subset of 5000 compounds from the Img2Mol test set depicted each five times with 256, 512, 1024 and 2048 px resolution. Please use the Img2Mol mapping information to link images to the smiles.
54 |
55 | You can download the tgz-file (~2.5GB) of the images here:
56 | https://drive.google.com/file/d/1uMZ2FGNON4k6vxrldkEJZPRbyrISNR6K/view.
58 |
59 |
60 |
--------------------------------------------------------------------------------
/img2mol/inference.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Machine Learning Research @ Bayer AG
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import torch
16 | from torchvision import transforms
17 |
18 | from typing import Optional
19 | import random
20 | import numpy as np
21 | from PIL import Image, ImageOps, ImageEnhance
22 |
23 | from img2mol.model import Img2MolPlModel
24 | from img2mol.cddd_server import CDDDRequest
25 |
26 | from rdkit import Chem
27 |
28 | import warnings
29 | # CDDD import only works if the suitable environment has been installed
30 | try:
31 | with warnings.catch_warnings():
32 | warnings.simplefilter("ignore", FutureWarning)
33 | from cddd.inference import InferenceModel as CDDDInferenceModel
34 | except ImportError:
35 | print("Local CDDD installation has not been found.")
36 |
37 |
38 | """
39 | Inference Class for Img2Mol Model.
40 | By default, the class instantiation will not use any model checkpoint.
41 | The Img2Mol model parameters are made available for non-commercial use only, under the terms of the
42 | Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license.
43 | You can find details at: https://creativecommons.org/licenses/by-nc/4.0/legalcode
44 | """
45 |
46 |
47 | class Img2MolInference(object):
48 | """
49 | Inference Class
50 | """
51 | def __init__(
52 | self,
53 | model_ckpt: Optional[str] = None,
54 | device: str = "cuda:0" if torch.cuda.is_available() else "cpu",
55 | local_cddd: bool = None
56 | ):
57 | super(Img2MolInference, self).__init__()
58 | if local_cddd:
59 | self.cddd_inference_model = CDDDInferenceModel()
60 | else:
61 | self.cddd_inference_model = None
62 | self.device = device
63 | print("Initializing Img2Mol Model with random weights.")
64 | self.model = Img2MolPlModel()
65 | if model_ckpt is not None:
66 | print(f"Loading checkpoint: {model_ckpt}")
67 | self.model = self.model.load_from_checkpoint(model_ckpt)
68 |
69 | print("Setting to `self.eval()`-mode.")
70 | self.model.eval()
71 | print(f"Sending model to `{self.device}` device.")
72 | self.model.to(self.device)
73 | print("Succesfully created Img2Mol Inference class.")
74 |
75 | """
76 | Class methods for image preprocessing
77 | """
78 | @classmethod
79 | def read_imagefile(cls, filepath: str) -> Image.Image:
80 | img = Image.open(filepath, "r")
81 |
82 | if img.mode == "RGBA":
83 | bg = Image.new('RGB', img.size, (255, 255, 255))
84 | # Paste image to background image
85 | bg.paste(img, (0, 0), img)
86 | return bg.convert('L')
87 | else:
88 | return img.convert('L')
89 |
90 | @classmethod
91 | def fit_image(cls, img: Image):
92 | old_size = img.size
93 | desired_size = 224
94 | ratio = float(desired_size) / max(old_size)
95 | new_size = tuple([int(x * ratio) for x in old_size])
96 | img = img.resize(new_size, Image.BICUBIC)
97 | new_img = Image.new("L", (desired_size, desired_size), "white")
98 | new_img.paste(img, ((desired_size - new_size[0]) // 2,
99 | (desired_size - new_size[1]) // 2))
100 |
101 | new_img = ImageOps.expand(new_img, int(np.random.randint(5, 25, size=1)), "white")
102 | return new_img
103 |
104 | @classmethod
105 | def transform_image(cls, image: Image):
106 | image = cls.fit_image(image)
107 | img_PIL = transforms.RandomRotation((-15, 15), resample=3, expand=True, center=None, fill=255)(image)
108 | img_PIL = transforms.ColorJitter(brightness=[0.75, 2.0], contrast=0, saturation=0, hue=0)(img_PIL)
109 | shear_value = np.random.uniform(0.1, 7.0)
110 | shear = random.choice([[0, 0, -shear_value, shear_value], [-shear_value, shear_value, 0, 0],
111 | [-shear_value, shear_value, -shear_value, shear_value]])
112 | img_PIL = transforms.RandomAffine(0, translate=None, scale=None,
113 | shear=shear, resample=3, fillcolor=255)(img_PIL)
114 | img_PIL = ImageEnhance.Contrast(ImageOps.autocontrast(img_PIL)).enhance(2.0)
115 | img_PIL = transforms.Resize((224, 224), interpolation=3)(img_PIL)
116 | img_PIL = ImageOps.autocontrast(img_PIL)
117 | img_PIL = transforms.ToTensor()(img_PIL)
118 | return img_PIL
119 |
120 | def read_image_to_tensor(self, filepath: str,
121 | repeats: int = 50):
122 | extension = filepath.split(".")[-1] in ("jpg", "jpeg", "png")
123 | if not extension:
124 | return "Image must be jpg or png format!"
125 | image = self.read_imagefile(filepath)
126 | images = torch.cat([torch.unsqueeze(self.transform_image(image), 0)
127 | for _ in range(repeats)], dim=0)
128 | images = images.to(self.device)
129 | return images
130 |
131 | def __call__(self,
132 | filepath: str,
133 | cddd_server: CDDDRequest = None,
134 | return_cddd: bool = False,
135 | ) -> dict:
136 | images = self.read_image_to_tensor(filepath, repeats=50)
137 | with torch.no_grad():
138 | cddd = self.model(images).detach().cpu().numpy()
139 |
140 | # take the median cddd prediction out of `repeats` predictions
141 | cddd = np.median(cddd, axis=0)
142 |
143 | if self.cddd_inference_model:
144 | smiles = self.cddd_inference_model.emb_to_seq(cddd)
145 | else:
146 | smiles = cddd_server.cddd_to_smiles(cddd.tolist())
147 | mol = Chem.MolFromSmiles(smiles, sanitize=True)
148 | # if the molecule is valid, i.e. can be parsed with the rdkit
149 | if mol:
150 | can_smiles = Chem.MolToSmiles(mol)
151 | can_mol = Chem.MolFromSmiles(can_smiles)
152 | else:
153 | print("Image translation failed.")
154 | can_smiles = None
155 | can_mol = None
156 |
157 | if not return_cddd:
158 | cddd = None
159 |
160 | return {"filepath": filepath,
161 | "cddd": cddd, "smiles": can_smiles, "mol": can_mol
162 | }
163 |
164 | def predict(self, filepath: str,
165 | cddd_server: CDDDRequest,
166 | return_cddd: bool = False) -> dict:
167 | return self.__call__(filepath, cddd_server, return_cddd)
168 |
169 |
170 | if __name__ == "__main__":
171 |
172 | device = "cuda:0" if torch.cuda.is_available() else "cpu"
173 | img2mol = Img2MolInference(model_ckpt=None,
174 | device=device)
175 | cddd_server = CDDDRequest(host="http://ec2-18-157-240-87.eu-central-1.compute.amazonaws.com")
176 |
177 | example = "examples/example1.png"
178 |
179 | res = img2mol(filepath=example, cddd_server=cddd_server)
180 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2021 Machine Learning Research @ Bayer AG
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/example_inference.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Copyright 2021 Machine Learning Research @ Bayer AG\n",
8 | "\n",
9 | "Licensed for non-commercial use only, under the terms of the\n",
10 | "Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license.\n",
11 | "\n",
12 | "You can find details at: https://creativecommons.org/licenses/by-nc/4.0/legalcode"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 1,
18 | "id": "b9b48452",
19 | "metadata": {},
20 | "outputs": [],
21 | "source": [
22 | "import torch\n",
23 | "from img2mol.inference import *"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 2,
29 | "id": "dca01497",
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "from IPython.display import display"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 3,
39 | "id": "30a2191a",
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "from PIL import Image"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 4,
49 | "id": "30d0b65b",
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "import os"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 5,
59 | "id": "2d9605c0",
60 | "metadata": {},
61 | "outputs": [
62 | {
63 | "data": {
64 | "text/plain": [
65 | "['.gitkeep', 'model.ckpt']"
66 | ]
67 | },
68 | "execution_count": 5,
69 | "metadata": {},
70 | "output_type": "execute_result"
71 | }
72 | ],
73 | "source": [
74 | "os.listdir(\"model/\")"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 6,
80 | "id": "99236cf4",
81 | "metadata": {},
82 | "outputs": [
83 | {
84 | "name": "stdout",
85 | "output_type": "stream",
86 | "text": [
87 | "Initializing Img2Mol Model with random weights.\n",
88 | "Loading checkpoint: model/model.ckpt\n",
89 | "Setting to `self.eval()`-mode.\n",
90 | "Sending model to `cuda:0` device.\n",
91 | "Succesfully created Img2Mol Inference class.\n"
92 | ]
93 | }
94 | ],
95 | "source": [
96 | "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
97 | "img2mol = Img2MolInference(model_ckpt=\"model/model.ckpt\",\n",
98 | " device=device)\n",
99 | "cddd_server = CDDDRequest()"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 7,
105 | "id": "24a20cfa",
106 | "metadata": {},
107 | "outputs": [
108 | {
109 | "data": {
110 | "text/plain": [
111 | "['digital_example1.png',\n",
112 | " 'digital_example2.png',\n",
113 | " 'handwritten_example1.png',\n",
114 | " 'handwritten_example2.jpg']"
115 | ]
116 | },
117 | "execution_count": 7,
118 | "metadata": {},
119 | "output_type": "execute_result"
120 | }
121 | ],
122 | "source": [
123 | "os.listdir(\"examples/\")"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 8,
129 | "id": "902c1057",
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "res = img2mol(filepath=\"examples/digital_example1.png\", cddd_server=cddd_server)"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 9,
139 | "id": "bd748334",
140 | "metadata": {},
141 | "outputs": [
142 | {
143 | "data": {
144 | "text/plain": [
145 | "{'filepath': 'examples/digital_example1.png',\n",
146 | " 'cddd': None,\n",
147 | " 'smiles': 'Cn1c(=O)c2c(nc(Sc3ccccc3)n2C)n(C)c1=O',\n",
148 | " 'mol': }"
149 | ]
150 | },
151 | "execution_count": 9,
152 | "metadata": {},
153 | "output_type": "execute_result"
154 | }
155 | ],
156 | "source": [
157 | "res"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 10,
163 | "id": "af273a49",
164 | "metadata": {
165 | "scrolled": false
166 | },
167 | "outputs": [],
168 | "source": [
169 | "input_img = Image.open(res[\"filepath\"], \"r\")"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 11,
175 | "id": "2657ad09",
176 | "metadata": {
177 | "scrolled": false
178 | },
179 | "outputs": [
180 | {
181 | "data": {
182 | "image/png": "\n",
183 | "text/plain": [
184 | ""
185 | ]
186 | },
187 | "metadata": {},
188 | "output_type": "display_data"
189 | }
190 | ],
191 | "source": [
192 | "display(input_img)"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "id": "80a91bd6",
198 | "metadata": {},
199 | "source": [
200 | "# show prediction"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": 12,
206 | "id": "ebf67ab2",
207 | "metadata": {},
208 | "outputs": [
209 | {
210 | "name": "stdout",
211 | "output_type": "stream",
212 | "text": [
213 | "Cn1c(=O)c2c(nc(Sc3ccccc3)n2C)n(C)c1=O\n",
214 | "\n"
215 | ]
216 | },
217 | {
218 | "data": {
219 | "image/png": "\n",
220 | "text/plain": [
221 | ""
222 | ]
223 | },
224 | "execution_count": 12,
225 | "metadata": {},
226 | "output_type": "execute_result"
227 | }
228 | ],
229 | "source": [
230 | "print(res[\"smiles\"])\n",
231 | "print()\n",
232 | "res[\"mol\"]"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "id": "e68f0281",
238 | "metadata": {},
239 | "source": [
240 | "# Different Example"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": 13,
246 | "id": "9655bd18",
247 | "metadata": {},
248 | "outputs": [],
249 | "source": [
250 | "example = \"examples/example2.png\"\n",
251 | "res = img2mol(filepath=\"examples/digital_example2.png\", cddd_server=cddd_server)\n",
252 | "input_img = Image.open(res[\"filepath\"], \"r\")"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 14,
258 | "id": "4b294b23",
259 | "metadata": {},
260 | "outputs": [
261 | {
262 | "data": {
263 | "image/png": "\n",
264 | "text/plain": [
265 | ""
266 | ]
267 | },
268 | "metadata": {},
269 | "output_type": "display_data"
270 | }
271 | ],
272 | "source": [
273 | "display(input_img)"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 15,
279 | "id": "9afc6ad8",
280 | "metadata": {},
281 | "outputs": [
282 | {
283 | "name": "stdout",
284 | "output_type": "stream",
285 | "text": [
286 | "CN(C)C(=O)COC(=O)Cc1ccc(OC(=O)c2ccc(N=C(N)N)cc2)cc1\n",
287 | "\n"
288 | ]
289 | },
290 | {
291 | "data": {
292 | "image/png": "\n",
293 | "text/plain": [
294 | ""
295 | ]
296 | },
297 | "execution_count": 15,
298 | "metadata": {},
299 | "output_type": "execute_result"
300 | }
301 | ],
302 | "source": [
303 | "print(res[\"smiles\"])\n",
304 | "print()\n",
305 | "res[\"mol\"]"
306 | ]
307 | },
308 | {
309 | "cell_type": "markdown",
310 | "id": "f2274b9f",
311 | "metadata": {},
312 | "source": [
313 | "# Another example on a handwritten image"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 16,
319 | "id": "2d63aae9",
320 | "metadata": {},
321 | "outputs": [],
322 | "source": [
323 | "res = img2mol(filepath=\"examples/handwritten_example1.png\", cddd_server=cddd_server)\n",
324 | "input_img = Image.open(res[\"filepath\"], \"r\")"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 17,
330 | "id": "c5a2704f",
331 | "metadata": {},
332 | "outputs": [
333 | {
334 | "data": {
335 | "image/png": "\n",
336 | "text/plain": [
337 | ""
338 | ]
339 | },
340 | "metadata": {},
341 | "output_type": "display_data"
342 | }
343 | ],
344 | "source": [
345 | "display(input_img)"
346 | ]
347 | },
348 | {
349 | "cell_type": "code",
350 | "execution_count": 18,
351 | "id": "76ec658c",
352 | "metadata": {},
353 | "outputs": [
354 | {
355 | "name": "stdout",
356 | "output_type": "stream",
357 | "text": [
358 | "CCOC(=O)c1[nH]c2c(S(=O)(=O)N3CCN(C)CC3)ccc(OCC)c2c1C1CCNCC1\n",
359 | "\n"
360 | ]
361 | },
362 | {
363 | "data": {
364 | "image/png": "\n",
365 | "text/plain": [
366 | ""
367 | ]
368 | },
369 | "execution_count": 18,
370 | "metadata": {},
371 | "output_type": "execute_result"
372 | }
373 | ],
374 | "source": [
375 | "print(res[\"smiles\"])\n",
376 | "print()\n",
377 | "res[\"mol\"]"
378 | ]
379 | },
380 | {
381 | "cell_type": "markdown",
382 | "metadata": {},
383 | "source": [
384 | "# Next example "
385 | ]
386 | },
387 | {
388 | "cell_type": "code",
389 | "execution_count": 20,
390 | "metadata": {},
391 | "outputs": [],
392 | "source": [
393 | "res = img2mol(filepath=\"examples/handwritten_example2.jpg\", cddd_server=cddd_server)\n",
394 | "input_img = Image.open(res[\"filepath\"], \"r\")"
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": 21,
400 | "metadata": {},
401 | "outputs": [
402 | {
403 | "data": {
404 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAANwAAAB7CAIAAACQIXmAAAABfGlDQ1BJQ0MgUHJvZmlsZQAAeJx1kbtLA0EQh78kikEjEbSwsAgSrRLRCKKNYIKoECTECL6ay5mHkMdxlyDBVrANKIg2vgr9C7QVrAVBUQSxE6wVbVTOOROIiJlldr797c6wOwv2WEbNGg39kM0V9OhE0DM3v+BpesJJI60006aohjYWiYSpa++32Kx47bdq1T/3r7UsJwwVbE7hUVXTC8KTwuHVgmbxlnCHmlaWhU+EfbpcUPjG0uMVfrY4VeFPi/VYNAT2NmFP6hfHf7Ga1rPC8nK82UxRrd7HeokrkZudkdgt3oVBlAmCeJhinBBDDDAi8xB+AvTJijr5/T/50+QlV5VZo4TOCinSFPCJWpTqCYlJ0RMyMpSs/v/tq5EcDFSqu4LQ+Giarz3QtAlfZdP8ODDNr0NwPMB5rpaf34fhN9HLNc27B+51OL2oafFtONuAzntN0ZUfySFuTybh5Rha56H9CpoXKz2r7nN0B7E1+apL2NmFXjnvXvoG5/ZnrNP2ONsAAACYZVhJZk1NACoAAAAIAAUBGgAFAAAAAQAAAEoBGwAFAAAAAQAAAFIBKAADAAAAAQACAAABMgACAAAAFAAAAFqHaQAEAAAAAQAAAG4AAAAAAAAASAAAAAEAAABIAAAAATIwMjE6MDg6MDUgMTc6MDg6MDcAAAOgAQADAAAAAQABAACgAgADAAAAAQDcAACgAwADAAAAAQB7AAAAAAAATjksWAAAI95JREFUeJztnWlAE1f38M9kEQIYCDsooiCIVZEKWjfcigpCte5VxOKCG1Zra627lLYidcMFa0Wr1oVa6waiUlrFCohUrSjWCgQFZBcCCUvMMvN+uI/zTpMQE8j2l/w+ZZbMOTNz5i7n3nsORhAEGDFiSND0rYARI7IYjdKIwWE0SiMGh9EojRgcRqM0YnAYjdKIwWE0SiMGh9EojRgcHdQoVRwyMI4s6AXM+NyNGBodtKREGD9Iw6RDGyUAREVFeXh4BAcHX7hwwWijBkLHrb7Lysq6du1K3RMZGbl//34AIAgCwzA96WUEGPpWQD/gOE5apIuLS3BwsIWFxY4dOwBg//79RovULx3UKC9evIhh2MiRI2/cuNHc3Eyn0zt16tTS0hIfH7969eru3bvrW8EOTQdtU37//fddu3a9du0aAJiZmZmYmGAYNm/ePAC4c+eOvrXr6HRQoxSJRN27dzcxMaHu7NSpEwA0NTXpSSkj/6ODGqWLi8vz589lOnmnTp0CgMDAQLTZYbuAeqeDGuXIkSNLS0vXrl2LNgmC+OGHH2JjY0NDQ52dndFOvXR3CIK4efPm7du35ffX1NToXh/9QHRIcBxftmwZAIwaNWrOnDm9evUCgJEjRzY3N+tRJfRj2LBh48ePlzmanp4OAA8ePNC5Xnqgg5aUGIbt3bs3Ojq6rq4uIyPD0dHxzJkzf/zxB4vF0qNK6AeO4ziOk/sJggCAvn37AsDFixf1oZqu6aAuIaFQKJFINm3atGnTJgDAcZxGU/B9EgbgRUcK2NjY2NvbV1RU6FcZ3dBBS0oOh9O5c2f0u7Gx0cPDY/78+fKn6atZKb8Tx3Emk3nt2rUePXoUFBToXitd8vaXlPKlnVAoFAqF5OapU6eKioqKiop+/PFHnWsHIKehSCQiCCIzMzM/P7+wsLCgoKCwsLCwsFAgEKATzM3NDaEI1yJ6bM9qFbLfIM+FCxcAYMKECWhzzJgxANClSxddqfYf5PV0cnIi3w6dTu/Zs2dQUNAnn3zCYDCWLVtWU1OjFz11yVtrlEqYMWOGnZ0dKpCqqqpoNNqcOXNu3bolf6YSy9YeHA4Hw7CrV68WFBQgJQmCaG5uBoCtW7fqXh/d8/ZX3zI0NjYmJyfPnz+fyWQCwIULF3AcX7NmTb9+/fSt2v+QSCQAMH78eGoF3dDQAABWVlb60kqXdLiOTlpaWktLy9SpU9HmuXPnPD09kcNFHt2024j/9mwkEgmGYTKi+Xw+AFhZWREdYJypwxllUlKSlZXV8OHDAaCuri49PX3q1Kn67TQg6aS1iUQi+XN4PB4AWFpayqv69plpxzJKqVSakpISFBSE6u7Lly+LxeLJkyfrWy+A/zrP3d3dZY7W19dDK9X329cN71hGeefOnZqamokTJ6LS5fz58y4uLn5+fvrW6/+DFJs5c6bM/vz8fABgs9l60EnnvLUdHULOk0cQRHJyMoPBCAwMxDCssbExNTV18eLFBlXSYBiWkJDw/vvvU3e2tLRs2LABAFAf/P8o8m+kNbRbUhYXF0ulUq2KaA35+8cwLCkpacSIEagSvHr1qlAoJHs8hsOCBQssLS1RHxyRn58vEAgwDKuurtajYoimpqZhw4ZZWFhwuVzqGMQbUePj1563CU3h9vb2fvbsmfakqE5hYSEA7N69G21+9NFHdnZ2EolEr0opAHl/gDIn6Ny5cwCQlJREui21SmveWRzHjxw5QjUeOp3ep0+fzZs3q/iKVfT7atEok5KS0PfRuXPn/fv3U18/mgijPdGEovvfvXs3ABQWFhIE0dLSYmFhERERoVUd2kZeXh565T179kRW+MsvvwBAXl6eHrUqKSkJCgoCAH9//127dhUWFq5cuXLTpk1jxoxBRWBAQMBff/2lEVnaMsr6+vqAgIB79+5xudyAgAAAGDRo0N27d7UkjgTZokKLHzVqVJ8+fdDvy5cvA8DVq1e1rY8qyGibnZ0NAOHh4RiGHT9+nCCIS5cuAYCmXrm6SKXShIQENpttZma2b98+qVQqc0JxcfFXX33l6OhIp9NXrFjB5/PbKVErRkl62k6fPk0QBI7jJ0+etLOzA4AxY8YkJSXJ35hGUFL61tbW0un0devWoc3w8HBLS8tXr15pQ412kpycDADZ2dm9evUaPHgwQRA3btwAgOvXr+tYExzH//jjj3fffRcARo4ciSqZ1qivr4+MjMQwzNXV9c6dO+2Rq3mjFIlEU6ZMAQAHBwccx8lam8fjxcbGotXWHh4e8fHxupnmnZ+fv2XLFuT3yc7OJghCKBRaWlp+/PHHOpDeBo4ePQoARUVFcXFxAPD333///fffAHD+/HldqvH48ePg4GAA6Nat26lTp1A5guP4gQMHli9fnpiYKBQK5f+VmZnp6urKZDIVlqkqomGjJC1yz549BEFIJBJ7e/uZM2eSRbpIJEpMTBw0aBAA2Nvbb926tb6+XlPSyZKyrKzs9OnTixcvpnqhXV1d0WNCtaGB1N3ybN++HQD4fH5tba2pqenixYufPXsGAEeOHNGNAhUVFYsXL6bRaJaWlrGxsdSyo6Ghwc7ODs3P79Kly549exobG2X+XldXh6wZvfc2dB40aZQtLS2TJk0CgLi4OHLnzp07aTRar169Hj16RD35zz//ROsG2Wz2unXrBAJBO6U/f/78+PHj8+fPJw2RzWYHBwfv379/4cKF06dP/+2339CZM2fOtLGx0U1PVl1wHP/yyy87deqEvp/w8HALC4uSkhIA2LlzJ0EQX3/9tb29/dy5c3/55RcNfs+Ipqamr7/+2sLCgsFgfPLJJwqnyeE4LpVKU1NT/f39AcDS0nLFihX//PMP1fikUmlMTIzC964K7TVKUhWBQIBcvvv375c5Jz093dHRkcVi/fTTTzKH7t+/P2PGDAzDPDw8VOkGyX92JSUl27ZtI2dUWFtbT5o0adeuXXfv3lXo7iktLWUymcuXL1f1DnXO/PnznZ2d0W/U6UFl5/bt2wmCePz4cWhoqLW1NQAwGIwxY8bs2rUrPz+/nUIlEsnRo0fRSs7Jkyc/ffpUlX9lZGTMnj0brZcfNWrUmTNnqM30GzduoPd+4sQJQtG7a60QVdsoFV6otrZ2yJAhNBrt2LFjCv9VUVExatQoVKS/ePFC5mh6enrXrl2ZTOZ3333XWkNERi6Pxzt8+PCoUaOQP2Lo0KFxcXG5ublvbMfk5+f36NHD1dWVx+MpP1NfTJw40dvbG/3GcdzHx8fNzY00SvQcJBJJRkbGl19+2adPH/Q1enp6rl27tqioSC1Z6GppaWn9+/dHHpKMjAx1Fa6qqtq2bVv37t0xDOvSpculS5fIl1VeXj5y5EgAiIiIaGlpUfGCGqi+q6ur+/fvz2Qyz507p+Q0sVgcHR1tYmJibm7+3XffUT8pHMfr6upQYzQgIKCyspJ6iHoRHMevX78+bdo0FNzCw8MjOjqa7BWq2HxJSUkBgF27dqlxk7oCx3F/f//Ro0eTew4fPozMDhmlPEVFRfv27Rs3bhydTqfRaMHBwVeuXJFKpao8jUePHiHvY/fu3RMTE9vjPJZIJCkpKci4w8LCyG9eLBavW7cOAHx8fLhcriqXaq9RNjY2Dhw40NTU9Nq1a0puifSWc7ncDz74AAC8vLx+//13mXMOHTpkamrq5OSUmZkpc4W6urrTp0+jTrStre0nn3ySk5PT5oc4fPhwJyen9jdktcGQIUMCAgLIzVevXtnY2MgbpfwARGlp6aZNmxwcHADAzc1t+/bttbW1rUl59uxZeHg4jUazsrLasWOHwq50ayh57K9evdqyZQudTvf19aU6LJOTkzkcjrW1dXp6+huv3y6jFIvFISEhNBrt4sWLrZ2jsCWRnJyMIpsFBQUhNw3JgwcP3N3dGQzGzp07pVLp6tWrAwICvL29UTXt4eGRkJCgekXQmiaZmZkAEBUV1YbraImGhobs7Gw0RW3QoEEEReFdu3YBwIIFC1S5zqtXrxITE9GEUVNT03nz5v3777/UEyoqKpYvX85kMk1MTD777LOXL18SimokmR9qkZSURKfTAwICqPVhQUGBl5cXg8FISEhQ/ve2GyWO44sWLQKAAwcOKFRd+f00Nzdv3boVNdgDAwNv375NHuLxeB9++CEA9OjRA9VcY8eOjY6OvnnzpgaHqqdNm2Zubl5eXq6pC7aHFy9e9OvXj81moylq7777LvXpvXr1auHChffv31frmrm5uYsXLzY3N2ez2ampqVKp9O7duwsWLGCxWHQ6ffHixaWlpfL/ak8NTv0vWhr63nvvPXnyhNzJ4/HGjx8PAF999ZWS67TdKL/99lsAIMdI2gafz4+JibG1tUWmWVxcjPbjOI7aUkwmc8WKFe0R0RoFBQVMJnP69Ol6WR1GpaioqEePHhYWFmlpaTiO7927t6CgQFMXLy0t9fb2RqsiUW0ze/ZsdH1t3/jPP//M4XBMTU337NlDyhKLxeHh4QAQHR3d2h/baJQ///wzAMyZM0cjN8bn87dt28Zms11cXKjejbq6OisrK+25b2JiYgDghx9+0NL1VeHp06ddu3blcDg5OTlaEtHQ0ECGkvv555+1JEUh5eXlyJFONRWJRBIWFgYAycnJCv/VFqO8c+eOiYmJv7+/Wq1jhVBt+v79+7a2to6OjtTpMG5ubrNnz26nlNYk4jg+btw4Fov18OFDDYpQnby8PAcHBzs7u9zcXK0KEovFyCjDwsK0KkgG1BubO3cuhmFUV79QKPT29nZ0dFTYfFLbKFtaWjw9PV1dXbWxKv7x48dOTk42NjYNDQ1oz6BBg8aNG6dxQSSVlZWOjo69e/eWHy4jtFzBFRYW2tvbOzs7U1td2gO5fjTYMFCd+vr606dPyzzMBw8eoO9E3geitlFOnz4dAL799tt2qakIpHRhYeEXX3xBmsiECRN8fX01LovK77//jmHY/PnztSpFhurqag8PDw6H888//+hGYlBQkJ+fH7n56NEj5J3R/cRWksGDByucJ6q2Ufbu3RsA5P2IWiIsLMzV1VXbUtAKmJMnT2pbEKKoqKhPnz6mpqY6e4wEQUyePJkcKCIIAsX30tIcwtYQiUTr16+nToGrq6uTP01toxQKhceOHSNrAQ1+ZwovtWrVKhTPSauIxWJ/f38Wi4XcLlotPK5fv25jY8PhcGTGDrQEeS+hoaFubm7kflSb60ABKg8fPgSAJUuWKD9N7YVjAoEgPDwczdyGdiw6JuSW0FPX5JNHbW1tm5qaXr161TYpqsgFADqdfvbsWVtb20mTJlVVVWlvfeOBAwfGjh1rb2+fk5Mjs2RRiXrtgbwXc3Nzao4BNG0ARXLTuNDW6Nevn6enJ9mabA21jRKF40btPADg8/n+/v5tiJjY2otH+8mjaL56VVWVutdXSy6GYQ4ODklJSS9fvpwyZYoGvwESsVi8dOnSyMjIwMDA7Ozsnj17ov0yBqG978HCwqKxsZHctLa2JggCDSDpcpHxuHHjHj9+rPwzUNsos7KymEymn58fupOSkpKMjIzc3Ny2q6kUNJKrm6Wl/fv3P378eFZW1tKlS8mn1p5ShPyvVCqdOXPmwYMH16xZc+nSJWpMAZ0ZhLm5OZquizYtLS3hddwsXeLl5SUQCJSHJFbDKNH9ZGVlDRgwQCY2uGafLNUO7O3tQaMlpRIwDJs+ffrmzZuPHj2KBiFqa2vz8/OLi4vfGAVAoe2SDZKVK1deuHAhLi4uNjaWTqdrRfs3wWazCYIgI6+iD0MvRgkA//77L7T+wSuLkEEQBIZhxOvABhiGiUSinJycpUuXyp+pGZXlQBFE0SitwthOGi9poqKisrKyVq1atWrVKup+MzMzDofTSQ4TE5MuXbp4enp6enr27t0bPXSqVjExMSi73sqVKzWrqlqgz7u6uhqZo75KSg8PDwAoLCwk1+bKo8woZZp3APDkyROhUIhW2GgPqsRu3brZ2Njk5OQsWbJE+ZkalB4fH4+SmOzevdvOzk4oFNbU1NTU1NTX14vFYpFIJBKJxGKxUCgUiUQ8Hi83Nxet9gIALy+vGTNmTJs2DQW8jI2N3bBhQ2hoaGxsrMZVVQvUEKqqqkLNWTRdnBqHA3SSeMDMzAxeR5ZrTZx6sYTKy8sBwNXVVRPqvQGkMYZhgwcPlkl2pO1n5+npeeTIkQULFlRVVX366aeq/IXP5xcWFubk5ERFRUVHR0dHR+fm5l69enXt2rWzZs06duwYyj6BqhS9RC9CRllZWYk2UVYUmZwYOlasLSWlPOiWHB0dNaDRmyA1Hjp0aEpKSl1dHZrnBjp5dvPmzfvrr7+2bdvWp0+fOXPmvPF8Nps9YMCAAQMGTJw4cdWqVQwGA83Bnjp16k8//cRg/O85UzXXQbFEhSwpkVyFRqkDVGnpqacTMkrUOtE2pPbo7T59+rS1E7QBhmGenp7Ozs4LFy5Ei4pU/KOzs/OZM2fmzp2LLlJeXk51xMiI0Ji6KmBnZ4dhGOmFRUap+zJbFYmqlpTo86qsrGSz2ebm5uqKaQPkZVHrh5qES6tyEc+ePfvss8/Qby8vL2tra2tra0tLSw6HY2VlZW5ubmZmZmZmxmKxzM3NWSyW2WucnJx69uw5dOjQmzdvlpaWzps3b8qUKdeuXUN3AfpLGMVgMGxtbcnqG31mui8pVUFVo0TPsbKykppQA6G9EotsVsJ/m+Tke9XeC0bhxVxdXUePHm1lZVVfX8/j8Xg8XlFRUX19fXNzc3Nzc0tLi8y/kLOC3ES5ecLCwhYsWHD06FFUiesxHKaDgwPpXEMdDsPMI612mxI1TUiQrWjW90Z1QgEAWmD65MkTtFgTKO9VSy84Ozs7NjZ22rRpZ8+eVXIajuNCoZA00Obm5qamphcvXhQUFHz//fchISFOTk5z5sx5/vz5pk2bnjx58tNPP73zzjvaUFhFkP8c/daXSwh9DGi6msY6OijcEQmqVRXGjm8zpK5I7x49etDp9OLiYg2KkIH6gGpra2fOnNm1a9dDhw4p/xeNRkNVtvwhlPIRsXHjxl69ei1btszX1zcuLm7RokX6Kizr6urQej2CIPRolJaWljIjOjIGql6ToqKiQqbr7e3t7eLioqVhQKRoU1OTVCq1sLDQhgiqIABA06QrKyvPnj3L4XBAboJI25g+ffqjR4/8/f2XLFny4YcfPn78uP06twGBQIBmrGEYpq8RHQDo0qVLeXk51QplvlI1jLKpqUkgEMgYJYPB4HK5y5cvb6eiSkhNTQWAYcOGyR/SeHM2JibmypUru3btIqPzy48gtA1HR8dr165t3779999/79u376RJk+7fv99eddXE0tISTekHADqdbm1tXVpaqmMdAMDFxQUNM7aK6pPhuFwuABw9elT1v2iEM2fOAIAOltFcuXIFw7DQ0FDVo960gZcvX27ZssXa2prBYFCX+emA4cOHjxgxgtycNm2as7Ozjuf5EgSxc+dOAFASYUYNo0QL+HUfQe/KlSsAQF0Yrg0KCgqsrKz69+/f1NREaH+RAI/HmzhxIgCEhYXJBN7WntCIiAgOh0OKOH78OADoILyyDCj4PDU0nwwqVd8EQcBrz7lM71sHUAfoNIXM1ZqamlDesfPnzyvsuGhWOgBYWVlduHBhy5YtJ06ciI6OJvdrtQ/k6+vL4/FQtEsAQGG///zzT+1JVIi7uzuK9tvaCSoZJXpS9vb2vr6+aBYMFQ2+sLq6uiNHjlAbOsTrBK6a9TrJDPdFREQ8evQoMTERuZ9Azji0YSs0Gi0qKio8PDw6OhrF3JJB418Caijfu3cPbTo7O7u7u2dkZGhDlnKcnJxQuBjFopUUszJVycaNGwEgMDCwfYW34usXFRWhBeoAsG/fPuppK1euNDExQfGD2hAfprUzyd/x8fGgnfWZqtDc3Ozj48PhcJ4/f65tWQKBwMPDgxoCIDk5OSkpSXsSMzMzV69efenSJZn9ISEh7777Lrkp8wbVaFOSo89tiM2qxG4kEklcXJyZmZmFhcXHH388ZMgQNGmgoaFh69atwcHBAwcOZDKZXl5eCQkJylvlbWiQ5eXlmZqaBgYG6r69j8BxvKCgoHPnzu+9915riQFaiz6l/LLUzfr6+u+++w75g7KystqjsIq0tLSQE1KpvSscxxsbG4OCgvr379/af9Vbz8blch0cHFxdXSsqKtqsLpUHDx689957ADBhwgQy3pJUKj18+DCa9mFraxsaGurh4YEmhw4fPrwNS/dbe4stLS3e3t52dnbUiJh6AQ0dBQYGKowY3ebeD5fLnTVrVkhICPLy+vj4mJmZmZqabt68mQz3oCmoSubm5np7ewPAsmXLysrK0PgNShLSrVs3DMN69+69cuXK1i6l9iLLu3fvmpmZDRw4EPVS26x3Y2PjF198QaPR7OzsTp48SR56+vQpavoMHTqUGl4Hx/Fjx45xOBw6nT537lwV4x8rZOnSpXv37iUIAn3KZHWm30hXCQkJDAajV69eZJSvN9KawiizxOjRo0kna2hoKFo9XFZWNmPGDPS179mzR+NJW2pqapYtW4ZeK7Wd8PDhQxQj3dfXd+PGjdXV1UpEt2Xl78WLFzEMmzp1apurvJSUFDTetXDhQmpgz5SUFDabbWtre+rUKYVPvLKy8rPPPmOxWDQaLTQ09N69e0p8iqdOnYLXDk5yJzkFAfnkIyMj9R51jXitXnp6upWVlbu7e1lZGaHmR1JbW/vrr78uXbrU09MT3eA777wTERERFBQk39zKyckZPXo0ALi5uZ09e1Yj4f9EIlFcXJyVlRWdTo+MjET9GIIgGhoaVq1aRafTbWxs3tgAQ7RxOToK4xkREaFuwMiysjIU+KV3795//vknuZ/P569duxbDsAEDBsg3+WWeWmVl5erVq5Hvxs3Nbc2aNQqj+qKgajY2NlQlydlGPj4+ffr0aVv8Ve1x+/btzp079+rVS0mMbZKmpqbU1NQ1a9aQi0vNzc0nTJiA0tQpF4Tj+NWrV1HI9GHDhsmErlWXK1euILfM2LFj0TeAQlslJiY6OTlhGLZo0SLSTMk70nAgfhzH169fDwCzZs1SJfcHjuMSieTAgQNsNtvExOSbb74hS2+xWPz999+jFuS8efPIVsEbW/cvX75MSEgYP348mhKGfEbUVjxK3QUAGzZsoP7Rw8PD3Nw8JydHeVNSXyXorVu3zMzM+vXr11pGksLCwq1bt44aNQrN0WQymSNGjIiKisrIyFA9Dwu6O7FYfOjQIeR7/uijj9qQ2vXJkyco2EbPnj2TkpLIh/bs2TPkB/X19VU3AVm7Ands27YNACZOnEgtb+TfZWNj48mTJ9Fys4CAAGrIl+TkZBScyN/fv22501AQ/2PHjo0bNw4AYmJiyEMojEffvn0xDKNG7Rk6dCiKdG8IFbdC0tLSGAzG2LFjRSJRS0tLZWVlUVFRWlrawoULXVxcyJL+888/v3r1qsJ4cSqCngCfz9+4cSOLxerUqdP69etVjPBYV1f36aefMhgMNpu9Y8cOahvx0KFDFhYWnTt3jo+Pb0Pw5TYapYyT7/3331cY1L6kpCQ0NBTVs66urtQOzb1791CzxsPD48KFC222DyV/TEtLA4DLly9369bNy8uL/HImTJhAjT9mCMjHGJdJYkz13js5OWkqWzX16ZWWlqLVSP3795eJhCbzkIVCYVxcnK2tLYZhERERyIWHePHiBQrQOmbMmDZ7XjUQ4uj48eN0On3IkCFklgryHn788UdHR8dFixbdunWLbOGWlJQgP7mNjc3evXu1l/nr1q1bGIalpqaiPg1ZiU+ZMoVMZ2vIJCYmrlmzZuvWrfHx8du2bXN3dx87dqy2A0xeunTJzs7OxMQkLi5OvlMilUpPnDiBOqmjR4+mxgPDcfzEiRNWVlZksts2z2vRTNytc+fOMZlMHx8f6kcjD5/P37Bhg6mpqYmJyZo1azSexU0GlK4LOSbmzJljamqKCpiAgIAhQ4ZoVXQ7Wbdu3ZQpU7QqojX7wHG8srISxYQeO3asTCaub775BrUcUlNTqaV7VVXV5MmTkSOv/enPNBYM7tq1aywWy8vLq7q6Wv4o1R8+a9YsTdU+ykGJQVFTtbS0lMViffTRRwRB+Pn5BQUF6UABdSFfM7xe7oPg8/nx8fG6CfhLFnsHDx5EQUF++eUX8mhBQUFycjK1BJVKpYcOHbKxsTExMdm+fbtG0ndoMkJheno6i8UaMGCAzGhBTk4O6uUMGzasnZmg1WLfvn0AUFFRgR705s2bAeD27dto6YzO1FAXNAFl5MiRaBPHcZROQcm4XPtBHhyZnU+fPkXjbTNmzFAYTTw7OxuNdPj7+8sH5FUoRRVlNBw2MyUlhcFgjBgxAgX44nK5CxcuxDDM0dHxxIkTynXSeF94/fr1dDqd/KzRtHlTU1MAOHz4sJaEth805JiWloY2eTwei8UKDg5+8OCBbhSgPhOxWPzNN98wmUx3d3fqtNyqqqr58+ejXpf8SEc7n6oGjFJGg1OnTmEYhjJeAQCDwfj888+Vj7RqKqGQDPLToFCmFVR8tlmitpkyZYqDgwNZDx48eBAA5BOa6DJWeVZWFofDcXJySk9PF4vFe/futbS0ZDAYX3zxBTXXnabQSoDhI0eOoGRhLi4uCtNa6YbY2Nhhw4bJ7Lx8+fKePXvQbwMsJl++fMlkMqmTFfz8/Ly9vRVWr7okLy+vW7duaNAcw7CAgADttXF1HfXaiHL2798PAGRNnZeXBwB79+7FX6NH3fh8PgB4e3ufO3dOq5q8zUYplUpnz56tMJOX3l9wa/j5+fn4+JCbKNeggSSQ1BmGGEpGUzQ2Np4+ffrWrVvyh8hoMAZFXl7e3bt3UV8bkZycPHDgQPlQOW83b7NRykP8dzlIZGRkZGSkvpSR5/jx4wwGY/bs2Wizurr6zp07ISEh+tVK93Qso5QpHS9fvszj8fSljAwSieTEiRMhISEoIQa8Xs38wQcf6Fcx3aN5oyR0uy6uNQiCQJPZZMK1kb8bGhpKSkpQEGhDIDU1taqqiqy7CYJITk52dnb28fHRp1qvISguNm3L0rxRGkhbDcMwlMKCy+WeOXMmLy9PKBRSdcvPz8cwTL9h0KgcOHDAzs5uwoQJaFMkEv32228hISGG8zxlfmgP9aKu/d+CRqOxWKwbN26Q8dPs7Oy6devm4uLi4uKC5v/pPraCQjIzM69cuRITE8NkMtGerKwsgUBA2miH4q01SoIgMAwzMzN7//33f/3117y8PC6XW1paWlpaWlBQcP36deR1u3XrFsqlqjOVFB5CvZkVK1aQe/744w8Gg6EwVd5bz1trlOj1i0QiU1NTb29vtOKTyldffRUVFYUmXOlMpdbsEqWjQ7Oh0TlcLtfV1VWrARANlre5900QRHNzs8LYQAAgkUhoNBoZp0U3KGmQUSPGEATx/PlzNJe2A/LWlpQAIBKJpFKpTNoAktraWg6HYyDdCD6fT7YmAQDDMD8/P8PphOmYt9koUTJaNFdNnrq6OhsbG30ZpUw9jgKqUEGTQTsmmIG4FbUBn89Hkb35fL78W09OTq6pqUGTAvVLUVGRQCDw9vY2kGJb/+h8tF13iMXiHj16BAUFKVmPomOVZLh586a7uzv1dQwePBitxiLRu5K6520uKQ0KQq7fnZubO2LECAcHB5SKFO10dHQUCASPHj1CE1I7Jm9z79ugkK+alyxZYmFhERcXV19fv2DBgqamJrFYnJ6e3tTUpDCGagdC30V1B+XBgwcAEBcXh+P4w4cPqXW0i4vLjBkzOmCtTWIsKfXD2bNnGQxGWFgYhmH9+vWjlqPDhw+nzgElOl77ymiU+uHZs2cuLi4of5QMbm5ulZWVaBEmGMwEF11iNEqdQhZ7L1686NKli4zBoaNlZWVOTk4MBqMDmiPCaJQ6hbQzc3NzgUAA/62d0dEXL1507dpVL+oZCEaj1A99+/Z98uSJRCKRLw6Li4tdXFw6YFOSxGiUOkLGyAYPHiwSiRITE2XOuXPnTkFBgb+/f4etu+HtHmY0ZHAcHzp06PPnzzMzM8lBHYFAMGbMGC6XW1xcLD8u2nEwGqXe+Pvvv0eMGCESiZYsWdK3b9+mpqaDBw8WFhaeP38epW3suOjLQWqEIIjy8vJ58+aRNfWAAQOoaT46LMaSUv80NzfX1dVJJJIOO6tXBqNRGjE4jL1vnWIsAlTBaJQ6BTUfjaapHKNR6oGO7INUBaNRGjE4jEZpxOD4f38s0n0vIqZhAAAAAElFTkSuQmCC\n",
405 | "text/plain": [
406 | ""
407 | ]
408 | },
409 | "metadata": {},
410 | "output_type": "display_data"
411 | }
412 | ],
413 | "source": [
414 | "display(input_img)"
415 | ]
416 | },
417 | {
418 | "cell_type": "code",
419 | "execution_count": 22,
420 | "metadata": {},
421 | "outputs": [
422 | {
423 | "name": "stdout",
424 | "output_type": "stream",
425 | "text": [
426 | "O=C1C(=C2Nc3ccccc3C2=O)Nc2ccccc21\n",
427 | "\n"
428 | ]
429 | },
430 | {
431 | "data": {
432 | "image/png": "\n",
433 | "text/plain": [
434 | ""
435 | ]
436 | },
437 | "execution_count": 22,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "print(res[\"smiles\"])\n",
444 | "print()\n",
445 | "res[\"mol\"]"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": null,
451 | "metadata": {},
452 | "outputs": [],
453 | "source": []
454 | }
455 | ],
456 | "metadata": {
457 | "kernelspec": {
458 | "display_name": "Python 3",
459 | "language": "python",
460 | "name": "python3"
461 | },
462 | "language_info": {
463 | "codemirror_mode": {
464 | "name": "ipython",
465 | "version": 3
466 | },
467 | "file_extension": ".py",
468 | "mimetype": "text/x-python",
469 | "name": "python",
470 | "nbconvert_exporter": "python",
471 | "pygments_lexer": "ipython3",
472 | "version": "3.8.5"
473 | }
474 | },
475 | "nbformat": 4,
476 | "nbformat_minor": 5
477 | }
478 |
--------------------------------------------------------------------------------