├── .github ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── Dockerfile ├── README.md ├── cheminfo ├── __init__.py ├── descriptors.py └── metrics.py ├── data ├── catalyst │ └── journal_data.csv ├── chembl │ └── CHEMBL26-chembl_26_activity-H1-receptor.csv ├── delaney-solubility │ └── delaney-processed.csv └── zinc │ └── ion_channel-fda.csv ├── environment.yml ├── models ├── 9.3_rdkit_pls.joblib ├── morgan_svm.joblib ├── rdfrags_svm.joblib └── rdkit_svm.joblib ├── notebooks ├── 1.2-eda-boston-data.ipynb ├── 1.3-tsne-tanimoto-distance.ipynb ├── 1.4-fragment-visualization.ipynb ├── 4.2-doe_orthogonal.ipynb ├── 4.4-deap_d_optimal_design.ipynb ├── 8.1.2-structure-decomposition.ipynb ├── 8.1.2-structure-generation-brics.ipynb ├── 8.2-bayes-optimization.ipynb ├── 9.2-catalyst-exhaustive.html ├── 9.2-catalyst-exhaustive.ipynb ├── 9.3-decsriptors.ipynb └── 9.4-histamine-antagonist-screening.ipynb └── src ├── data └── 9.3-brics.py ├── features ├── 5.2-3-fragmentor.py ├── 5.4-rdkit_desc.py └── 5.5-run_mordred.py ├── from_root.py ├── from_src.py ├── models └── 9.3-screening.py ├── parallel.py └── parallel_wo_with.py /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **開発環境:** 2 | - RDKitバージョン: 3 | - OSバージョン: 4 | - Pythonバージョン: 5 | - 使っている開発環境(Anaconda,Miniconda,Homebrew etc.): 6 | 7 | **説明:** 8 | 9 | ```python 10 | >>> Code example 11 | ``` 12 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **関連するissue:** 2 | 3 | **どういう変更ですか?** 4 | 5 | **その他** 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM frolvlad/alpine-glibc:alpine-3.8_glibc-2.28 2 | 3 | LABEL maintainer 'sshojiro' 4 | 5 | RUN apk update && \ 6 | apk --no-cache add bash ca-certificates wget libxext libxrender libstdc++ && \ 7 | update-ca-certificates && \ 8 | apk --update add tzdata && \ 9 | cp /usr/share/zoneinfo/Asia/Tokyo /etc/localtime && \ 10 | apk del tzdata 11 | 12 | RUN echo 'export PATH=/opt/anaconda/bin:$PATH' > /etc/profile.d/anaconda.sh && \ 13 | wget --quiet https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/anaconda.sh && \ 14 | /bin/bash ~/anaconda.sh -b -p /opt/anaconda && \ 15 | rm ~/anaconda.sh 16 | 17 | ENV PATH /opt/anaconda/bin:$PATH 18 | 19 | RUN conda install joblib=0.14.1 networkx=2.4 numpy=1.18.1 \ 20 | pandas=0.25.3 python=3.7.0 scikit-learn=0.22.1 \ 21 | scipy=1.4.1 && \ 22 | conda install -c rdkit rdkit=2019.09.3.0 && \ 23 | conda install -c conda-forge pymatgen=2019.12.3 && \ 24 | pip install --upgrade pip && \ 25 | pip install jupyter==1.0.0 matplotlib==3.1.3 tqdm==4.42.1 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # サポートページ 2 | 3 | [『実践 マテリアルズインフォマティクス』](https://amzn.to/3dSszJL)のサポートページです。 4 | 5 | ## プロジェクトの始め方 6 | 7 | 本リポジトリを使用する際には 8 | 9 | 1. このリポジトリをクローンする(手元へダウンロードする) 10 | 1. 必要なライブラリを揃える 11 | 12 | という2つのステップを踏む必要があります。 13 | 14 | ### このリポジトリをクローンする 15 | 16 | Git BashやMac OS Xのターミナルを開いて,以下のコマンドを実行してください. 17 | support-pageというフォルダが作成されます. 18 | 19 | ```bash 20 | $ git clone https://github.com/funatsu-lab/support-page.git 21 | ``` 22 | 23 | ### 必要なライブラリを揃える 24 | 25 | #### それぞれのライブラリをインストールする場合 26 | 27 | ```bash 28 | $ conda create -n regz python==3.7 numpy scipy pandas scikit-learn 29 | $ conda install -c rdkit rdkit 30 | $ pip install jupyter 31 | ``` 32 | 33 | #### `environment.yml`ファイルを使ってインストールする場合 34 | 35 | Anaconda (Miniconda)の`conda env create`コマンドを使って
36 | (Pythonの)仮想環境を構築します。本フォルダには既に
37 | `environment.yml`という必要なライブラリのリストが
38 | 掲載されているテキストファイルがありますので利用してください。 39 | 40 | ```bash 41 | $ conda env create -n [Python仮想環境名] -f environment.yml 42 | ``` 43 | 44 | ## プロジェクトの構成 45 | 46 | 以下のフォルダ構成は[Cookie Cutter/Data Science](https://github.com/drivendata/cookiecutter-data-science)が
47 | テンプレートとして作成するフォルダ構成を参考に作っています。
48 | 他の人も利用可能な状況にフォルダを維持しておくのは
49 | プログラミングの生産性の観点からも重要です。
50 | 可能な限り以下のフォルダ構成を維持するようにしてください。
51 | もちろん、必要に応じてフォルダが増減しても構いません。 52 | 53 | ``` 54 | support-page 55 | |-- README.md # プロジェクト概要を示すテキストファイル 56 | |-- environment.yml # Anacondaにインストールしたライブラリの情報を書き出す 57 | |-- setup.py # cheminfo ライブラリをコンパイルする場合は作成する 58 | |-- bin # シェルスクリプトを書いたら保存しておくフォルダ 59 | | `-- compile_package.sh 60 | |-- cheminfo # 自作ライブラリを保存するフォルダで、今回は`cheminfo`とした 61 | | |-- __init__.py # __init__.pyを必ず含む 62 | | |-- descriptors.py 63 | | `-- metrics.py 64 | |-- data # データを保存するフォルダ。データセットが多様ならデータソースごとに 65 | | |-- catalyst # サブフォルダを作っておくと良い 66 | | |-- chembl 67 | | |-- delaney-solubility 68 | | `-- zinc 69 | |-- models # 作った機械学習モデルを保存するフォルダ 70 | | |-- 9.3_rdkit_pls.joblib 71 | | |-- morgan_svm.joblib 72 | | |-- rdfrags_svm.joblib 73 | | `-- rdkit_svm.joblib 74 | |-- notebooks # 探索的な解析,可視化をする場合はJupyter Notebookを用いた解析をする 75 | | |-- 1.2-eda-boston-data.ipynb 76 | | |-- 1.3-tsne-tanimoto-distance.ipynb 77 | | |-- 1.4-fragment-visualization.ipynb 78 | | |-- 4.2-doe_orthogonal.ipynb 79 | | |-- 4.4-deap_d_optimal_design.ipynb 80 | | |-- 8.1.2-structure-decomposition.ipynb 81 | | |-- 8.1.2-structure-generation-brics.ipynb 82 | | |-- 8.2-bayes-optimization.ipynb 83 | | |-- 9.2-catalyst-exhaustive.ipynb 84 | | |-- 9.3-decsriptors.ipynb 85 | | `-- 9.4-histamine-antagonist-screening.ipynb 86 | |-- references # 文献を保存しておくフォルダ 87 | |-- results # 解析結果の図を保存しておくフォルダ。必要に応じてサブフォルダを作る 88 | `-- src # Jupyter Notebookで実行すべきではない重い処理や 89 | | # 何度も実行する処理をスクリプトにまとめて保存するフォルダ 90 | |-- from_root.py 91 | |-- from_src.py 92 | |-- parallel.py 93 | |-- parallel_wo_with.py 94 | |-- data 95 | | `-- 9.3-brics.py 96 | |-- features 97 | | |-- 5.2-3-fragmentor.py 98 | | |-- 5.4-rdkit_desc.py 99 | | `-- 5.5-run_mordred.py 100 | `-- models 101 | `-- 9.3-screening.py 102 | ``` 103 | 104 | ## コンテンツ 105 | 106 | 0. Pythonの基礎(テキスト本編にはありません) [gist](https://nbviewer.jupyter.org/gist/sshojiro/e437645bb071bcb6c072f9cc6dbb11fa) 107 | 1. CoLabでの演習 108 | - 1.2 Bostonデータ可視化 [gist](https://nbviewer.jupyter.org/gist/sshojiro/d614503df0db630ac8194e381a7e5588) 109 | - 1.3 tSNEでのタニモト距離基準の可視化 [gist](https://nbviewer.jupyter.org/gist/sshojiro/01579415335916620109f5c45e69826e) 110 | - 1.4 フラグメント可視化 [gist](https://nbviewer.jupyter.org/gist/sshojiro/946737ed021eae99b08e6b2cd0b4cc12/1-4-fragment-visualization.ipynb) 111 | 2. (環境構築,テキスト参照のこと) 112 | 3. (マテリアルズインフォマティクス概論,テキスト参照のこと) 113 | 4. 実験計画法 114 | - 4.2 直交計画法 [gist](https://nbviewer.jupyter.org/gist/sshojiro/975bd4c31e32fde35ddae14987510fa5/4-2-doe_orthogonal.ipynb) 115 | - 4.4 D最適化計画 [gist](https://nbviewer.jupyter.org/gist/sshojiro/1806ea69ce0b190a38a516bc050d36a9) 116 | 5. 記述子計算(スクリプト実行) 117 | - 5.2 フラグメントカウントの実装 118 | - 5.3 RDKit組み込みのフラグメントカウント 119 | - 5.4 RDKit記述子 120 | - 5.5 Mordred 121 | - 5.6 Pymatgen 122 | - 5.6.1 [元素物性 gist](https://nbviewer.jupyter.org/gist/sshojiro/decde333f82748c7df668374b571e75c) 123 | - 5.6.2 [組成比 gist](https://nbviewer.jupyter.org/gist/sshojiro/2868de251878d3f52a2e6521b430a968) 124 | 6. (機械学習,テキスト参照のこと) 125 | 7. (機械学習モデルの解釈,テキスト参照のこと) 126 | 8. 追加検討 127 | - 8.1.1 自作構造生成 128 | - 8.1.2 ReCAP,BRICSによる構造生成 129 | - 8.2 ベイズ最適化 130 | 9. 解析例 131 | - 9.2 触媒データを使った解析 132 | - 9.3 水溶解度データを使った構造生成、スクリーニング 133 | - 9.4 ChEMBLデータを用いた分類。カーネル法と分類問題 134 | 10. (Bashによるデータ加工,テキスト参照のこと) 135 | 136 | ## Dockerによる起動 137 | 138 | 本項はDockerの操作を分かっている方がご利用ください。 139 | 140 | **ローカルでイメージをビルドする場合** 141 | 142 | ```bash 143 | $ docker build -t example/matcheminfo . # Dockerfileからビルド 144 | $ docker run -i -v /$(pwd):/workspace -p 9999:8888 -t example/matcheminfo # 現在のフォルダとDockerコンテナ内を-vオプションで紐付ける(マウントする) 145 | ``` 146 | 147 | **イメージを取得する場合** 148 | 149 | ```bash 150 | $ docker pull sshojiro/alpine:latest 151 | $ docker run -i -v /$(pwd):/workspace -p 9999:8888 -t sshojiro/alpine:latest # 現在のフォルダとDockerコンテナ内を-vオプションで紐付ける(マウントする) 152 | ``` 153 | 154 | ## 誤字脱字等の報告 155 | 156 | 本書あるいは本リポジトリに誤りが見つかった場合は
157 | [Issueを立ててください](https://github.com/funatsu-lab/support-page/issues/new/choose)。 158 | -------------------------------------------------------------------------------- /cheminfo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/cheminfo/__init__.py -------------------------------------------------------------------------------- /cheminfo/descriptors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.sparse import lil_matrix 3 | from sklearn.base import TransformerMixin, BaseEstimator 4 | from rdkit.Chem import Descriptors, RDKFingerprint 5 | from rdkit.Chem.AllChem import GetHashedMorganFingerprint, GetMorganFingerprintAsBitVect 6 | __all__ = ['RDKitDescriptor', 'HashedMorgan', 'BinaryHashedMorgan', 'RDKitFingerprint'] 7 | 8 | 9 | class RDKitDescriptor(TransformerMixin, BaseEstimator): 10 | """ 11 | RDKit記述子を計算するクラス 12 | """ 13 | def __init__(self): 14 | pass 15 | def fit(self, x, y=None): 16 | return self 17 | def transform(self, data): 18 | """SMILESの配列`data`をRDKit記述子にする""" 19 | return np.matrix(list(map(lambda m: 20 | list(map( 21 | lambda f:f(m), dict(Descriptors.descList).values() 22 | )) 23 | , data))) 24 | 25 | 26 | class HashedMorgan(TransformerMixin, BaseEstimator): 27 | """ 28 | Morganフィンガープリント(頻度)を取り出すクラス 29 | """ 30 | def __init__(self, n_bits=512, radius=3): 31 | self.n_bits = n_bits 32 | self.radius = radius 33 | def fit(self, x, y=None): 34 | return self 35 | def transform(self, data): 36 | """SMILESの配列`data`をHashed Morganフィンガープリントにする""" 37 | n_samples = len(data) 38 | D = lil_matrix((n_samples, self.n_bits)) 39 | for ix, mol in enumerate(data): 40 | morgan = GetHashedMorganFingerprint(mol=mol, 41 | radius=int(self.radius), 42 | nBits=int(self.n_bits)).GetNonzeroElements() 43 | for key, val in morgan.items(): 44 | D[ix, key]=val 45 | return D.toarray() 46 | 47 | 48 | class BinaryHashedMorgan(TransformerMixin, BaseEstimator): 49 | """ 50 | Morganフィンガープリント(有無)を取り出すクラス 51 | """ 52 | def __init__(self, n_bits=512, radius=3): 53 | self.n_bits = n_bits 54 | self.radius = radius 55 | def fit(self, x, y=None): 56 | return self 57 | def transform(self, data): 58 | """SMILESの配列`data`をHashed Morganフィンガープリントにする""" 59 | n_samples = len(data) 60 | D = lil_matrix((n_samples, self.n_bits)) 61 | for ix, mol in enumerate(data): 62 | D[ix, :] = GetMorganFingerprintAsBitVect(mol=mol, 63 | radius=int(self.radius), 64 | nBits=int(self.n_bits)) 65 | return D.toarray() 66 | 67 | 68 | class RDKitFingerprint(TransformerMixin, BaseEstimator): 69 | """ 70 | RDKitフィンガープリント(有無)を取り出すクラス 71 | doc: http://rdkit.org/docs/source/rdkit.Chem.Fingerprints.FingerprintMols.html 72 | """ 73 | def __init__(self, n_bits=256, fraglen=7): 74 | self.n_bits = n_bits 75 | self.fraglen = fraglen 76 | def fit(self, x, y=None): 77 | return self 78 | def transform(self, data): 79 | """SMILESの配列`data`をHashed Morganフィンガープリントにする""" 80 | n_samples = len(data) 81 | D = lil_matrix((n_samples, self.n_bits)) 82 | for ix, mol in enumerate(data): 83 | D[ix, :] = RDKFingerprint(mol=mol, # radius=int(self.radius), 84 | fpSize=int(self.n_bits), 85 | minPath=1, maxPath=int(self.fraglen)) 86 | return D.toarray() 87 | -------------------------------------------------------------------------------- /cheminfo/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | __all__ = ['t2_score', 'q_value'] 5 | 6 | def t2_score(data, model): 7 | "T2値の計算モジュール" 8 | assert type(data)==pd.DataFrame or type(data)==np.ndarray, "input must be pandas.DataFrame or np.array" 9 | explained_std_ = np.sqrt(model.best_estimator_.x_scores_.var(axis=0)) 10 | scores_whiten = model.transform(data) / explained_std_ 11 | return (scores_whiten ** 2.).sum(axis=1) 12 | 13 | def q_value(data, model): 14 | "Q値の計算モジュール" 15 | assert type(data)==pd.DataFrame or type(data)==np.ndarray, "input must be pandas.DataFrame or np.array" 16 | x_reproduced_ = model.transform(data) \ 17 | @ model.best_estimator_.x_loadings_.T \ 18 | * model.best_estimator_.x_std_ + model.best_estimator_.x_mean_ 19 | return ((data - x_reproduced_)**2.).sum(axis=1) 20 | -------------------------------------------------------------------------------- /data/catalyst/journal_data.csv: -------------------------------------------------------------------------------- 1 | label,Ag,K,Mo,W,Re,Au,Cl,Mg,Fe,Temp,P,GHSV,C3,O2,NOx,EtCl,CO2,pNOx,pEtCl,pCO2,Conv,Sel,Yield 2 | 1,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,10,0,0,0,12,48,5.8 3 | 2,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,5,0,0,0,13,44,5.7 4 | 3,50,2,0.5,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,1,13,44,5.7 5 | 4,50,2,0.5,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,13,42,5.5 6 | 5,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,25,0,0,0,10,54,5.4 7 | 6,51,2.1,0.51,0,0,0,0,0,0,250,30,1200,4,8,0,0,0,0,1,1,10,54,5.4 8 | 7,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,0,0,0,1,13,41,5.3 9 | 8,51,1.6,1.8,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,13,40,5.2 10 | 9,45,1.3,0,0,0.37,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,14,37,5.2 11 | 10,45,1.3,0,0,0.37,0,0,0,0,250,30,1200,10,5,0.02,0.005,11.2,0,0,0,10,51,5.1 12 | 11,38,2,0,5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,25,0,0,0,8.8,53,4.7 13 | 12,38,2,0,5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,13.3,35,4.7 14 | 13,44,1.7,0.44,0,0,0,0.05,0,0,232,100,2400,4.15,8.15,0,0,0,0,1,0,9.1,51,4.6 15 | 14,50,2,0,0,0.5,0,0.6,0,0,250,30,1200,10,5,0.02,0,0,0,0,0,10.9,42.4,4.6 16 | 15,40,2,0,0,0,0,0,0.24,0.027,250,30,1200,10,5,0.02,0.005,0,0,0,0,12,38,4.6 17 | 16,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,1,13,35,4.6 18 | 17,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,25,0,0,0,8.7,52,4.5 19 | 18,44,1.7,0.44,0,0,0,0.05,0,0,250,30,1200,10.7,5.2,0.02,0.05,0,0,0,0,10.6,42.6,4.5 20 | 19,52,1.8,0.55,0,0,0,0.05,0,0,250,30,1200,4,8,0,0,0,1,1,0,9,50,4.5 21 | 20,40,2,0,0.5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,1,12,37,4.4 22 | 21,54,2,0.5,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,11.2,39.5,4.4 23 | 22,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,13,34,4.4 24 | 23,39,2.3,0,0.6,0,0,0,0,0,232,100,2400,4,8,0,0,15,0,1,0,8,55,4.4 25 | 24,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,1,12.4,35,4.3 26 | 25,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,0,0,0,0,12,36,4.3 27 | 26,37,2,0,0,0,0,0,0.79,0.067,250,30,1200,10,5,0.02,0.005,0,0,0,0,11,38,4.2 28 | 27,40,2,0,0.5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,11,37,4.1 29 | 28,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,12.6,32,4 30 | 29,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,12.6,32,4 31 | 30,40,2,0,0,0,0,0,0.26,0.038,250,30,1200,10,5,0.02,0.005,0,0,0,0,11.5,35,4 32 | 31,52,1.8,0.55,0,0,0,0.05,0,0,240,30,1200,4,8,0,0,0,1,1,0,8,50,4 33 | 32,45,1.3,0,0,0.37,0,0,0,0,250,30,1200,10,5,0.02,0.005,52.1,0,0,0,7,57,4 34 | 33,50,2,0.5,0,0,0,0.6,0,0,250,30,1200,10,6,0,0,0,0,0,0,11,36,4 35 | 34,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,50,0,0,0,7,56,3.9 36 | 35,37,2,0,0,0,0,0,0.41,0.023,250,30,1200,10,5,0.02,0.005,0,0,0,0,10,38,3.8 37 | 36,38,2,0,5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,50,0,0,0,6.8,55,3.7 38 | 37,43,1.7,0,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,11,33,3.6 39 | 38,51,2.1,0.51,0,0,0,0,0,0,232,30,1200,4,8,0,0,0,0,1,1,7.5,48,3.6 40 | 39,47,1.7,0,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,11,32,3.5 41 | 40,39,2,0,0,0,0,0,0.3,0.0006,250,30,1200,10,5,0.02,0.005,0,0,0,0,10,35,3.5 42 | 41,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,50,0,0,0,6.3,54,3.4 43 | 42,44,1.7,0.44,0,0,0,0.05,0,0,250,100,1200,8.15,6.35,0,0,14.8,0,1,0,6.5,52,3.4 44 | 43,47,1.7,0,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,11.8,0,0,0,8,42,3.4 45 | 44,50,2,0,0,0,0,0.6,0,0,250,30,1200,10,5,0.02,0,0,0,0,0,11,30,3.3 46 | 45,50,2,0.5,0,0,0,0.6,0,0,250,30,1200,10,6,0,0,0,0,1,0,9,35,3.2 47 | 46,54,2,0.5,0,0,0,0,0,0,240,30,1200,5,5,0.0075,0.02,20,0,0,0,4.5,61,2.7 48 | 47,38,2,0,0,0,0,0,0.001,0.07,250,30,1200,10,5,0.02,0.005,0,0,0,0,9,30,2.7 49 | 48,52,2.1,0,0,0,0,0,0,0,245,30,1200,10,5,0.0075,0.02,10,0,0,0,5.1,52,2.7 50 | 49,25,1,0,0,0,4.6,0,0,0,250,30,1200,10,5,0.02,0,0,0,0,0,6,44,2.6 51 | 50,47,1.7,0,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,51.4,0,0,0,5,52,2.6 52 | 51,44,1.7,0.44,0,0,0,0.05,0,0,232,100,2400,4.09,7.97,0,0,14.48,0,1,0,3.9,64,2.5 53 | 52,50,2,0.5,0,0,0,0.6,0,0,250,30,1200,10,6,0,0,12,0,1,0,5,49,2.5 54 | 53,38,1.9,0,0,0,0,0,0.001,0.0006,250,30,1200,10,5,0.02,0.005,0,0,0,0,7,35,2.5 55 | 54,50,2,0.5,0,0.5,0,0,0,0,250,30,1200,10,5,0.02,0,0,0,1,0,5.4,44.3,2.4 56 | 55,50,2,0.5,0,0,0,0.6,0,0,250,30,1200,10,6,0,0,16,0,0,0,5,47,2.4 57 | 56,48,1.8,0.59,0,0,0,0.05,0,0,216,300,4800,4.6,7.6,0,0,0,0,1,0,3.8,59,2.2 58 | 57,50,2,0.5,0,0.5,0,0,0,0,250,30,1200,10,5,0.02,0,0,0,1,0,5,41.8,2.1 59 | 58,49,0.57,0.49,0,0,0,0.7,0,0,250,30,1200,8,8,0.0005,0.0005,10,0,0,0,4,52,2.1 60 | 59,49,0.57,0.49,0,0,0,0.7,0,0,250,30,2400,8,8,0,0,0,0,0,0,6,34,2 61 | 60,49,0.57,0.49,0,0,0,0.7,0,0,250,30,1200,8,8,0,0,10,0,0,0,3.5,58,2 62 | 61,49,0.57,0.49,0,0,0,0.7,0,0,250,30,1200,8,8,0,0.0005,10,0,0,0,3.5,58,2 63 | 62,44,1.7,0.44,0,0,0,0.05,0,0,232,100,2400,4.1,8.12,0,0,14.43,0,1,0,3.2,60,1.9 64 | 63,54,2,0.5,0,0,0,0,0,0,245,30,1200,10,5,0.0075,0.02,10,0,0,0,3.2,59,1.9 65 | 64,53,1.1,0.54,0,0,0,0,0,0,245,30,1200,10,5,0,0.02,10,0,0,0,2.8,58,1.6 66 | 65,36,2,0,0,0,0,0,0.0005,0.0005,250,30,1200,10,5,0.02,0.005,0,0,0,0,6,27,1.6 67 | 66,49,0.57,0.49,0,0,0,0.7,0,0,240,30,2400,8,8,0.0005,0.0005,10,0,0,0,2.5,60,1.5 68 | 67,44,1.7,0.44,0,0,0,0.05,0,0,232,100,2840,9.3,6.4,0.038,0,13.8,0,1,0,2.8,53.2,1.5 69 | 68,50,2,0.5,0,0.5,0,0,0,0,250,30,1200,10,5,0.02,0.005,9.6,0,1,0,2.2,63.8,1.4 70 | 69,52,1.8,0.55,0,0,0,0.05,0,0,250,30,1200,4,8,0,0,15,1,1,0,2,70,1.4 71 | 70,50,2,0.5,0,0.5,0,0,0,0,250,30,1200,10,5,0.02,0,9.6,0,1,0,1.6,63.2,1 72 | 71,25,1,0.5,0,0,4.6,0,0,0,250,30,1200,10,5,0,0,0,0,0,0,2,50,1 73 | 72,49,0.57,0.49,0,0,0,0.7,0,0,250,30,2400,8,8,0.0005,0.0005,10,0,0,0,1.5,64,1 74 | 73,49,0.57,0.49,0,0,0,0.7,0,0,250,30,2400,8,8,0,0,10,0,0,0,1.5,58,0.9 75 | 74,25,2,0,0,0,4.6,0,0,0,250,30,1200,10,5,0,0,0,0,0,0,2,40,0.8 76 | 75,25,2,0,0,0,4.6,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,2,40,0.8 77 | -------------------------------------------------------------------------------- /data/zinc/ion_channel-fda.csv: -------------------------------------------------------------------------------- 1 | zinc_id,smiles 2 | ZINC000003807804,Clc1ccccc1C(c1ccccc1)(c1ccccc1)n1ccnc1 3 | ZINC000095616601,COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC 4 | ZINC000003831405,C=C[C@H]1CN2CC[C@H]1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12 5 | ZINC000085205448,COC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1ccccc1[N+](=O)[O-] 6 | ZINC000000607939,COC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@H]1c1cccc2nonc12 7 | ZINC000000607986,O[C@@H](CNC[C@@H](O)[C@H]1CCc2cc(F)ccc2O1)[C@H]1CCc2cc(F)ccc2O1 8 | ZINC000245204924,Cl[C@H]1[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H]1Cl 9 | ZINC000003806063,C[C@H](Cc1cc2c(c(C(N)=O)c1)N(CCCO)CC2)NCCOc1ccccc1OCC(F)(F)F 10 | ZINC000000004448,Cc1nccn1C[C@H]1CCc2c(c3ccccc3n2C)C1=O 11 | ZINC000019632718,COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@H]1c1cccc([N+](=O)[O-])c1 12 | ZINC000000897251,COc1cc2c(cc1OC)C(=O)[C@H](CC1CCN(Cc3ccccc3)CC1)C2 13 | ZINC000019632713,COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@@H]1c1cccc([N+](=O)[O-])c1 14 | ZINC000238809662,COc1ccc(C[C@H]2c3cc(OC)c(OC)cc3CC[N@@+]2(C)CCC(=O)OCCCCCOC(=O)CC[N@@+]2(C)CCc3cc(OC)c(OC)cc3[C@H]2Cc2ccc(OC)c(OC)c2)cc1OC 15 | ZINC000003794601,CN(C)CCC[C@]1(c2ccc(F)cc2)OCc2cc(C#N)ccc21 16 | ZINC000005844788,O[C@@H](CNC[C@@H](O)[C@H]1CCc2cc(F)ccc2O1)[C@@H]1CCc2cc(F)ccc2O1 17 | ZINC000001530575,COc1cc(CNC(=O)CCCC/C=C/C(C)C)ccc1O 18 | ZINC000004474460,C=C1CC[C@H](O)C/C1=C/C=C1\CCC[C@@]2(C)[C@H]1CC[C@@H]2[C@H](C)CCCC(C)C 19 | ZINC000003800706,CN(C)CCC[C@@]1(c2ccc(F)cc2)OCc2cc(C#N)ccc21 20 | ZINC000000001084,Cn1c(=O)c2c(ncn2C)n(C)c1=O 21 | ZINC000034676245,N[C@@H]1CON=C1O 22 | ZINC000000596731,CN(CCOc1ccc(NS(C)(=O)=O)cc1)CCc1ccc(NS(C)(=O)=O)cc1 23 | ZINC000084400879,COC(=O)C1=C(C)NC(C)=C(C(=O)OCCN(C)Cc2ccccc2)[C@H]1c1cccc([N+](=O)[O-])c1 24 | ZINC000019796087,COC(=O)C1=C(C)NC(C)=C(C(=O)OCCN(C)Cc2ccccc2)[C@@H]1c1cccc([N+](=O)[O-])c1 25 | ZINC000000596881,Clc1ccc(CO[C@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1 26 | ZINC000000003876,Cc1ccc(-c2nc3ccc(C)cn3c2CC(=O)N(C)C)cc1 27 | ZINC000003079342,C[N+](C)(C)CCOC(N)=O 28 | ZINC000000020228,C[C@H](NC(C)(C)C)C(=O)c1cccc(Cl)c1 29 | ZINC000001550477,CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 30 | ZINC000000968310,CN1CCN2c3ncccc3Cc3ccccc3[C@H]2C1 31 | ZINC000006745272,CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 32 | ZINC000238809663,COc1ccc(C[C@@H]2c3cc(OC)c(OC)cc3CC[N@@+]2(C)CCC(=O)OCCCCCOC(=O)CC[N@@+]2(C)CCc3cc(OC)c(OC)cc3[C@H]2Cc2ccc(OC)c(OC)c2)cc1OC 33 | ZINC000019632706,COC(=O)C1=C(C)NC(C)=C(C(=O)OCC(C)C)[C@@H]1c1ccccc1[N+](=O)[O-] 34 | ZINC000000591993,COC(=O)C1=C(C)NC(C)=C(C(=O)OCC(C)C)[C@H]1c1ccccc1[N+](=O)[O-] 35 | ZINC000019796168,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12 36 | ZINC000000075126,Cc1nccn1C[C@@H]1CCc2c(c3ccccc3n2C)C1=O 37 | ZINC000001530886,CCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2ccccc2C(=O)O)cc1 38 | ZINC000238809665,COc1ccc(C[C@H]2c3cc(OC)c(OC)cc3CC[N@+]2(C)CCC(=O)OCCCCCOC(=O)CC[N@@+]2(C)CCc3cc(OC)c(OC)cc3[C@H]2Cc2ccc(OC)c(OC)c2)cc1OC 39 | ZINC000000537822,O=C(CCCN1CCC(O)(c2ccc(Cl)cc2)CC1)c1ccc(F)cc1 40 | ZINC000001530611,CNCCCN1c2ccccc2CCc2ccccc21 41 | ZINC000238809664,COc1ccc(C[C@@H]2c3cc(OC)c(OC)cc3CC[N@+]2(C)CCC(=O)OCCCCCOC(=O)CC[N@@+]2(C)CCc3cc(OC)c(OC)cc3[C@H]2Cc2ccc(OC)c(OC)c2)cc1OC 42 | ZINC000000016154,CCOC(=O)Nc1ccc(NCc2ccc(F)cc2)cc1N 43 | ZINC000000006481,Nc1nc2ccc(OC(F)(F)F)cc2s1 44 | ZINC000011679756,CC1=NN(c2ccc(C)c(C)c2)C(=O)/C1=N\Nc1cccc(-c2cccc(C(=O)O)c2)c1O 45 | ZINC000001999441,O[C@@H](CNC[C@@H](O)[C@@H]1CCc2cc(F)ccc2O1)[C@H]1CCc2cc(F)ccc2O1 46 | ZINC000100018854,CN1[C@H]2CCC[C@@H]1C[C@H](NC(=O)c1nn(C)c3ccccc13)C2 47 | ZINC000103105084,O=C(O[C@@H]1C[C@@H]2C[C@H]3C[C@H](C1)N2CC3=O)c1c[nH]c2ccccc12 48 | ZINC000003922770,C[C@@H](O)[C@H]1C(=O)N2C(C(=O)O)=C(S[C@@H]3CN[C@H](CNS(N)(=O)=O)C3)[C@H](C)[C@H]12 49 | ZINC000003830842,O=C(NC[C@@H]1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F 50 | ZINC000004658552,NCC(=O)O 51 | ZINC000000002212,Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1Cl)=NC2 52 | ZINC000003951740,Cc1cccc(C)c1OCC(=O)N[C@@H](Cc1ccccc1)[C@@H](O)C[C@H](Cc1ccccc1)NC(=O)[C@H](C(C)C)N1CCCNC1=O 53 | ZINC000036294079,CN[C@@]1(c2ccccc2Cl)CCCCC1=O 54 | ZINC000000000565,Cc1ccccc1[C@H](OCCN(C)C)c1ccccc1 55 | ZINC000001530697,CSc1ccc2c(c1)N(CC[C@H]1CCCCN1C)c1ccccc1S2 56 | ZINC000001530707,CCC(=O)C(C[C@H](C)N(C)C)(c1ccccc1)c1ccccc1 57 | ZINC000019796155,CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1 58 | ZINC000019796080,O=C(CCCN1CC=C(n2c(=O)[nH]c3ccccc32)CC1)c1ccc(F)cc1 59 | ZINC000019632834,CN1CCN(C(=O)O[C@H]2c3nccnc3C(=O)N2c2ccc(Cl)cn2)CC1 60 | ZINC000019796018,CN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1 61 | ZINC000035999642,CN[C@]1(c2ccccc2Cl)CCCCC1=O 62 | ZINC000013537284,Cc1nc[nH]c1CN1CCc2c(c3ccccc3n2C)C1=O 63 | ZINC000019418959,CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1 64 | ZINC000001530706,CCC(=O)C(C[C@@H](C)N(C)C)(c1ccccc1)c1ccccc1 65 | ZINC000001481956,Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCC[C@H]2O 66 | ZINC000001536779,CNCC[C@H](Oc1cccc2ccccc12)c1cccs1 67 | ZINC000001530760,CCCNC[C@@H](O)COc1ccccc1C(=O)CCc1ccccc1 68 | ZINC000000006427,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21 69 | ZINC000000007673,COC[C@@H](NC(C)=O)C(=O)NCc1ccccc1 70 | ZINC000000538312,Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCCC2 71 | ZINC000000013156,Nc1nnc(-c2cccc(Cl)c2Cl)c(N)n1 72 | ZINC000000001464,CCOC(=O)c1ncn2c1CN(C)C(=O)c1cc(F)ccc1-2 73 | ZINC000006716957,Cc1cn(-c2cc(NC(=O)c3ccc(C)c(Nc4nccc(-c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 74 | ZINC000004475353,CC1=C(CC(=O)O)c2cc(F)ccc2/C1=C\c1ccc([S@](C)=O)cc1 75 | ZINC000001530759,CCCNC[C@H](O)COc1ccccc1C(=O)CCc1ccccc1 76 | ZINC000001530811,CCCCNc1ccc(C(=O)OCCN(C)C)cc1 77 | ZINC000002522669,CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1 78 | ZINC000001530571,O=C1CC2(CCCC2)CC(=O)N1CCCCN1CCN(c2ncccn2)CC1 79 | ZINC000000538550,O=C1Cc2cc(CCN3CCN(c4nsc5ccccc45)CC3)c(Cl)cc2N1 80 | ZINC000000000096,CN(C)CC[C@@H](c1ccc(Br)cc1)c1ccccn1 81 | ZINC000004175630,Oc1nc2ccccc2n1C1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1 82 | ZINC000004213946,O[C@@H](CNC[C@@H](O)[C@@H]1CCc2cc(F)ccc2O1)[C@@H]1CCc2cc(F)ccc2O1 83 | ZINC000001996117,NC(=O)C(c1ccccc1)(c1ccccc1)[C@@H]1CCN(CCc2ccc3c(c2)CCO3)C1 84 | ZINC000000057206,C[C@@H](NC(C)(C)C)C(=O)c1cccc(Cl)c1 85 | ZINC000004214700,Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCC[C@@H]2O 86 | ZINC000004340269,NC(N)=NC(=O)c1nc(Cl)c(N)nc1N 87 | ZINC000000527386,Fc1ccc([C@@H]2CCNC[C@H]2COc2ccc3c(c2)OCO3)cc1 88 | ZINC000000009073,CN1CCc2cccc3c2[C@H]1Cc1ccc(O)c(O)c1-3 89 | ZINC000001482113,N[C@@H](CCC(=O)O)C(=O)O 90 | ZINC000000020244,CN(C)CCOC(c1ccccc1)c1ccccc1 91 | ZINC000000391812,CN1CCC[C@H]1c1cccnc1 92 | ZINC000000896543,O=C(NC[C@H]1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F 93 | ZINC000000896557,CN1[C@H]2CCC[C@H]1CC(NC(=O)c1nn(C)c3ccccc13)C2 94 | ZINC000001530579,COc1ccccc1OCCNC[C@H](O)COc1cccc2[nH]c3ccccc3c12 95 | ZINC000001530580,COc1ccccc1OCCNC[C@@H](O)COc1cccc2[nH]c3ccccc3c12 96 | ZINC000001530637,CNCC[C@H](Oc1ccc(C(F)(F)F)cc1)c1ccccc1 97 | ZINC000001530689,CC/C(=C(\c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1 98 | ZINC000000897085,O[C@@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@H]1CCCCN1 99 | ZINC000000402830,CN1CCC[C@@H]1CCO[C@](C)(c1ccccc1)c1ccc(Cl)cc1 100 | ZINC000052509463,CC(C)(C)c1cc(C(C)(C)C)c(NC(=O)c2c[nH]c3ccccc3c2=O)cc1O 101 | ZINC000052957434,Cc1cc2c(s1)Nc1ccccc1N=C2N1CCN(C)CC1 102 | ZINC000034051848,Cc1ccc(Sc2ccccc2N2CCNCC2)c(C)c1 103 | ZINC000118912393,C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O 104 | ZINC000100001964,CCOC(=O)C1=C(COCCN)NC(C)=C(C(=O)OC)[C@@H]1c1ccccc1Cl 105 | ZINC000094566093,COc1cc2nc(N3CCN(C(=O)[C@H]4COc5ccccc5O4)CC3)nc(N)c2cc1OC 106 | ZINC000003830716,CCOC(=O)C1(c2ccccc2)CCN(CCC(C#N)(c2ccccc2)c2ccccc2)CC1 107 | ZINC000094566092,COc1cc2nc(N3CCN(C(=O)[C@@H]4COc5ccccc5O4)CC3)nc(N)c2cc1OC 108 | ZINC000100001918,COC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@@H]1c1cccc2nonc12 109 | ZINC000100001965,CCOC(=O)C1=C(COCCN)NC(C)=C(C(=O)OC)[C@H]1c1ccccc1Cl 110 | ZINC000030691763,CCCC(=O)OCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@H]1c1cccc(Cl)c1Cl 111 | ZINC000030691760,CCCC(=O)OCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@@H]1c1cccc(Cl)c1Cl 112 | ZINC000000897089,O[C@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@@H]1CCCCN1 113 | ZINC000000968336,Cc1ccc(O)c([C@H](CCN(C(C)C)C(C)C)c2ccccc2)c1 114 | ZINC000000001148,CN(C)CC/C=C1/c2ccccc2COc2ccccc21 115 | ZINC000000897240,CN1CCC[C@H](n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1 116 | ZINC000000537931,CCOC(=O)N1CCC(=C2c3ccc(Cl)cc3CCc3cccnc32)CC1 117 | ZINC000001842633,CNCC[C@@H](Oc1ccccc1C)c1ccccc1 118 | ZINC000003936683,O=C(O[C@H]1CN2CCC1CC2)N1CCc2ccccc2[C@@H]1c1ccccc1 119 | ZINC000012402836,CCN(CC)CCOc1ccc(/C(=C(\Cl)c2ccccc2)c2ccccc2)cc1 120 | ZINC000000896455,CN(C)CC[C@H](c1ccc(Br)cc1)c1ccccn1 121 | ZINC000003079336,CC(=O)OCC[N+](C)(C)C 122 | ZINC000000643055,Clc1ccc(CO[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1 123 | ZINC000000044027,CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21 124 | ZINC000003830212,CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1 125 | ZINC000000597013,COc1cc2c(cc1OC)C(=O)[C@@H](CC1CCN(Cc3ccccc3)CC1)C2 126 | ZINC000000020237,CCN(CC)CC(=O)Nc1c(C)cccc1C 127 | ZINC000000020240,CC(C)NC[C@@H](O)COc1cccc2ccccc12 128 | ZINC000000000509,CN1CCN2c3ncccc3Cc3ccccc3[C@@H]2C1 129 | ZINC000019228902,OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1 130 | ZINC000019203912,OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1 131 | ZINC000008220878,CCCCCCCN(CC)CCC[C@H](O)c1ccc(NS(C)(=O)=O)cc1 132 | ZINC000019144226,CCN(CC)CCC[C@@H](C)Nc1ccnc2cc(Cl)ccc12 133 | ZINC000008214402,CCCCCCCN(CC)CCC[C@@H](O)c1ccc(NS(C)(=O)=O)cc1 134 | ZINC000019144216,COc1ccc(CN(CCN(C)C)c2ccccn2)cc1 135 | ZINC000000014360,CNS(=O)(=O)Cc1ccc2[nH]cc(CCN(C)C)c2c1 136 | ZINC000000020257,Cc1cccc(C)c1OC[C@@H](C)N 137 | ZINC000000601229,CN1CCC[C@@H](n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1 138 | ZINC000001530638,CNCC[C@@H](Oc1ccc(C(F)(F)F)cc1)c1ccccc1 139 | ZINC000001530695,CSc1ccc2c(c1)N(CC[C@@H]1CCCCN1C)c1ccccc1S2 140 | ZINC000001530769,CCC(=O)O[C@](Cc1ccccc1)(c1ccccc1)[C@H](C)CN(C)C 141 | ZINC000043763856,CN[C@@]1(C)[C@@H]2CC[C@@H](C2)C1(C)C 142 | ZINC000000001681,CCOC(=O)C1(c2ccccc2)CCN(C)CC1 143 | ZINC000003812888,COc1ccc(CCN(C)CCC[C@](C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC 144 | ZINC000003786192,CC1=C(CC(=O)O)c2cc(F)ccc2/C1=C\c1ccc([S@@](C)=O)cc1 145 | ZINC000001853550,CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21 146 | ZINC000003784182,COc1ccc(-c2ccc3cc(C(=O)O)ccc3c2)cc1C12CC3CC(CC(C3)C1)C2 147 | ZINC000003812933,C[C@@]12CC3CC(N)(C1)C[C@](C)(C3)C2 148 | ZINC000000056556,CC(C)NC[C@H](O)COc1cccc2ccccc12 149 | ZINC000003871832,COc1ccc(CCN(C)CCC[C@@](C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC 150 | ZINC000003872605,COCCCC/C(=N\OCCN)c1ccc(C(F)(F)F)cc1 151 | ZINC000003805768,COc1cc2c(cc1OC)CC(=O)N(CCCN(C)C[C@H]1Cc3cc(OC)c(OC)cc31)CC2 152 | ZINC000000968257,CN(C)CCC=C1c2ccccc2CCc2ccccc21 153 | ZINC000003932831,C[C@]12CC[C@H]3[C@@H](CC[C@H]4NC(=O)C=C[C@]34C)[C@@H]1CC[C@@H]2C(=O)Nc1cc(C(F)(F)F)ccc1C(F)(F)F 154 | ZINC000003964325,CCN(CC)CCNC(=O)c1c(C)[nH]c(/C=C2\C(=O)Nc3ccc(F)cc32)c1C 155 | ZINC000000057062,Cc1cccc(C)c1OC[C@H](C)N 156 | ZINC000000113404,CN(C)CC[C@@H](c1ccc(Cl)cc1)c1ccccn1 157 | ZINC000000113410,CN(C)CC[C@H](c1ccc(Cl)cc1)c1ccccn1 158 | ZINC000001851149,O=C1CCc2ccc(OCCCCN3CCN(c4cccc(Cl)c4Cl)CC3)cc2N1 159 | ZINC000001530625,CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1CCC(C)=C[C@@H]21 160 | ZINC000000020245,CN(C)CCCN1c2ccccc2CCc2ccccc21 161 | ZINC000000643153,CC(=O)N1CCN(c2ccc(OC[C@@H]3CO[C@@](Cn4ccnc4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1 162 | ZINC000000155269,Cc1ccccc1[C@@H](OCCN(C)C)c1ccccc1 163 | ZINC000000000903,Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1)=NC2 164 | ZINC000001550499,C[C@@H](NCCCc1cccc(C(F)(F)F)c1)c1cccc2ccccc12 165 | ZINC000019144231,CCN(CC)CCC[C@H](C)Nc1ccnc2cc(Cl)ccc12 166 | ZINC000000621893,COc1ccc([C@@H]2Sc3ccccc3N(CCN(C)C)C(=O)[C@@H]2OC(C)=O)cc1 167 | ZINC000003795819,O=C1c2cccc3c2[C@H](CCC3)CN1[C@@H]1CN2CCC1CC2 168 | ZINC000019632628,OCCOCCN1CCN(C2=Nc3ccccc3Sc3ccccc32)CC1 169 | ZINC000049933061,CCCCc1oc2ccc(NS(C)(=O)=O)cc2c1C(=O)c1ccc(OCCCN(CCCC)CCCC)cc1 170 | ZINC000085534336,CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O 171 | ZINC000096272772,Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 172 | ZINC000001571009,CC(C)(C)CC(C)(C)c1ccc(OCCOCC[N+](C)(C)Cc2ccccc2)cc1 173 | ZINC000003874185,O[C@@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@@H]1CCCCN1 174 | ZINC000030691754,COc1ccc(NC(=O)c2ccc(C(=N)N(C)C)cc2)c(C(=O)Nc2ccc(Cl)cn2)c1 175 | ZINC000038140873,CN[C@]1(C)[C@@H]2CC[C@@H](C2)C1(C)C 176 | ZINC000003794794,O=C1c2c(O)ccc(O)c2C(=O)c2c(NCCNCCO)ccc(NCCNCCO)c21 177 | ZINC000003802417,C[C@H]1CN(C[C@H](Cc2ccccc2)C(=O)NCC(=O)O)CC[C@@]1(C)c1cccc(O)c1 178 | ZINC000000537964,O[C@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@H]1CCCCN1 179 | ZINC000000537964,O[C@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@H]1CCCCN1 180 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - rdkit 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - joblib=0.14.1 7 | - networkx=2.4 8 | - numpy=1.18.1 9 | - pandas=0.25.3 10 | - pymatgen=2019.12.3 11 | - python=3.7.0 12 | - rdkit=2019.09.3.0 13 | - scikit-learn=0.22.1 14 | - scipy=1.4.1 15 | - pip: 16 | - jupyter==1.0.0 17 | - matplotlib==3.1.3 18 | - tqdm==4.42.1 19 | -------------------------------------------------------------------------------- /models/9.3_rdkit_pls.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/models/9.3_rdkit_pls.joblib -------------------------------------------------------------------------------- /models/morgan_svm.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/models/morgan_svm.joblib -------------------------------------------------------------------------------- /models/rdfrags_svm.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/models/rdfrags_svm.joblib -------------------------------------------------------------------------------- /models/rdkit_svm.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/models/rdkit_svm.joblib -------------------------------------------------------------------------------- /notebooks/1.4-fragment-visualization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# RDKitを使った分子構造可視化" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "RDKit WARNING: [14:04:23] Enabling RDKit 2019.09.3 jupyter extensions\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "from rdkit import Chem\n", 25 | "from rdkit.Chem import rdMolDescriptors\n", 26 | "from rdkit.Chem.Draw import IPythonConsole\n", 27 | "from rdkit.Chem import Draw" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "'2019.09.3'" 39 | ] 40 | }, 41 | "execution_count": 2, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [ 47 | "import rdkit \n", 48 | "rdkit.__version__" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## IbuprofenからMorgan fingerprintを取り出す" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "[1, 79, 80, 283, 310, 389, 650, 807, 854, 857]\n" 68 | ] 69 | }, 70 | { 71 | "data": { 72 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAfu0lEQVR4nO3de1yMefsH8GtKp8k2M4oVm5VkeTrJuWKXSg45szxrVyHsCmG12CQph5Aocs6W0y724MGzaxUhqZBqS3hQWIdSmpmOU83h98e9O9uPmampmfs7h+v98urV1Lfp02vbq+ue+3tgSCQSQAgh1FoGpAMghJB2a0c6AEKoZRiMv97BK0gNg2UUIY1HFVBp9XzrISINL+oR0gZNiyYWUA2DZRQhzcZgyKibEsk/1/iINCyjCCHUJlhGEUKoTbCMIoRQm2AZRQihNsEyipBmk3k36d37TpWVsHAh5ObSlgtJYRlFSBs0raQy79FHR8O+fTB3LjQ20hYKUbCMIqTxJJK/elLqH/XwLatXg7095ORAVBSJiHqNgVuTIKQjbtyAYcPA0BCyssDVlXQaPYLdKEK6wt0dAgOhsREv7WmGZRQhbVNYKPdTW7aAnR3k5sK2bTQG0ndYRhHSKnPngpMTXL8u+7NMJhw8CAwGRERAQQG9yfQXllGEtEq3biAWg78/1NTIHjBiBHz5JdTXg78/XtrTA8uoGqWnpwsEAtIpkG4JDYV+/aCoCNatkzsmOhp69IA7d2DHDhqT6S+8U68u5eXltra2FhYW+fn5HTp0IB0H6ZDcXBg0CEQiuHoVhg6VPebyZfD2BmNjyM4GBwd68+kd7EbVZfv27dXV1f369cMailSsb19YuRLEYpg3D+rqZI/x9IR586C+HgICQCSiN5/ewW5ULd68eWNra1tVVZWVlTVo0CDScZDOaWiAAQMgPx9WroQtW2SPqawEJyd49gyio2HFCnrz6RfsRtUiOjq6qqrK19cXayhSC2NjSEoCIyOIjob0dNljLCwgIQEYDFizBu7dozeffsFuVPWwFUU0+fZbiIqC3r0hJwdMTWWPmTsXvvsO3NwgLQ0MDenNpy+wG1W97du3YyuK6BAeDg4OcP8+RETIHbNjB9jYQEYG7N5NYzL9gt2oimErimiVlQUeHsBgwI0bMHCg7DG//gq+vsBkQm4u2NvTm08vYDeqYjExMVVVVWPHjsUaiugweDB8/TUIheDvD/ImKY8dC35+UFsLs2eDWExvPr2A3agqVVRU2NraVlZWZmZmDh48mHQcpB/q66FfPygshLVr5V7dV1SAoyNUVOTv2+c0ezat8fQAdqOqFBMTU1lZOWbMGKyhiD4mJpCQAIaGsHkzZGfLHtOhQ+3Bg5NsbNyXLCkuLqY3n+7DMqoyPB4vPj4eANasWUM6C9IzQ4ZAUBAIhTB3LjQ0yBzC9PU1HzSouro6ICAAr0FVC8uoymzfvp3H440ePdrDw4N0FqR/NmwAe3v44w/YtEnekLi4uM6dO6empu7fv5/OaDoPXxtVDR6PZ2try+Px0tLShspb5oyQWmVkwNChYGAAWVnQr5/MIWfPnp04caK5ufkff/zRo0cPmgPqKuxGVSMmJobH440aNQprKCLGzQ0WL/7r0l7OFnkTJkyYMWNGTU3N/PnzsYVSFexGVYDP53fv3h1bUUReba140KDfrazyRo5cLec1+vLyckdHx9LS0gMHDsyfP5/mgDoJu1EVoFpRHx8frKGIMCbzRny877Vr6yIiCuTsfm9lZRUXFwcAwcHBz549ozefbsIy2lZ8Pp/6pQwNDSWdBSEY+sknCxcubGho8PPza5RzaT99+vRp06ZVVlbiXXuVwDLaVlQrOnLkyGHDhpHOghAAwNatW+3s7HJycqKjo+WN2bt3b6dOnVJSUhITE2mMppvwtdE24fP5tra2XC732rVrWEaR5khNTfXy8jI2Ns7OznaQs/v9999/P3PmTBaLlZ+fb2NjQ3NCXYLdaJvs2LGDy+ViK4o0zYgRIxYsWFBfXx8QECCSs/v9Z599NmXKFD6f/9VXX9EcT8dgN9p62IoiTVZTU+Ps7FxUVLR169ZvvvlG5piSkhIHB4eKioqkpCQ/Pz+aE+oM7EZbb+fOnVwu19vbG2so0kDm5uYHDhxgMBhr164tLCyUOaZz5847d+4EgKCgoOfPn9MbUHdgGW0lPp8fGxsLAGvXriWdBSHZvLy85s6dq/jSftasWZMmTeLz+QsXLqQ5HgAAg/HXP3mfbeEHicIy2kpUK+rl5fXxxx+TzoKQXDExMTY2NpmZmdRffZn27NnD4XDOnz9//Phx+pJR1VMi+eufgmKq8fC10daorKzs3r07l8u9evUqllGk4VJSUnx8fExNTfPy8uzl7H6fmJg4Z84cS0vLgoKCzp070xGLqqHKfkTeB4nCbrQ1qFbU09MTayjSfN7e3v7+/nV1df7+/vIu7WfPnj1mzJg3b94sXrxYvWkEAigtlV0KqZ5UC2E3qrTKykpbW9uKioorV6588sknpOMg1Dw+n+/o6Pj8+fPY2NigoCCZY54/fz558uSYmBjlbpnW1QGXC1wuCAT/vC/vn0AAXO5fXyiz8rxVXrWkG8UyqrTIyMiwsLARI0ZcvnyZdBaEWuq///3vuHHjmExmXl5ez549ZY6RSCS8v/H5fAVvuVyuoLr6RUUFCIXK5TAyAjYbyspaWkblBFXum6oZllHlYCuKtNesWbOOHTvm4eFx7do1A4O/XtB7+fLlmDFjpCVSqScUm5szRCIwMwNTU+BwmvlHDbO2/ufm0ruwG9UHGzZsWLt27fDhw1NTU0lnQUg5PB7P0dHxxYsXu3fvXrRoEQAkJydPmTKltrZW/PeJoSwWi81mN3377kPpB63YbIaRUWuiYBnVW9XV1ba2tuXl5ampqcOHDycdByGlnTt3bsKECebm5nl5eXZ2dh9//HFaWtqyZcuCgoLYbDaHw6Evig7dqccyqoSNGzeGhoZ6eHhcv36ddBaEWmnmzJmnT58+dOhQly5dfHx8LC0ti4uL33vvPbpzUK97SuvPWw+lH8Qyqkuqq6t79OhRVlaGrSjSauXl5U+fPu3fvz/VikZFRa1atYpYGulNpJZc4yv4IFHtSAfQGnFxcWVlZR4eHlhDkVazsrKysrJKSUlJS0uztLQMDAwkmUZxQZT5WQ2roYBltIWqq6upHRwiIyNJZ0EAzU3N4fF4HA4nNDSU1hf7tAr1mxwcHEzgcl7n4EV9i2zevDkkJARfFVWruro6rnwCgaDpgLKyMqHCGYscDqexsfHSpUuDBg2i7UfQFpcuXfL29ib2qqjOwW60eTU1NTt27ACAiIgI0lm0Rm1tbdPeUNoqcrlcmf1jbW2tst9C5nQc6dsLFy789NNP/v7+OTk5pqam6vgZtRf1m7xixQqsoSqB3WjzqFbU3d09PT2ddBZNJBKJNm3adO3ataYVU95havIYGhpS5Y/D4SgojtK3zV6t19fX9+/f/+7duyEhIRs3bmzDz6drLl++7OXlha2oCmEZbUZNTY2trW1ZWVlKSoqXlxfpOBpHKBS6uroKBIJHjx699SlTU1OOQmZmZtIxnTt3lq6rUZWsrCwPDw8Gg3Hjxo2BAweq9sm11/Dhw69evbpx48aQkBDSWXSFBCm0efNmAHB3dycdREMlJCQAgI2NzYULF7Kysh48eFBSUiIQCEjn+ktwcDAA9OnTp66ujnQWjUBtBNGhQ4fKykrSWXQHllFFqqurO3XqBADJycmks2gioVBI7V95/Phx0llkEwgE//rXvwBg7dq1pLNoBGojiA0bNpAOolOwjCoSFRUFAG5ubqSDaKjDhw8DgL29vVAoJJ1FroyMDENDw3bt2t2+fZt0FsKojSA6dOjA5/NJZ9EpWEblkraiFy9eJJ1FEwmFwl69egHAsWPHSGdpxvLlywHA2dm5vr6edBaSqJUj2IqqHN5igpqaGpmzuH///fcrV664ubnduHGDdEZNRB07YW9vX1hY2K6dRs+cq62t7du378OHD8PDw9etW0c6Dhnp6elDhw5ls9nFxcVsNpt0HJ2i0b/9rfPWLO63pm2/paKior6+Xt5TderUycXFhc7w2kIkElE338LCwjS8hgIAk8lMTEwcNmzYhg0bxo8f369fP9KJCKCOsF2xYgXWUJXT9G5UKBQ23XD73enc764CVPZbmJmZvbu1IpvNLi0tTUxMZDKZubm58g4C01tJSUmzZ8/u2bPnvXv3NL+MUoKCgnbt2uXi4nLr1i2j1u2SqbWwFVUrTfkfIDMz88iRI++WyJqaGmWfysLCQvG+sxwOp+lDY2NjeU8lkUioepGWlqbyWY3aS2NbUYFAcPHixQkTJsj8bFRU1G+//ZaXl7dly5bQ0FCas5EVFhYGAF9//TXWUHXQlG706NGjfn5+Mj8lcxZ305nbTXXs2FGFjYb0ILC4uLglS5ao6mm13ZEjR/z9/TWtFW1sbBwwYEBBQUFqaqq8E1uvXLni6elpZGR0+/ZtJycnmhOSQrWiLBbryZMnWEbVguwdLqkHDx7s3r372LFj586dS0tLy8/Pf/bsmSbMED5//jwAMJnMhw8fks6iEYRC4UcffQQASUlJpLO8LTw8HABsbW2rqqrkjaH2hXN1dW1oaKAzG0Genp4AsH79etJBdJamlFFN9sUXXwDA8OHDxWIx6SzkHTlyBAB69uzZ2Ngoc0BycvInn3ySlpZGczCJRNLY2Ni/f38AWLZsmbwx1dXVdnZ2ALB582Y6s5FCbQTBYrG4XC7pLDoLy2jzuFxu165dASA+Pp50FsJa0opSp5xHRUXRGUwqNzfXyMjIwMDg2rVr8sZcunSJwWCYmJgUFBTQmY0IaiOI8PBw0kF0GZbRFjl79iwAmJubP3r0iHQWko4ePQoAdnZ2ClpRALC0tCT4ggx1O6VXr161tbXyxnz55ZcAMHjwYE1ef9V20la0oqKCdBZdhmW0pf79738DgKenp95e2guFwt69ewNAYmKivDHUvR1SrSilsbHR1dUVAIKDg+WN4fP53bp1A4CtW7fSmY1m3t7eALBu3TrSQXQcltGWKi8vf//99wFg//79pLOQcezYMcWtaEpKCvFWlJKTk0Nd2it4iTYlJYW6tL979y6d2WhDrb7DVpQGWEaVcObMGQCwsLB4+vQp6Sx0k7ai3333nbwxVCuqIbduqM00P/roIwWX9gEBAQAwZMgQnby0HzlyJLai9MAyqpxPP/0UALy8vPTt0l6LWlGKQCBwdHQEgFWrVskbw+fzbWxsACAmJobObDSgWlELCwtsRWmAZVQ5ZWVl1LZPhw4dIp2FPtJW9PDhw/LGUK3opk2b6Aym2J07d4yMjAwNDbOysuSNSU5OZjAYTCbzf//7H53Z1M3HxwcAwsLCSAfRC1hGlfb9999Tf+efPXtGOgtNjh8/DgA9evSQ14peunRJo1pRqVWrVgFA7969Fex+P3v2bABwc3MTiUR0ZlOfjIwM6lf0zZs3pLPoBSyjrTF16lQA8Pb21odLe5FIRF0dJyQkyBtD7am+ceNGOoO1hEAgcHBwAICQkBB5Y3g83gcffAAAsbGxdGZTn1GjRgFu+E8jLKOt8fr1644dOyq+36IzTpw4AQAffvihvD2PNfx4n8zMTGr3+5s3b8obo0tLfrEVpR+W0VaibrmwWKw///yTdBY1kraiCl4L1vzjfVasWAEAffr0UXDWns4s+R09ejQAhIaGkg6iR7CMtt7kyZMBwNfXl3QQNaJeCFbQiqalpQEAm83W5CXb0oPtFNxykS753b17N53ZVOv27dsMBqN9+/ZlZWWks+gRLKOt9/Llyw4dOgDA0aNHSWdRC5FIRO0md/DgQXljqON9IiMj6QzWCjdu3Gj2YDsdWPI7ZswYAFizZg3pIPoFy2ibJCYmUr3Y8+fPSWdRvR9++AEAunXrJq8VvX79uua3olLLli0DABcXFwUH23322WcAMGLECG28tJe2oq9fvyadRb9gGW2riRMnAsD48eNJB1ExaSt64MABeWNGjBihFa0opaamhjoMRsHOm9Ilv/v27aMzm0pQraiCOQlITbCMttWLFy84HA4AnDhxgnQWVTp58qQutaKU9PR0AwODdu3aZWdnyxtDLfk1Nzd//PgxndnaCFtRgrCMqkBCQgI1+bykpIR0FtUQi8XOzs6KW1FqT/WIiAg6g7UddRhM3759Fex+Ty351a7dvMaOHYutKClYRlWDup6aOHEi6SCqcerUqZa0otq4p3pNTU3Pnj0Vz9CSLvlVcG9No1CtqLm5ObaiRGAZVY3nz59Th4WdOnWKdJa2kraiCrYE1OrjfVJTUxkMhrGxcX5+vrwx1O01bVny6+vrCwDffvst6SB6Csuoyhw4cAAArKysSktLSWdpk9OnTytuRXXgeJ+FCxc2e7Cdtiz5zc7OxlaULCyjKiMWi6m1zDNmzCCdpfXEYrGLi4viW9U6cLyP9GA7BRv1a8uS33HjxgHA6tWrSQfRX1hGVenJkyfvvfceAPz444+ks7QS1Yra2NjIWzepM8f7tORgO81f8ittRbX9GkirYRlVsT179gBAx44dtfEKS9qK7t27V94YXTreZ8GCBc0ebDdlyhSNWvLb0NBQVlb28OHD27dvJycnu7m5Kd6aGtGAIZFIAKmORCIZNWpUcnLy559/TvUyWuSnn36aNm2ajY3Nw4cPTUxM3h2QkZHh7u7OYrGKi4up2bJarbKy0snJ6dmzZ9u2bQsODpY5pqSkxMHBoaKi4siRI7NmzVJ5hrq6OoFAUFdXx1VIOqa0tFQsFku/3NLSsq6uLj8/v0ePHirPhloIy6jqPXnyxNnZuaqq6ueff6a2L9EKEonE1dU1Ly9vz5491B2Yd/n4+CQnJ69bty48PJzedOpy6dKlkSNHGhsb37lzh9q+5F1Hjhzx9/dns9kFBQXU9iUKVFZW8ng8Pp/P4/Gk7zR9y+Vymz5saGhQKrChoSGLxWKz2RwOh8Vi3bt379WrVwsWLNi/f79Sz4NUCMuoWuzevXvJkiXW1tYFBQXU9iWa7+eff546dar+tKJSAQEBiYmJsbGxixcvljdm8uTJZ86ccXNzmz9/vrwSyeVyeTyesv9DmZiYsNlsNptNFUcWi8XhcJo+fOst9eK71P37911dXevr63/77TfqDieiH5ZRtRCLxZ6enlevXvXz80tKSiIdp3kSiWTgwIHZ2dnx8fGBgYEyx4waNerixYthYWHr16+nOZ5a8fn8/Pz8oUOHKhjz4sULe3t7Npv96tUrxc9mamrK4XA4HI6ZmZn0fXnMzMza/gcpKirq22+//fDDD/Pz898qsogeWEbVpbi42NnZubq6+pdffpk0aRLpOM345ZdfpkyZYm1tXVRUZGpq+u6AzMxMNzc3CwuL4uJibemvVSg9PX3o0KFmZmZTpkyxtLSU9obSd6hLbDabbWhoSHM2oVDo7u5+69atwMDA+Ph4mr87AgC8U69GO3bsAABra2sNnxskFosHDBgACncs1vPjfTT8zPfCwkJTU1MGg3Hx4kXSWfQRllE1EolEw4YNA4A5c+aQzqLIL7/8QpX72tpamQP0/Hgf6sx3DZ8qu2HDBgDo3r17VVUV6Sx6B8uoej148MDMzAwAfv31V9JZZJO2ort27ZI35unTpwEBAVq6gr7ttOLM98bGRuq/45IlS0hn0TtYRtUuOjoaALp06aJpK9B5PN7Tp0937typuBXVc1rUiefl5RkbGxsYGFy5coV0Fv3Sjv5XY/XN8uXLz5w5c/369ZUrV1Lbl6iJvCncMmd3l5eXNzY2Ul9obm4+a9YsqmtGb6FmyC5dulTzb6w5OzuHhISEh4fPmTPnjz/+aN++PelE+gLv1NPhwYMHrq6uAoGg5ZP7BALBWzMTpd6d0c3n86uqqpRN1b59ezabLRQKS0pKnJ2db926ZWxsrPwPp8u0bn6CUCgcMmRIdnb28uXLY2JiSMfRF1hGabJly5bVq1d/8MEHCQkJjY2Nile28Pl8gUCg1PMzGAz23+TN3JY5Naeurs7FxeXhw4e6tDZJVUaPHv3777+vXbs2IiKCdJaWysvLGzhwoEgkunLlCnWHE6kbllGaiEQid3f34uLisrKyFn6J4snbb83u7tSpU7t2rXyJJiMjY9iwYQwGIysrq1+/fq17Et2jda2oVFhYWGRkZK9evXJzc/G1GhpgGaXPn3/+eefOna1btzbtDeW1kEwmk85sS5cujYuLc3FxuXXrlpGREZ3fWmONGTPmwoULoaGhkZGRpLMop6GhYcCAAfn5+cHBwdu2bSMdR/dhGUUAALW1tS4uLo8ePYqMjAwNDSUdh7zs7OyBAweam5sXFxdbWVmRjqO03NzcQYMGiUSiq1evKl7nitrOgHQApBGYTGZiYqKBgUFkZGR+fj7pOORR67WWLl2qjTUUAPr27fvNN9+IxeJ58+Yp+zo7UhZ2o+gfixcvjo+Pd3V1zcrK0udLe2krWlRURJ0joo3q6+sHDBhQUFCwevXqzZs3k46jy7AbRf/YsmWLnZ1dTk4OtWRAb1ELloKCgrS3hgKAiYlJUlKSkZHRtm3bbt68STqOLsNuFP0/qampXl5exsbGt2/fdnR0JB2HAKoVZTKZxcXFWl1GKatWrdq6dWvv3r1zcnJk7t2F2g67UfT/jBgxYsGCBfX19QEBASKRiHQcAqhtnLS9FZWKiIhwcHC4f/8+tXcJUgfsRtHbampqnJ2di4qKtmzZsnLlStJxaHXnzp0BAwboTCtKycrK8vDwYDAYGRkZ1PYlSLWwG0VvMzc3P3jwIIPBCAsLKywsJB2HVlQrumTJEp2poQAwePDgZcuWCYXCgIAAZY9+Qi2B3SiSbf78+YcOHRoyZMj169fp39GdCGkrWlRU1KlTJ9JxVKm+vt7V1fXevXu6dwaMJsBuFMm2fft2GxubzMzM2NhY0lloEh4eLpFIFi9erGM1FABMTEwSEhIMDQ03bdqUnZ1NOo6uwW4UyZWSkuLj42NiYnLnzp0+ffqQjqNeOTk5/fv318lWVGrZsmWxsbG45FflsBtFcnl7e/v7+wsEAn24a0+1oosWLdLVGgoAmzZtsre3z8vLw9n4qoXdKFKEz+c7Ojo+f/58586dS5cuJR1HXfShFaXcuHFj2LBhBgYGN2/edHV1JR1HR2A3ihRhsVj79u0DgJCQkEePHpGOoy760IpS3N3dAwMDhULh3LlzpccfoDbCbhQ1z8/P7+jRox4eHteuXTMw0LU/vfrTilKku3lt3LgxJCSEdBxdoGv/SyB1iIuL69q1a3p6+t69e0lnUT3qxNPAwEB9qKEAwGQyqXnBERERBQUFpOPoAuxGUYucP39+/Pjx5ubmeXl5dnZ2pOOoTG5ubr9+/czMzIqLi/WkjFICAwP37t2Lu3mpBHajqEXGjRs3c+bMmpqa+fPn69KfXn1rRaW2bdvWo0ePnJwcPPmu7bAbRS315s0bBweH0tLSvXv3fvXVV6TjqEBBQYGLi4uxsXFRUZG1tTXpOHS7fPmyt7e3sbFxdna2g4MD6ThaDLtR1FKWlpYHDhwAgODg4KKiItJxVCAsLEwsFi9atEgPaygAeHp6zps3T59381IV7EaRcmbMmHHq1ClPT8+UlBQGg0E6TutJW9HHjx936dKFdBwyKisrnZycnj17Fh0dvWLFCtJxtBV2o0g58fHx77///uXLlxMSEkhnaZN169aJxeLAwEC9raEAYGFhcfjwYQaDsWbNGn3bzUuFsBtFSjt16tSMGTMsLCzy8/O7detGOk5r3L1719nZWc9bUamAgIDDhw+7ubmlpaXpyW5eqoXdKFLa9OnTp02bVllZGRAQoAl/hmtra1+9enXv3r3MzMwLFy4IhcJmv4RqRRcuXIg1FABiYmJsbGwyMjJ27dpFOotWwm4UtUZ5ebmDg8Pr168PHz48Z84c1T55XV0d928CgaDpw3dVVFTU19c3/fLS0lLFs5ewFX3Xr7/+6uvry2Qyc3Nz7e3tScfRMlhGUSudOHHi888/Z7FYBQUFH3zwQbPji4uLi4uLeTwej8fj8/ky31KU/Z00NTVlsVhsNpt6m5SU1LlzZwXjP/300x9//HHZsmU7duxQ6hvpNn9//yNHjri7u6elpenekl+1wjKKWm/q1Kk///yzr6/v+fPnmx08b968Ft6VMjU15XA4HA7HzMxM+r48ZmZmHA6n5ZmlreijR4+6du3a8i/UedLdvOLi4pYsWUI6jjbBMopar6SkxMHBoaKiIikpyc/PT/HgmJiYc+fOSXtG6m3ThxwOh3pHrXc5pk+ffvr06aVLl+7cuVN930VLnT17duLEiX379s3OzsaGtOWwjKI2OXr0qJ+fH4vFunv3ruY3d9iKNuvYsWNTpkxhMpmkg2gT/IOD2mTWrFmTJk3i8/lasTx0/fr1YrH4yy+/xBoqzxdffIE1VFnYjaK2evXqlYODA5fLPX78+MyZM0nHkauwsNDJyQlbUaRy2I2itrK2tt6+fTsABAUFlZSUkI4jF9WKLliwAGsoUi3sRpFqjB079rfffpswYcJ//vMf0llkoFpRIyOjx48fYxlFqoVlFKnGy5cvHR0duVzuDz/8MGPGDNq+b0NDw1vTTmVOTb1//35ZWdnixYtxoQ5SOSyjSGUOHTo0f/58KyurgoKC999/v3VPQq1ZanbxknSNU8un68fGxk6dOhVbUaRyWEaRykgkkrFjx164cGH69OknT56kPtjssiU+n8/lcqUfacmK+KbatWsnnXzK4XDkTUpls9m9e/c2MTFRw8+N9B2WUaRKT58+dXJyqq6utra2rq2t5fF4yj6DmZnZW+Wv6cz8d4ujubm5On4QhFoOyyhSsZMnT96+fTs6Opp62HQ1Z7OLOzt06GBqako2P0LKwjKKVI964ZK6xMY1hUjnYRlFCKE2wU4BIYTaBMsoQgi1CZZRhBBqEyyjCCHUJv8H2j/8/zN0Q8YAAAAASUVORK5CYII=\n", 73 | "text/plain": [ 74 | "" 75 | ] 76 | }, 77 | "execution_count": 3, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "ibuprofen = Chem.MolFromSmiles('CC(C)CC1=CC=C(C=C1)C(C)C(=O)O')\n", 84 | "\n", 85 | "bi = {}\n", 86 | "fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(ibuprofen,\n", 87 | " radius=2,\n", 88 | " bitInfo=bi)\n", 89 | "print(list(fp.GetOnBits())[:10])\n", 90 | "ibuprofen" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "## Morgan fingerprintを色付けして表示\n", 98 | "\n", 99 | "RDKit 2019.09 以上のバージョンでは、`Draw.DrawMorganBit`を利用しましょう\n", 100 | "\n", 101 | "ID: 854の以下の例では、ibuprofenの左端が取得されています。" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 4, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJYAAACWCAIAAACzY+a1AAAOZElEQVR4nO2deWwTVx7Hf+OxYzsen4nNJlCS0k0aSIGqDdLSqmK7FarCamHZqG0ghLSACohDS4T4p9qGKMduhXpu0WpTzipsVK5CqxZadlXUwnZBLO0WQjkaIARoYic+4tueY/+YdDL1kTiO4/Fz3keR5fHzzPzsb76/d8y8Z4LjOMCgjEzqADDjBUuIPFhC5MESIg+WEHmwhMiDJUQeLCHyYAmRB0uIPFhC5MESIg+WEHmwhMiDJUQeLCHyYAmRB0uIPFhC5MkqCY8fP+52u6WOIt0QWXP70/Xr12fNmmWxWLq6utRqtdThpI/scWFzczPDMIsXL55U+kHWuPCHH36YOXMmQRDXrl178MEHpQ4nrWSJC5uammiafumllyabfpAdLpzMFoTscOFktiBkgQu7urrKysomrQUhC1zIW/DFF1+cnPoB6i7EFgTUXdjc3EzTdF1d3aTVD5B2oWDBq1evzpgxQ+pwJANhF7a0tNA0vXLlysmsH6Drwps3b5aVlbEs+/3335eUlEgdjpSg6sKWlpZwOFxXVzfJ9QNEXdjd3V1aWsowDLYgIOrCpqamUCi0cuVKrB+g6ELBgleuXCktLZU6HOlBz4XNzc2hUKi2thbrx4OYC+/cuVNSUoItKAYxF/IWXLFiBdZPACUXChbs7Ox8+OGHpQ4nU0DJhbwFa2pqsH5ikHEhtmA8kHFhS0sLtmBM0HAhtuAIoOHC1tbWUCi0fPlyrF80CLiwp6enpKSEpmlswZgg4MLW1tZgMLhs2TKsX0wy3YWCBS9fvlxWViZ1OJmIXOoAhmAYrqsrcOtWwGoN+f0sAOTmyiyWnP37m4LBYE1NzWTTr7GxsaGhIZF3ZkQivX078MEHttOnnd3dAV4/APD52G++uXnw4H6ZjFy9epu0EaaNxsbGUV+JQHoJL1xwf/65w+NhootOnnyXpkMVFYtv3DBfvOhJf2zpp6GhQaxZIl6UuC789lvP+fOxJ3W6XH2vvPIrmg6/+uo/CwvLAGD+fN3s2Zr0BigNgoqJ5FIpXdjfH75wIe6k3E8/fSccDlZULOb1A4Bz5wbtdjpd0UmG4LwIR8ZDSgnPnXOzbOwil6vv7NkOgpAtWrRZeJFl4fz5wTQFJxERmTMRFSWT0OWi790Lxis9ceKdcDhQUfE7wYIAQBDQ0xOMWWtmDdGZc9RcKpmEPT1x9XO5+s6c6SAIWWXlZvHrHAccB3fvxt0RdWh6uJpIsEcBEvYLHY7hcINBr8836PcP/X3xxd5wOFBRsXjq1JnRO2Zrdchx3MDAgEwmoyhqTMsFTIiEfr/f8XMCgUDEi7du2ex2h8/n8nrtNB0W706SpFKZO3/+8zEPHgrFqT8zHpqm7XY7RVG5ubnRpV6vl2VZlmWdTqfP58vLy0vwsKNLGA6HnU6ny+VyiuA3hRfFmy6Xa2yfDCAnR61W63JzdWq1Tq3WWa23bLbbJ0++W17+a4KITPVyOTHW42cIHo+HYRiXy+X1enU6nVKpFIo4jvN6vcKmuGhUYkh45cqVNWvWCJL4fL6xxqrX6w0GwwiPRqOxry/n1i05r5lcrhDv7vcPNjY+fePGf06f3vf006siDq7TZcqg4JjgOC4YHKrFaZqO6I7zFuSfy2QyjWYM3d8YXwfHcV9//bX4FZVKZYyFWq2OLjKbzQqFIvqwEVit4WPH+mMWqdW65cv/snNn3dGjLeXlv7FYisWlhYU5iX+8zIEgCIvF4vP5PB4PSZIqlUooirCgRqMhiDFkmhijM36//+LFi4JpKIoaZ/TxOHjQ5nTGbZvs2bPx3LmjpaVP1NcfEj5SXp6iqip/guJJDxzHMQwjlw+bx+PxCOuOyWQyi8UyJgljdCrUavWTTz5ZXl4+bdq0idMPACoqtAAQL9oXXmjS6SzXr//7yy/fF+0ygfGkB4IgxPqN04Ig7ejMjBmqhx5Sxxuj1WiMtbU7AODIkSab7TYAlJaqi4pUsd+NLOOpBYf2SnVIY2PBAv3UqUqI48U5cxbOm7ckGPS1t2974IGcp57Spzu+CWb8FgTJJZTLicpK45w5cf/1li1r1enMV6+euXPnCEmi152gabq/vz8QCMQsjbBgzP7iqGTKjRcOB/3dd97btwPB4HDPXaWSFRer7t79V01NlU6nu3Tp0vTp0yUMMgmcTqff7wcAhUKh0+lycoab0xzHWa1WQUKtVptcyyNTJOThOBgcpL1eFgA0GplOJ+fzynPPPXf48OFnnnnm1KlTSaQaqaBp2mazCZtGo1Hcl4hoiJrNZpksmaSYWRLGo7+/v7y83Gq17t69e9WqyM5+xiJYEADkcrnZbBaKUmVBkLwuTJD8/Py3334bAOrr63t6eqQOJyFomhb0AwCtVisuFdeCBEEkVwvyoCEhAFRXV1dVVblcrnXr1kkdS0KwLEuSJP9cLpePMBxDUVRyKZQHjUTKY7PZysvLbTbbvn376urqpA4nIfx+v8fj0Wq18WpBfuBtskgIAO3t7bW1tXq9/vLly9OmTZM6nGRIYS3Ig0wi5VmxYsXSpUsRSqfRpLAW5EFMQgDYuXOnyWT65JNP2tvbpY5lzKS2FuRBT8KCgoI33ngDADZt2nTv3j2pw4mEjXdTHgBMgAUBRQkBoK6ubsmSJU6nc/369VLHEsng4KDVavV6vdGNjOgR0fFbEJBrzgjcv3//kUcecTgcHR0d1dXVUoczhHg4hiTJvLw8oV8BqW6ICiDpQgAoLCzcsWMHAGzcuLGvr0/qcIbweIYnfhAEIdZvgiwI6EoIAKtXr66srBwYGFi7dq3UsQAAMAwjHo6J6Cr4fD5xLZjEdcF4ICwhALz33nsGg+H48eOHDh2SOhYQ/2ibXC4X3wvKcZzYoCm0IKBbFwq0tbWtXbs2Pz+/s7PTYrFIGEkgEPB4POFwGAA0GptMdp5hrAAgk+Wz7BM+39B9oSmsBYcOiLqEHMdVVlZ+9tln1dXVHR0dUgcTdLlO+f00QCuA0LtQAuwCMPAbFEVFDHmPE+QlBIDu7u7Zs2e73e7Dhw9XVVVJFQZN33E4ttL0zaiSpQDCBTK/ycQolalcCRftupCnqKjotddeA4D169eLL7GmE5q+MzCwJpZ+CoDfizY/cjpfDoevp/DU2SAhAKxbt27hwoU2m23Lli3pPzvHBR2OrSxrj1X4WwDTT8/9AB+xrNvh2Mpx3lhvToYskZAgiLa2NoqiDhw48OGHH6b57F7vgVj+AwAFwFLR5scAgwDAMPfd7l2pOnuWSAgAxcXFra2tALBhwwa7PaYhJgSOC3u9/4hTuCjCgkKBz3ckVUbMHgkBYMOGDQsWLPjxxx/r6+vTdtJQ6ALLOmOVKAD+INr8GGB4zhfH+QKBsykJIKsklMlke/fupShq//79x44dS89Jw+HLcUrEFgyILTjajmMjGzoVEbz11ltbtmwpKCjo7Ow0Go0pP37EXNf799+3Ws+5XLTTyQSDbCDA8c+Lix9btWrr3LkVAABwEmBnxHFUqoVG45/HHw+Sc/VGZvPmzUePHv3qq6+2bt26e/fukd/s9/ujZyBHI7ynt7c3wX/6a9e+nTfv6ty5jwMQANMBCICIHVNjniyUUCaT7dq169FHH92zZ4/JZCoqKhLPQHY4HOLNUCg0poOTJMnP2TMajfyT3Nze3NwurZbU60nxY0FBZV7eip/2mwXwLMDJnx/KHH38JMjCRMrz+uuvv/nmm4lc1o83ATbmTNgpU6aILyEBQDB4xm7/Y6wDEwDbAR77aTMAsAmgVyg2GJrU6spkP5/oNNkqIcMwvb2927ZtE+a6iq0jPIrvDUwOjgtZrc+ybMxlrCwA7wIIlyz+B/AnAA6AIIgci+WETKYb59khiyVMJ2733zyeeJXuIgDx3SF/BfgcADSa53W61CzvmFWdCqmgqDqSjHdT6wmAb0SbqwHySdJMUSm7TI1dmBrC4Rt2+8tx0ukUgL8K6ZQgLplMv8jJmZOqU2MXpgaFosRk+jtJFsYq7APYL2xw3Gya/mUKT40lTBkKRanZ3KHR1BKEBuDxnxeeAOgUNgYHBxkmZYsB4kSaYhiGcToHQiFGpToL8F8AjiQtCsUsufyJgQGf8G0rlUqTyTTyoRIES5hKAoGA0+nkv9Loibter3dwcHg9VYPBMKbl8uKBE2kqES8ow7KsWDAA0Gg04rn2qUqnWMJUIpfLxbeP8gOw4jcYDAZhrQCWZZNYcjAaLGGKoShKvASd+OZgACBJUnz7WjAYjHhDEmAJU49erycIgiAIrVYbfbUrOp2OPBlqVHBzZkLw+/0KhUJcNYrh1xMSvnl+DD3pc2EXTghqtTqefhBVZQYCgXjLQyUCllAaKIoSp1OXy5V0OsUSpgmWZfnpFgJ8lSmUJt06xRKmg0AgYLPZHA6H2GqpSqdYwgnH6XTy4jEMI57ABlE9kOTSKZZwwhHfqOHz+YRF1nkiOvsRAzqJgCWccCiKErdOI6w26oDOqGAJJxyCIAwGg7DJMEzEbXPjTKdYwnSgUCh4q/HLYETfcxXROh1TOsWjM2mCX/FihNW33W63eD5+xPqzI4BdmCYIgqAoaoQljbVabXLpFEuYQej1wz8BkHg6xRJKSSgUEldkQpXJk2DrFEsoDRzHud3ugYGB6M6+uAeSyKUoLKEE8Eu18Y0Xr9crHjuN7oGI2zgxwRJKAEmSfLuGIAi9Xh/xQ3PidKpSqSiKGvkXmbGEEsArp1QqzWZzzCVJKYpSKpVtbW1Go1F8D1xsLTlMBrN9+/aIx2hw1z7TEZwX7ze2cSLNaBobG3nlGhoa4tWIWMLMRdCPJ56KWMLMJTpzxsyluC5EHuxC5MESIg+WEHmwhMiDJUQeLCHyYAmRB0uIPFhC5MESIg+WEHmwhMiDJUQeLCHyYAmRB0uIPFhC5MESIs//AUQF4Abdh4VgAAAAAElFTkSuQmCC\n", 112 | "text/plain": [ 113 | "" 114 | ] 115 | }, 116 | "execution_count": 4, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "Draw.DrawMorganBit(ibuprofen, 854,bi)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "[(1, ((1, 0), (10, 0))),\n", 134 | " (79, ((3, 1),)),\n", 135 | " (80, ((3, 0),)),\n", 136 | " (283, ((0, 1), (2, 1), (11, 1))),\n", 137 | " (310, ((5, 2), (9, 2))),\n", 138 | " (389, ((14, 1),)),\n", 139 | " (650, ((13, 0),)),\n", 140 | " (807, ((12, 0), (14, 0))),\n", 141 | " (854, ((1, 2),)),\n", 142 | " (857, ((4, 2),))]" 143 | ] 144 | }, 145 | "execution_count": 5, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "list(bi.items())[:10]" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 6, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAXFElEQVR4nO3de1QTZ/oH8AkhBMQVkOoiWqtcFCXAgrIWFXS9dK3dtqtuRUEuChWhiqKiuFARlJuyuFr3iOKlrVZ7ZNvttqvU2sUL1S6CcjFcBBRFqyhKkoIkIbffHzmb5YegM8lMJjTfzx85GOdJvvF4Hp93zLzD0Wg0BAAA6MuC7QAAAAMb2igAgEHQRgEADII2CgBgELRRAKOSSqWtra2NjY1sBwHaWLIdAGBgk0qlov+SyWQ9f9knmUxGEISVldWHH34YGxvr6OjI9icAQ3HwhScAHbVaLZFIRCKRWCyWSCR9Pup+EIlEEolErVZTegs+n29nZyeXyyUSSWho6PHjxxn6LGA0aKPwC/fi8bDX/Pj48WOVSkXp9a2trR0cHGxsbLQ/vJiNjY2DgwNBEHfu3PHy8urs7Pzyyy8XLFjAzEcHI0EbhYFE2/LIrJ21h4nFYqp/w3t1wxf3R0dHRz6fr99n+eijj+Lj40eMGCEUCocOHarfi4ApQBsFk9PZ2ZmamqpbR/dcYiuVSkovZWlpaW9vb2dnZ29v7+DgoPu5v0d7e3uGPtTz1Gr1rFmzLl68GBER8fHHHxvtfYF2aKNgWoRC4Zo1ay5cuNDn71pbW5NZPuuOcXJysrAw3a+jNDc3e3t7d3Z2fvXVV++++y7bcUBPaKNgWhYvXlxYWBgYGBgWFqYdIXtOi1ZWVmwHpNnu3bvXr1/v7OwsFAq1p01hwEEbBRNSW1vr5eVlZWXV1NQ0cuRItuMYg1qtnjlzZklJyYoVKw4fPsx2HNCH6a53YKD75z//2dXVRakkLS1NrVavXLnSTHooQRAWFhaHDh2ysbE5cuRIUVER23FAH5hGgRFVVVW+vr5jxoy5efMmj8cjU2KGo6hObm5uYmLiyJEjhUKhMf+bC2iBaRQYkZ6ertFoFixYQLKHakvUavX7779vbj2UIIj169dPnz79p59+2rx5M9tZgDJMo0A/oVDo4+NjZWV169YtZ2dnMiV1dXUCgYDH4zU1NY0aNYrphCbo5s2bv/nNb+RyeVFR0e9//3u24wAFmEaBfqmpqWq1OjY2lmQPJXqMoubZQwmCGD9+fGpqqkajiYmJ6ejoYDsOUIA2CjTTaDReXl5OTk6bNm0iWVJXV3fq1Ck+n2/mS9rExER/f/+7d++a+Z/DgINFPTBCoVCQPysaEhJy8uTJDz74YN++fYymMn11dXV+fn5yufzs2bNz585lOw6QgjYKLNOeFbW0tGxoaHjttdfYjmMQ7VX8EolEIpEMHz58zJgxerxIRkZGSkrKmDFjbty4MXjwYLozAv3QRoFlA2IUvX79emtra89d8uLj43t9o8DPz6+iokL3y61bt6alpenxXkqlMiAgoLy8fM2aNXv37jU0OjAPbRTY1NjYOGHCBC6Xa4RRVKPRiMViW1vbXleUHjx4sL6+vueOokePHvXy8up5zJQpU65evdrzmStXrgQEBPR8JjAw8OrVq7pLV1esWBEbG6tf1Orqan9/f6VSWVxcPGPGDP1eBIwGu98DbSoqKnx9fSmVbNu2TaVSxcTE6NFDu7u7e/Y+kUg0Y8aM4cOH9zwmLi7u8uXLuoU2QRAlJSXTp0/vecyJEycuXrzY85nHjx/3eq+pU6cOHTq052ZRz3+joLi4uL/TwdevXz99+vSHH35I8qN5e3snJyenpqYuX768uroaS3tTpwGgg1AotLCwmDlzpkqlIlnS0NBgaWnJ4/Hu3LmjxzvOmTOn11/mb7/9ttcx8+fP73mAvb39uXPneh1z/Pjx3NzcgoKCwsLCc+fOlZWVdXR06JGnP2KxeMiQIQRBnDlzhnyVQqGYNGkSQRDr1q2jMQwwAW0U6LF48WKCIOLj48mXhIaGEgQRGxur3zsuWbLE0dHR1dV10qRJs2fPXrRoUWlpaa9jamtrKyoqmpub29vb9XsXWvzlL38hCMLZ2ZlSjMrKSh6PZ2FhcenSJeaygeHQRoEGNTU1FhYWfD7//v37JEt0o2hzczOT0UyCSqXSnkmIjo6mVLh161aCIMaNG9fV1cVQNjAc2ijQIDg4mCCINWvWkC9ZtmwZQRCrVq1iLpVJqa+vt7GxIQiiqKiIfJVCodCebt64cSNz2cBAaKNgqNraWu0oeu/ePZIljY2N5jOK6uzcudPBweGLL76gVFVRUaFd2peUlDAUDAyELzyBoWQyWUFBgUgk0q5AyQgPDz927FhMTEx+fj6j2UyKSqVqa2tzcnKiWpicnJyZmTl+/PiKigrtSAsmBW0UjK2pqWnChAkcDufmzZtjx45lO84AIJfLJ0+eLBQKN2/enJ2dzXYc6A1bk4Cxbd++XalULl++HD2UJD6f/+mnn/J4vNzc3F5XAYApwDQKRnXr1i0PDw+MonpISkrKycnx8PCoqKiwtrZmOw78D6ZRMCrtKBoZGWnmPVSj0ezbt6/X1VMvlpaW5unpWV9fv2PHDuaCgR4wjYKeGhoa/vOf/4SEhFhakr2kGKOoztGjR1esWOHi4lJdXW1ra0uyqrS0dNq0aRwO58cff5w8eTKjCYECVr8nAANYSEgIQRDJycnkSyIjIwnqX0H/RVIoFNo+SOm6L41Gs2HDBoIgJk6cKJPJGMoGVGEaBX3osTOTbhStr693cXFhOqHp023jdP78+aCgIJJVcrncz8+vtrZW7434gHY4Nwr6SEtLU6lUUVFR5HdmysjIUCqV4eHh6KFa3t7eW7ZsUavVkZGRnZ2dJKv4fP6hQ4e4XG5mZua1a9cYTQgkYRoFyhobGydOnMjhcBoaGkhu8I5RtE8KhcLf33/QoEGnTp2idC+/hISEv/71rz4+PlevXu21fSoYH6ZRoKygoECpVEZFRZG/SYZ2FA0LC0MP7YnH4xUVFZWUlFC9H2pGRoa7u3tVVRW+jW8KMI0CZWq1urCwMCAgYPTo0WSOv3v37rhx41QqVV1dnbu7O9PxzMSVK1cCAwMtLCxKS0v9/PzYjmPWMI0CZRYWFsHBwSR7KEEQ6enp3d3dERER6KE0mjp1alxcnHZZoFAo2I5j1jCNArMwijKnq6vLx8enqalpx44dycnJbMcxX5hGgVnbt2/v7u4ODw9HD32ptra2jRs3yuVykscPGjSooKCAw+Gkp6cLhUJGs8ELYBoFBulG0dra2nHjxrEdx9TNnDnz4sWLSUlJWVlZ5Kvi4uL279/v6+tbWlra3z31gFGYRoGsjRs35uXldXV1kS/ZsWNHd3d3WFgYeigZOTk5XC43Nze3rKyMfNWuXbtcXFwqKiry8vKYywYvgGkUSNHjcviWlhZ3d3eMopRs2rRp165dEyZMuH79OvltnIqLi+fMmWNlZXXt2jVPT09GE8LzMI0CKXrszKQdRZctW4YeSt727dsnTpxYV1eXkZFBvmrWrFnR0dFyuTwqKkqlUjEXD/qENgovd+vWrc8++4zH423ZsoVkSUtLyyeffMLlcsmXAPHfaz15PB7Vbpibmzt69OjS0tLdu3czlA36xea+KDBA6LEz08qVKwmCCA8PZy7VLxj5mwP29P3333M4HD6fX1NTQ3skeAGcG4WXUKlUc+fO/eGHH/Q4K1pTUzN+/HimE4JOVFTUkSNHAgICSkpKuFwu23HMBRb18BJcLre4uLi6upr8WdHMzMzu7u6QkBD0UCPLy8t79dVXf/zxx48++ojtLGYE0yjQ7N69e25ubhhF2XLmzJm33npr0KBBlZWVuOTBODCNAs20o+jSpUvRQ2lx4cKF/Px88sfPnz8/IiKiq6srMjJSrVYzFwx0MI0Cne7du+fu7q5UKoVCoYeHB9txBrympiYPDw8ul1teXu7l5UWySiKRCASC+/fv7927d82aNYwmBALTKLyAVCqlWpKVlSWXy5csWYIeSgs3N7dVq1Zp98civ42TnZ3d/v37CYJISkpqampiMiAQBIEvPEE/bt++7eDgQOmOdS0tLXw+n8vl1tXVMRfM3HR2drq6uhIEkZmZSakwNDSUIIiZM2eq1WqGsoEWplHoW0ZGhkgkevDgAfmS7OxsuVweHByMUZRGtra22m2c0tLSKG3jtGfPHicnJ6qnVkEPODcKfdBjZybtWVGFQlFVVSUQCJhOaG5WrVr19ddfnzx5csaMGeSrvvnmm3feecfW1raqqko70gIj2B6HwRRFR0cTBBEZGUm+JC4ujiCIkJAQ5lKZs59//rm9vV2PwiVLlhAEMWvWLCztmYNpFHrTY2emhw8furq6yuVyjKKm5unTp56eno8ePTpw4ID2Cl2gHc6NQm8qlertt9+mtEloRkaGVCoNDg5GDzU1jo6OBw4cIAgiMTGxpaWF7Ti/TJhGoW9qtdrCgtS/shhFTd97773397//ffbs2efOneNwOGzH+aXBNAp9I9lDCYLIzMyUSqWLFy9GDzUOhUKRkZHR0NBAvmT//v3Dhw//97//ffToUeaCmS1Mo2AQ3ShaWVlJ/jIbMMS2bdvS0tKmT59+8eJF8v/aff7550uXLrWzs7tx48arr77KaEJzg2kUDJKVlSWVSt977z30UKNJSEgYNWrUDz/8QGkbpyVLlixatEgikaxatYq5bOYJ0yjoTzuKymSyqqoqtFFjOn369B/+8Aeq2zi1tbV5enq2tbV9/PHHERERjCY0K5hGQX/Z2dkYRVnx1ltvhYWFdXV1paenk68aNmyY9u6ha9euvX//PmPpzA6mUdBTa2uri4uLTCarrKz09vZmO47ZaW9v37VrV0pKiq2tLaXCd95555tvvgkNDT1+/DhD2cyNJdsBYKDSjaLooawYOnRoVlaWHoX5+fm/+tWvdu7cSXsks4VpFPSBURRAB+dGQR85OTlSqXTRokXooQCYRoGy1tZWV1dXqVRaUVHh4+PDdhwAlmEaBcp27tzZ1dW1cOFC9FAAAtMoUPXo0SMXFxeMogA6mEaBGu0oumDBAvRQAC1Mo0ABRlGA5+F7o/ASYrFYLBZLJBKxWLxnzx6MogC9oI2aHalUKuqHTCbr9bttbW1KpVJXa2lpOWjQoPfff5/F/ACmBov6gU0qlWrnRO2j9geRSKR7ptdjZ2cn1bcYMmSInZ2dvb29vb393bt3W1pa5s6de/bsWez+C6CFNmpaeg6Dz8+Gfc6PVN/C2tra4Tk2NjZ9Pj9s2DAej6erffLkiUAgePTo0cGDBzGTAmihjTKrzz7YX3988uSJQqGg+hZ9tr/++uOvf/1rLpdryCc6depUcHDwkCFDbty4MXr0aENeCuCXAW2UNmKxOCwsrOcK+ueff6b6IoMHD9auoJ9/dHBw0C2udc/b2Ngw8VleTHtjnzlz5nz33XdY2gOgjdKmq6vr+S3Lnh8V+1s+Ozg4ODo68vl8VsJT8uTJE09Pz8ePHx85cmT58uVsxwFgGdoonb7++uue86O9vT3biZhy8uTJkJAQ3NgHgEAbBb0tWrToyy+/nD9//unTp9nOAsAmtFHQU2trq6enZ3t7+yeffBIeHs52HADWoI2C/o4dOxYeHm5nZ1dTUzNy5Ei24wCwA1uTgP7CwsL++Mc/4p69YOYwjYJBHj586OnpKRKJPvvss5CQELbjALAA0ygYZMSIEdp79sbHx7e2trIdB4AFaKNgqMjIyDfffPPp06cxMTFsZwFgARb1QIMHDx4IBAKRSPT5558HBwezHQfAqDCNAg2cnZ219z1fvXr1o0eP2I4DYFRoo0CP6OjoefPmPXnyJD4+nu0sAEaFRT3Q5qeffhIIBGKxuLCw8E9/+hPbcQCMBNMo9KuwsJDS5UkjR47Mzs4mCCI2Nvbx48eM5QIwLZhGoW9isdjNze3p06eUtnHSaDTz5s377rvvli5deuLECUYTApgItFHo14kTJ0JDQ+3s7IRC4ahRo0hW3b1718vLq6Oj44svvli4cCGjCQFMARb10K+QkJCFCxdSvdbztddey8rKIgjigw8+aG9vZywdgKnANAovotvG6dNPPw0LCyNZpdFo3njjje+//37ZsmXHjh1jNCEA6zCNwos4OTnt3r3bw8PD3d2dfBWHwzl48ODgwYOPHz/+j3/8g7l4AKYA0yi8nFwu1+PuJnv37l27du2IESNqamocHByYCAZgCjCNwsvpd4eo1atXBwUFPXz4cP369bRHAjAdmEaBQc3NzV5eXs+ePfvqq6/effddtuMAMALTKDBo7Nix6enpBEHExcWJRCK24wAwAm0UKOjs7Fy9evWpU6fIl6xbt2769OkPHjxITExkLhgAi7CoBwq0N18aNmxYTU3NsGHDSFbdvHnT19dXKpUWFRXNmzeP0YQAxodpFChYtmzZvHnz2traVq9eTb5q/PjxaWlpBEFER0eLxWLG0gGwA9MoUNPS0iIQCDo6Oiht46RWq4OCgi5fvhwTE5Ofn89oQgAjQxsFyvLz82NjY1955ZWamprhw4eTrKqvr/f19ZXL5d9+++0bb7zBaEIAY8KiHiiLiYmZNWvW/PnzraysyFd5eHhs3bpVo9GsXLmyo6ODuXgARoZpFPSh33VNSqVy6tSpZWVlcXFxf/vb35gIBmB8aKNgVHV1dX5+fnK5/OzZs3PnzmU7DgANsKgHo5owYUJycrJ2ad/Z2cl2HAAaoI2CsSUlJU2ePPnOnTvJyclsZwGgAdooGKq6uvrtt98m/4VQS0vLw4cPW1lZ7du379KlS4xmAzACtFEwVEJCwr/+9a+EhATyJd7e3n/+85/VanVkZCSW9jDQ4b+YwFC3b9/29vZ+9uzZmTNn3nzzTZJVSqXy9ddfv3btWkJCQl5eHqMJARiFNgo0yMvL27Bhg7Ozs1AoJL9Dc1VVlb+/v0qlunDhQmBgIKMJAZiDRT3QQLeN06ZNm8hX+fj4JCUlqdXq6OhoqVTKXDwARqGNAg0sLCwKCgqcnJwCAgIoFaakpHh5eTU0NKSmpjKUDYBpWNQDbWQymbW1NdWqioqKKVOmqFSqS5cuTZs2jYlgAIzCNAq00aOHEgTh6+u7ceNG7dJeJpPRngqAaWijwL7U1FSBQFBfX6/dlhRgYMGiHkzC1atXp06dyuFwrly54u/vz3YcAAowjQL9NBrNgQMHkpKSyJf89re/3bBhg1KpjIiIwNIeBhZMo0C/xsZGgUCgVCqLi4tnzJhBskoul/v5+dXW1qakpGzfvp3RhAA0wjQK9HN3d09JSVGr1cuXLyd/rSefzz98+DCXy83Ozi4vL2c0IQCN0EaBEVu2bJk0aVJzc3NKSgr5qtdff33t2rVKpTIqKqq7u5u5eAA0wqIemFJdXe3v769UKs+fPx8UFESySiaT+fn51dXVpaambtu2jcmAAPTANApM8fb21s6kr7zyCvkqa2tr7dI+IyPj2rVrzMUDoAumUWCQQqHgcDiWlpZUC9etW7dnzx4fH5+ysjIej8dENgC6YBoFBvF4PD16KEEQmZmZbm5uVVVV2dnZtKcCoBemUTBRly9fDgoKsrS0LC8v9/LyYjsOQL8wjYKJmjZtWmxsbHd3d3h4uEKhYDsOQL/QRsFInj59GhYWVlpaSr4kJyfH1dW1srJy165dzAUDMBAW9WAk6enpqampEydOvH79Op/PJ1l1/vz52bNnW1lZlZeXCwQCRhMC6AfTKBjJ5s2bPT09a2trKW3j9Lvf/S4mJkYul0dERGBpD6YJ0ygYT2lp6bRp06hu4/Ts2TNvb+/bt2/n5ORQukkJgHFgGgXjmTJlyvr165VK5aFDh8hX2draFhQUcDicrVu31tTUMBcPQD9cXG8HxhQYGOjm5paSksLhcMhXjR079v79+2VlZRKJZOHChczFA9ADFvUwMEgkkj179iQmJtrY2LCdBeD/QRsFADAIzo0CABgEbRQAwCBoowAABkEbBQAwCNooAIBB/g8JdlbqUJBoawAAAABJRU5ErkJggg==\n", 162 | "text/plain": [ 163 | "" 164 | ] 165 | }, 166 | "execution_count": 6, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "atom_id, radius = bi[310][0]\n", 173 | "path_ids = Chem.FindAtomEnvironmentOfRadiusN(ibuprofen, radius, atom_id)\n", 174 | "substructure = Chem.PathToSubmol(ibuprofen, path_ids)\n", 175 | "substructure" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "## Morgan fingerprintが分子の中のどこを指しているか確認する\n", 183 | "\n", 184 | "`Mol.GetSubstructMatch`を利用する" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 7, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | "substructure cccc(c)C\n" 197 | ] 198 | }, 199 | { 200 | "data": { 201 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3deVRT97YH8G8IUxgkEaRotQpaqxVBAQdAWxxQtAo41daqndQq2muLQ3v1tr31Vh/gWNs6XXxqW9veZ63iWAFrZXBWBNQ6oDhhpYJJCJBEIHl/hJuQEJCQ5JwM+7NYWTnJL2S3S7f7d34TR6lUghBCSGs5sB0AIYRYN0qjhBBiFEe2AyCEaONwtC7ptpvFozRKiCXhcHTzZuNXiIWhTj0hFkNvxlQqdetTYmEojRJCiFEojRJCiFEojRJCiFEojRJiVSoq2I6A6KI0SoiVUCqxbBk6d8bNm2yHQrRQGiXEYugdlFcP33M4KCqCSITp06FQMB8daQqH1tQTYlmamX4vEiEwECUl+OorzJvHcFykKZRGCbEqBw5g7Fi4uaGgAF27sh0NAahTT4iFksshk+l5fcwYTJmC6mrMnEmrmywEpVFCLE9eHkJDsXSp/nfXr4efH44dw+bNzIZF9KNOPSGWJz8f/fqhrg7Hj2PQID0N9u1DXBzc3VFQgIAAxuMjWqgaZcehQ4eEQiHbURBLFRyMjz6CQoEZMyCV6mkQG4vJk1FVRV17S0DVKAsePHjQtWtXV1fXmzdvtm3blu1wiEV68gRhYSgsxOLFSE7W06CsDIGBKC3Fli2YOZPx+IgGVaMsSElJkclk0dHRlENJk5ydsWMHnJywahVyc/U08PHB+vUAsHAh7t5lODrSEFWjTHv48GFAQIBMJrt48WJQUBDb4RDL9ve/IykJPXogLw+urnoaTJqEn3/G8OFIT6f99NhC1SjTkpOTpVLphAkTKIeSp/vnP9GrF65exbJl+hts3AhfX2RmYvt2RgMjDVA1yqiHDx927dpVKpXm5eUFBwezHQ6xBqdPIzISHA5OnEC/fnoa/PgjpkyBlxcKC9GpE+PxEapGmZWcnFxdXT1+/HjKoaSlBgxAYiJqa/Hmm/on5L/+OsaPh1iM2bMZD44AVI0yiUpR0kpyOUJCcOUK/vEP/Otfeho8fIhevfD4MXbswPTpjMdn76gaZU5KSgqVoqQ1XFywdSu4XCQl4dw5PQ38/LBuHQDFp58++vNPpsOze1SNMqS0tDQgIIBKUdJ6iYlYuxZBQTh7Fs7Ojd+/P2fO+LS09v36paWlMR+dPaNqlCGqUnTcuHGUQ0krffEFnn8eBQVYsULv+9xPPy2Syfbt27dz506GQ7NzVI0yoayszN/fv6qq6uzZs6GhoWyHQ6zWyZMYNAgODjh9GiEhjd/fvn3722+/7e3tfenSJT8/P+YDtE9UjTIhKSmpsrIyPj6ecigxSng45s1DbS3eeQc1NY3ff+utt+Li4srLy9977z3mo7NbVI2anboUPXPmTFhYGNvhECtXVYXgYNy8+XDdOr/58xu//+DBg8DAQKFQ+NNPP02ePJn5AO0QVaNml5ycXFlZGRcXRzmUmIC7uyI19auIiM6LF1+6dKnx+x06dEhJSQEwb9680tJSxuOzR5RGzausrGzTpk0Alja1BS8hBnKIirrWt++TJ0+mT59eo69rP2PGjJiYmLKysvfff5/x6OwRpVHzSklJoVKUmFxycnLXrl3z8vJWrVqlt0Fqaiqfz9+1a9fPP//McGx2iO6NmpHqrmhlZeWZM2f66V0NTUhrHTt2bNiwYc7OzufPn+/Vq1fjBps3b549e7aPj8/ly5d9fX2Zj9B+UDVqRitXrqysrIyNjaUcSkxuyJAhs2bNksvl7777bl1dXeMGs2bNGjFiRFlZ2QcffMB8eHaFqlFzKS8v9/f3l0gkVIoSM6mqqgoKCrp161ZKSsqiRYsaN7hz507v3r0lEsnu3bvHjx/PfIR2gqpRc1m5cqVEIhk7dizlUGIm7u7u//73vzkczieffHLlypXGDTp37pyUlARg9uzZjx49YjxAe0HVqFlQKUoYM3PmzNTU1IEDB+bk5HC5XJ13lUrlyJEjMzIypk6d+t133zEUk84+/A2TDIej5ww+vS9aD6pGzWLVqlUSiWTMmDGUQ4m5rV69ulOnTqdOnfryyy8bv8vhcLZs2eLh4fH999/v2bOHiYBUObHhj62fbkLVqOmpS9HTp0/379+f7XDsQ3U1Cgtx8yb++gtyObhc8Pno3Bm9e8MOlpZnZmaOGDHCxcXlwoULPXv2bNxg/fr18+fPb9++/aVLl8x7kGJTdaX6dVusRimNmt7f//73pKSkV1555cCBA2zHYgfq6pCVhZMn9a4xB4Bu3TB6NAQCZsNi2ttvv719+/bw8PDs7OzGXXuFQhEVFZWTk7Njx45p06a1/mvq6iASQSyGUFj/RP3YvTveeIPSKDEBOy1FlUpUVKC6Gm5u8PSEA1M3iyor8eOPePDgKc2cnTFxIp5/npGY2CEWiwMDA+/fv79u3br5+tbaFxUV3bt3b8iQIQ1flEqlYrFYJBI1fBQKhdOUyg5372plSdVjZWWTEYwZg/37W5RG9bLmRERp1MSWLFnyP//zP/ZSiiqVuHIFFy/izh1NMejkhOeeQ58+6NXLvDfFZDL87/+ihQPQXC6mTEFAgBnjYdvBgwfHjBnj5uaWn5/frVs31Yt1dXVJSUmq/KimTppyuVzvr7oVGemfm6vnDQ4HfD4EAnh5gc/XeuzZE5MnUzVKjGVfpeiDB0hLw19/NdmgXTvExeHZZ80VwM8/4/JlA9rzeEhIgIeHueKxANOnT//uu+8iIyOzsrIcHBwePHiQkJBw8ODB2tpave1dXFy8vLz4fL7qUW1Gp07dHRz05Mo2bZ4SAaVRYqSlS5euWLFi9OjRBw8eZDsWM7t8GXv2QN/iGS1cLuLjERho+gBu38aOHS1trB4sDgnB2LGmD8ZiiESiwMDAkpKSr7/+eu7cufPnz1+/fn1gYODUqVMFAkHDjKl65PF4Jo6A0igxxuPHj/39/SsqKk6dOjVgwAC2wzGnmzfxww9QKFrUmMPBlCn4bx/TZH78EdevG/wpLheJiXBzM3EwliQtLS0+Pj4kJGT//v3dunWTyWQXL14MCgpiLoLGObHhK7aYRmneqMmsWbOmoqJi1KhRNp5DpVLs2dPSHApAqcSePZBKTRnDkycoKmrNB+vqcO2aKSOxPHFxcd9//31OTk5KSopUKp04cSKjORT/rf0b/lhzimwJqkZNQyQS+fv7i0SinJycyMhItsMxp4wMnDjR0sbq3nR4OEaMMOp7a2shk9X/3LuH9PRW/p7QUIwZY1Qk1uDhw4ddu3aVSqVMl6J2yZHtAGzE6tWrRSJRTEyMjefQujpcuGBAe/VIfV4ehg6FY4M/b7W1kEohk0EqRW2t1qXexyYGSQxWUWGa32PZkpOTq6urWShF7RJVoyagLkWzs7MHDRrEdjjmVFyMb79t5WefeQYODvVpUS5nraPXrRveeIOdr2aKuhTNy8ujA70ZQNWoCaxZs0YkEo0cOdLGcyiAhw9b/1kLORfI3Z3tCMwuJSWlurp6woQJlEOZQdWoscRicZcuXeyiFAWQmQm9s7IZwOXC1bX+x9ERd+608vfExMCmxwBLS0sDAgKoFGUSVaPGUpWiI0aMsP0ciqZX8rWCoyNcXeHkVP+Ex2vu0ckJrq5aH9+6Fffvt+Z7u3c3SfgWS1WKjh8/nnIoY6gaNYq6FM3Kyho8eDDb4ZjfuXNo9cqCAQMQHFxfTrq4GLvuvrAQv/xi8Keefx5Tphj1vZaNSlFW0LxRo6xdu1YkEkVHR9tFDgXQsWPrPxscjPbtIRCAxzPB3iWBgQYvM+VyMXy4sd9r2VauXFldXR0fH085lEmURltPLBavX78ewCeffMJ2LEx55plW7jgnEJh4308OBxMmwKC1jCNHwqYPyCwrK9u8eTOHw1myZAnbsdgXujf6dBUVFTo7iameHDp0SCgUDh8+3F5KUQAcDiIiWtOvHzjQ9Ls9CQSYNg07d6Kq6umNnZyg7xRiW5KcnFxZWRkfHx8WFsZ2LPbFHtOoVCoVNiKTyfS+XlZWVtPUfsCAt7e33fWeQkJw4QL+/NOAj/j5wUx/sdu3x6xZOHAAN248pWVNDY4cwbhxZgnDApSVlW3atInD4SxdupTtWOyO1Q8xyeXyxpvONt6JVv1EIpEY+hUeHh46++KoHktKSr799lt3d/f8/PyuXbua47/OQgmF2Lq1RTUgAHd3vPuu2Tefv3sXFy6gqEgTlaMjOnWCs7PWIvrJk9Gjh3kjYcnixYtXrlwZFxe3d+9etmOxO1aZRquqqsLCwlTJUSaTGfpxgUCgNy3qbLyo3lis8ZEMaq+99tp//vOfoUOHZmZmcmz93C4tZWXYuhVP/Z/P52PKFLRrZ/Z4ZDJkZqKgAG++CTc3cLnw9ASHA4UCqama2tnDAwkJht1RtQZlZWX+/v6VlZV0Ei0rrDKNKpVKJyenuv9udunq6ipoAo/H03nX19fX0dFktzLKysoCAwNLS0s3b948a9YsU/1aKyCVYt06PHnSZAMHB4SEYNgw3fme5nDtGvbvr69Dn3sOb72ldR+2tBRbtmi2pAoORny82UNi1kcffZSSkhIbG5uWlsZ2LPbIKtMogKtXr7Zp08bLy8ud7bV9u3btevXVV9u0aVNYWPjcc8+xGwxzjh5FTk79cxcXdOyI8nLIZHB1hY8PnJ3B42HIEIZWXubno2FPtvE6pWPHkJWluXztNbzwAhOBMaKsrCwgIEAikVApyhZrHWLqYTF3uCZNmjRx4sSff/75nXfeycjIsIuuvVSKM2c0l1FRGDhQc1lXh/XrUVGBggKEhuKll8zeiQ4OxtWruHq1/vLoUTz/PBoeI/zSS7h6VXPeycGD6NyZiTKZEStXrpRIJLGxsZRD2ULzRk1g48aNvr6+R48e3bZtG9uxMOLECU133sMDoaFa7+bl1W9GV1ODc+dMtsFd80aP1qTFmhrs3au1gxSXi3HjNHP+JZLWb1dqYcrLyzdu3AjgH//4B9ux2C9Koybg4+Ozbt06AImJiffu3WM7HDPTKUUjI+HkpLlUKLT2LunXD56eTETl6YmRIzWX9+7h3DmtBn5+iIjQXObltXL/fAujKkXHjh1LpSiLKI2axuuvvz5hwgSxWDx79my2YzGzkyc1pai7u24peuECRKL6546OCA9nLrA+fbROfMrIgFCo1SAqSmsV0759T59pYNnKy8s3bNgAu1pHZ5EojZrMxo0b27Vrd+jQoR0tP7HS6hhUioaFMVSKqsXGanXt9+3T7drHx2t17TMzGQ3P1FatWiWRSMaMGUOlKLsojZpMu3bt1qxZA2D+/Pn3W7eHm+U7eRJyef1zd3fdtUl5eVqlaMNONDM8PREdrbm8fRvnz2s1aN9eq0A+fx43bzIUm6mVl5d/8803oFLUAlAaNaWpU6eOGzdOLBbPmTOH7VjMQCbD2bOay8alqHoKFNgoRVX69kXDFWXp6bpd+yFDtJYD7N/f3OxXC7Z69WqJRPLKK6/079+f7VjsHaVRE/vmm2/atm174MCBnTt3sh2LqZ08qbmZ6OamZ4Ce3VJUhcPB2LFwdq6/rKnB/v26Xfu4OM38fLHYGrv25eXlX3/9NYBPP/2U7VgIpVFTa9++vbpr/9CYk4ssjUyme1dUnarQqBQNDWWnFFXx8tLq2hcXIy9Pq8Gzz2pNdD17FrduMRSbiaxZs0YikYwePZpKUUtAadT03nzzzbi4uPLycptaHqpTijZ/V5T1U6ZDQ3W79mKxVoNhw+Djo7m0qq7948ePv/rqK1ApajEojZrFhg0bBALB/v37f/rpJ7ZjMQW53GpKURUOB2PGaIKUy/WM2jfs2otEOHqU6SBbS1WKjho1aoBNn81nRSiNmkWHDh1WrlwJYN68eaUWcrCwMZovRS9etKxSVIXP1zoy5NYt5OdrNejYEQ17xGfPtv60UQaJRCLVAD3tK2o5KI2ay7vvvhsTE1NeXv7ee++xHYtx5HKcPq25bFyKZmdrLkNC2C9F1cLCEBCgufz11/pVqmrDhmmW3iuV2LvX8rv2q1evFolEMTExkRbyzxWhNGpWqampfD4/LS1t165dbMdihJaXolyupZSiKjqj9nI5DhzQauDkhPh4ra79sWOMRmggkUikGqCnUtSiUBo1o2effTY5ORlAQkLCX+rthayLTikaEfGUu6Jt2jAXW0vw+Rg6VHN54wYKCrQadOqEhkuATp/G3bsAoFCgvBx//onSUsspUdesWSMSiUaOHDlo0CC2YyEa1rrfqLVQKpWjRo06cuTIa6+99uOPP7IdjuF+/x3Hj9c/5/HwwQdaaTQvD/v21T/ncvG3v1lcGgWgVGLHDs19T1dXJCRo3XmoqcHGjZpZ+p6e8PPD7dtoeAaXnx9690ZoKFxcmIpbl1gs7tKli0gkys7OpjRqUagaNS8Oh7N582ZPT8+ffvpp9+7dbIdjIEPvilpgDgXA4SAuTrPgSibT07WPjdVcSiS4cQM65xg+fIiMDKxfj0uXzBxuk1Sl6IgRIyiHWhpKo2bXuXPnpKQkAHPmzHn06BHb4Rji1CnNXVEeDzr7X+Tnayo4S7srqkMgwJAhmsvr11FYqNWgS5cW7YdfXY3du1mZGiUWi9evXw/aV9QiURplwpw5c6Kjox89evThhx+yHUuLyeU4dUpzqXNXVKnUuisaEgIvL+Zia4WBA9HwiJfDh1FZqbksLUVxcUt/VU6O1kZWjFCXooMHD2b4q8lTURplAofD2bJli4eHx86dO/fs2cN2OC2jU4rqLDrMz8fjx/XPLbwUVeFwEBsL9WmGUqmma69Q4Jdf6seRWjhU8NtvmtNGzU8sFquWLVEpapkojTKkS5cuK1asADB37tzH6gRkIRQK3LqF33/Hnj3YtQsHDiA316ZKURVvb0RFaS6vXcPlywCQl6c5pqmFR2kpFExuaLJ27VqhUBgdHU2lqGWy1iPtrNHcuXN37959/PjxxMTE7du3sx0OAEChwNmzyM2FRNJkG72laHl5/XMHBysoRdUiInDtGtQHvRw6hC5ddI8baaFbt/D4sdbBeQYSCoUikUgsFqsfdS5Vj0KhUDVbjlbQWyxKo8xxcHDYtm1bUFDQjh07xo0bFxcXx3JAFRX4z3/w4MFTmrVrp7WvqJWWoiqqrv3mzfUH7VVXY98+NN6IS6lsUVl6/XrDnaKkUqmwCTKZTOfdR48e1bb4sD/VKeI2tWGYbaF5o0xbt27dhx9+2KFDh0uXLgmqq3HlCkpK6hcpurrimWfwwgvo3l1z1oWZiMXYtk1336OmBAVpVvtcvIi0tPrXHRzw/vvg880VpJnk5NSPtjs5ITi4mWq0rLr6Rnm5WC4XyWRimUwkkwn/+0Qsl4scHMRcrqqKrK6uNjQKLy8vPp/f1GND+/btW7BggY+Pz+XLl30bHidFLAOlUaYpFIqoqKjs7Ox3Bg/eOmyY/kZeXhg+HIGB5gqirg5btxo2SDJ8OCIjoVTim280PfqwMLzyijkCNC+FAlu3wtERcXG4e1fzr4K2k/fuHbxxY3lWVgt/q6urq0AfHo/X+K127do5Nazxm6VUKmNiYtLT019//fUffvihhZ8ijKE0yoLraWl9Jk2S1tQceuONUc8/32S74GDExpqlLM3NNXiEhMvFvHm4cwd799a/YqWlqEp1NXg8cDgoKEATcycGpqaevn+/W9u2AQIB39VV/eOlenRx4Xfv7jVhgqp+9DTzhix37tzp3bu3RCLZvXv3+PHjzfpdxFB0b5RxeXndL15cNmTIovT0mfv3X0pI4KsPs9SRn4+aGkyc2NLh4xaqq8OJE635VG6u1uTKvn2tNYcCcHOrf9LEjd3DN26cvn//GQ+P/Dlz3JoqG/v3N2OPQZtqEcfcuXNnz549ePDgdg2PkyJsowlPzCotxcGDABLDwwc991xJRcVHGRnNtb9yRWvikUkUFcHwG3kAUFiomVbJ5cI2Jt+0bw8ut/HLy7OzASyKiGgyhwLo2NF8cTWmXsSRmJjI5PeSp6I0yqzDh1FXB8CBw0mNjeU5Of37/PkjRUXNfeTYMa31Nsa7fbuVH5TLMXEiJk2Ctzf69LGmAfpmODujWzed134tKsq9e9fHze09nV0BG3JyQvfu5o1Nm3oRx/fff281izjsA3XqGVRS0nB/9Rd8fD57+eWPMzPfO3CgcM4cz6a2DqqpwblzWvPGddTWorYWNTWQySCVNveoatZ8Um5+ro9QiBdfRM+elrN3nAlERuLatYYvqIaVPho0yKPhogMdbOz21KVLl+XLl8+fP3/u3LlRUVECgYDhAIhelEYZdOWKzgsLIyJ++eOPMyUlH2VmbmhmyPvCBfB4+pOjVKoqb02m+fuwqn2POBwW94szvU6dEBysPmLkSFFRzt27Pm5us5spRT098fLLDIWnbd68ebt3787KykpMTNy2bRsrMRAd1Kln0P37Oi9wHRx2jBvn6ui46ezZymbWdEsk+PVXHD+O06dRUIDr13HvHh49QmWliXPoU/F4jH4dY0aPRvv2qqdfZGUBWBwZ2WQp6uSEV19FUwODZubg4LB9+3YPD4/t27enNTFVizCM0iiD9M117+Hj88nLLysBqWpNi2kH5U2u4aHEtsTZGdOmoUuX9Js3VaXoHJ1dAdXc3DB1KsODSzr8/f2XLVsGICEhQajeq5Cwhzr1DFIo9L68ODLSgcPxNqbQc3SEqyt4vCYfHR3h6Fh/KRSidZ1BgcCYJeSWjsfD9OlfrF2LpkpRDge9eyM6Gh4eLISnbf78+Xv27MnOzl60aFFqairb4dg7mn7PoM2b9SzfbqG+fdGmTZMp0iBKJTZsQFmZwTEMHWojk5yakJGRMWLECB8fn1tpaZ4PH+Kvv/DkCRwcIBCgc2cEBlrUPNnr16/36dNHKpUePnw4JiaG7XDsGlWjDPLza2UadXfXOuXCSBwOhg7F//2fYZ/y8NDd58nm/Otf/wKwaNEiz4gItmN5uu7du3/++eeLFy+eMWPGpUuX+JaU4u0N3RtlkHqaoaE9gGYWjLZOz57o3duA9qqNkWxpdL6RzMzM7Oxsb2/vOXPmsB1LSy1YsCAyMrKkpOTjjz9mOxa7RmmUQd271x/6Zug4UlPDHcaIjUVAQEsbv/ii6VO5hdGUomZeHW9CDg4Oqamprq6uW7ZsSU9PZzsc+0VplEFcLpra0qkZgYHo0MH0wTg6YsoUDBjQ0o01LW3HfpPKzMzMysry9vZOSEhgOxbD9OjR47PPPlMqlbNmzZI0s/c2MSdKo8wKCkJQkAHtBQKMHm2uYLhcxMRg5ky8+KKedeXu7prBq5oa7N1r8L0I66EqRRcuXGhFpajawoUL+/Xrd+fOHeras4VG6hlXV4fdu/HHH09vKRBg+nSGRoefPEFJCUQiKBRwdoavL3x9kZ+vtRfnqFE2Ocp09OjR4cOHe3t7FxcXW2MaBfDHH3+EhITI5fIjR45ER0ezHY7doWqUcVwuJk1CdDSa37U3MBAzZzI3w8bZGf7+6NsXoaHo3RvPPAMOB336aG3bkZkJW5zsrZrKvmDBAivNoQB69uy5dOlSVde+0rQb2ZAWoGqUPZWVOHsWV65oTeF0c0P37ujXzyz3Q1tBIsGGDZqTlrt0wfTplr7UyhC//fbbsGHDrLoUVamtrQ0PDz937tzf/va3L7/8ku1w7AulUQsgk0EshkIBDw9Y4N/kCxewf7/m8pVX0MyeHdYmKirq+PHjy5cvX7JkCduxGKugoKBfv361tbXHjh176aWX2A7HjlCn3gKoTrJr394ScyiAkBCtrn16us107Y8dO3b8+PG2bdu+//77bMdiAkFBQUuWLFEoFG+99RZ17ZlEaZS0QGysZkOjmhrs328bo/aff/45gMTERKvuzje0dOnS0NDQ4uJiOtSeSdSpJy1z/jwOHNBcjh2LkBD2ojGBnJycwYMH8/n84uJiW1pJmZ+f369fv7q6ut9//32wTe+BYDmoGiUtExKCrl01l+npLT3j3lJ98sknABYsWGBLORRAcHDwxx9/rFAoZsyYIZVK2Q7HLlA1SlpMLMaGDZrjQ/z9MW2alY7a5+bmDho0yPZKUZUnT56EhYUVFhYuWrQoJSWF7XBsH1WjpMW8vDB8uOayuBgXL7IXjVFUpWhiYqLt5VAAzs7OO3bscHJyWr16dW5uLtvh2D5Ko8QQYWFaG5ocOYKKCvaiaaXc3Nxjx47x+XzbGKDXq2/fvgsXLlR17WXqab/EPCiNEkNwOBg7Fuqd4eVyrXEnK6EaxbbVUlTts88+CwwMvHr1qmpCAjEfujdKDHf6NH79VXMZH4/gYPaiMcyJEyciIyO9vLxu375t22kUwJkzZyIiIjgczokTJ/qZY7tFAoCqUdIa/fujc2fN5eHDVtS1t5NSVKV///4LFiyora198803qWtvPpRGieE4HMTHa3XtDx5kNaCWOnHixNGjR728vGz4rqiOZcuWvfjii3/88cfy5cvZjsVmURolrcLnY8gQzeX16ygsZCUQsVh8586dgoKCrKwsuVzefOPPPvsMwIcffigQCBiJjn0uLi5bt27lcrlJSUnnzp1jOxzbRPdGSWspldi+HXfv1l/yeEhIMP7wYalUKtRHJpM1fqusrKympkb92Zs3bwY0fTKK+q5ocXGx/aRRlQULFqxZsyYoKOjs2bPOjc+OJsahk0FJa6nOudu0CbW1ACCV4sABvPZaM58oLCw8efKkSCQSiURisbjxYyuOwfD09PTy8uLz+Xw+v/maQFWKfvDBB/aWQwEsX7788OHDBQUFK1as+Oc//8l2OLaGqlFinBMnkJGhuYyORnU17t6FUIjaWvB48PVFt24IDISr6xdffKGa994MV1dXgTYej9f4RRUfH58W1lYnT56MiIiwz1JU5eTJk4MHD+ZwOKdOnQoNDWU7HJtC1SgxTng4rl7FvXv1lw1TKgCZDEIhrl1DRgYiI0NDQmbMmKGqHFUlpM6jl5eXmTHok74AAAK3SURBVMK051JUJTw8fO7cuT/88MOjR4/YjsXWUDVKjFZWhk2bUFf39JYdO+L11+HmZv6YtKhK0TZt2ty+fdtu0yiAqqqqqqoqX19ftgOxNTRST4ym2rq/Je7fx7ffajY3YYrqbqA9l6Iq7u7ulEPNgapRYpzKSmzYAIM2ZAsKwrhxZgtI16lTp8LDw9u0aVNcXNy2bVvGvpfYD6pGiXF++82wHAqgoEBzL9X8VKXo/PnzKYcSM6FqlBihqgpr17borqiOHj0webKpopBIJE1NoioqKtqzZw+VosSsaKSeGOHatdbkUABFRaipgZOT3jf1zsDXO/1eKBSWl5c/afZm68SJE4cOHUo5lJgPpVFihJKSpzZ5UlcnksnEMplIJhPL5UKpVPVEVFoqUiga148Vhu9y4u7urp41pTOJSiAQDBgwINh6NqAi1ojSKDFCs8cxvbpr14Hr16UNFmtqSU9v6oNNTSxteCkQCNSXjo70x5iwif78ESM87ca6Koe6OjoKeDyBq6vWY0iI4IUXGq9Q8vX1pbRIrAsNMREj/PJLMxs7VcjlXA7HvanFmtOnw9/fXIERwiCa8ESM0L59M2+2cXFpModyOPDzM0tIhDCO0igxQvfurfxgp07g8UwaCiGsoTRKjODtjab392wOnQtEbAilUWKc6Gg4GPinqGNH9OplnmgIYQGlUWIcPz9ER2sunzpiyeNh/HhwOGYNihAmURolRhs4EFFR9c+bz4/u7pg2Dfa9zRKxPTThiZjI1as4dAjNnALSrRvGjkWbNgzGRAgTKI0S06mpQX4+Ll3C/fuatfZubujWDSEhWkfbE2JDKI0SM1AoIJFAoYCzM9zd2Y6GEPOiNEoIIUahISZCCDEKpVFCCDEKpVFCCDEKpVFCCDEKpVFCCDEKpVFCCDEKpVFCCDHK/wMyULg0aNexkQAAAABJRU5ErkJggg==\n", 202 | "text/plain": [ 203 | "" 204 | ] 205 | }, 206 | "execution_count": 7, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "print('substructure', Chem.MolToSmiles(substructure))\n", 213 | "ibuprofen.GetSubstructMatch(substructure)\n", 214 | "ibuprofen" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "`bi`変数には、`(atom_index, radius)`が格納されている" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 8, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "atom_index 1 radius 2\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "atom_index, radius = bi[854][0]\n", 239 | "print('atom_index', atom_index, 'radius', radius)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "## RDKit fingerprintの取得と可視化" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 9, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "from pprint import pprint " 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 10, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "name": "stdout", 265 | "output_type": "stream", 266 | "text": [ 267 | "[(19, [[0, 2, 3, 14, 1], [0, 2, 3, 4, 1]]),\n", 268 | " (103,\n", 269 | " [[4, 5, 6, 7],\n", 270 | " [4, 5, 6, 14],\n", 271 | " [4, 5, 14, 8],\n", 272 | " [4, 14, 8, 7],\n", 273 | " [5, 6, 7, 8],\n", 274 | " [6, 7, 8, 14]]),\n", 275 | " (105, [[5, 6, 9, 11, 13], [7, 8, 9, 11, 13]]),\n", 276 | " (110, [[9, 11, 12, 10]]),\n", 277 | " (161, [[4, 5, 6], [4, 5, 14], [4, 14, 8], [5, 6, 7], [6, 7, 8], [7, 8, 14]]),\n", 278 | " (173, [[0, 2, 1]]),\n", 279 | " (194,\n", 280 | " [[4, 5, 6, 7],\n", 281 | " [4, 5, 6, 14],\n", 282 | " [4, 5, 14, 8],\n", 283 | " [4, 14, 8, 7],\n", 284 | " [5, 6, 7, 8],\n", 285 | " [6, 7, 8, 14]]),\n", 286 | " (254, [[9, 11, 13]]),\n", 287 | " (261, [[9, 11, 12]]),\n", 288 | " (294, [[4, 5], [4, 14], [5, 6], [6, 7], [7, 8], [8, 14]])]\n" 289 | ] 290 | }, 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "[19, 103, 105, 110, 161, 173, 194, 254, 261, 294]" 295 | ] 296 | }, 297 | "execution_count": 10, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "rdkbi = {}\n", 304 | "rdkfp = Chem.RDKFingerprint(ibuprofen, maxPath=5, bitInfo=rdkbi)\n", 305 | "pprint(list(rdkbi.items())[:10])\n", 306 | "list(rdkfp.GetOnBits())[:10]" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 11, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "data": { 316 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJYAAACWCAIAAACzY+a1AAAKV0lEQVR4nO2dfWwT5xnAnzv7/JWcP4AkWlKBgLCMDZoGMhjSmLpuEqhAIWgSqfjcFAwR4mOiLRWtSmmL2ioVqbqVELXqh0BqKTC15Q+6NhFlEp2UQcdEIU2cBDoGSRzAPjvGH+e7d39YM+Zwvojx3aM9P1mn+PF7z/tcfn7vXjvvKRxjDAjM8HoXQIwXUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUqgfHHfnkY5oGowCc67rGi2qGpLl7xkL8XyRIFRwnE2vSvSB4yDz30tono4FHRQmk/8Oh/8ci50GUFIRjrPZ7Y+LYj3Pe/JfzwMnGARJumsrCDlMzw3/r0YU5Xo8flZR+njebjaXWyxzOc4ynv5isb8Fg88xFtWUAcB4ftKECW8Jwo/Hk/9Bk0wmg8GgJEnBYDAQCKR+yNwGg8F3CwuLLl4ESQJJgkAgS5apU+Hy5Syj8N4z5yiG5pAKFaUvFHojFjsNcKcBz08Qxc0ORw3AqE7TGmT5+5s3/8BYYqgGPD+xqOgjnp9wH8nzQE9PzxdffLFly5bhm12fN+9HbW13njud4HKB231nW1oKDQ3ZFY797JpdoSx33rq1RVWzvYMAHI4nXK7n72MqdOPG72X5wvBtHI4VLtfzY82cH1atWnXs2DG73V5SUuJ2u91ut8vl0mzdbvcvJ06cVFAAbjd4POByAZ/tF5XVVq4Uqmr4xo1aRekfahcAJopbCwvXj5g9E1nuuHFj9Yj1cJxQUtLCcY4xJc8Dly5dmj17tiAI3d3dZWVlOciYedpMWbgvhVmmM5HIoaH9Qeq8Ojj4jsPxxJhmH4nEPxMJFgopoVBSkpRwWJEkZdYsx9Sp1szkjCVk+aLF8vPRZ84PL730kqqqXq83N/4g23VOExndHPVehSwaPTGK3mOx2FcctywQCMRisWg0GhiJaFQKBm9r8ixd6qmosO3cWZoZVJRboyk9n7S3tx89etRqtT7zzDN616JFq1BR/IoyoAl2dESbm/3hsCJJyVBICYUUSVLC4SeTSXVMnQkCJ4omp9PkdJpcLjPHwcmTwZMn4bHHXFVVBelmPF94fwfz4EgNwY0bNz700EN616JFey1MJrsGBmo1jdraBles6Lh3Z5vNZrfbbTabZ1hSbQoKfrBYXtBMm/ftu/b2233l5bavvppptaYu+3xJyUmen5jTwxwX7e3ts2bNEgShq6vLgAq1ozDr5W3GDFtDwxSn0+R2m5xOc2oYlZQsLyraO5a+fuL3NylKb2bo6adLW1qkjo5oY2Pvs8+WAYDV+gtD+QOAl19+WVXVuro6A/qDrDNSv3+FovxnxD1drhccjifG1Fks1hoI7NIEv/02snx5B8fB559XVFV5Jk78wFCf7n0+38yZM00mU2dn55QpU/QuJwtZPrIUFKwceTfeabf/dqyd2Wy/KSz0aoJz5hR4vcXJJNu+/Qeb7TlD+QOAF198UVGUuro6Y/qDrAodjlqzuXz43ZzOPyYSpv7+/s7Ozra2tp6eHk2DY8eOLVu2bOHChQ8//PDkyZNdLteePXsAQBS9Hs+rJlNJZuNdu8oqKpw+X7SxsQ2MhM/nO3LkiMViMeBE9A4sG8lkr99fc/363GyP6tdfr7XZ7vrDwlNPPaXJsH//fk1H27ZtS7/a0XFp8eIFHR0NodBb4fAH8fi/vvnmjMlkMpvNZ8+ezVqSLqxevRoA6uvr9S5kOLIrZIyp6qAk7e/tXZjpb2BgdSz294MHDwKAIAhFRUXl5eXV1dWNjY2a3bu7uz/77LPTp0+fP3/+ypUrgUAg89VVq1YBwJIlSzKDO3bsAIDKysp4PJ6z4xsHnZ2dZrNZEITLly/rXctwDKkwhapG4/F/3L59IhptkeUfUsF4PB6JRMbTq9/vLyoqAoAPP/wwHYxEIjNmzACAvXv3jid5rlizZg0AbN68We9CRmAEhQ+OQ4cOAYDL5bp69Wo6eObMGZ7nzWbzuXPn9Coshc/nQzEEmY4KGWM1NTUAsHTp0szg1q1bAeCRRx5JJBJ6FcYYW7t2LQBs2rRJxxpGiZ4Kr127VlZW9uabb6qqmg5GIpHy8nIAeOWVV/QqLD0Ee3p69Kph9OipkDEWi8XuDZ46dYrjOIvFcuHChfyXxFANQaa7wqGor68HgKqqqvyfTnENQWZYhYODg9OnTweA1157Lc9dr1u3DgC8Xm+e+71vDKqQMdba2spxnNVq/e677/LWaVdXF64hyAyl8PDhw5r3vtfrBYD58+cnk8n81LB+/XoA2LhxY366ywlGUdjX1+d0OgHg6NGj6aAkSZMnTwaAhoYGWb4yOHgkFHorFDpw+/ZfVTWc8xowDkFmHIWMsebmZgCYNGlSf39/OtjS0sJxnNVq+vrrn2V+1dfbuzAcfoexXI7ODRs2AEBdXV0Oc+YBAylUVXXRokUAUFtbmw4mEp1PPlkKAHPnFly9OkfznfvNm9tUNTdT1vQQ7O7uzknCvGGg22I4jmtubhZF8eOPPz5+/DgAMHY7ENi5Z09xaanl3LnIe+9pFvVw8fiZcPhPOel93759yWRy3bp106ZNy0nCvDHCgvz8c+DAgaampvfff7+6unpw8N1w+CAAtLZKa9d22e18S8tP7160CACm4uK/mExjWBioqqokSYFAILV+XpIkn8+3e/duAGhvb0991Y4IwylkjMmybLFYAGBgoCaZvJqK79hx5ZNPblZXF376aYVmebQobjaZVg+/BDJzpaTf71cURdPvggULKisrm5qa8nKUucRwCtOoari//9fpp5KkPProxf5+ubLS4fGYU2shU49YbGxrIQHA4/Fo1tKvXLly+fLlOT2CPGFchYrS5/cvzYx8+WXw+PFbJ05kudNjmIWQ9y6TLC4uNpt1u7Ey5xhXIWOxvr5fAdw1wsJhpbVVSq0kFkWTy2USRdOECYs8nlf1qlN3jPtm5DibIMyU5YuZQVE0rVihvXXNYqnKY12Gw0AfKu6loOB3I7bhuEK7fXEeijEshlZoty+xWOYM38bp3M7zzvzUY0wMrRCA93jeEIRZQ7zKieImh6MmrxUZD+NOZ9IwJkcihyORj1T1zk1rgjBbFOut1nk6FmYQECj8H6osdypKL8cJZvMMzXrw/2cQKSSyY/BrITEypBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9/wWgphVBBOcPowAAAABJRU5ErkJggg==\n", 317 | "text/plain": [ 318 | "" 319 | ] 320 | }, 321 | "execution_count": 11, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [ 327 | "Draw.DrawRDKitBit(ibuprofen, 105, rdkbi)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 12, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJYAAACWCAIAAACzY+a1AAAHZUlEQVR4nO2c22/TVhzHj+0QN1lD4m5jEi8gGlJCYYNWAoT2D0C5tOUmxGNfpr3saUKbaMUbfdj2NK0F2kD7gCgFSrlJSGOPMO1lk9aJqUxNOzVVC4jc2sSJsb0HF1PSXJzUifc7/n0UVUlqH/+iT77H5yTOYVRVJQhkWKsLQNYKKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKrQChlm+5f2XwSff4jCpKMQYmgx90bSchxWBKaw5K4WZsQAeKqwV2SxhmDzOVLV4P1kS7EgrJZ0m0ejyTRTfe5j3tmlTlQpBhYQQoqpqLBaLRqOxWCwej+t/8z78PhBof/So7D4wlTK6ZZmhtK/Cly9f9vT0PHz4MBaLJRIJ4zu+2riRqCqpqyOCQFyu5TvFby4XEQSjbla/OYruyNhzSdlUKrVlyxaGYebn57VnfG/xer05f1fe8fl8G3w+j89HOK7so+Y9F+Y8n3ebQjsSQmybwv7+/oWFhZ07dz558kQQBJ/PV4ujaiOXHBlF9RjBjikURbGxsXFubu7BgwcHDx6s6bFLzgsxhUbo6+ubm5trbW09cOBArY+tmdDPbWbkx3YptDKC1cF2U/v+/n7LIlgd7JVCURT9fn8kErl//35bW5vV5ZiDvVJ48eLFSCTS0tJCRxeqYaMU6hG8d+/eoUOHrC7HNGyUQj2C1HShGnZJoR7Bu3fvHj582OpyzMQuKbx06VIkEtm9ezdNXaiGLVKYyWQaGxsjkcj4+PiRI0esLsdkbJFCPYKUdaEa9Kcwk8n4/f7Z2dk7d+4cPXrU6nLMh/4UXr58eXZ2dteuXfR1oRqUp1CP4NjYWHt7u9XlVAXKUzgwMKBFkMouVIPmFGaz2UAgMDMzc/v27Y6ODqvLqRY0p3BgYGBmZmbHjh0UR5BQnEKbRJBQnMLBwUE7RJDQmkJJkgKBwPT09K1btzo7O60up7rQmcLBwcHp6enm5mZaJxIroTCFegRv3rx57Ngxq8upOhSmMBQKaRGkexSjQ1sKJUlqamoKh8Ojo6PHjx+3upxaQFsKQ6FQOBxubm6mfhSjQ1UK9QjeuHHjxIkTVpdTI6hK4ZUrV8Lh8Pbt2+0witGhJ4V6BEdGRk6ePGl1ObWDnhRevXpVi6BNRjE6lKRQj+D169dPnTpldTk1hZIUDg0NhcPhYDBon1GMThk/TlNVSVFeEaIwzAcs66taSWUjSdKFCxcIIT09PSxLyZvSOKUVKkoinR5Ppx9L0t+EvNGeZNkPeX6v293udLZUucLSDA8PT01NBYNBW41idIqfC9WlpdFk8idVXSy0Bc/v8XrPcdzGahRnBEmStm3bNjU1de3atdOnT1tVhoUUVKiq2VisWxQfl2yCZT2C8J3T2Wp2bYYIhUJdXV3BYHBiYsKGvSgpPJxRY7FzoviYkNLrbChK8vXrryTpL3MrM4Isy729vYSQ7u5ue/ojhRQuLY2I4i+EMIQYmnKoqhiNfqOqhlfHMYnh4eHnz59v3brVnmdBjTwKFSWeTPYRQgz605DlucXFIZOqMnjE5QieP3+eq2AVGFrIMyJNpcZUdamCtlKp0fr6LoZxrrmqd0iSpC+hlbPA1tOnTycnJ5uamuw2l88hj0JR/LmythQlkc3+xvOfF9kmnU6LophOp6NF0beZn58vMmY+c+ZMZ2ennSNIVitU1YwkTRrfP5GQk0k5kZDjcTmZlDOZgUzmj5VZyYlONpstqz6O47TFswRBWL20Vltbm9/vL6tB+shVKMsLhChFdjh79t/ff19KJOR4/E0iIa9KyD+EFDsj1tXV5ZjQ1s8qtPKZx+Op7IXZh9UdaYmUhMPixMS7kSfPsz4f5/VydXUsz7MNDZ9s2LBfKIDL5RIEweyXYHdyp/ay/OLFi2ILejx7lpYk1evlvF7O4+E47r2Jo9vd6fV+W5VKkQLkppDjPmbZ9YpScH3OYNBVrDlHozl1IYZZPS9knM49FTXFEEJ4fu8aC0LKJc/U3u2u7EcIqtP5qcOxeW31IGWTRyHP73M6P6ugrfr6L9ZcD1I2eT8jZbzeboZxl9WQ293B85X1wMiayP8xt8OxWRB6jX9UxvP71q//2ryqkDIo+AUNz+9vaPiRZT8q2YTb3S4IP5j70ShinBJXsClKYnFxMJUay/tF0rp1zR7PlzgKtRZDFyGqaiqT+TWb/VNRFlRVYVm3w9HI8/twFvh/gJLrSO2MTS9WoAlUCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCJ7/AA9JQm+DyaDOAAAAAElFTkSuQmCC\n", 338 | "text/plain": [ 339 | "" 340 | ] 341 | }, 342 | "execution_count": 12, 343 | "metadata": {}, 344 | "output_type": "execute_result" 345 | } 346 | ], 347 | "source": [ 348 | "Draw.DrawRDKitBit(ibuprofen, 254, rdkbi)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "- Morgan fingerprintは、原子から同心円状に結合をたどって行きます。\n", 356 | "- 一方でRDKit fingerprintでは、直線的な部分構造を取り出すようです。\n", 357 | "- `rdkbi`では、部分構造に含まれる結合のidが取り出されています。" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 13, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "epinephrine = Chem.MolFromSmiles('CNC[C@H](O)c1ccc(O)c(O)c1')" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "## 分子をまとめて表示" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 14, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAADICAIAAABJdyC1AAAfkklEQVR4nO3deVhUVR8H8N8MiyDKbm6pgFtAaYqISuaruBSSubxkvoqKJWTqhCtmKVZqpKnoIxZqIC5ouKO4hOCCCypSCq6FIgLuMCSDI7Oc949jtxEQWebeO2f4fR4fHzjBPYccvnPuuWeREEIAIYRYIBW7AQghVF0YWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGIGBhZCiBkYWAghZmBgIYSYgYGFEGKGqdgNQAgZHonk348JEa8d5WFgIYQqw+WURGI4mYW3hAghZmBgIYSYgYGFEHo5ej/48CGMHAnJyWK3BsewEEIVEfLCuPvGjRAfD+fOQWYmNGokXrOwh4UQqhQhoNE8H27/4gvw9IScHAgNFbdRGFgIocps3w5ubnD/PgCAqSnExoKFBfz0EyQlidgoDCyD8PTp071793766adnz57Nz88XuznG4+TJkwcPHhS7FWyKjobr1yE4+Pmnrq7w9ddACAQFQUmJaK0iSDyFhYXx8fEBAQGNGzem/xw2NjZOTk7Xr18Xu2lGwtPTEwA2btwodkMYlJdHbG0JANm+/XmJSkW6dSMAZOpUsRqFgSWC27dvr1q1ql+/fqamzx96SCQSLy+v+fPnd+vWDQAcHBzS0tLEbibz9u/fDwDNmjVTKBRit4VNUVEEgDg6kvv3n5dcvEjMzYlUSo4dE6VFGFjCyc7OjoiI8Pb2lvzz/MXExMTb2zsiIuLOnTv0axQKxfvvvw8AjRo1+u2338RtMOu6d+8OAMuXLxe7IczSasmgQQSAfPzxv4ULFhAA4uxMnjwRvkUYWPzSaDTp6elhYWFvvPEGdxvesGFDPz+/2NjYoqKiit+iUqnGjx8PAA0aNNjO9cZRDSUmJgJA06ZNsXtVJzk5pHFjAkB27HheolIRDw8CQEJChG8OBhYvlEplUlKSTCZr0aIFl1MODg4BAQHx8fElJSVVf7tWq50+fTrtgq1du1aYNhsZLy8vAFi2bJnYDWFfZCQBIE2akAcPnpdkZGgdHdf37n3y5EmB24KBpU8KhSIhISEgIMDGxobLKScnJ5lMlpSUpFKpXnkFrVbLfRweHk6Ht8LCwnhstDE6cOAAdq/0RqslAwYQADJmDFe25JtvAKBDhw6lpaVCtgUDSw8ePHgQGxvr5+fXoEEDLqfc3NxCQ0NTU1N1M6hq+/fv79mz5wPufYyQNWvWSKVSAJDJZBqNhp/mGyFvb2/sXulTdjaxsiIAJCGBFpSVlXXp0gUAZs6cKWRDMLDqKj8/v1evXjSkTE1N+/btu3LlypycnJpeR61Wu7u706TjxuAJIbt27bKwsACAMWPGlJWV6bXtxolOvHJ0dHwixqiw0Vq16oGn5/udOj1+/JgW/P7772ZmZlKpNDU1VbBWYGDVVZ8+fczMzHr37r1hw4ZHjx7V5VJ3797t3LkzALRu3fratWtceXJyMp2o5efnx0sPHODfP1xJuQ/Y8c477wDAjz/+KHZDjIpWo+nduzcAjBs3jiucO3cuAHTs2FGwG0P2Xo4GpaysrGHDhhKJ5OHDh3q5YFFREf19s7e3P336NFd+7ty5Jk2aAMC7774rl8v1Ute/dFOJfsxsYB06dAi7Vzy5efNmo0aNAGDPnj20RKlUvvnmmwAQGhoqTBsYezkamvPnz9N3GD1eU6FQDB48GACsrKwOHz7MlV+5cqVVq1YA8Oabb+bl5emnMjouVmlgletzMYLG/dKlS8VuiHFavnw5ALRo0aKwsJCWZGRkmJmZmZiYnD17VoAGMPZyNDSrVq0CgPHjx+v3smq1esKECQBgbm7+66+/cuX5+fn0Dc3Z2fnGjRu1r+DpU5KURGQy0qIFKSgwmh4Wdq/4pvnnxnDChAlcYWhoKAC88cYbT58+reV1qz0owdLL0QCNGjUKAKKiouinBQUFd+/e1cuVtVrtzJkz6VSsn3/+mSt//Phxz5496YqT33//vWYXLSwkmzaRESOeP/GhfzZvrupVwlRg0e7VkiVLxG6IMbt+/bqlpSUAHDhwgJYolUr6vGju3Lk1u5ZSSWhPrdpvmTV4Oebm5g4aNEj3ARZycnICgEuXLtFP6VvNDz/8oK/rh4eH03U8umMEJSUlgwYNAgBbW9sTJ0688iKqO3dIZCTp35+YmT0PKYmEeHqSRYvI5cvPv4j9QffDhw9j90oYS5cuBYCWLVtySzXOnDljY2MTERGRl5eXlZV16tSpxMTEuLi4NWvWRC9fTkJDSVAQGTmSDBpEevQgb7xBmjUjlpYEgLRqRUgNBiVq8HIcOnQonQZZp5sRI3Lv3j0AaNy4sVqtpiW0t5yYmKjHWmJiYuga6SlTpnBTsZ49e/bRRx/R5Ts7d+6s9Bu5pYtLevZ8/g9vYkK8vUlEBDHGdx36P1+P7xboZTQaDe3MBgUFqVSqjIwMR0dHbiV/OR3s7V9IH90/ZmbEyYkQfnpY3AMs3EuA2rVrFwD079+ffso9Mazj5IaKdu/eTadijR49mpuKpVarg4OD6T3jL7/8Qgs1Gs2ZM2dCQ0M7dOjAvWJaOzpq/f3Jxo3kn4FS4/Pbb7/RV+bff/8tdlvqhWvXrllaWkokkhkzZpibm9NXmqWlZfPmzV1dXXv06PHee++NHDkyODh43pdfku+/Jz/9ROLiyIED5NQpcvkyycsjugvUqj0oUbMOv0Kh8PX1xb0EqFmzZgHAvHnz6Kfnzp2jQ4981JWSkmJtbQ0AgwcP1l1uwi3fmThxYnBwcPPmzbmccnR0nDBhwt69ewVePCEK2r0KDw8XuyH1SHh4+IABA5ydnQEgMjLy2bNndbocT4PuKpUqMDAQcC+Bf4Z4uaHHlStXAkBgYCBP1aWnp9OpWF5eXrqduBUrVkgkEvqfAKBNmzZBQUEJCQn1Z1p8UlKSEXSviouLc3NzL126lJqaum/fvs2bN69evXrRokWzZs0KCgoaO3ZspXt7iEij0cTExABAu3btqrNOVi9qM6Sq1WpnzJgB9XsvAe4GkFup8PHHHwMAr/9Drl692rp1awCIjY3VLW/Xrh2dXVHj54ZG4d133wWA77//XuyGvECr1WZnZ1+4cCElJWXXrl0xMTErVqxYsGBBSEhIYGDgsGHD+vbt27VrVxcXFzs7O7pitAoSicTOzm737t1i/1j/UqvVHTt2BGE3dJWQ2h5C/cMPP8yZM0cikcyfP3/BggW1uwi7zp075+Xl5erqeuXKFVri5OR0+/btzMxMOlWKJ3fu3NmzZ8/UqVO5EoVCYWtrCwByudzKyoq/qg1TcnJy//79HRwcbt26xe00LbqysrKdO3eOHj26+r9fVlZWtra2tra2NjY25f62s7M7e/ZsdHR0s2bNLl++bG9vz2vjq2njxo3jxo1r167d1atXXzbirne1ryY0NNTGxmby5MkpKe3lcli+HF71JmFUzpw5AwB0ShQAFBQU3L5929ra2tXVldd6W7VqpZtWAHD+/Hm1Wu3h4VEP0woAvv32WwCYOXOm4aQVAMTExHz22WfW1tYuLi4VA4j7287Ojvu06t/5iRMn5ubmHjlyJCQkZOPGjYL9IC+j0WgWL14MAPPmzRMsrQDqPMtm9+6zdDrF+PFEqNtYgzBy5EgAWLduHf10x44dADBw4EDhW0JfN1OmTBG+atElJyeD4Y1elZWV0aFo/Q7yckv5DOHGkIamkKNXVF2jcejQ7omJ8OGHsGEDPH4Mv/4KlpZ1vORL3bt3Ty6XOzs76247JZZyPaxyn4rYEiOgUqmKi4vlcnlxcXFRUZFcLqcfV/w7OzsbAGQymaF1r27duuXu7j58+HA9XtbZ2XnhwoUhISGff/55nz597Ozs9HjxGtFoNIsWLQLhu1d6Oaq+b19ISQFfX9i3D957DxISQGe7TT24detWQkLC/v37jx075uXllZub++uvv4r7+1lQUJCbm6t7AyhWahBC0tLSRKm61rZv315YWCiXy4uKiiqNIYVCUf2rSaVSuVzOW2NrTKVS0bkmYWFhrxxKrya1Wk1zYerUqbt27Tpx4sT06dPpEzpRbN269fr1623btv3f//4ndN366qpdvUpatyYA5M03SX5+Xa+m1WrPnz8/d+5cNzc3rqmWlpb0GVnjxo2PHDmij1bXUrkbwLKyMjqJrlDwmZk3btwAgKZNmwpcb104OjpW/Zo0MTGxt7d3cXHx8PDo16/f8OHDAwMDp02btmDBgoiIiA0bNuzevfvo0aMZGRn79u2je8idOnVK7B/ruaioKABwc3PT1w6x8fHxLi4uubm59NPs7Gw6WMnt8SIwtVpNT1TZsGGD8LXrc6VYfj55663nJwD9+WdtrlBWRpKSSEjIzNdff517+drb2wcEBOzcuVOhULxsGwOB0Vkd3FbrtI/j5uYmfEtiY2MBYNiwYcJXXWtTpkwJCgqaPXv24sWLIyMjt2zZsn///pMnT2ZmZt65c6emo1F62CpAf7jRK+7FeefOnQULFtRlChW9r/T19eVKli1bBi/u8SKkzZs3A0Dbtm0FHr2i9Ly09fFjQheuNWtGqj8lqLSUJCSQoCDy2msEgHTv/gEAtGrVik6ALDeD9mXbGAiJ7onMbVa1YsUKAPjkk0+Eb8lnn30G9Xt/gtpvFcCDtWvXluteTZkyBeq2AdGDBw/orGBu8h23lO/TTz/VQ6NrgutexcTECFw1pf+1+CUlZNAgYm5OXrl05+FDEh1Nhgx5vmyb/nF3J0uXpqenp1d9dkOl2xgIQ6lUWlhY6N4A0nXI69evF7glhBC6pbKQm2oboLS0NBMTE1NT03PnzonYDK57tW3bNlpSUFBgaWkplUozMzPrcuVNmzYBgI2NDbdXCl3KBwAHDx6sa7trYsuWLSJ2rwhP+2EplURnd99KXLxI+vQhJibPQ0oqJb16kSVLanYjWek2BgKg4+vu7u5cCb2BvXLlimBtoEpKSkxNTc3MzPAwK3qT7urqqlQqxWrDunXraBvKda8+1j02ubaGDRsGAH5+flzJkiVL4MU9XvimVqvpU6bo6GhhaqyIr92Oql59nZ9PpNJ/Nzup9SD9nj17Km5jwDe6SyzXG8/PzwcAW1tb4Y/horOQPD09Ba7XACmVSvp8Zv78+aI0oKyszMXFBQC2bt1KS7juFbddWl0UFBTQeQybN2+mJRqNhp5mFhwcXPfrVwftXrVp06au65zrQJzAIoQcOkSKi/VQ0cu2MeCPv78/AHA7uty4cWP48OEBAQECVF3OwoULAUAmkwlftQE6ffo0vTFMT08Xvvb169eX617RBQkjR47UVxV0HoODgwO3q+3Vq1fp6ITu3v880Wg0dM0Z98oXBY+BVXEfLp6ONeC2Mejevbve96KqSKwbwIroWRXcWzoKCQkBgM6dOwvcBeC6V3FxcbSEdq8kEoleulecDz/8EACGDBnCldB1Dm3atOF7rn9cXJzo3SvCdw9LsE13uW0Myp1Cql/Pnj2jvWJra2vRz2HWarV0QlMtDm01VgqFon379gDwzTffCFnvL7/8Uq57JZPJAOCjjz7Sb0X5+fn0xpAb11epVJ6engAwefJk/dali+teifJkSZeRBBYhpKCg4K233gIAJyen69ev6/HKCoUiISEhICCAborQokULU1NT0fs1165do40RtxmG5tSpU1Kp1NTU9MKFC8LUqFaraUpy3au7d+/S7tXFixf1Xh2993R0dLx37x4tuXLlCr0x5G9Pza1btxpC94qIOIbFh8LCQjpDqmnTphkZGXW82r1799auXevr66u7dLFz5850c0upVLp69Wq9NLt26IjGiBEjRGyDYaLP5t5++21hnsNER0cDQPv27bmt/b/44gsA8Pf356nG9957r9z1v/vuO/pWzccBHBqNhnYFuKX+ImLmTJRqKikpof+cjRo1SkpKqsUVbt26FRER0b9/f25Vp1Qq9fDwCAsL446Pj4iIEGsWGCcoKAjwQPbKKBQKuqPhwoUL+a5LrVbT7fO3bNlCS3jtXlF5eXm0s8/tBqFSqbp168bTE5ht27YBQOvWrUXvXhHjCyxCyLNnz+jmnzXaxDkrKyssLMzDw4PrTFlYWPTv3z8iIqKgoKDi18fGxtJE+/zzz0UZz6Jveoazhs6gHD16VCKRmJub13HG5ivRfm779u25iZR04P+///0vr/X+/PPP9Mbw/v37tOTixYvm5uZSqfT48eN6rEir1dJXmoHsLWyEgUUI0Wq106ZNo8t3qujHqtXq1NTU0NBQOgZB2dra+vv7x8bGFr9q2sXevXvpbONhw4YJPF/x77//NjExMTc3N4QFdIZp0qRJANClSxf+bgy57hU3N+ru3bt04+w//viDp0oprVY7cOBAABg1ahRXGBYWBgDOzs61uDF82Y7yQ4YMoaNXBnJEgHEGFsWdKFPuMJXS0tKEhISgoKCmTZtyOdWkSZOAgICKSxerduzYMRsbGwDw8fHh6bmyXC6Pi4u7ffu2biE9dsHLy4uPGo1DSUkJnWrA31E6L+teCTOwmJOTQ3cB4w6mVKlUXbt2BYBp06aVlpbm5+dnZWWlpqYmJCTExsZGRESEh4eHhYXJZLKAgAA/Pz9vb283N7fmzZtz53RVqkWLFtxZdqIz5sAihKxevZruSSSTyR49ehQfHx8QEKC72ZuLi4tMJktNTa166WIVMjMzW7RoAQCenp4PHz7UV8sfPHgQGxvr5+dHh/zLLW/+5ptv6OtSX9UZpeTkZIlE0qBBg6ysLL1fnOtebdq0iZYI1r3iREZGuru7nz9/nivJyMgwNTWlG5PWSKNGjVq2bOnu7t6rVy9fX99Ro0ZNmjRpzpw506dPb9CgAa+PIGvEyAOLEBIXF2dmZkZvD+m/jVQq7dGjR3h4ODeIXkfZ2dl0lNfV1ZXbt6h2rl27Fh4e3qNHD27vN1NT0759++7YsUP3y+iDhfj4+Lo13PhNnDiRdkW5R3j6smHDBnhxj2A6CiHkc1utVltuLKKwsNDKyqphw4a2trZOTk5vv/12nz59Pvzww7Fjx8pksnnz5i1btmz9+vU7duxISkpKT0//66+/Hj58WPVKZl4fQdaU8QcWIeS7776jc3O8vb0jIiLy8vL0XsXdu3fpxglt2rSpRQ5WOuTv5+cXFRXFTbfhaLVaem4Kf1NkjUZxcTGdUbx06VI9XrbiCVdqtbpz585Cdq8qNX/+fND32QK8PoKsqdof88WQsWPHbtq06Z133klNTeWvlqKiog8++ODUqVP29vaJiYk9evSo+uvpsfLbt2/ftWtXXl4eLbS3t/fx8fHz8xs2bFil+5TfuHEjKipq+fLlDg4Ojx490v+PYXSSk5MHDBhgbm6ekZGhu4Hty6jVam4XebqjvO4OzvSDmzdvZmVlOTs7//nnn1zPXaPRHD9+vF+/fjz/QC9VXFzs5OQkl8tPnDhBZwvqy6VLlzw9PdVq9dGjR+kpkKIROzF59/TpUzourvs8hScKhcLX1xcAGjVq9LL1qE+fPt2+ffvo0aPpVBqqTZs2MpksJSXlZZ3zcl0wBwcHEG9nAubQXWq7dOly4MCBbdu2RUVFhYeHz5kzZ9KkSaNGjXr//fd79erl5ubWsmXL6h+VZm9v37FjR7G2haoUfUo4YMAAPi5Ozx6t3SNIPTL+wNq+fTt9hc2YMUOA6lQqVWBgIACYm5tXOsb06NEjbkpq1UP+ZWVlSUlJkydPLrdh9NixY0NCQujA3KRJk0Rf1Wj4ioqKWrZs2bJly+okkVQqpTvKd+3ateKO8jExMbt37z548KCTkxMY0nHTcrmcLjM8ceIEH9dXqVT0/VLcRz3GH1jcUUvLli0TpkatVkv3kzMxMYmKiqr4BTKZ7Mcff/zrr78q/XZu1sVrr73G/RZxG0Zz02H27dtHZ4ENHToUZ2O9Uk5OTmRk5IABA/z9/YOCgmbNmrVo0aLVq1dv3rx53759qampmZmZubm51Z+bwusjyFqg3Ste5x/88ccf9MgPnjKxOow8sIqLiy3/OSiRW+AuDG4WGHdWRdUePXoUGxvr7++v+0y66i5YWloavTfs16+fQZ0kWk/Q1VF8PIKsKa57pd9p7hXNmzcPADp06FBaWsprRS9j5IGle3ab8G8La9asobMTpk6d+rIbt5ycnKioKD8/P3qLBzpLF69evfrKKrKysuidTrdu3R48eKDvnwBVhadHkLVAB5gEmN757NkzulJn5syZfNdVKSMPLLp8gcrOzha+Abt376abOI8ZM0Z3cUNWVlZ4eLi3tzddRA0A3KyL/BruGH3z5k26tKjus8BQTR05coTeGF6+fFmsNgjWvaIyMjLojaEoR58Yc2Ddv39f9xxtsTqxKSkpdILC4MGDjxw5EhoaSmdIUw0bNvTz84uNjZXL5bWugpsF9lH37qQa/TKkR5988gkA9OjRQ6wbQ7rswcfHR7Aav/zySwDo2LGj8IOnxhxYq1evfg1A8s/DNRFbcvbsWQcHB64zBQDNmzcPDg4+dOiQvrbsKCoqGj948DMnJ+LoSEQ97aq+KS4ubtWqFQAsX75clNrpLGJhuleUUqmkG5DOmTNHsEopYw6sE87OaoAigESAfh07ituYK1eurFu3ztPTc/bs2adPn+ZlLoJSSYYPJwDEyoocOqT/66OXSExMpJ3lGzduCFz1t99+S5+6CFzvhQsXzMzMTExMBD4L0ngDKydH99yLdJ1jBI2ZWk0mTCAAxNycCPtUtJ4bN24cAPTq1UvIaXFc9+rYsWOCVcqZPXs2HTkV8sbQeAPr++9fOKgnMFDsBglFqyWzZhEAYmJCfvpJ7NbUF3K5nM7vXbVqlWCV0mXJffv2FaxGXUql0t3dHQC++uorwSo13sDq1OmFwPr6a7EbJKzwcCKREAAi3ibO9c3+/fvpjeGfNTrBvLbE7V5RaWlp9CxI3S1ueGWkgXX16gtpBVAf+xobNhBTUwJAJk8muHxHEGPGjAGA//znP7XeXq36aPfK29ub74qqRhd1dOrUSZgd3400sObNKx9YCQlit0kMO3aQBg0IABH1tN76gy5aBIDIyEheK3ry5Ak9lfLo0aO8VvRKSqXS1dUVAKq5oqOOjHR7mQ4d4M8/XyhJTwed3abqkZQUiI2F6Gj4ZxcUxKt9+/YNGTLEysrq4sWLbdu2reZ3lZSUlNvBRvfTgQMHjhgxQvfrFy1a9PXXX3t7e588eZKHH6Jmzpw507t3b4lEkpaW5sH3b5kAoSi0s2fLd68AyN27YjdLPLr/H7iSch8g/aGHNvXr16/ijeG6desmTJgwfPhwHx8fDw8PFxcXBwcHk1e9l5RbB/PkyZMmTZoAQEpKioA/VlXoUYydO3fm+6wKY+xhTZsGEREvlJiaglJZf7sYEglw/8r0Y65E9z8hPXn8+LG7u/v9+/ejoqLoAmnO6NGj4+LiKn4L3dTYxsbG1taW+8DGxsbOzs7W1rZr1670PHpq8eLFX331lYF0r6jS0tJOnTrdunUrMTGR7t/NE6MLLK0WWreG/PwXCps2hXv3RGqQAag0sDhG9gIwDHv37h06dKi1tXVmZiZdIE0dOXIkJyfHVgfNJm7p+yspFApnZ+eHDx8mJyeLuLtpRXQ6tH53Oq0Er/03EaSkVHI/2L692M0Sle6/Mv0Ybwn55+/vDwA+Pj76fWK4ePFiAOjVq5cer8kQ01cFGmssLCq5zbGwEKk1BgZvAAW0Zs2a48ePJycnR0dH0wXS1aFWq7mxdg43Bv/48eM9e/YAAF2RUw8ZXWD17Anr18Phw3D6NPxzsgN06yZqm8RW7h4QCcLR0XHlypWjRo2aNm1a27ZtLSwsuNypeLYF93dJSUnVl+3SpcvgwYN9fHyE+SkMjRGNYVUcl5FIID4eTExgxAjsWSBRDB8+/PDhw6WlpdX8eqlUyo21c0PvuuPxr7/+ev/+/XltsyEzrsDCZ2HIwNy/f//SpUtTpkzhEqfccHu5v62trcVuskEz9sDiGM2PiVA9JhW7ATwjBKMKIaNhjIGFN4AIGSkjekqIz8IQMnZGNIaFEDJ2xnhLiBAyUhhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJiBgYUQYgYGFkKIGRhYCCFmYGAhhJjxfyjNBatPUF9WAAAAAElFTkSuQmCC\n", 384 | "text/plain": [ 385 | "" 386 | ] 387 | }, 388 | "execution_count": 14, 389 | "metadata": {}, 390 | "output_type": "execute_result" 391 | } 392 | ], 393 | "source": [ 394 | "Draw.MolsToGridImage([epinephrine,\n", 395 | " ibuprofen], molsPerRow=2)" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": {}, 401 | "source": [ 402 | "### 図の保存" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 15, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "img = Draw.MolsToGridImage([epinephrine,\n", 412 | " ibuprofen], molsPerRow=2)\n", 413 | "img.save('./molecules.jpg')" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 16, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "name": "stdout", 423 | "output_type": "stream", 424 | "text": [ 425 | "molecules.jpg\n" 426 | ] 427 | } 428 | ], 429 | "source": [ 430 | "!ls molecules.jpg" 431 | ] 432 | } 433 | ], 434 | "metadata": { 435 | "kernelspec": { 436 | "display_name": "Python 3", 437 | "language": "python", 438 | "name": "python3" 439 | }, 440 | "language_info": { 441 | "codemirror_mode": { 442 | "name": "ipython", 443 | "version": 3 444 | }, 445 | "file_extension": ".py", 446 | "mimetype": "text/x-python", 447 | "name": "python", 448 | "nbconvert_exporter": "python", 449 | "pygments_lexer": "ipython3", 450 | "version": "3.7.0" 451 | } 452 | }, 453 | "nbformat": 4, 454 | "nbformat_minor": 4 455 | } 456 | -------------------------------------------------------------------------------- /notebooks/4.2-doe_orthogonal.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"4.2-doe_orthogonal.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyN7Xwmki2kvYSR6WBiJRQEB"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"h7TTRpRxBnJx","colab_type":"text"},"source":["# 実験計画法 (Design of Experiments, DoE)\n"]},{"cell_type":"code","metadata":{"id":"zfLPOz07sOUq","colab_type":"code","outputId":"dfbeb457-36c2-4945-8844-a290538d7657","executionInfo":{"status":"ok","timestamp":1581085543339,"user_tz":-540,"elapsed":6733,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":235}},"source":["import numpy as np \n","import pandas as pd"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Collecting pyDOE2\n"," Downloading https://files.pythonhosted.org/packages/70/1f/8a2e1b57f6dd22cd2002e4332e229e87a3858d560c516b50ab7fe5bb075c/pyDOE2-1.3.0.tar.gz\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from pyDOE2) (1.17.5)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from pyDOE2) (1.4.1)\n","Building wheels for collected packages: pyDOE2\n"," Building wheel for pyDOE2 (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyDOE2: filename=pyDOE2-1.3.0-cp36-none-any.whl size=25519 sha256=59ddb72ba21f0659cee848d43b576914d01c0b4afb40d9695ea2f0e57b472f1b\n"," Stored in directory: /root/.cache/pip/wheels/60/88/1d/b334ee00dd83e82d111ec56c3be91573d335c93870698037f1\n","Successfully built pyDOE2\n","Installing collected packages: pyDOE2\n","Successfully installed pyDOE2-1.3.0\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"Nv_ovqPcjmNX","colab_type":"text"},"source":["## pyDOEによるデモ \n","\n","pyDOEというパッケージにいくつか実験計画法の手法が登録されている。
\n","所望の手法があれば利用すると良い。
\n","以下はfull factorial手法の例(2因子3水準)。"]},{"cell_type":"code","metadata":{"id":"Grr-TKrRuZcR","colab_type":"code","outputId":"5e559eef-28ff-48ff-b19a-2f3c304d8687","executionInfo":{"status":"ok","timestamp":1581085543758,"user_tz":-540,"elapsed":6906,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":125}},"source":["!pip install pyDOE2\n","import pyDOE2 as pyDOE\n","pyDOE.fullfact([2, 3])# 2因子 3水準"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0., 0.],\n"," [1., 0.],\n"," [0., 1.],\n"," [1., 1.],\n"," [0., 2.],\n"," [1., 2.]])"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"markdown","metadata":{"id":"nZsLUHT-j5vk","colab_type":"text"},"source":["## 直交計画表の関数を作る \n","\n","直交計画表を作る関数を定義する。
\n","因子数を入力したら直交表が出力される。
"]},{"cell_type":"code","metadata":{"id":"yZQ7dWnNBz_9","colab_type":"code","colab":{}},"source":["def orthogonal_table(n_factors=7):\n"," \"\"\" Orthogonal table \"\"\"\n"," n_pow = np.ceil(np.log2(n_factors))\n"," n_bases = 2**int(n_pow)\n"," bases = np.array([list(map(int, '{:09b}'.format(val)))for val in range(n_bases)])\n"," kernels = [bases[:,np.where(row>0)[0]] \n"," for row in bases if len(np.where(row>0)[0])>0]\n"," indexes = [np.where(row>0)[0].tolist()\n"," for row in bases if len(np.where(row>0)[0])>0]\n"," indexes = sorted(indexes,key=lambda x:len(x))\n"," D = np.zeros((bases.shape[0], len(indexes)),dtype=np.bool)\n"," for enum, ix in enumerate(indexes):\n"," for i_ in ix:\n"," if ix[0]==i_:\n"," D[:, enum] = bases[:, i_]\n"," else:\n"," D[:, enum] = D[:, enum] ^ bases[:, i_]\n"," return np.array([[2 if v else 1 for v in row] for row in D])"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"U2-xNYwhDv1t","colab_type":"code","outputId":"8904f803-b1fb-473a-f311-9bdf2b3c028d","executionInfo":{"status":"ok","timestamp":1581085543761,"user_tz":-540,"elapsed":6456,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":71}},"source":["for i in [7, 14, 21]:\n"," X = orthogonal_table(i)\n"," print(len(X), len(X[0]))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["8 7\n","16 15\n","32 31\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"L4P3cWoQA6PK","colab_type":"code","outputId":"692836a8-1851-447d-aab5-1ab71810a9d8","executionInfo":{"status":"ok","timestamp":1581085543762,"user_tz":-540,"elapsed":6232,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":294}},"source":["D = orthogonal_table(7)\n","columns=[''] * D.shape[1]\n","columns[0]='A'\n","columns[2]='B'\n","columns[4]='C'\n","columns[6]='D'\n","#割付\n","df=pd.DataFrame(D,columns=columns)\n","df['measure'] = [40,46,28,18,32,26,32,58]\n","df"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ABCDmeasure
0111111140
1211221246
2121212228
3221122118
4112122232
5212212126
6122221132
7222111258
\n","
"],"text/plain":[" A B C D measure\n","0 1 1 1 1 1 1 1 40\n","1 2 1 1 2 2 1 2 46\n","2 1 2 1 2 1 2 2 28\n","3 2 2 1 1 2 2 1 18\n","4 1 1 2 1 2 2 2 32\n","5 2 1 2 2 1 2 1 26\n","6 1 2 2 2 2 1 1 32\n","7 2 2 2 1 1 1 2 58"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"code","metadata":{"id":"zfuFbC-ZBS6X","colab_type":"code","colab":{}},"source":["\n","df_alloc = df[['A','B','C','D','measure']]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"RcODuW_pEmBw","colab_type":"code","outputId":"44c22721-c592-4496-820c-1c14abfa730b","executionInfo":{"status":"ok","timestamp":1581085543763,"user_tz":-540,"elapsed":5749,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":294}},"source":["df_alloc"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ABCDmeasure
0111140
1212246
2111228
3212118
4122232
5221126
6122132
7221258
\n","
"],"text/plain":[" A B C D measure\n","0 1 1 1 1 40\n","1 2 1 2 2 46\n","2 1 1 1 2 28\n","3 2 1 2 1 18\n","4 1 2 2 2 32\n","5 2 2 1 1 26\n","6 1 2 2 1 32\n","7 2 2 1 2 58"]},"metadata":{"tags":[]},"execution_count":29}]},{"cell_type":"code","metadata":{"id":"K5d9r4diEnSD","colab_type":"code","outputId":"6b036a29-2e51-4ffc-d581-0861898c80e8","executionInfo":{"status":"ok","timestamp":1581085543764,"user_tz":-540,"elapsed":5123,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["print(df_alloc['measure'].sum()/8)\n","m = df_alloc['measure'].mean()"],"execution_count":0,"outputs":[{"output_type":"stream","text":["35.0\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"5-YF9QYcQuP7","colab_type":"text"},"source":["## ANOVA\n","\n","分散分析(ANalysis Of VAriance, ANOVA)を実施する。
\n","直交計画法などは、それぞれの因子が直交
\n","(無相関)していると仮定しているので、
\n","応答(目的変数のこと)の分散が
\n","因子ごとの分散の和で表せる。"]},{"cell_type":"code","metadata":{"id":"6GW7JYqGQdFj","colab_type":"code","colab":{}},"source":["INPUT=df_alloc.columns[df_alloc.columns!='measure']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"cfw7ZFMXFx8d","colab_type":"code","colab":{}},"source":["effects_factors = [np.array([df_alloc.loc[df_alloc[factor]==level]['measure'].mean() - m for level in [1, 2]])\n"," for factor in INPUT]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"oTCP4QmZGLZH","colab_type":"code","outputId":"3cb47018-829e-4724-ebe6-786bfac51c08","executionInfo":{"status":"ok","timestamp":1581072454408,"user_tz":-540,"elapsed":718,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["print(*effects_factors)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[-2. 2.] [-2. 2.] [ 3. -3.] [-6. 6.]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"raKD7OaDOnvF","colab_type":"code","colab":{}},"source":["ss_total = np.sum(df_alloc['measure']**2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"zFUJWUvmOs_3","colab_type":"code","colab":{}},"source":["CT = m ** 2 * df_alloc.shape[0]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"7HJbRKPbO0E8","colab_type":"code","outputId":"feeb02d4-9e11-44f8-cc80-3078806eccb9","executionInfo":{"status":"ok","timestamp":1581072570142,"user_tz":-540,"elapsed":616,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["squaresum = np.array([np.sum(effect**2.)*(df_alloc.shape[0]/2.)for effect in effects_factors])\n","squaresum"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([ 32., 32., 72., 288.])"]},"metadata":{"tags":[]},"execution_count":70}]},{"cell_type":"code","metadata":{"id":"gjPBdnETOXQg","colab_type":"code","colab":{}},"source":["diff_factors = np.array([np.array([effects_factors[ix_factor][val - 1] for val in df_alloc[factor]])\n"," for ix_factor, factor in enumerate(INPUT)])"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"CnoqG3_1RnnB","colab_type":"code","outputId":"7f49e222-4144-4bcc-cd54-67d6525300cb","executionInfo":{"status":"ok","timestamp":1581072761081,"user_tz":-540,"elapsed":625,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":179}},"source":["e = df_alloc['measure'] - diff_factors.T.sum(axis=1)\n","e**2."],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 2209.0\n","1 1849.0\n","2 529.0\n","3 729.0\n","4 841.0\n","5 625.0\n","6 1681.0\n","7 2025.0\n","Name: measure, dtype: float64"]},"metadata":{"tags":[]},"execution_count":80}]},{"cell_type":"code","metadata":{"id":"zRgQbAB7RwLj","colab_type":"code","outputId":"bd748549-5cf0-4893-aa7c-b876c55bc9b2","executionInfo":{"status":"ok","timestamp":1581072954824,"user_tz":-540,"elapsed":680,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":53}},"source":["if squaresum.sum()+np.sum(e**2.)==ss_total:\n"," print('SS_total', ss_total, ':', 'SS_factors', squaresum.sum()+np.sum(e**2.))\n"," print('It is true that: SS = S_a + S_b + S_c + S_d + S_e')"],"execution_count":0,"outputs":[{"output_type":"stream","text":["SS_total 10912 : SS_factors 10912.0\n","It is true that: SS = S_a + S_b + S_c + S_d + S_e\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Xs5W03tJSH7L","colab_type":"code","outputId":"9f10af74-5f93-40e7-e00a-1c34c7b677a1","executionInfo":{"status":"ok","timestamp":1581073126460,"user_tz":-540,"elapsed":567,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["CT/1, squaresum/1\n","np.sum(e**2.) / (df_alloc.shape[0]-1-len(INPUT))"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["3496.0"]},"metadata":{"tags":[]},"execution_count":103}]},{"cell_type":"code","metadata":{"id":"KCw4R7yDSNcP","colab_type":"code","colab":{}},"source":["v_factors = squaresum/1\n","v_error = np.sum(e**2.) / (df_alloc.shape[0]-1-len(INPUT))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"6NlI74HlTdLh","colab_type":"code","outputId":"936de711-31f9-42b6-a284-0d1ccc08ffc7","executionInfo":{"status":"ok","timestamp":1581073173546,"user_tz":-540,"elapsed":496,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["v_factors / v_error"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([0.00915332, 0.00915332, 0.02059497, 0.08237986])"]},"metadata":{"tags":[]},"execution_count":106}]},{"cell_type":"markdown","metadata":{"id":"FFuXUsu4TlhP","colab_type":"text"},"source":["## 結論\n","\n","F値について、いかが成り立つ。\n","\n","$$\n","F_3^1=\\left\\{ \\begin{array}{l} 10.13 \\\\ 34.12 \\end{array} \\right.\n","$$\n","\n","これと上記の結果から、
\n","「**今回の割付では因子A, B, C, Dの分散は
\n","誤差分散との有意差があるとは言えない**」
\n","と言える。\n","\n","もう少しわかりやすい言い換えると、
\n","今回のような実験結果が得られたときに、
\n","因子A~Dは誤差項と同じ程度の分散しか持っていない
\n","ということになる。応答(目的変数)を
\n","重回帰モデルで予測する際に有効な変数は
\n","今回は取得できていないということになる。\n","\n","実際の実験に利用する際には、実験結果である物性値に
\n","影響を与えている因子が特定できなかったことを意味する。"]},{"cell_type":"code","metadata":{"id":"9knTbPZqTqeo","colab_type":"code","outputId":"a9f0d8a1-e758-414e-b892-695d0d92ea54","executionInfo":{"status":"ok","timestamp":1581085734655,"user_tz":-540,"elapsed":1703,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":281}},"source":["import numpy as np \n","from scipy.stats import f\n","import matplotlib.pyplot as plt\n","dfn = 1 # degree of freedom, nominator\n","dfd = 3 # degree of freedom, denominator\n","rv = f(dfn, dfd)\n","x = np.linspace(f.ppf(0., dfn, dfd),\n"," f.ppf(0.99, dfn, dfd), 100)\n","fig , ax = plt.subplots(1,1)\n","ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')\n","plt.vlines(10.13,ymin=0,ymax=.5, colors='blue', linestyles='dashed')\n","plt.vlines(34.12,ymin=0,ymax=.5, colors='blue', linestyles='dashed')\n","plt.xlabel('$F$ value');\n","plt.ylabel('probability');"],"execution_count":0,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAYIAAAEICAYAAABS0fM3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deZRV1Zn38e9TVcxjgIIglimCJYYY\nx4pDcKA7+EYTBU0c3zZv6DdZRI3LuDKKsTWtGYhDYicxvtLROKRtx8SuRGw1RJMoUQFB04BgISQU\nKBRB5kGq6nn/OKfwUtyqukXdXefee36ftfY60773Ppzlrse99xnM3RERkfQqSzoAERFJlhKBiEjK\nKRGIiKScEoGISMopEYiIpFxF0gF01YgRI7y6ujrpMEREisqCBQs2uHtltmNFlwiqq6uZP39+0mGI\niBQVM/tre8c0NCQiknJKBCIiKadEICKSckoEIiIpp0QgIpJySgQiIimnRCAiknJBE4GZnWFmy8ys\n3syuznJ8mpk1mtmiuHwhVCzf+MY3mDhxIi+99FKonxARKUrBbigzs3LgduB0oAGYZ2Z17r6kTdWH\n3P2KUHG0Wrp0KXPnzmX9+vWhf0pEpKiE7BEcD9S7+5vu/i7wIDA14O91qH///gDs2LEjqRBERApS\nyEQwBlidsd0Q72vrM2b2mpk9amZV2b7IzKab2Xwzm9/Y2HhAwfTr1w+AnTt3HtDnpWOTJkVFRMII\n2caSniz+DVDt7kcCzwD3Zqvk7rPcvdbdaysrsz4zqVPqEYiIZBcyEawBMv8P/+B4317u/nd33x1v\n/hw4LlQwrT0CJQIRkX2FTATzgBozG2tmvYGLgLrMCmY2OmNzCrA0VDCtPQINDYmI7CvYVUPu3mRm\nVwBPAeXA3e6+2MxuAOa7ex1wpZlNAZqAjcC0UPFoaEhEJLug7yNw99nA7Db7rstYnwHMCBlDK00W\nh3XnnUlHIFLaQraxonsxzYFSjyCs8eOTjkCktIVsY0lfNdRjNFkc1m9+ExURCSNkG0tdj0BDQ2Hc\nemu0PPvsZOMQKVUh21hqegQaGhIRyS41iUCTxSIi2aUmEahHICKSXWoSgSaLRUSy02Sx5MX99ycd\ngUhpC9nGUpMI1CMIqyrrc2NFJF9CtrHUDA2pRxDWQw9FRUTCCNnGUtMj0GRxWHfcES0vvDDZOERK\nVcg2lpoeQa9evSgrK2PPnj00NTUlHY6ISMFITSIwMw0PiYhkkZpEAJowFhHJJlWJQPMEIiL7S81k\nMejKoZAefTTpCERKW8g2lqpEoKGhcEaMSDoCkdIWso2lcmhIPYL8u+eeqIhIGCHbWKoSgXoE4SgR\niISlRJAnmiwWEdlfKhOBhoZERN6TqkSgoSERkf2lKhGoRyAisj9dPip5MXt20hGIlLaQbSxViUCT\nxeHEp1ZEAgnZxjQ0JHnxs59FRUTCCNnGUpUINDQUzsMPR0VEwgjZxlKVCNQjEBHZX6oSgXoEIiL7\nS1Ui0GSxiMj+giYCMzvDzJaZWb2ZXd1Bvc+YmZtZbch4NDQkIrK/YJePmlk5cDtwOtAAzDOzOndf\n0qbeIODLwEuhYmmloaFwnnsu6QhESlvINhayR3A8UO/ub7r7u8CDwNQs9W4EfgDsChgLoKEhEZFs\nQiaCMcDqjO2GeN9eZnYsUOXuT3T0RWY23czmm9n8xsbGAw6otUegoaH8u+WWqIhIGCHbWGKTxWZW\nBvwQ+Gpndd19lrvXunttZWXlAf+megTh/Pa3URGRMEK2sZCJYA1QlbF9cLyv1SDgCOA5M1sFnAjU\nhZwwVo9ARGR/IRPBPKDGzMaaWW/gIqCu9aC7b3b3Ee5e7e7VwIvAFHefHyog9QhERPYXLBG4exNw\nBfAUsBR42N0Xm9kNZjYl1O92JPOqIXdPIgQRkYIT9Omj7j4bmN1m33Xt1J0UMhaAiooKevXqxZ49\ne3j33Xfp06dP6J9MjTjHikggIdtYqh5DDdHw0ObNm9mxY4cSQR49+WTSEYiUtpBtLFWPmABNGIuI\ntJW6RKAJ4zBuvDEqIhJGyDamRCB5MWdOVEQkjJBtLHWJQENDIiL7Sl0iUI9ARGRfqUsE6hGIiOwr\nlZePgnoE+TZ8eNIRiJS2kG1MiUDy4rHHko5ApLSFbGMaGhIRSbnUJQL1CMKYMSMqIhJGyDaWuqEh\nva4yjD//OekIREpbyDaW2h6BhoZERCKpTQTqEYiIRFKXCDRZLCKyr9TNEahHEMbBBycdgUhpC9nG\nUpcINFkcxi9/mXQEIqUtZBtL3dCQJotFRPaV2kSgHkF+XXVVVEQkjJBtLLVDQ+oR5NeiRUlHIFLa\nQrYx9QhERFIudYlAk8UiIvtKXSLQZLGIyL5SO0egHkF+HXZY0hGIlLaQbSx1iUA9gjBmzUo6ApHS\nFrKNpW5oqG/fvgDs2rWLlpaWhKMREUle6hJBWVnZ3mSgXkH+TJ8eFREJI2QbS93QEETDQ7t27WLn\nzp0MGDAg6XBKwvLlSUcgUtpCtrGcegRmVh4uhJ6nCWMRkffkOjT0hpndbGYTgkbTQzRhLCLynlwT\nwVHAcuDnZvaimU03s8GdfcjMzjCzZWZWb2ZXZzl+qZn9xcwWmdnzPZVodHexiMh7ckoE7r7V3f/d\n3T8GfBO4HnjLzO41s0OzfSYeTrodOBOYAFyc5Q/9A+7+EXc/GrgJ+OGB/kO6QkND+Xf00VERkTBC\ntrGcJovjP+qfAv4ZqAZuBf4DOAWYDWS71eF4oN7d34y/40FgKrCktYK7b8moPwDwLv8LDoCGhvLv\nttuSjkCktIVsY7leNfQG8Cxws7vPzdj/qJmd2s5nxgCrM7YbgBPaVjKzLwFfAXoD/5jti8xsOjAd\n4JBDDskx5PapRyAi8p5c5wj+j7t/PjMJmNlEAHe/sjsBuPvt7j6OaMjp2nbqzHL3Wnevrays7M7P\nAZojCOGSS6IiImGEbGO5JoIfZ9n3k04+swaoytg+ON7XngeBc3KMp1uUCPKvoSEqIhJGyDbW4dCQ\nmZ0EfAyoNLOvZBwaDHR2b8E8oMbMxhIlgIuA/93m+2vc/Y1481NEQ1DBDR4cXfC0efPmnvg5EZGC\n1tkcQW9gYFxvUMb+LcB5HX3Q3ZvM7ArgKaKkcbe7LzazG4D57l4HXGFmk4E9wDvA5w7sn9E1I0eO\nBGD9+vU98XMiIgWtw0Tg7n8A/mBm97j7X7v65e4+m+iqosx912Wsf7mr35kPSgQiIu/pbGjoNne/\nCvipme13aae7TwkWWUBKBPl30klJRyBS2kK2sc6Ghu6Pl7eEC6HntSaCxsbGhCMpHd//ftIRiJS2\nkG2ss6GhBfHyD+FC6HnqEYiIvKezoaG/0MHdvu5+ZN4j6gGt9yIoEeTPZz4TLR97LNk4REpVyDbW\n2dDQWfn/yeQNHjyY3r17s337drZv3653EuTB3/+edAQipS1kG+tsaKjLVwoVAzNj5MiRNDQ00NjY\nqEQgIqnW4Z3FZvZ8vNxqZlvaLnsmxDA0YSwiEumsR3ByvBzUUb1ipAljEZFIzu8sNrNjgZOJJo+f\nd/eFwaLqAUoE+fXxjycdgUhpC9nGcn0fwXXA+cCv4l33mNkj7v6dYJEFpiuH8utf/iXpCERKW8g2\nlmuP4J+Ao9x9F4CZzQQWAUWbCNQjEBGJ5PoY6rVA34ztPnT8SOmCp8ni/DrzzKiISBgh21hnN5T9\nhGhOYDOw2MyeibdPB14OE1LPUI8gv/TWT5GwQraxzoaG5sfLBcCvM/Y/FySaHqREICIS6ezy0Xt7\nKpCepkQgIhLJ9aqhGuD7wAQy5grc/YOB4gou86ohd8fMEo5IRCQZuV419AvgeuBHwD8A/0zuE80F\nqV+/fgwcOJBt27axZcsWhgwZknRIRe2sknwqlUjhCNnGck0E/dx9jplZ/Pyhb5vZAuC6zj5YyEaO\nHMm2bdtYv369EkE3fe1rSUcgUtpCtrFc/69+t5mVAW+Y2RVmdi7Ru4yLmuYJRERyTwRfBvoDVwLH\nAZ+lh140H5ISQf5MmhQVEQkjZBvLaWjI3ecBxL2CK919a5hwepYSgYhIjj0CM6uN31b2GvAXM3vV\nzI4LG1p4urtYRCT3yeK7gcvd/U8AZnYy0ZVERfmqylZ68JyISO5zBM2tSQDA3Z8HmsKE1HM0NCQi\n0vmzho6NV/9gZncC/0n0rKEL0WMmJMMFFyQdgUhpC9nGOhsaurXN9vUZ657nWHqcEkH+XH550hGI\nlLaQbayzZw39Q7ifTp4mi/Nnx45o2b9/snGIlKqQbSzXq4aGmNkPzWx+XG41s6K/FXfEiBEAbNiw\ngebm5oSjKW6f/GRURCSMkG0s18niu4GtwAVx2UJ01VBRq6ioYPjw4bS0tLBx48akwxERSUSuiWCc\nu1/v7m/G5V+Bon3yaCZdQioiaZdrItgZ3zsAgJlNBDp9X46ZnWFmy8ys3syuznL8K2a2xMxeM7M5\nZvaB3EPPD00Yi0ja5XpD2aXAfRnzAu/QybOGzKwcuJ3otZYNwDwzq3P3JRnVFgK17r7DzC4DbiK6\nNLXHaMJYRNKu00QQP19ovLsfZWaDAdx9Sw7ffTxQ7+5vxt/zIDAV2JsI3P3ZjPovApd0Ifa8UI8g\nP6ZNSzoCkdIWso11mgjcvcXMvgE8nGMCaDUGWJ2x3QCc0EH9zwNPZjtgZtOB6QCHHHJIF0Lo3KhR\nowBYu3ZtXr83bZQIRMIK2cZynSP4nZl9zcyqzGxYa8lXEGZ2CVAL3JztuLvPcvdad69tndzNl0MP\nPRSAN954I6/fmzYbNkRFRMII2cZynSO4kOhO4rb3tnV05dAaoCpj++B43z7MbDLwLeA0d9+dYzx5\nc9hhhwGwfPnynv7pknLeedHyuecSDUOkZIVsY7n2CCYQTfy+CiwCfgJ8uJPPzANqzGysmfUGLgLq\nMiuY2THAncAUd09kkL6mpgaIegQtLS1JhCAikqhcE8G9wIeAHxMlgQnxvna5exNwBfAUsJRojmGx\nmd1gZlPiajcTvfLyETNbZGZ17XxdMEOGDGHUqFHs3LmThoaGnv55EZHE5To0dIS7T8jYftbMlrRb\nO+bus4HZbfZdl7E+OcffD2r8+PGsW7eO5cuX530yWkSk0OXaI3jFzE5s3TCzE4D5YULqeZonEJE0\ny7VHcBww18z+Fm8fAiyLX1/p7l7UbyprTQTLli1LOJLiddllSUcgUtpCtrFcE8EZ4UJInnoE3Xdh\nj94PLpI+IdtYTonA3f8aLoTkKRF03+r41sGqqo7riciBCdnGcu0RlLRx48ZRVlbGqlWr2L17N336\n9Ek6pKLz2c9GS91HIBJGyDaW62RxSevduzdjx46lpaWFFStWJB2OiEiPUiKIaXhIRNJKiSCmRCAi\naaVEEBs/fjygS0hFJH00WRxTj6B7vvrVpCMQKW0h25gSQUyJoHvOPjvpCERKW8g2pqGh2JgxY+jX\nrx/r169n06ZNSYdTdJYti4qIhBGyjSkRxMrKyvY+klq9gq774hejIiJhhGxjSgQZWieMlQhEJE2U\nCDJonkBE0kiJIMOECdErFxYuXJhwJCIiPUeJIMNJJ50EwNy5c/XaShFJDV0+mqG6upqDDjqItWvX\nsmzZMj70oQ8lHVLRuPbapCMQKW0h25h6BBnMjIkTJwLw/PPPJxxNcZk8OSoiEkbINqZE0MbJJ58M\nwAsvvJBwJMVl0aKoiEgYIduYhobaaE0E6hF0zVVXRUu9j0AkjJBtTD2CNo488kgGDBjAihUrePvt\nt5MOR0QkOCWCNioqKvZePaThIRFJAyWCLDRPICJpokSQha4cEpE00WRxFieccALl5eUsXLiQ7du3\nM2DAgKRDKnjf+17SEYiUtpBtTD2CLAYNGsRRRx1FU1MTL7/8ctLhFIWPfSwqIhJGyDamRNAOzRN0\nzdy5URGRMEK2MSWCdrQmgjlz5iQcSXG45pqoiEgYIduYEkE7Tj/9dHr16sUf//hH1q1bl3Q4IiLB\nKBG0Y+jQoXziE5+gpaWFxx57LOlwRESCCZoIzOwMM1tmZvVmdnWW46ea2Stm1mRm54WM5UBceOGF\nADz00EMJRyIiEk6wRGBm5cDtwJnABOBiM5vQptrfgGnAA6Hi6I4pU6bQp08f/vSnP7F27dqkwxER\nCSJkj+B4oN7d33T3d4EHgamZFdx9lbu/BhTkW2AGDx7MmWeeibvzyCOPJB1OQbvttqiISBgh21jI\nRDAGWJ2x3RDv6zIzm25m881sfmNjY16Cy1Xr8NDDDz/co79bbI4+OioiEkbINlYUk8XuPsvda929\ntrKyskd/+6yzzqJfv37MnTuX1atXd/6BlPrd76IiImGEbGMhE8EaoCpj++B4X1EZOHAgn/rUpwD1\nCjryne9ERUTCCNnGQiaCeUCNmY01s97ARUBdwN8L5oILLgDg/vvvx90TjkZEJL+CJQJ3bwKuAJ4C\nlgIPu/tiM7vBzKYAmNlHzawBOB+408wWh4qnO84++2xGjhzJq6++yu80/iEiJSboHIG7z3b3w9x9\nnLt/N953nbvXxevz3P1gdx/g7sPd/cMh4zlQffv25ar4PXEzZ85MOBoRkfwqisniQnDZZZcxaNAg\nfv/73+uJpCJSUpQIcjR06FAuu+wyAH7wgx8kHE3hufPOqIhIGCHbmBXb5Gdtba3Pnz8/kd9+6623\nqK6uZs+ePSxZsoTDDz88kThERLrKzBa4e222Y+oRdMHo0aOZNm0a7s5NN92UdDgF5Te/iYqIhBGy\njalH0EX19fWMHz8egHnz5nHssccmFkshmTQpWj73XJJRiJSu7rYx9Qjy6NBDD+XKK6+kpaWF6dOn\n09TUlHRIIiLdokRwAG688UaqqqpYsGABt99+e9LhiIh0ixLBARg4cCA//elPAbj22mv1DCIRKWpK\nBAdoypQpfPrTn2bbtm1cfvnlevSEiBQtTRZ3w5o1a5gwYQJbtmxh5syZfPOb30w6pMS0doqqqjqu\nJyIHprttTJPFgYwZM4b77rsPgBkzZvDEE08kHFFyqqqUBERCCtnGlAi6aerUqdx44424OxdffDFL\nly5NOqREPPRQVEQkjJBtTIkgD771rW9x/vnns3XrVs4+++xUvt/4jjuiIiJhhGxjSgR5YGbcc889\nHHvssaxYsYJTTjmFVatWJR2WiEhOlAjypH///jz99NMcd9xxvPnmm5x66qksX7486bBERDqlRJBH\nw4cPZ86cOUycOJHVq1dz6qmn8sILLyQdlohIh5QI8mzIkCE89dRTTJ48mXXr1nHaaadx66236j4D\nESlYuo8gkD179nDNNddwyy23AHDOOecwa9YsKisrE44sjA0bouWIEcnGIVKqutvGdB9BAnr16sXN\nN9/Mr3/9a4YMGcLjjz/O4Ycfzl133UVLS0vS4eXdiBFKAiIhhWxjSgSBnXPOObzyyitMnjyZjRs3\n8oUvfIHTTjuNl156KenQ8uqee6IiImGEbGNKBD3ggx/8IE8//TQPPPAAo0aN4vnnn+fEE0/krLPO\nYsGCBUmHlxdKBCJhKRGUADPj4osv5vXXX2fGjBkMGDCAJ554gtraWiZPnszjjz+udxuISCKUCHrY\n0KFD+d73vsfKlSv5+te/Tv/+/ZkzZw7nnnsu48aN4/rrr9f9ByLSo5QIElJZWclNN93EmjVr+NGP\nfkRNTQ1/+9vfuOGGGxg/fjy1tbXMnDmTxYsX69JTEQlKiSBhQ4cO5aqrruL111/nmWeeYdq0aQwa\nNIgFCxYwY8YMjjjiCMaNG8fll1/OI488QmNjY9Ihi0iJ0X0EBWjnzp08+eST1NXV8cQTT7Ch9QLi\n2IQJEzjppJM48cQT+ehHP8qECRPo1atXQtFGduyIlv37JxqGSMnqbhvr6D4CJYIC19zczLx585gz\nZw7PPvssL7zwArt27dqnTu/evTniiCM4+uijmTBhwt5SVVVFWZk6fSKiRFBSdu/ezcKFC3nxxRf5\n85//zIIFC1ixYkXWun369GHcuHHU1NQwduxYqqurqa6upqqqiqqqKkaMGIGZ5SWun/0sWl5+eV6+\nTkTa6G4bUyIocVu2bOHVV1/ltddeY+nSpSxZsoSlS5fy9ttvd/i5vn37Mnr0aA466CBGjx7N+9//\nfkaOHMmoUaOorKyksrKSESNGMHz4cN73vvdRUVHR7ndNmhQtn3suf/8uEXlPd9tYR4mg/ZYtRWPw\n4MGccsopnHLKKfvs37p1K/X19dTX17Nq1SpWrlzJqlWrWL16NQ0NDWzatImVK1eycuXKnH5nyJAh\nDBs2jKFDh+4tgwcPZsiQIaxcOYSKisHceecgBg2KysCBAxk4cCADBgzYp/Tt2zdvPRER6b6gicDM\nzgD+DSgHfu7uM9sc7wPcBxwH/B240N1XhYwpTQYNGsQxxxzDMccck/X4tm3beOutt1i7di1r165l\n3bp1e8uGDRvYsGEDjY2NbNy4kXfeeYfNmzezefPmDn/z0ktzi61///7069cva+nbty99+/alT58+\n+y179+69d5mt9OrVa++yvVJRUbF3ma2Ul5dTUVFBWVmZEpakQrBEYGblwO3A6UADMM/M6tx9SUa1\nzwPvuPuhZnYR8APgwlAxyb4GDhxITU0NNTU1ndZtaWlh06ZNbNq0iXfeeWfv+ubNm9m0aRM//vEW\nmpu3cuaZW9myZQvbt29n27ZtbN26le3bt+/d3rlzJ7t27WLHjh3saL0MooC1JoXy8vL91svLyykr\nK9tnu+2+1vVsy7brZrZ3vaPSWi+zftt9HS3brrctB3osswA518ms297nstVp73Nt19vWb++7Otvf\n0ffmWqe9z+SyvmmT0bv3+4Hx5FvIHsHxQL27vwlgZg8CU4HMRDAV+Ha8/ijwUzMzL7aJixQoKytj\n2LBhDBs2LOvxurpoOWtW59/V3NzMzp0795YdO3awa9euvdu7du1i9+7d7Ny5k927d+9X3n333b3L\n1vU9e/awZ8+efdbblqampn3WW7ebm5v3W29qasLdaW5uprm5OY9nUuTAjRo1DfhF3r83ZCIYA6zO\n2G4ATmivjrs3mdlmYDiwz4XzZjYdmA5wyCGHhIpXuqErE1jl5eV75w8KWUtLy95E0NTUlHW9ubl5\nn3qZ25n7M9czE4y77z3m7vttt7feut26zLbeXp3M/dlKR8dyOQ50eDyzTmbd9j6XrU57n2u73rZ+\ne9/V2f6OvjfXOu19Jpf11uUZZxxGCEUxWezus4BZEF01lHA4khKtwy5J36wnElrIu43WAFUZ2wfH\n+7LWMbMKYAjRpLGIiPSQkIlgHlBjZmPNrDdwEVDXpk4d8Ll4/Tzg95ofEBHpWcGGhuIx/yuAp4gu\nH73b3Reb2Q3AfHevA+4C7jezemAjUbIQEZEeFHSOwN1nA7Pb7LsuY30XcH7IGEREpGN6IpmISMop\nEYiIpJwSgYhIyikRiIikXNE9htrMGoG/duEjI2hzp3IRUMw9QzH3jGKMGYoz7o5i/oC7V2Y7UHSJ\noKvMbH57z+AuVIq5ZyjmnlGMMUNxxn2gMWtoSEQk5ZQIRERSLg2JIIcHIxccxdwzFHPPKMaYoTjj\nPqCYS36OQEREOpaGHoGIiHRAiUBEJOVKOhGY2RlmtszM6s3s6qTjyYWZrTKzv5jZIjObn3Q82ZjZ\n3Wa23sz+J2PfMDN7xszeiJfvSzLGttqJ+dtmtiY+14vM7JNJxtiWmVWZ2bNmtsTMFpvZl+P9BXuu\nO4i5YM+1mfU1s5fN7NU45n+N9481s5fivx8PxY/TLwgdxHyPma3MOM9H5/R9pTpHYGblwHLgdKLX\nZM4DLnb3JR1+MGFmtgqodfeCvZHFzE4FtgH3ufsR8b6bgI3uPjNOuu9z928mGWemdmL+NrDN3W9J\nMrb2mNloYLS7v2Jmg4AFwDnANAr0XHcQ8wUU6Lm26O3wA9x9m5n1Ap4Hvgx8BfiVuz9oZv8PeNXd\n70gy1lYdxHwp8Ft3f7Qr31fKPYLjgXp3f9Pd3wUeBKYmHFNJcPc/Er0/ItNU4N54/V6ixl8w2om5\noLn7W+7+Sry+FVhK9J7vgj3XHcRcsDyyLd7sFRcH/hFo/YNaaOe5vZgPSCkngjHA6oztBgr8P8iY\nA0+b2QIzm550MF0wyt3fitffBkYlGUwXXGFmr8VDRwUzxNKWmVUDxwAvUSTnuk3MUMDn2szKzWwR\nsB54BlgBbHL3prhKwf39aBuzu7ee5+/G5/lHZtYnl+8q5URQrE5292OBM4EvxUMaRSV+3WgxjDne\nAYwDjgbeAm5NNpzszGwg8BhwlbtvyTxWqOc6S8wFfa7dvdndjyZ6t/rxwOEJh9SptjGb2RHADKLY\nPwoMA3IaMizlRLAGqMrYPjjeV9DcfU28XA/8mug/ymKwLh4fbh0nXp9wPJ1y93VxY2oB/p0CPNfx\n+O9jwH+4+6/i3QV9rrPFXAznGsDdNwHPAicBQ82s9S2OBfv3IyPmM+KhOXf33cAvyPE8l3IimAfU\nxDP/vYneh1yXcEwdMrMB8QQbZjYA+F/A/3T8qYJRB3wuXv8c8F8JxpKT1j+msXMpsHMdTwjeBSx1\n9x9mHCrYc91ezIV8rs2s0syGxuv9iC4wWUr0x/W8uFqhnedsMb+e8T8IRjSnkdN5LtmrhgDiS9Ru\nA8qBu939uwmH1CEz+yBRLwCi90k/UIgxm9l/ApOIHnm7DrgeeBx4GDiE6DHhF7h7wUzOthPzJKKh\nCgdWAV/MGHtPnJmdDPwJ+AvQEu++hmjMvSDPdQcxX0yBnmszO5JoMric6H+OH3b3G+L2+CDREMtC\n4JL4/7QT10HMvwcqAQMWAZdmTCq3/32lnAhERKRzpTw0JCIiOVAiEBFJOSUCEZGUUyIQEUk5JQIR\nkZRTIhARSTklAhGRlFMiEImZ2RfN7K2MZ7n/spvf1+mNPCKFoKLzKiKp8RHgWne/K+lARHqSegQi\n7zmS6Lb8/ZjZTDP7Usb2t83sa/H64/Fjwxdne3S4mVXbvm9G+1r8UpzW7Uvit00tMrM745cqifQY\nJQKR93wY+EX8B/l3bY49RPSWrVYXxPsA/q+7HwfUAlea2fBcf9DMPgRcCEyMHyncDPzTgf4DRA6E\nhoZEiN61C7zt7kdmO+7uCyS1SMoAAAELSURBVM1spJkdRPRQr3fcvfXFR1ea2bnxehVQA/w9x5/+\nOHAcMC96YCT9KLDHSkvpUyIQiXwEWNxJnUeIHkv8fuLegJlNAiYDJ7n7DjN7Dujb5nNN7Nv7zjxu\nwL3uPuOAIxfpJg0NiUSOpPNE8BDRey3OI0oKAEOIegc7zOxw4MQsn1sHjDSz4fGrA8/KODYHOM/M\nRgKY2TAz+0A3/h0iXaZEIBL5CLCkowruvhgYBKzJeJb+fwMVZrYUmAm8mOVze4AbgJeJ3of7esax\nJcC1RO+pfi0+Prrtd4iEpPcRiIiknHoEIiIpp0QgIpJySgQiIimnRCAiknJKBCIiKadEICKSckoE\nIiIp9/8B5CRCQKC+O6oAAAAASUVORK5CYII=\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"rk3eUKQmBa6t","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /notebooks/4.4-deap_d_optimal_design.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"4.4-deap_d_optimal_design.ipynb のコピー","provenance":[{"file_id":"https://gist.github.com/sshojiro/1806ea69ce0b190a38a516bc050d36a9#file-4-4-deap_d_optimal_design-ipynb","timestamp":1587451718335}],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"21oYBLXofBGZ","colab_type":"text"},"source":["# 遺伝的アルゴリズムによるD最適化計画法\n","\n","`deap`という遺伝的アルゴリズムのパッケージを利用する。\n","\n","D最適化基準はデータ行列$X$に対して行列式$|X^TX|$で与えられる。\n","\n","触媒データを利用してサンプル選定を行う。\n"]},{"cell_type":"code","metadata":{"id":"pbZ_ueuCfAFQ","colab_type":"code","outputId":"601b0d83-a942-469a-c63c-c89cf8adb391","colab":{"base_uri":"https://localhost:8080/","height":233}},"source":["!wget https://raw.githubusercontent.com/funatsu-lab/support-page/master/data/catalyst/journal_data.csv\n","!ls journal_data.csv"],"execution_count":0,"outputs":[{"output_type":"stream","text":["--2020-01-11 13:24:44-- https://raw.githubusercontent.com/funatsu-lab/support-page/master/data/catalyst/journal_data.csv\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5502 (5.4K) [text/plain]\n","Saving to: ‘journal_data.csv’\n","\n","journal_data.csv 100%[===================>] 5.37K --.-KB/s in 0s \n","\n","2020-01-11 13:24:49 (81.6 MB/s) - ‘journal_data.csv’ saved [5502/5502]\n","\n","journal_data.csv\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"9tpOfCCFgDBs","colab_type":"code","outputId":"f6c9f057-1397-4125-8f53-0e39d1a4667b","colab":{"base_uri":"https://localhost:8080/","height":145}},"source":["# deapのインストール \n","!pip install deap"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Collecting deap\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/81/98/3166fb5cfa47bf516e73575a1515734fe3ce05292160db403ae542626b32/deap-1.3.0-cp36-cp36m-manylinux2010_x86_64.whl (151kB)\n","\r\u001b[K |██▏ | 10kB 20.5MB/s eta 0:00:01\r\u001b[K |████▎ | 20kB 7.3MB/s eta 0:00:01\r\u001b[K |██████▌ | 30kB 8.4MB/s eta 0:00:01\r\u001b[K |████████▋ | 40kB 6.5MB/s eta 0:00:01\r\u001b[K |██████████▉ | 51kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████ | 61kB 7.9MB/s eta 0:00:01\r\u001b[K |███████████████ | 71kB 8.8MB/s eta 0:00:01\r\u001b[K |█████████████████▎ | 81kB 8.3MB/s eta 0:00:01\r\u001b[K |███████████████████▍ | 92kB 9.2MB/s eta 0:00:01\r\u001b[K |█████████████████████▋ | 102kB 9.5MB/s eta 0:00:01\r\u001b[K |███████████████████████▊ | 112kB 9.5MB/s eta 0:00:01\r\u001b[K |█████████████████████████▉ | 122kB 9.5MB/s eta 0:00:01\r\u001b[K |████████████████████████████ | 133kB 9.5MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▏ | 143kB 9.5MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 153kB 9.5MB/s \n","\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from deap) (1.17.5)\n","Installing collected packages: deap\n","Successfully installed deap-1.3.0\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"udlussIVfKgP","colab_type":"code","colab":{}},"source":["from pandas import read_csv\n","from pandas import DataFrame\n","from sklearn.preprocessing import scale\n","from sklearn.model_selection import train_test_split\n","from random import randint\n","from deap import creator, base, tools, algorithms\n","from numpy import log as np_log, where as np_where, array as np_array, arange as np_arange, exp as np_exp, unique as np_uniq, power as np_pow\n","from numpy.linalg import det as np_det\n","from numpy.random import permutation as np_perm\n","from tqdm import tqdm\n","from pprint import pprint "],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"dtd7pDhAfNBW","colab_type":"code","colab":{}},"source":["df = read_csv('./journal_data.csv', header=0, index_col=0)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"ATUFlglQfZH5","colab_type":"code","outputId":"4dc8921b-d5ec-42b3-a8f1-10e5d2cfa647","colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["df_train, df_test = train_test_split(df, test_size=.33, random_state=66)\n","print(df_train.shape, df_test.shape)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["(50, 23) (25, 23)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"wXhgWaahvwVp","colab_type":"code","colab":{}},"source":["INPUT=df_train.columns[:-3].tolist()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"dpNxbzZVuYIN","colab_type":"code","outputId":"19eeec62-380c-4b71-e6da-d88a8d24f87b","colab":{"base_uri":"https://localhost:8080/","height":53}},"source":["Xtrain = scale(df_train[INPUT])\n","print('determinant', np_det(Xtrain.T @ Xtrain))\n","print('scaled determinant', np_det(Xtrain.T @ Xtrain)**(1/len(INPUT)))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["determinant 1.0109311742906108e+27\n","scaled determinant 22.399384226277576\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"OC7n49ZmfkzB","colab_type":"text"},"source":["## トレーニングデータからD最適化計画を取得する\n","\n","50個から17サンプルを取り出す事を考える。
\n","すると${}_{50}C_{17}$ 通りも調べないと行けなくなり、効率が悪い。
\n","そこで、最適化手法の1つである遺伝的アルゴリズムを利用する。"]},{"cell_type":"code","metadata":{"id":"ODAAhenogGil","colab_type":"code","colab":{}},"source":["def d_criterion(X):\n"," return np_log(np_pow(np_det(X.T @ X), (1/X.shape[1])) )"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"iu6qEIv6hFBC","colab_type":"code","outputId":"60dd9ac1-07f5-4a1d-8c5f-b5510be5753c","colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["d_criterion(Xtrain)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["3.1090334685848173"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"zNo8xo3mjSwC","colab_type":"text"},"source":["## Deapを用いた実装\n","[公式サンプルコード](https://github.com/DEAP/deap)"]},{"cell_type":"code","metadata":{"id":"0Y52w6SlxP1a","colab_type":"code","outputId":"6d3f7e2f-94a8-4c05-dbf3-23f46d200e48","colab":{"base_uri":"https://localhost:8080/","height":53}},"source":["def perm(n_max, n_out):\n"," return np_perm(np_arange(n_max)).tolist()[:n_out]\n","print(len(perm(50, 17)))\n","print(perm(50, 17))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["17\n","[24, 16, 20, 18, 37, 45, 2, 31, 25, 22, 38, 9, 44, 6, 19, 27, 7]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"EvPylurohoeg","colab_type":"code","colab":{}},"source":["n_pop = 300\n","n_dim = 20\n","n_gen= 100\n","n_samples=Xtrain.shape[0]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"kN-7OyE-mc4I","colab_type":"code","colab":{}},"source":["def d_opt01(individual):\n"," \"Deap evaluation function. \"\n"," if sum(individual)!=n_dim:\n"," return -9999.99,\n"," x_in = np_array(individual)\n"," x_sc = Xtrain[np_where(x_in==1)[0], :]\n"," return d_criterion(x_sc),"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"45CKGDsWhhe0","colab_type":"code","colab":{}},"source":["creator.create(\"FitnessMax\", base.Fitness, weights=(1.0,))\n","creator.create(\"Individual\", list, fitness=creator.FitnessMax)\n","\n","toolbox = base.Toolbox()\n","toolbox.register(\"attr_index\", randint, 0, 1)\n","toolbox.register(\"individual\", tools.initRepeat, creator.Individual, \n"," toolbox.attr_index, n=n_samples)\n","toolbox.register(\"population\", tools.initRepeat, \n"," list, toolbox.individual)\n","toolbox.register(\"evaluate\", d_opt01)\n","toolbox.register(\"mate\", tools.cxTwoPoint)\n","toolbox.register(\"mutate\", tools.mutFlipBit, indpb=0.05)\n","toolbox.register(\"select\", tools.selTournament, tournsize=3)\n","population = toolbox.population(n=n_pop)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"wVRBVnE8hqKF","colab_type":"code","outputId":"07f2cafa-5f9c-4d4f-9157-99531274bb3b","colab":{"base_uri":"https://localhost:8080/","height":71}},"source":["for gen in tqdm(range(n_gen)):\n"," offspring = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.1)\n"," fits = toolbox.map(toolbox.evaluate, offspring)\n"," for fit, ind in zip(fits, offspring):\n"," ind.fitness.values = fit\n"," population = toolbox.select(offspring, k=len(population))\n","top10 = tools.selBest(population, k=10)"],"execution_count":0,"outputs":[{"output_type":"stream","text":[" 0%| | 0/100 [00:00\n","全て揃っていることが分かり、最適化に成功している。"]},{"cell_type":"code","metadata":{"id":"Ixvgu_IbyTea","colab_type":"code","outputId":"d52dfac7-5a6d-4a76-fd6b-e78d070341ac","colab":{"base_uri":"https://localhost:8080/","height":197}},"source":["for top in top10:\n"," print(top)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f7uE3QYOd2x4","colab_type":"text"},"source":["**D最適化基準を標準化する場合**\n","\n","$d$次元のデータ行列$X$に対して以下のように定めることもある。\n","\n","\\begin{equation}\n","D = \\det|X^TX|^{1/d}\n","\\end{equation}\n","\n"]},{"cell_type":"code","metadata":{"id":"IMdwR00FMVpy","colab_type":"code","outputId":"09e72fd5-8c47-4b3c-d01f-bad056377897","colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["xin = np_array(top10[0])\n","mat_ = Xtrain[np_where(xin==1)[0], :]\n","print('D criterion', np_pow(np_det(mat_.T@mat_), 1./mat_.shape[1]))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["D criterion 8.602635504346852\n"],"name":"stdout"}]}]} -------------------------------------------------------------------------------- /notebooks/8.1.2-structure-generation-brics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# BRICSを使った構造生成" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "df = pd.read_csv('../data/delaney-solubility/delaney-processed.csv')" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "2019.09.3\n" 30 | ] 31 | }, 32 | { 33 | "name": "stderr", 34 | "output_type": "stream", 35 | "text": [ 36 | "RDKit WARNING: [20:33:06] Enabling RDKit 2019.09.3 jupyter extensions\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "from rdkit.Chem import MolFromSmiles\n", 42 | "from rdkit import Chem \n", 43 | "import rdkit \n", 44 | "print(rdkit.__version__)\n", 45 | "df['mol'] = df['smiles'].apply(MolFromSmiles)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 6, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "from rdkit.Chem import BRICS, Recap" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 9, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "1個目の分子からBRICSにより見つかるフラグメントの数 6\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "print('1個目の分子からBRICSにより見つかるフラグメントの数', len(BRICS.BRICSDecompose(df['mol'][0])))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 5, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAffUlEQVR4nO3deVxUR7Y48HO7kR1FQNlERFFwVEQWReIYVIjLoE9UGI2Kxph2yQv6GZ1p85IJZpL4msz7KZPkmWlc3nSMGtttjBmZCHGNURFBY1BQAUVE9n2nu+v3R+lNi4osd2m6z/eTTz7YdFcdoDlU1a1bhyGEAEIIoe6SiB0AQgj1bphGEUKoRzCNIoRQj2AaRQihHsE0ihDXGObxf/Rj9kFkpDCNIsQphgFCADfAmBIzsQNAL9Tc3Hzo0KHKyko7O7tFixZZWlqKHRHqCkLaD0iRkWJw36gBys3N3bFjx+7du8vKyiwtLZubmwMDAw8dOjRkyBCxQ0MvQ0ej7Aft/omMEU7qDYhWqz127NiMGTNGjBiRkJBQVlYWGBgol8uHDh169epVf3//o0ePih0jehmaOukItLhY7GiQIAgyAI8ePVIoFJ6envSHYmFhER0dnZKSQj9bU1Mzb948AGAYRi6XazQacaNFL5eWRgCIv7/YcSAhYBoVWXp6+tKlS/v06UMT6PDhwxUKRXl5ebun6XS6xMREMzMzAAgLCysuLhYlWtRZ+/cTADJ/vthxICHgpF4cNTU1SUlJY8aMCQoK2rNnj06ni4yMTElJycnJkcvljo6ODQ0NSUlJ169fp89nGGbdunWpqakuLi5nzpwJCgq6ePGiuF8C6khuLgDA0KFix4EEIXYeNznp6ekymczGxoZ+/11dXeVy+f3799knZGdny+VyBwcHAFi+fHm7lxcWFoaGhgKAmZmZQqEQNnbUaStWEADy5Zdix4GEgGlUIE1NTWq1+pVXXqHZUyKRhIeHq9XqtrY2+oSWlpb9+/dPnjyZ/Qs3adKkgwcPPttUW1ubXC6nz1m0aFF9fb2wXwrqhLAwAkBOnhQ7DiQETKMCGT16NE18jo6OGzduvHPnDvupwsJChUIxaNAg+gQ7OzuZTHbt2rWOGzx69Gi/fv0AwNfX95dffuE5fNRFHh4EgOTmih0HEgKmUSG8/vrrzs7OPj4+SqWyoaGBPqjValNSUqKjo+mFIwAYOXKkQqGoqqrqZLPZ2dk0O9vZ2X3zzTe8hY+6qLmZSCTEzIy0toodChICplEhjBo1CgAyMzPpP0tLS7ds2cLupbe0tFyyZMmFCxe60XJdXd3ChQtpOzKZrKWlhdPAUbdkZxMAMnSo2HEggWAa5Z1Op7O2tgaAmpoa+khqaipNfMOGDVMoFKWlpT3sQqlUmpub0+XUhw8f9jhk1COPUlIqAgIaoqPFDgQJBDc88a64uLixsdHJyalv3770kalTp65Zs+bf//737du35XL5gAEDetiFTCa7cOGCp6fnjz/+6O/vn5KS0uOoUfcdys52zMj4g4OD2IEggWAa5V1ubi4ADBs2jH2EYZjt27dPnz5dIuHs+x8UFJSWljZt2rSysrK1a9e2tbVx1TLqKvoTH4qbRk0GplHe5eXlgSC/VAMHDvz+++8nTJhQWFioUqn47q4XaHfWp/4xoHyiP3H9P5zIuGEa5d2zo1H+SKVSFxeX5uZme3t7AbrrTQQ8BlTInzgyBJhGecf3aLS2tnbr1q3JycnCdNfLPDv8ZI8B5QchJD8/H/BHYErw2Gbe8T02yc7O3rBhg7+//8yZMwEAf4efwp71yeL53M9nrygio4ejUd7xPTzUX4krLi6ur693dHTESX17+seA/ulPsHkzT8kUZ/QmCEej/Kqvry8tLbW0tHRzc+OpC/3fW7xG/BQ2UdIP6P+zssDPD3Q6uHED/u//gOsx461btwDAy8uL22aRIcPRKL/y8vIIIV5eXhzubXq2C3iSOvEa8cuNGgXHjkH//nDkCAQFwc8/c9Xw7du3N23atGHDhsGDB1+5cqWoqIirlpGBwzTKLwGGh/ppFGeUnRIZCZmZEBwMd+7AhAmwa1dPGmttbT1w4EBYWJiPj09CQkJdXV1NTU1ubm5AQMCZM2c4ihgZNEyj/BJgeKifOvEyfWd5esLZs7B8OTQ3w8qVsGYNtLR0tY2ioqKEhIRhw4YtXLjw7Nmz9GiuzMzMu3fvvvbaayUlJeHh4Zs3b9bpdHx8BciAiH03qpFbu3YtACQmJvLUfnNzs0QiMTMza21tJYTQ80xPnz7NU3dGSKUi1tZ1o0f/dsIEugLzUs8ezeXr66tQKCorK9nnaDSa+Ph4upITGRmp/ylkfDCN8mv69OkAcPz4cZ7az87OBoChTw4TcnFxAYCCggKeujNO6elR48cDgKOjY3JycgdPrKysTExMZC8fmZub61cefNZ3333Xv39/APD09ExLS+MyZoDH/9GP2QeRGPD7zi9vb28AyMrK4qn9f/3rXwAQERFBCGloaGAYxsLCQqvV8tSdsaqpqYmKioIX116lpV+srKxoAu380Vz3798fP348AFhaWu7YsYObcNvlTUyjYsPvO480Go25uTnDMI2NjTx18dlnnwHAqlWrCCE///wzAPj4+PDUl3HT6XQKhUIqlQLAlClTaO3V2tpapVI5duxYmj3Z0i9dqnHd1NQUFxdHW1i6dGmP3gx1deTo0afSJTssZQenSHD4fecRvaHI3d1d/8Hm5mYOu1i/fj0AfPrpp4SQf/7znwAwa9YsDtvnhE5Hnhy1auhOnjxJzy10d3efN2+era0tTX+urq7vv/9+T1ZLVCoVPXZ23Lhxud0oLpKdTeRy4uDwVLrE0ahhwCv1PHp2+1FqaqqPj8+lS5e46sLAdzvV1kJSEowdC6tXix1K50RERGRkZISEhJSWlh47dqy+vj4wMFClUt2/f/+jjz7y8PDodsuxsbEXLlwYNmxYZmZmQEDA0aNHO/Wy1lb45ht49VXw9YWEBKishEmTIC1NmKOqUGeJnceN2VdffcUwTGBgILtYOWvWLACwsLDYvn07J13olyd5++23AWDbtm2ctNxD6enkzTeJtfXjwZOXV2+qS5STkwMA/fv3v3XrFrct19TUzJ8/H168CMvKz88v/e//JgMHPv4O9u1L1q4lP//MbTyIE5hGeaTT6ejlhZkzZ1ZUVJCnayMvXry4h7WR25UnmTFjBgAcO3aMm+i7pbGxafduEhz8+HefYUhEBDl8mNAy0r1l9kmrvEyePJmPxnU6XWJiIt0sFRYWRhdhWfq7qba/8goBICNHksREUlfHRzCIE4b9du79Tp065ezsDAAeHh6XLl2iD+7bt8/GxgYAfH19e3IRX6fT3bx588SJE/SfI0aMAACxii3TgiiOjo4jR9YDkH79iExG9GO5f/85W3QMk1KpBIA33niDvy7OnDlDd6cNGjTop59+IoSUlJS0K3S4avly0q1Ch0hghv12NgoPHjwICQmhc3l2H/6tW7fofNzOzk6tVve8F61Wa2FhwTAMW8BZGK2trYcOHZo2bRrzZKluyZL9//gHaWpiAyMpKSQ6mpiZ/ZpDDTyN0hnDRx99xGsvhYWFoaGhAGBubh4SEkKLEgLAsGHDPv3007KyMl57Rxwy7LezsWhubma3vCxZsoTO5evq6n7/+9/TZbK4uLjWnq0d3rt3DwDc3Nw4CvnlioqKPvjgA/bkKhsbm7feeisjI4N9QkkJUSiIl9fjvGlh0WvS6IIFCwBg3759fHdEF3mkUqm9vX33dlMhQ2DYb2fj8vXXX9O5/MiRI2/evEkfZGsj//a3v+12beTz589HRETY2dk5OzsLNoo5ffo0TaA+Pj4KhYIu/lLp6UQmI1ZWjzPmoEEkPp6UlvaatdGAgAAAYBdheKXRaPr06cMwTE5OjgDdIT4Y9tvZ6Ny6des3v/kNncsfPHiQPnj+/Hk6phswYEBqamrnW6usrNy2bZuPjw9NZ/Qem8GDB1++fJmf8J+i0+nWr19/5swZ9pGampovvvgiKuoBzZ5SKZkzhyQnE/amqsZGcvWqAKH1FD30Wpg/SMJPIxDnMI0Krba2Njo6ut1cvri4eMqUKQBgZmbWmXNM6L2JdGwLAK6urnK5/NKlS3StTX8RVhg3b96Mi4ujm9WDgt53cSFyOcnP//UJt28TuZw4OZEBAwin9x9wr7y8nP6dE6a7H374gc5FhOkO8QHTqDiUSmWfPn3orpqioiKidyaQUql80auamprUajU9xgn07k1so/uJuN5Q9VJNTU1fffXVxIkTaY8Mw0yZMkWtPsQu87a2kkOHyLRphGEeT/DHj38qvRqgtLQ0APD39xemux07dgDAsmXLhOkO8QHTqGjYufzAgQN/+OEH+mB6evpzn5yTk0O3E9GEZW9vL5PJXrRZat++fXRg2MMNVR24e/fuxo0bnZyc2HjWrVunv1n9wYMHW7Y0uLk9zp42NmTlyt4xo9+/fz8AzJ8/X5ju3n33XQD48MMPhekO8QHTqJhKS0vDw8PpXF6hUOh0unZP0Gg03377bXh4OLudKDAwUKlUvnRXU3Z2Nrcbqih2c3hQUJB+POywV6fTsbvHw8J+BCAjRhCFguhdfzJ0H3/8MQD88Y9/FKa7mJgYAPj666+F6Q7xAdOoyPTP950zZ05VVRV9/OHDhwqFgr2Jmz1ZvfMtsxuqAEAmk/VwQ1VRUdFf/vKXQYMGsdub3nnnHf0zNMvLy//617/SgwHpXsi33orXu/7Ua6xYsQIA/v73vwvTXWBgIADQHfiol8I0ahC+/fZbenV4+PDhSUlJHZ+s3iXshqpJkyZ1b0PV+fPno6Oj6UoujVChUJSXl7NPKC4ujo2NtbS0pE/w9PT85JNP2t3j2IuEhYUBwMmTJ4Xpjp7rXFJSIkx3iA+YRg3F7du3/fz8AIDeJs+erP7sTL+r0tLSPD096YaqDo5qb6e6ulqpVI4ePZomR6lUGhkZ+dx4GhsbHRwcnr3e1UvRGUB3zrLruqqqKgCwtbUVoC/EH4awtbyR2BobG7/44gsAIISsWLGCHnzJifLy8tdffz0lJUUqlb7//vsffPBBBwWfr169mpSUtHfv3oaGBgBwdXWNjY1du3bt4MGDX/SS7777btSoUUZQnL2lpcXa2loikTQ2NrIDcP6kp6cHBwf7+fldv36d774Qj8TO40gg+ouws2fPZhdhWS/dTWUKbt26BXq1rfh24MABAIiKihKmO8QTMxEzOBKSVCrdvHlzUFBQbGzs8ePHg4ODDx8+TJcRbt++vXv37p07d1ZUVACAvb19TEzMunXr6A1XJkWAgtj6DPCkbdQNmEZNS2Rk5LVr1xYsWHDlypUJEyYsW7YsJyfn7NmzhBAACA0NXb16dXR0NHu9yNQInNf0ixeg3gvTqMkZPHjwuXPn4uLiduzYceTIkbKyMjs7u0WLFq1Zs8bf31/s6EQmcF7DNGocMI2aIktLy6SkpEuXLt24cePtt9/esmVL3759xQ7KIGRmZgKAYNfKcFJvHLCknemqr68HgLi4OMyhdXV1SUlJ/v7+586dc3JyUiqV1dXVfHfa1tZWWFgolUo72AKBegXc8GSiNBqNlZWVTqdrbGy0sLAQOxzRXLt27csvv9y3bx/9o+Lk5NTU1NTQ0ODj43P48GF6Qy1P7ty5M2LEiCFDhtBC3Kj3wtGoibp3755Go/Hw8DDNHNrS0nLw4MGIiIhx48YlJSWxhZSLioquXbvm5+eXk5MTHBy8e/du/mLAGb3RwLVRE2W6Fzfu3gWlMjkrKyY5GQDs7e2XLVu2evVqX19f+nlvb+/Lly+/8847O3fufPPNNy9fvvz555+zhZI4ZLo/AqODo1ETZXJDIZ0OUlMhJgZ8feF//ud3WVkh48fv2rXr4cOHiYmJbA6lLC0td+zYoVKprKyskpKSQkNDOZ9363Q6WsaZLWaFejGx9/8jcWzcuBEAtmzZInYg/Hv0iCgUxNPz8dGnlpYkOpr8+GNnXnrlyhVa8fjN8HDy/fechFNZWZmYmEgHoYMGDfLw8Lh+/TonLSOxYBo1UVFRUQBw4MABsQPhU00NmT//cWVnAOLrSxITyTN3wXasoqJizZIlNXZ2RCIhH374a2Gprjt37tyiRYvYxeghQ4bQHG1tba1SqbrdLBIdplET9V5MTMjo0ddecNh+70aTJp1pjR5NpFISGUlSUki3z8rS6YhCQaRSAkCmTiVdPNSutrZWqVTS+25B77ACjUbT1NT01ltv0ceXLl3a2NjYzQiRqDCNmqq+fQkA6e4xpoarXRHntDRSVMRNy6dPE2dnAkA8PMjFi516yc2bJC7u7pQpNFG6uLjI5fL8Z2pRqVQqejpiQECAMAf0IW5hGjVJJSUEgNjbix0HD/SX+zlf+n/wgEycSACIhQXpoPZqUxNRqUhIyONBsYVF7OzZarW6gwIEGRkZ9HKfg4PDiRMnOA4b8QzTqEm6eJEAkMBAsePgQbvRKOdaWsh//ufj/LhmTfvP3rnzuJA0fUK/fkQmIzdudKbhmpqaefPmAQDDMHK5XKPRcB884gdueDJJubkAAEa524kQYBh4UgGQe+bm8PnnsG8f2NpCSMjjvhgGCIG5c2HECEhIgPJyCA6GXbugqAiUSnhSQaBjffv2PXTokEKhkEgkCQkJERERJSUlfH0ViFOYRk0STaPGuvGbltW7eZPHLhYtgjt3YNkyIATo7dQMA/37g4UFREfDjz9CWhqsWAHW1l1qlY5DU1NTXVxcTp8+HRQUdPHiRV7iR5zCNGqS8vIAjHQ0qtVCQQEwDAwZwm9HLi6/fkyHwB9/DI8egVoNTyoIdE9YWFh6enpoaGhhYeHkyZMTEhJ6GiriGaZRk0TTqFGORh88gNZWcHMDKyvhOqWTend3sLfnpD13d/ezZ8/SFdJNmzYtXryY1sVChgnTqEky4rVRIb80PtdhzczMFArFkSNH+vXrt2/fvqCgoKysLD46Qj2HadT0EAK/+x1MmwaDBokdCg8EXvala6O8nTYZFRV1+fLl0aNHZ2dnT5w4Ua1W89QR6glMo6aHYSApCVJTQSoVOxQeGN16hY+Pz08//RQTE1NXV7dw4cJvvvlG7IhQe5hGTQ+7R8coGePVMzs7uwMHDnz++ee2tranT58WOxzUHqZRE0MvhhhxyQPjXfadMmVKXV3dmTNnxA4EtYdp1FTRyyNNTWLHwbGNAwZse/VVjRFN6lkmd0Rs74Fp1FQxDDQ2wiuvwLp10NYmdjTcqKys/H/ffx+fkWE2YIDYsXAPT8s3WJhGTYz+Hp2zZ+GXX+CzzyAiAoqLxY6MA8Y9XqNfHaZRA4Rp1PSwe3RmzIBTp8DNDc6ehbFj4YcfuO9L2MtZxp1o6GjUWP9I9GqYRk3bpElw7RpEREBpKcyYAQkJnF19ysoS/nKWcSca4x5r92qYRk3egAGQnAzx8aDTwaZNMHcuVFd3v7XmZjh4ECZNeupYI7qSUF4OGk3P4+2AEa8e6nS6e/fuAcAQvs8KQF2HaRQBSKWweTMcOQL29vDttzdWrrxx40aXG7l1C9atA1dXiImBCxfAyenXTzEMtLbC3LkwdSoUFXEYeDtGPF57+PBhS0uLi4uLra2t2LGg9jCNoif+4z8gPb1izpzQ48dDQkL27NnTqVdptXD8OEREwKhR8NlnUF0NgYGgVML9+09dzrp3D+7dg/PnITAQzp7l6Ssw4tHowPz8C2PG7I6IEDsQ9DxinxuNDEtTU9PKlSvpe6PjImv3799/7733voiMfHzSu60tWbWKXLv2wqbLyshrrxEAIpWS+PielNh8rvz8fIlEYmZm1kGtjl5s1y4CQJYuFTsO9ByYRtFzqFQqKysrAAgICMjLy9P/lFarTUlJiY6ONjMzAwAPNzeNnx/53/8lNTUvb1ejIfHxRCIhAGT27K7WOn6R8+fPR0dH9+nTx8HBQaFQcNKmwfmv/yIAJD5e7DjQc2AaRc+XkZFBZ8eOjo7JycmEkKqqqsTERHbKbG5uHh0dnZKSoutq4eLvviP9+xMA4u1Nrl/vdoSVlZVbt2718fGh8fTp02fBggXl5eXdbtCgLVxIAMhXX4kdB3oOTKPohSoqKmbNmgUAEokkICDAwsKCJqyhQ4cqFIrS0tLuN52bS8aNIwB5YWEqlaqrr05PT5fJZDY2NjQeV1dXuVxeUFDQ/XgMX3AwASAXLogdB3oOTKOoIzqdTqFQSKXSgQMHSiSS8PBwtVrNTdHKpqba9esH2tgAgEwma25u7sQrmtRq9StPSnSw8bS1tXEQj4FzdCQA5NEjseNAz4FpFL2cg4MDAFy5coXzllUqlbW1NQCMGzcuNzf3RU/LycmRy+WOjo40gdrb28tksqysLM7jMVDV1QSAWFuTrq6fIEFgGkUvUVNTAwDW1tZdXgPtnMzMTLrTs1+/fkeOHNH/VEtLi1qtDg8PZ57cThoYGKhUKhsaGviIxHBdvUoAyJgxYseBng/3jaKXYG9UZ/i5Nd7f3z8jI2P+/Pk1NTXz58/ftGmTVqstKipKSEjw9vaOiYlJTU21tbWVyWSZmZl0VdS6i4WLez2jO9LfyDDEiE/wRVw4fPjwggUL5syZc+zYMf56IYQoFIo///nPWq3Ww8OjqKhIq9UCwJgxY9asWbNkyRI7Ozv+ejd0jY2QkwNSKfj5iR0Keg4zsQNAhk6YOywZhnn33XdDQ0PnzJkjkUgkEsm8efNkMtm0adN4GgX3DuzXTm8Jo4Me9gNkGDCNopcQ8g7LV1991dfXNy0t7dixY3PmzBGgR4OmnzeRAcM0il5C4BvV6TlGgYGBwnTXO9ChKGA+NVB4iQm9hJDHJtXX15eWllpaWrq6ugrQXa/BDkuNuxxhr4WjUdQRjUbz4MEDiUQizDGXdOTr5eUlkeAfeL1BKDJs+GZFHSkoKGhra3N3d2fvBOWVER8Y2k1sxRf6MfsgMiSYRlFHBM5rxl1MCRkrTKOoIwLnNeMupoSMFaZR1BGBL9Mb8fH1yIhhGkUdEXh4iGujqDfCNIo6ImRe02q1BQUFDMNg8UvUu2AaRR0RcpZdUFDQ2trq5uZG65cg1FtgGkUvVF5eXltb27dvX/agT17h9SXUS2EaRS9EZ/Te3t5CdodpFPU6mEbRC+FleoQ6A9Moej6dTpecnAwALi4uwvSIaRT1UphGUXvV1dV/+9vfhg8fvmfPnsGDB+/du/fUqVMC9IuTetRLYRpFv7pw4cKSJUtcXFzWr1+fl5c3ZMgQKyurqqqq6dOnb926le9CCTgaRb0UplEEUFd3c8+esWPHTpo0ae/evRqNZvbs2SdOnMjNzc3KyoqPj9fpdBs2bJg7d251dTVPIVRUVFRXV9vZ2Q0YMICnLhDiCdZiMm3Z2fCPf0BSktbc3Laiop+j4/Lly1evXt1uA/zx48djY2Orq6uHDx9++PDhMWPGcB7IlStXxo8f7+/vn5mZyXnjCPEKR6MmqaUF9u6FSZNg5EhISICqKqmv7xm1uqCgQKFQPHsT0ezZs69cueLn53fnzp0JEybs3r2b23BaW1v37t0LAP379+e2ZYSEIG59ZySC+Hji5EQACADp14+88w7JyurM65qamlauXEnfNjKZrKWlpeexPHz4UKFQeHh4AIC7u7tEIomPj9dqtT1vGSHBYBo1UuwfSPoBTZr04zfeIAAkIIAolaSurqsNq1QqerNmYGBgXl5e96LTarUnTpyYPXu2VCqleXnMmDEzZ86kRUCjoqKqq6u71zJCwsM0aqT002i7lHr7Nrl8uSdtX7161cvLCwAcHR2Tk5O79NqqqqrExET2cry5uXl0dHRKSopOpyOEHD9+nM7rvb29r1+/3pMgERIMplEjxQ4/9dMoIYSjZZzy8vKZM2cCAMMwcrm8M9Pw9PR0mUzGHjvi7u4eHx9fWlra7mn37t0LDg4GAEtLy507d3ISLUK8wjRqpDoYjXJEp9MpFAo6K586dWpJSclzn1ZbW6tUKseOHUuzp0QiCQ8PV6vVGo3mRS03NjYuX76cPv/CBx+Q5mYOw0aIc5hGjVQHa6OcOnXqlLOzMwB4eHhcvHhR/1M3b96Mi4uztbWlCdHZ2Vkul+fn53eyZZVKJZ8wgZiZkYAA0t1FWIQEgGkU9dSDBw8mTpwIABYWFomJic3NzWq1Ojw8nN0NEhgYqFKpWltbu9z01avEy4sAEEdH0sVFWIQEg9vvEQdaWlr+8Ic/bN++HQBsbW3r6+sBwN7ePjY2dvXq1SNHjux+07W1sHw5HD0KDAN/+hN88gk8ubiPkIHANIo4s3fv3tWrV0skEm9v71WrVi1evNjGxoaDdgmBTz+F994DrRamTIH9+8HZmYNmEeIIplHEmaKiInd3d0dHx/Lycu5bT02F11+HsjKYORNOnOC+fYS6C28GRZyhJ90NHz6cl9bDw+HqVYiIgG3beGkfoe4yEzsAZDx4P+nOwwNOngQAYJjHjxACDAN0RsV+gJCwcDSKOCNQTTqaLjFjIoOBo1HEGTqpF+7cZToUBb3BKUJiwNEo4ozQaZSdxePgFIkKR6OIMwJN6tlBKEKGATc8IW7U19fb2dlZWlo2NDRIJDjLQSYER6OIG1b5+Y2+vqUjR2IORaYG3/GIG9K7d62ysz3b2sQOBCGhYRpFHMnNBQDAKvPI9GAaRRzJywMAwCrzyPRgGkUcoaNRTKPI9GAaRRyho1Gc1CPTgxueEBe0WrC2hrY2aGiAJ9WWEDIROBpFXCgogNZWcHPDHIpMEKZRxAWc0SMThpN6xIXWVsjNhdZWeFIBFCHTgXcxoZ7Boz+RycNJPeoBPPoTIRyNIm7g0Z/IhOFoFHEBj/5EJgxHo6gH8OhPhPBKPUII9RBO6hFCqEcwjSKEUI9gGkUIoR75/xt6grV5KDV5AAAAAElFTkSuQmCC\n", 82 | "text/plain": [ 83 | "" 84 | ] 85 | }, 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "df['mol'][0]" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 10, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "1個目の分子からRECAPにより再帰的に見つかるフラグメントの数 10\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "def recursive_retreive(list_fragments, leaf):\n", 110 | " if leaf.children:\n", 111 | " for leaf in leaf.children.values():\n", 112 | " list_fragments += leaf.smiles\n", 113 | " recursive_retreive(list_fragments, leaf)\n", 114 | "decomp = Recap.RecapDecompose(df['mol'][0])\n", 115 | "lst_frg = []\n", 116 | "recursive_retreive(lst_frg, decomp)\n", 117 | "print('1個目の分子からRECAPにより再帰的に見つかるフラグメントの数', len(set(lst_frg)))" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## BRICSを使ったフラグメントライブラリ作成" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 18, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "fragments = [list(BRICS.BRICSDecompose(mol)) for mol in df['mol']]" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 22, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "BRICSが取り出した全フラグメント 1028\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "def unwrap(list_data):\n", 151 | " list_output = []\n", 152 | " for li in list_data: \n", 153 | " list_output += li\n", 154 | " return list_output \n", 155 | "fr_all = unwrap(fragments)\n", 156 | "print('BRICSが取り出した全フラグメント', len(set(fr_all)))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 58, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "name": "stdout", 166 | "output_type": "stream", 167 | "text": [ 168 | "2\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "import random \n", 174 | "random.seed(42)\n", 175 | "list_fragments = [MolFromSmiles(smi)for smi in fr_all]\n", 176 | "random.shuffle(list_fragments,\n", 177 | " random=random.random)\n", 178 | "seed_structures = list_fragments[:2]\n", 179 | "print(len(seed_structures))" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 60, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stderr", 189 | "output_type": "stream", 190 | "text": [ 191 | "\n", 192 | "\n", 193 | " 0%| | 0/2 [00:00 15 | from cheminfo.metrics import t2_score 16 | ModuleNotFoundError: No module named 'cheminfo' 17 | ``` 18 | """ 19 | import sys 20 | sys.path.append('./') 21 | from cheminfo.metrics import t2_score 22 | 23 | if __name__=='__main__': 24 | print(t2_score.__name__) 25 | -------------------------------------------------------------------------------- /src/from_src.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pythonパスシステムの理解。 3 | このファイルが存在するフォルダ src/ で 4 | 実行すると実行できて、プロジェクトルートへ移動すると実行できない。 5 | 6 | ```bash 7 | $ pwd 8 | ./support-page/src 9 | $ python from_src.py 10 | t2_score 11 | $ cd ../ 12 | $ python src/from_src.py 13 | Traceback (most recent call last): 14 | File "src/from_src.py", line 13, in 15 | from cheminfo.metrics import t2_score 16 | ModuleNotFoundError: No module named 'cheminfo' 17 | ``` 18 | """ 19 | import sys 20 | sys.path.append('../') 21 | from cheminfo.metrics import t2_score 22 | 23 | if __name__=='__main__': 24 | print(t2_score.__name__) 25 | -------------------------------------------------------------------------------- /src/models/9.3-screening.py: -------------------------------------------------------------------------------- 1 | from joblib import load as jl_load 2 | import numpy as np 3 | from rdkit.Chem import MolFromSmiles 4 | 5 | from cheminfo.descriptors import RDKitDescriptor 6 | from cheminfo.metrics import t2_score, q_value 7 | from multiprocessing import Pool, cpu_count 8 | 9 | def processor(argv): 10 | """予測値とSMILESを文字列で返す関数。multiprocessing.Pool.imap用。""" 11 | smiles, model = argv 12 | smi = smiles.strip() 13 | mol = MolFromSmiles(smi) 14 | mol.UpdatePropertyCache(strict=True) 15 | rdcalc = RDKitDescriptor() 16 | xnew = np.array(rdcalc.transform([mol])) 17 | # print(xnew,type(xnew)) 18 | ypred = model.predict(xnew) 19 | t2 = t2_score(xnew, model) 20 | q = q_value(xnew, model) 21 | return '%s %.8f %.8f %.8f'%(smi, ypred, t2, q) 22 | 23 | 24 | def count_lines(filename): 25 | """ファイルの行数を数える関数""" 26 | with open(filename, 'r')as f: 27 | c=0 28 | for _ in f: 29 | c+=1 30 | return c 31 | 32 | 33 | def main(argv): 34 | """メイン関数。入力を受け取り、並列処理を実行。""" 35 | assert len(argv)>2, "SYNTAX: python src/10.3-screening.py MODEL_FILE.joblib SMILES.smi" 36 | modelfile, smilesfile = argv[1:] 37 | model = jl_load(modelfile) 38 | n_counts = count_lines(smilesfile) 39 | cs = 1000 40 | model_sampler = (model for _ in range(n_counts)) 41 | outfile = open(smilesfile.replace('.smi', '.out'), 'w') 42 | with open(smilesfile, 'r') as f: 43 | with Pool(cpu_count()) as pool: 44 | "並列処理" 45 | for ret in pool.imap(processor, zip(f, model_sampler), chunksize=cs): 46 | outfile.write(ret+'\n') 47 | outfile.close()# 開いたファイルオブジェクトは必ず閉じる。 48 | 49 | if __name__ == '__main__': 50 | import sys 51 | main(sys.argv) 52 | -------------------------------------------------------------------------------- /src/parallel.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | def f(x): 3 | return x*x 4 | if __name__=='__main__': 5 | with Pool(processes=4) as pool: 6 | print(pool.map(f, range(10))) 7 | for i in pool.imap(f, range(10)): 8 | print(i) 9 | -------------------------------------------------------------------------------- /src/parallel_wo_with.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | def f(x): 3 | return x*x 4 | if __name__=='__main__': 5 | pool = Pool(processes=4) 6 | print(pool.map(f, range(10))) 7 | for i in pool.imap(f, range(10)): 8 | print(i) 9 | pool.close() 10 | --------------------------------------------------------------------------------