├── .github ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── Dockerfile ├── README.md ├── cheminfo ├── __init__.py ├── descriptors.py └── metrics.py ├── data ├── catalyst │ └── journal_data.csv ├── chembl │ └── CHEMBL26-chembl_26_activity-H1-receptor.csv ├── delaney-solubility │ └── delaney-processed.csv └── zinc │ └── ion_channel-fda.csv ├── environment.yml ├── models ├── 9.3_rdkit_pls.joblib ├── morgan_svm.joblib ├── rdfrags_svm.joblib └── rdkit_svm.joblib ├── notebooks ├── 1.2-eda-boston-data.ipynb ├── 1.3-tsne-tanimoto-distance.ipynb ├── 1.4-fragment-visualization.ipynb ├── 4.2-doe_orthogonal.ipynb ├── 4.4-deap_d_optimal_design.ipynb ├── 8.1.2-structure-decomposition.ipynb ├── 8.1.2-structure-generation-brics.ipynb ├── 8.2-bayes-optimization.ipynb ├── 9.2-catalyst-exhaustive.html ├── 9.2-catalyst-exhaustive.ipynb ├── 9.3-decsriptors.ipynb └── 9.4-histamine-antagonist-screening.ipynb └── src ├── data └── 9.3-brics.py ├── features ├── 5.2-3-fragmentor.py ├── 5.4-rdkit_desc.py └── 5.5-run_mordred.py ├── from_root.py ├── from_src.py ├── models └── 9.3-screening.py ├── parallel.py └── parallel_wo_with.py /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **開発環境:** 2 | - RDKitバージョン: 3 | - OSバージョン: 4 | - Pythonバージョン: 5 | - 使っている開発環境(Anaconda,Miniconda,Homebrew etc.): 6 | 7 | **説明:** 8 | 9 | ```python 10 | >>> Code example 11 | ``` 12 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **関連するissue:** 2 | 3 | **どういう変更ですか?** 4 | 5 | **その他** 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM frolvlad/alpine-glibc:alpine-3.8_glibc-2.28 2 | 3 | LABEL maintainer 'sshojiro' 4 | 5 | RUN apk update && \ 6 | apk --no-cache add bash ca-certificates wget libxext libxrender libstdc++ && \ 7 | update-ca-certificates && \ 8 | apk --update add tzdata && \ 9 | cp /usr/share/zoneinfo/Asia/Tokyo /etc/localtime && \ 10 | apk del tzdata 11 | 12 | RUN echo 'export PATH=/opt/anaconda/bin:$PATH' > /etc/profile.d/anaconda.sh && \ 13 | wget --quiet https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/anaconda.sh && \ 14 | /bin/bash ~/anaconda.sh -b -p /opt/anaconda && \ 15 | rm ~/anaconda.sh 16 | 17 | ENV PATH /opt/anaconda/bin:$PATH 18 | 19 | RUN conda install joblib=0.14.1 networkx=2.4 numpy=1.18.1 \ 20 | pandas=0.25.3 python=3.7.0 scikit-learn=0.22.1 \ 21 | scipy=1.4.1 && \ 22 | conda install -c rdkit rdkit=2019.09.3.0 && \ 23 | conda install -c conda-forge pymatgen=2019.12.3 && \ 24 | pip install --upgrade pip && \ 25 | pip install jupyter==1.0.0 matplotlib==3.1.3 tqdm==4.42.1 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # サポートページ 2 | 3 | [『実践 マテリアルズインフォマティクス』](https://amzn.to/3dSszJL)のサポートページです。 4 | 5 | ## プロジェクトの始め方 6 | 7 | 本リポジトリを使用する際には 8 | 9 | 1. このリポジトリをクローンする(手元へダウンロードする) 10 | 1. 必要なライブラリを揃える 11 | 12 | という2つのステップを踏む必要があります。 13 | 14 | ### このリポジトリをクローンする 15 | 16 | Git BashやMac OS Xのターミナルを開いて,以下のコマンドを実行してください. 17 | support-pageというフォルダが作成されます. 18 | 19 | ```bash 20 | $ git clone https://github.com/funatsu-lab/support-page.git 21 | ``` 22 | 23 | ### 必要なライブラリを揃える 24 | 25 | #### それぞれのライブラリをインストールする場合 26 | 27 | ```bash 28 | $ conda create -n regz python==3.7 numpy scipy pandas scikit-learn 29 | $ conda install -c rdkit rdkit 30 | $ pip install jupyter 31 | ``` 32 | 33 | #### `environment.yml`ファイルを使ってインストールする場合 34 | 35 | Anaconda (Miniconda)の`conda env create`コマンドを使って
36 | (Pythonの)仮想環境を構築します。本フォルダには既に
37 | `environment.yml`という必要なライブラリのリストが
38 | 掲載されているテキストファイルがありますので利用してください。 39 | 40 | ```bash 41 | $ conda env create -n [Python仮想環境名] -f environment.yml 42 | ``` 43 | 44 | ## プロジェクトの構成 45 | 46 | 以下のフォルダ構成は[Cookie Cutter/Data Science](https://github.com/drivendata/cookiecutter-data-science)が
47 | テンプレートとして作成するフォルダ構成を参考に作っています。
48 | 他の人も利用可能な状況にフォルダを維持しておくのは
49 | プログラミングの生産性の観点からも重要です。
50 | 可能な限り以下のフォルダ構成を維持するようにしてください。
51 | もちろん、必要に応じてフォルダが増減しても構いません。 52 | 53 | ``` 54 | support-page 55 | |-- README.md # プロジェクト概要を示すテキストファイル 56 | |-- environment.yml # Anacondaにインストールしたライブラリの情報を書き出す 57 | |-- setup.py # cheminfo ライブラリをコンパイルする場合は作成する 58 | |-- bin # シェルスクリプトを書いたら保存しておくフォルダ 59 | | `-- compile_package.sh 60 | |-- cheminfo # 自作ライブラリを保存するフォルダで、今回は`cheminfo`とした 61 | | |-- __init__.py # __init__.pyを必ず含む 62 | | |-- descriptors.py 63 | | `-- metrics.py 64 | |-- data # データを保存するフォルダ。データセットが多様ならデータソースごとに 65 | | |-- catalyst # サブフォルダを作っておくと良い 66 | | |-- chembl 67 | | |-- delaney-solubility 68 | | `-- zinc 69 | |-- models # 作った機械学習モデルを保存するフォルダ 70 | | |-- 9.3_rdkit_pls.joblib 71 | | |-- morgan_svm.joblib 72 | | |-- rdfrags_svm.joblib 73 | | `-- rdkit_svm.joblib 74 | |-- notebooks # 探索的な解析,可視化をする場合はJupyter Notebookを用いた解析をする 75 | | |-- 1.2-eda-boston-data.ipynb 76 | | |-- 1.3-tsne-tanimoto-distance.ipynb 77 | | |-- 1.4-fragment-visualization.ipynb 78 | | |-- 4.2-doe_orthogonal.ipynb 79 | | |-- 4.4-deap_d_optimal_design.ipynb 80 | | |-- 8.1.2-structure-decomposition.ipynb 81 | | |-- 8.1.2-structure-generation-brics.ipynb 82 | | |-- 8.2-bayes-optimization.ipynb 83 | | |-- 9.2-catalyst-exhaustive.ipynb 84 | | |-- 9.3-decsriptors.ipynb 85 | | `-- 9.4-histamine-antagonist-screening.ipynb 86 | |-- references # 文献を保存しておくフォルダ 87 | |-- results # 解析結果の図を保存しておくフォルダ。必要に応じてサブフォルダを作る 88 | `-- src # Jupyter Notebookで実行すべきではない重い処理や 89 | | # 何度も実行する処理をスクリプトにまとめて保存するフォルダ 90 | |-- from_root.py 91 | |-- from_src.py 92 | |-- parallel.py 93 | |-- parallel_wo_with.py 94 | |-- data 95 | | `-- 9.3-brics.py 96 | |-- features 97 | | |-- 5.2-3-fragmentor.py 98 | | |-- 5.4-rdkit_desc.py 99 | | `-- 5.5-run_mordred.py 100 | `-- models 101 | `-- 9.3-screening.py 102 | ``` 103 | 104 | ## コンテンツ 105 | 106 | 0. Pythonの基礎(テキスト本編にはありません) [gist](https://nbviewer.jupyter.org/gist/sshojiro/e437645bb071bcb6c072f9cc6dbb11fa) 107 | 1. CoLabでの演習 108 | - 1.2 Bostonデータ可視化 [gist](https://nbviewer.jupyter.org/gist/sshojiro/d614503df0db630ac8194e381a7e5588) 109 | - 1.3 tSNEでのタニモト距離基準の可視化 [gist](https://nbviewer.jupyter.org/gist/sshojiro/01579415335916620109f5c45e69826e) 110 | - 1.4 フラグメント可視化 [gist](https://nbviewer.jupyter.org/gist/sshojiro/946737ed021eae99b08e6b2cd0b4cc12/1-4-fragment-visualization.ipynb) 111 | 2. (環境構築,テキスト参照のこと) 112 | 3. (マテリアルズインフォマティクス概論,テキスト参照のこと) 113 | 4. 実験計画法 114 | - 4.2 直交計画法 [gist](https://nbviewer.jupyter.org/gist/sshojiro/975bd4c31e32fde35ddae14987510fa5/4-2-doe_orthogonal.ipynb) 115 | - 4.4 D最適化計画 [gist](https://nbviewer.jupyter.org/gist/sshojiro/1806ea69ce0b190a38a516bc050d36a9) 116 | 5. 記述子計算(スクリプト実行) 117 | - 5.2 フラグメントカウントの実装 118 | - 5.3 RDKit組み込みのフラグメントカウント 119 | - 5.4 RDKit記述子 120 | - 5.5 Mordred 121 | - 5.6 Pymatgen 122 | - 5.6.1 [元素物性 gist](https://nbviewer.jupyter.org/gist/sshojiro/decde333f82748c7df668374b571e75c) 123 | - 5.6.2 [組成比 gist](https://nbviewer.jupyter.org/gist/sshojiro/2868de251878d3f52a2e6521b430a968) 124 | 6. (機械学習,テキスト参照のこと) 125 | 7. (機械学習モデルの解釈,テキスト参照のこと) 126 | 8. 追加検討 127 | - 8.1.1 自作構造生成 128 | - 8.1.2 ReCAP,BRICSによる構造生成 129 | - 8.2 ベイズ最適化 130 | 9. 解析例 131 | - 9.2 触媒データを使った解析 132 | - 9.3 水溶解度データを使った構造生成、スクリーニング 133 | - 9.4 ChEMBLデータを用いた分類。カーネル法と分類問題 134 | 10. (Bashによるデータ加工,テキスト参照のこと) 135 | 136 | ## Dockerによる起動 137 | 138 | 本項はDockerの操作を分かっている方がご利用ください。 139 | 140 | **ローカルでイメージをビルドする場合** 141 | 142 | ```bash 143 | $ docker build -t example/matcheminfo . # Dockerfileからビルド 144 | $ docker run -i -v /$(pwd):/workspace -p 9999:8888 -t example/matcheminfo # 現在のフォルダとDockerコンテナ内を-vオプションで紐付ける(マウントする) 145 | ``` 146 | 147 | **イメージを取得する場合** 148 | 149 | ```bash 150 | $ docker pull sshojiro/alpine:latest 151 | $ docker run -i -v /$(pwd):/workspace -p 9999:8888 -t sshojiro/alpine:latest # 現在のフォルダとDockerコンテナ内を-vオプションで紐付ける(マウントする) 152 | ``` 153 | 154 | ## 誤字脱字等の報告 155 | 156 | 本書あるいは本リポジトリに誤りが見つかった場合は
157 | [Issueを立ててください](https://github.com/funatsu-lab/support-page/issues/new/choose)。 158 | -------------------------------------------------------------------------------- /cheminfo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/cheminfo/__init__.py -------------------------------------------------------------------------------- /cheminfo/descriptors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.sparse import lil_matrix 3 | from sklearn.base import TransformerMixin, BaseEstimator 4 | from rdkit.Chem import Descriptors, RDKFingerprint 5 | from rdkit.Chem.AllChem import GetHashedMorganFingerprint, GetMorganFingerprintAsBitVect 6 | __all__ = ['RDKitDescriptor', 'HashedMorgan', 'BinaryHashedMorgan', 'RDKitFingerprint'] 7 | 8 | 9 | class RDKitDescriptor(TransformerMixin, BaseEstimator): 10 | """ 11 | RDKit記述子を計算するクラス 12 | """ 13 | def __init__(self): 14 | pass 15 | def fit(self, x, y=None): 16 | return self 17 | def transform(self, data): 18 | """SMILESの配列`data`をRDKit記述子にする""" 19 | return np.matrix(list(map(lambda m: 20 | list(map( 21 | lambda f:f(m), dict(Descriptors.descList).values() 22 | )) 23 | , data))) 24 | 25 | 26 | class HashedMorgan(TransformerMixin, BaseEstimator): 27 | """ 28 | Morganフィンガープリント(頻度)を取り出すクラス 29 | """ 30 | def __init__(self, n_bits=512, radius=3): 31 | self.n_bits = n_bits 32 | self.radius = radius 33 | def fit(self, x, y=None): 34 | return self 35 | def transform(self, data): 36 | """SMILESの配列`data`をHashed Morganフィンガープリントにする""" 37 | n_samples = len(data) 38 | D = lil_matrix((n_samples, self.n_bits)) 39 | for ix, mol in enumerate(data): 40 | morgan = GetHashedMorganFingerprint(mol=mol, 41 | radius=int(self.radius), 42 | nBits=int(self.n_bits)).GetNonzeroElements() 43 | for key, val in morgan.items(): 44 | D[ix, key]=val 45 | return D.toarray() 46 | 47 | 48 | class BinaryHashedMorgan(TransformerMixin, BaseEstimator): 49 | """ 50 | Morganフィンガープリント(有無)を取り出すクラス 51 | """ 52 | def __init__(self, n_bits=512, radius=3): 53 | self.n_bits = n_bits 54 | self.radius = radius 55 | def fit(self, x, y=None): 56 | return self 57 | def transform(self, data): 58 | """SMILESの配列`data`をHashed Morganフィンガープリントにする""" 59 | n_samples = len(data) 60 | D = lil_matrix((n_samples, self.n_bits)) 61 | for ix, mol in enumerate(data): 62 | D[ix, :] = GetMorganFingerprintAsBitVect(mol=mol, 63 | radius=int(self.radius), 64 | nBits=int(self.n_bits)) 65 | return D.toarray() 66 | 67 | 68 | class RDKitFingerprint(TransformerMixin, BaseEstimator): 69 | """ 70 | RDKitフィンガープリント(有無)を取り出すクラス 71 | doc: http://rdkit.org/docs/source/rdkit.Chem.Fingerprints.FingerprintMols.html 72 | """ 73 | def __init__(self, n_bits=256, fraglen=7): 74 | self.n_bits = n_bits 75 | self.fraglen = fraglen 76 | def fit(self, x, y=None): 77 | return self 78 | def transform(self, data): 79 | """SMILESの配列`data`をHashed Morganフィンガープリントにする""" 80 | n_samples = len(data) 81 | D = lil_matrix((n_samples, self.n_bits)) 82 | for ix, mol in enumerate(data): 83 | D[ix, :] = RDKFingerprint(mol=mol, # radius=int(self.radius), 84 | fpSize=int(self.n_bits), 85 | minPath=1, maxPath=int(self.fraglen)) 86 | return D.toarray() 87 | -------------------------------------------------------------------------------- /cheminfo/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | __all__ = ['t2_score', 'q_value'] 5 | 6 | def t2_score(data, model): 7 | "T2値の計算モジュール" 8 | assert type(data)==pd.DataFrame or type(data)==np.ndarray, "input must be pandas.DataFrame or np.array" 9 | explained_std_ = np.sqrt(model.best_estimator_.x_scores_.var(axis=0)) 10 | scores_whiten = model.transform(data) / explained_std_ 11 | return (scores_whiten ** 2.).sum(axis=1) 12 | 13 | def q_value(data, model): 14 | "Q値の計算モジュール" 15 | assert type(data)==pd.DataFrame or type(data)==np.ndarray, "input must be pandas.DataFrame or np.array" 16 | x_reproduced_ = model.transform(data) \ 17 | @ model.best_estimator_.x_loadings_.T \ 18 | * model.best_estimator_.x_std_ + model.best_estimator_.x_mean_ 19 | return ((data - x_reproduced_)**2.).sum(axis=1) 20 | -------------------------------------------------------------------------------- /data/catalyst/journal_data.csv: -------------------------------------------------------------------------------- 1 | label,Ag,K,Mo,W,Re,Au,Cl,Mg,Fe,Temp,P,GHSV,C3,O2,NOx,EtCl,CO2,pNOx,pEtCl,pCO2,Conv,Sel,Yield 2 | 1,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,10,0,0,0,12,48,5.8 3 | 2,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,5,0,0,0,13,44,5.7 4 | 3,50,2,0.5,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,1,13,44,5.7 5 | 4,50,2,0.5,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,13,42,5.5 6 | 5,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,25,0,0,0,10,54,5.4 7 | 6,51,2.1,0.51,0,0,0,0,0,0,250,30,1200,4,8,0,0,0,0,1,1,10,54,5.4 8 | 7,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,0,0,0,1,13,41,5.3 9 | 8,51,1.6,1.8,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,13,40,5.2 10 | 9,45,1.3,0,0,0.37,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,14,37,5.2 11 | 10,45,1.3,0,0,0.37,0,0,0,0,250,30,1200,10,5,0.02,0.005,11.2,0,0,0,10,51,5.1 12 | 11,38,2,0,5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,25,0,0,0,8.8,53,4.7 13 | 12,38,2,0,5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,13.3,35,4.7 14 | 13,44,1.7,0.44,0,0,0,0.05,0,0,232,100,2400,4.15,8.15,0,0,0,0,1,0,9.1,51,4.6 15 | 14,50,2,0,0,0.5,0,0.6,0,0,250,30,1200,10,5,0.02,0,0,0,0,0,10.9,42.4,4.6 16 | 15,40,2,0,0,0,0,0,0.24,0.027,250,30,1200,10,5,0.02,0.005,0,0,0,0,12,38,4.6 17 | 16,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,1,13,35,4.6 18 | 17,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,25,0,0,0,8.7,52,4.5 19 | 18,44,1.7,0.44,0,0,0,0.05,0,0,250,30,1200,10.7,5.2,0.02,0.05,0,0,0,0,10.6,42.6,4.5 20 | 19,52,1.8,0.55,0,0,0,0.05,0,0,250,30,1200,4,8,0,0,0,1,1,0,9,50,4.5 21 | 20,40,2,0,0.5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,1,12,37,4.4 22 | 21,54,2,0.5,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,11.2,39.5,4.4 23 | 22,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,13,34,4.4 24 | 23,39,2.3,0,0.6,0,0,0,0,0,232,100,2400,4,8,0,0,15,0,1,0,8,55,4.4 25 | 24,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,1,12.4,35,4.3 26 | 25,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,0,0,0,0,12,36,4.3 27 | 26,37,2,0,0,0,0,0,0.79,0.067,250,30,1200,10,5,0.02,0.005,0,0,0,0,11,38,4.2 28 | 27,40,2,0,0.5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,11,37,4.1 29 | 28,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,12.6,32,4 30 | 29,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,0,0,0,0,12.6,32,4 31 | 30,40,2,0,0,0,0,0,0.26,0.038,250,30,1200,10,5,0.02,0.005,0,0,0,0,11.5,35,4 32 | 31,52,1.8,0.55,0,0,0,0.05,0,0,240,30,1200,4,8,0,0,0,1,1,0,8,50,4 33 | 32,45,1.3,0,0,0.37,0,0,0,0,250,30,1200,10,5,0.02,0.005,52.1,0,0,0,7,57,4 34 | 33,50,2,0.5,0,0,0,0.6,0,0,250,30,1200,10,6,0,0,0,0,0,0,11,36,4 35 | 34,40,2,0,4.7,0,0,0,0,0,258,40,1200,10,5,0.02,0.005,50,0,0,0,7,56,3.9 36 | 35,37,2,0,0,0,0,0,0.41,0.023,250,30,1200,10,5,0.02,0.005,0,0,0,0,10,38,3.8 37 | 36,38,2,0,5,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,50,0,0,0,6.8,55,3.7 38 | 37,43,1.7,0,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,11,33,3.6 39 | 38,51,2.1,0.51,0,0,0,0,0,0,232,30,1200,4,8,0,0,0,0,1,1,7.5,48,3.6 40 | 39,47,1.7,0,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,11,32,3.5 41 | 40,39,2,0,0,0,0,0,0.3,0.0006,250,30,1200,10,5,0.02,0.005,0,0,0,0,10,35,3.5 42 | 41,40,2,0,0,0,0,0,0,0,260,40,1200,10,5,0.02,0.005,50,0,0,0,6.3,54,3.4 43 | 42,44,1.7,0.44,0,0,0,0.05,0,0,250,100,1200,8.15,6.35,0,0,14.8,0,1,0,6.5,52,3.4 44 | 43,47,1.7,0,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,11.8,0,0,0,8,42,3.4 45 | 44,50,2,0,0,0,0,0.6,0,0,250,30,1200,10,5,0.02,0,0,0,0,0,11,30,3.3 46 | 45,50,2,0.5,0,0,0,0.6,0,0,250,30,1200,10,6,0,0,0,0,1,0,9,35,3.2 47 | 46,54,2,0.5,0,0,0,0,0,0,240,30,1200,5,5,0.0075,0.02,20,0,0,0,4.5,61,2.7 48 | 47,38,2,0,0,0,0,0,0.001,0.07,250,30,1200,10,5,0.02,0.005,0,0,0,0,9,30,2.7 49 | 48,52,2.1,0,0,0,0,0,0,0,245,30,1200,10,5,0.0075,0.02,10,0,0,0,5.1,52,2.7 50 | 49,25,1,0,0,0,4.6,0,0,0,250,30,1200,10,5,0.02,0,0,0,0,0,6,44,2.6 51 | 50,47,1.7,0,0,0,0,0,0,0,250,30,1200,10,5,0.02,0.005,51.4,0,0,0,5,52,2.6 52 | 51,44,1.7,0.44,0,0,0,0.05,0,0,232,100,2400,4.09,7.97,0,0,14.48,0,1,0,3.9,64,2.5 53 | 52,50,2,0.5,0,0,0,0.6,0,0,250,30,1200,10,6,0,0,12,0,1,0,5,49,2.5 54 | 53,38,1.9,0,0,0,0,0,0.001,0.0006,250,30,1200,10,5,0.02,0.005,0,0,0,0,7,35,2.5 55 | 54,50,2,0.5,0,0.5,0,0,0,0,250,30,1200,10,5,0.02,0,0,0,1,0,5.4,44.3,2.4 56 | 55,50,2,0.5,0,0,0,0.6,0,0,250,30,1200,10,6,0,0,16,0,0,0,5,47,2.4 57 | 56,48,1.8,0.59,0,0,0,0.05,0,0,216,300,4800,4.6,7.6,0,0,0,0,1,0,3.8,59,2.2 58 | 57,50,2,0.5,0,0.5,0,0,0,0,250,30,1200,10,5,0.02,0,0,0,1,0,5,41.8,2.1 59 | 58,49,0.57,0.49,0,0,0,0.7,0,0,250,30,1200,8,8,0.0005,0.0005,10,0,0,0,4,52,2.1 60 | 59,49,0.57,0.49,0,0,0,0.7,0,0,250,30,2400,8,8,0,0,0,0,0,0,6,34,2 61 | 60,49,0.57,0.49,0,0,0,0.7,0,0,250,30,1200,8,8,0,0,10,0,0,0,3.5,58,2 62 | 61,49,0.57,0.49,0,0,0,0.7,0,0,250,30,1200,8,8,0,0.0005,10,0,0,0,3.5,58,2 63 | 62,44,1.7,0.44,0,0,0,0.05,0,0,232,100,2400,4.1,8.12,0,0,14.43,0,1,0,3.2,60,1.9 64 | 63,54,2,0.5,0,0,0,0,0,0,245,30,1200,10,5,0.0075,0.02,10,0,0,0,3.2,59,1.9 65 | 64,53,1.1,0.54,0,0,0,0,0,0,245,30,1200,10,5,0,0.02,10,0,0,0,2.8,58,1.6 66 | 65,36,2,0,0,0,0,0,0.0005,0.0005,250,30,1200,10,5,0.02,0.005,0,0,0,0,6,27,1.6 67 | 66,49,0.57,0.49,0,0,0,0.7,0,0,240,30,2400,8,8,0.0005,0.0005,10,0,0,0,2.5,60,1.5 68 | 67,44,1.7,0.44,0,0,0,0.05,0,0,232,100,2840,9.3,6.4,0.038,0,13.8,0,1,0,2.8,53.2,1.5 69 | 68,50,2,0.5,0,0.5,0,0,0,0,250,30,1200,10,5,0.02,0.005,9.6,0,1,0,2.2,63.8,1.4 70 | 69,52,1.8,0.55,0,0,0,0.05,0,0,250,30,1200,4,8,0,0,15,1,1,0,2,70,1.4 71 | 70,50,2,0.5,0,0.5,0,0,0,0,250,30,1200,10,5,0.02,0,9.6,0,1,0,1.6,63.2,1 72 | 71,25,1,0.5,0,0,4.6,0,0,0,250,30,1200,10,5,0,0,0,0,0,0,2,50,1 73 | 72,49,0.57,0.49,0,0,0,0.7,0,0,250,30,2400,8,8,0.0005,0.0005,10,0,0,0,1.5,64,1 74 | 73,49,0.57,0.49,0,0,0,0.7,0,0,250,30,2400,8,8,0,0,10,0,0,0,1.5,58,0.9 75 | 74,25,2,0,0,0,4.6,0,0,0,250,30,1200,10,5,0,0,0,0,0,0,2,40,0.8 76 | 75,25,2,0,0,0,4.6,0,0,0,250,30,1200,10,5,0.02,0.005,0,0,0,0,2,40,0.8 77 | -------------------------------------------------------------------------------- /data/zinc/ion_channel-fda.csv: -------------------------------------------------------------------------------- 1 | zinc_id,smiles 2 | ZINC000003807804,Clc1ccccc1C(c1ccccc1)(c1ccccc1)n1ccnc1 3 | ZINC000095616601,COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC 4 | ZINC000003831405,C=C[C@H]1CN2CC[C@H]1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12 5 | ZINC000085205448,COC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1ccccc1[N+](=O)[O-] 6 | ZINC000000607939,COC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@H]1c1cccc2nonc12 7 | ZINC000000607986,O[C@@H](CNC[C@@H](O)[C@H]1CCc2cc(F)ccc2O1)[C@H]1CCc2cc(F)ccc2O1 8 | ZINC000245204924,Cl[C@H]1[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H]1Cl 9 | ZINC000003806063,C[C@H](Cc1cc2c(c(C(N)=O)c1)N(CCCO)CC2)NCCOc1ccccc1OCC(F)(F)F 10 | ZINC000000004448,Cc1nccn1C[C@H]1CCc2c(c3ccccc3n2C)C1=O 11 | ZINC000019632718,COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@H]1c1cccc([N+](=O)[O-])c1 12 | ZINC000000897251,COc1cc2c(cc1OC)C(=O)[C@H](CC1CCN(Cc3ccccc3)CC1)C2 13 | ZINC000019632713,COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@@H]1c1cccc([N+](=O)[O-])c1 14 | ZINC000238809662,COc1ccc(C[C@H]2c3cc(OC)c(OC)cc3CC[N@@+]2(C)CCC(=O)OCCCCCOC(=O)CC[N@@+]2(C)CCc3cc(OC)c(OC)cc3[C@H]2Cc2ccc(OC)c(OC)c2)cc1OC 15 | ZINC000003794601,CN(C)CCC[C@]1(c2ccc(F)cc2)OCc2cc(C#N)ccc21 16 | ZINC000005844788,O[C@@H](CNC[C@@H](O)[C@H]1CCc2cc(F)ccc2O1)[C@@H]1CCc2cc(F)ccc2O1 17 | ZINC000001530575,COc1cc(CNC(=O)CCCC/C=C/C(C)C)ccc1O 18 | ZINC000004474460,C=C1CC[C@H](O)C/C1=C/C=C1\CCC[C@@]2(C)[C@H]1CC[C@@H]2[C@H](C)CCCC(C)C 19 | ZINC000003800706,CN(C)CCC[C@@]1(c2ccc(F)cc2)OCc2cc(C#N)ccc21 20 | ZINC000000001084,Cn1c(=O)c2c(ncn2C)n(C)c1=O 21 | ZINC000034676245,N[C@@H]1CON=C1O 22 | ZINC000000596731,CN(CCOc1ccc(NS(C)(=O)=O)cc1)CCc1ccc(NS(C)(=O)=O)cc1 23 | ZINC000084400879,COC(=O)C1=C(C)NC(C)=C(C(=O)OCCN(C)Cc2ccccc2)[C@H]1c1cccc([N+](=O)[O-])c1 24 | ZINC000019796087,COC(=O)C1=C(C)NC(C)=C(C(=O)OCCN(C)Cc2ccccc2)[C@@H]1c1cccc([N+](=O)[O-])c1 25 | ZINC000000596881,Clc1ccc(CO[C@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1 26 | ZINC000000003876,Cc1ccc(-c2nc3ccc(C)cn3c2CC(=O)N(C)C)cc1 27 | ZINC000003079342,C[N+](C)(C)CCOC(N)=O 28 | ZINC000000020228,C[C@H](NC(C)(C)C)C(=O)c1cccc(Cl)c1 29 | ZINC000001550477,CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 30 | ZINC000000968310,CN1CCN2c3ncccc3Cc3ccccc3[C@H]2C1 31 | ZINC000006745272,CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 32 | ZINC000238809663,COc1ccc(C[C@@H]2c3cc(OC)c(OC)cc3CC[N@@+]2(C)CCC(=O)OCCCCCOC(=O)CC[N@@+]2(C)CCc3cc(OC)c(OC)cc3[C@H]2Cc2ccc(OC)c(OC)c2)cc1OC 33 | ZINC000019632706,COC(=O)C1=C(C)NC(C)=C(C(=O)OCC(C)C)[C@@H]1c1ccccc1[N+](=O)[O-] 34 | ZINC000000591993,COC(=O)C1=C(C)NC(C)=C(C(=O)OCC(C)C)[C@H]1c1ccccc1[N+](=O)[O-] 35 | ZINC000019796168,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12 36 | ZINC000000075126,Cc1nccn1C[C@@H]1CCc2c(c3ccccc3n2C)C1=O 37 | ZINC000001530886,CCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2ccccc2C(=O)O)cc1 38 | ZINC000238809665,COc1ccc(C[C@H]2c3cc(OC)c(OC)cc3CC[N@+]2(C)CCC(=O)OCCCCCOC(=O)CC[N@@+]2(C)CCc3cc(OC)c(OC)cc3[C@H]2Cc2ccc(OC)c(OC)c2)cc1OC 39 | ZINC000000537822,O=C(CCCN1CCC(O)(c2ccc(Cl)cc2)CC1)c1ccc(F)cc1 40 | ZINC000001530611,CNCCCN1c2ccccc2CCc2ccccc21 41 | ZINC000238809664,COc1ccc(C[C@@H]2c3cc(OC)c(OC)cc3CC[N@+]2(C)CCC(=O)OCCCCCOC(=O)CC[N@@+]2(C)CCc3cc(OC)c(OC)cc3[C@H]2Cc2ccc(OC)c(OC)c2)cc1OC 42 | ZINC000000016154,CCOC(=O)Nc1ccc(NCc2ccc(F)cc2)cc1N 43 | ZINC000000006481,Nc1nc2ccc(OC(F)(F)F)cc2s1 44 | ZINC000011679756,CC1=NN(c2ccc(C)c(C)c2)C(=O)/C1=N\Nc1cccc(-c2cccc(C(=O)O)c2)c1O 45 | ZINC000001999441,O[C@@H](CNC[C@@H](O)[C@@H]1CCc2cc(F)ccc2O1)[C@H]1CCc2cc(F)ccc2O1 46 | ZINC000100018854,CN1[C@H]2CCC[C@@H]1C[C@H](NC(=O)c1nn(C)c3ccccc13)C2 47 | ZINC000103105084,O=C(O[C@@H]1C[C@@H]2C[C@H]3C[C@H](C1)N2CC3=O)c1c[nH]c2ccccc12 48 | ZINC000003922770,C[C@@H](O)[C@H]1C(=O)N2C(C(=O)O)=C(S[C@@H]3CN[C@H](CNS(N)(=O)=O)C3)[C@H](C)[C@H]12 49 | ZINC000003830842,O=C(NC[C@@H]1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F 50 | ZINC000004658552,NCC(=O)O 51 | ZINC000000002212,Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1Cl)=NC2 52 | ZINC000003951740,Cc1cccc(C)c1OCC(=O)N[C@@H](Cc1ccccc1)[C@@H](O)C[C@H](Cc1ccccc1)NC(=O)[C@H](C(C)C)N1CCCNC1=O 53 | ZINC000036294079,CN[C@@]1(c2ccccc2Cl)CCCCC1=O 54 | ZINC000000000565,Cc1ccccc1[C@H](OCCN(C)C)c1ccccc1 55 | ZINC000001530697,CSc1ccc2c(c1)N(CC[C@H]1CCCCN1C)c1ccccc1S2 56 | ZINC000001530707,CCC(=O)C(C[C@H](C)N(C)C)(c1ccccc1)c1ccccc1 57 | ZINC000019796155,CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1 58 | ZINC000019796080,O=C(CCCN1CC=C(n2c(=O)[nH]c3ccccc32)CC1)c1ccc(F)cc1 59 | ZINC000019632834,CN1CCN(C(=O)O[C@H]2c3nccnc3C(=O)N2c2ccc(Cl)cn2)CC1 60 | ZINC000019796018,CN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1 61 | ZINC000035999642,CN[C@]1(c2ccccc2Cl)CCCCC1=O 62 | ZINC000013537284,Cc1nc[nH]c1CN1CCc2c(c3ccccc3n2C)C1=O 63 | ZINC000019418959,CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1 64 | ZINC000001530706,CCC(=O)C(C[C@@H](C)N(C)C)(c1ccccc1)c1ccccc1 65 | ZINC000001481956,Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCC[C@H]2O 66 | ZINC000001536779,CNCC[C@H](Oc1cccc2ccccc12)c1cccs1 67 | ZINC000001530760,CCCNC[C@@H](O)COc1ccccc1C(=O)CCc1ccccc1 68 | ZINC000000006427,CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21 69 | ZINC000000007673,COC[C@@H](NC(C)=O)C(=O)NCc1ccccc1 70 | ZINC000000538312,Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCCC2 71 | ZINC000000013156,Nc1nnc(-c2cccc(Cl)c2Cl)c(N)n1 72 | ZINC000000001464,CCOC(=O)c1ncn2c1CN(C)C(=O)c1cc(F)ccc1-2 73 | ZINC000006716957,Cc1cn(-c2cc(NC(=O)c3ccc(C)c(Nc4nccc(-c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 74 | ZINC000004475353,CC1=C(CC(=O)O)c2cc(F)ccc2/C1=C\c1ccc([S@](C)=O)cc1 75 | ZINC000001530759,CCCNC[C@H](O)COc1ccccc1C(=O)CCc1ccccc1 76 | ZINC000001530811,CCCCNc1ccc(C(=O)OCCN(C)C)cc1 77 | ZINC000002522669,CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1 78 | ZINC000001530571,O=C1CC2(CCCC2)CC(=O)N1CCCCN1CCN(c2ncccn2)CC1 79 | ZINC000000538550,O=C1Cc2cc(CCN3CCN(c4nsc5ccccc45)CC3)c(Cl)cc2N1 80 | ZINC000000000096,CN(C)CC[C@@H](c1ccc(Br)cc1)c1ccccn1 81 | ZINC000004175630,Oc1nc2ccccc2n1C1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1 82 | ZINC000004213946,O[C@@H](CNC[C@@H](O)[C@@H]1CCc2cc(F)ccc2O1)[C@@H]1CCc2cc(F)ccc2O1 83 | ZINC000001996117,NC(=O)C(c1ccccc1)(c1ccccc1)[C@@H]1CCN(CCc2ccc3c(c2)CCO3)C1 84 | ZINC000000057206,C[C@@H](NC(C)(C)C)C(=O)c1cccc(Cl)c1 85 | ZINC000004214700,Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCC[C@@H]2O 86 | ZINC000004340269,NC(N)=NC(=O)c1nc(Cl)c(N)nc1N 87 | ZINC000000527386,Fc1ccc([C@@H]2CCNC[C@H]2COc2ccc3c(c2)OCO3)cc1 88 | ZINC000000009073,CN1CCc2cccc3c2[C@H]1Cc1ccc(O)c(O)c1-3 89 | ZINC000001482113,N[C@@H](CCC(=O)O)C(=O)O 90 | ZINC000000020244,CN(C)CCOC(c1ccccc1)c1ccccc1 91 | ZINC000000391812,CN1CCC[C@H]1c1cccnc1 92 | ZINC000000896543,O=C(NC[C@H]1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F 93 | ZINC000000896557,CN1[C@H]2CCC[C@H]1CC(NC(=O)c1nn(C)c3ccccc13)C2 94 | ZINC000001530579,COc1ccccc1OCCNC[C@H](O)COc1cccc2[nH]c3ccccc3c12 95 | ZINC000001530580,COc1ccccc1OCCNC[C@@H](O)COc1cccc2[nH]c3ccccc3c12 96 | ZINC000001530637,CNCC[C@H](Oc1ccc(C(F)(F)F)cc1)c1ccccc1 97 | ZINC000001530689,CC/C(=C(\c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1 98 | ZINC000000897085,O[C@@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@H]1CCCCN1 99 | ZINC000000402830,CN1CCC[C@@H]1CCO[C@](C)(c1ccccc1)c1ccc(Cl)cc1 100 | ZINC000052509463,CC(C)(C)c1cc(C(C)(C)C)c(NC(=O)c2c[nH]c3ccccc3c2=O)cc1O 101 | ZINC000052957434,Cc1cc2c(s1)Nc1ccccc1N=C2N1CCN(C)CC1 102 | ZINC000034051848,Cc1ccc(Sc2ccccc2N2CCNCC2)c(C)c1 103 | ZINC000118912393,C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O 104 | ZINC000100001964,CCOC(=O)C1=C(COCCN)NC(C)=C(C(=O)OC)[C@@H]1c1ccccc1Cl 105 | ZINC000094566093,COc1cc2nc(N3CCN(C(=O)[C@H]4COc5ccccc5O4)CC3)nc(N)c2cc1OC 106 | ZINC000003830716,CCOC(=O)C1(c2ccccc2)CCN(CCC(C#N)(c2ccccc2)c2ccccc2)CC1 107 | ZINC000094566092,COc1cc2nc(N3CCN(C(=O)[C@@H]4COc5ccccc5O4)CC3)nc(N)c2cc1OC 108 | ZINC000100001918,COC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@@H]1c1cccc2nonc12 109 | ZINC000100001965,CCOC(=O)C1=C(COCCN)NC(C)=C(C(=O)OC)[C@H]1c1ccccc1Cl 110 | ZINC000030691763,CCCC(=O)OCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@H]1c1cccc(Cl)c1Cl 111 | ZINC000030691760,CCCC(=O)OCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@@H]1c1cccc(Cl)c1Cl 112 | ZINC000000897089,O[C@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@@H]1CCCCN1 113 | ZINC000000968336,Cc1ccc(O)c([C@H](CCN(C(C)C)C(C)C)c2ccccc2)c1 114 | ZINC000000001148,CN(C)CC/C=C1/c2ccccc2COc2ccccc21 115 | ZINC000000897240,CN1CCC[C@H](n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1 116 | ZINC000000537931,CCOC(=O)N1CCC(=C2c3ccc(Cl)cc3CCc3cccnc32)CC1 117 | ZINC000001842633,CNCC[C@@H](Oc1ccccc1C)c1ccccc1 118 | ZINC000003936683,O=C(O[C@H]1CN2CCC1CC2)N1CCc2ccccc2[C@@H]1c1ccccc1 119 | ZINC000012402836,CCN(CC)CCOc1ccc(/C(=C(\Cl)c2ccccc2)c2ccccc2)cc1 120 | ZINC000000896455,CN(C)CC[C@H](c1ccc(Br)cc1)c1ccccn1 121 | ZINC000003079336,CC(=O)OCC[N+](C)(C)C 122 | ZINC000000643055,Clc1ccc(CO[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1 123 | ZINC000000044027,CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21 124 | ZINC000003830212,CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1 125 | ZINC000000597013,COc1cc2c(cc1OC)C(=O)[C@@H](CC1CCN(Cc3ccccc3)CC1)C2 126 | ZINC000000020237,CCN(CC)CC(=O)Nc1c(C)cccc1C 127 | ZINC000000020240,CC(C)NC[C@@H](O)COc1cccc2ccccc12 128 | ZINC000000000509,CN1CCN2c3ncccc3Cc3ccccc3[C@@H]2C1 129 | ZINC000019228902,OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1 130 | ZINC000019203912,OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1 131 | ZINC000008220878,CCCCCCCN(CC)CCC[C@H](O)c1ccc(NS(C)(=O)=O)cc1 132 | ZINC000019144226,CCN(CC)CCC[C@@H](C)Nc1ccnc2cc(Cl)ccc12 133 | ZINC000008214402,CCCCCCCN(CC)CCC[C@@H](O)c1ccc(NS(C)(=O)=O)cc1 134 | ZINC000019144216,COc1ccc(CN(CCN(C)C)c2ccccn2)cc1 135 | ZINC000000014360,CNS(=O)(=O)Cc1ccc2[nH]cc(CCN(C)C)c2c1 136 | ZINC000000020257,Cc1cccc(C)c1OC[C@@H](C)N 137 | ZINC000000601229,CN1CCC[C@@H](n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1 138 | ZINC000001530638,CNCC[C@@H](Oc1ccc(C(F)(F)F)cc1)c1ccccc1 139 | ZINC000001530695,CSc1ccc2c(c1)N(CC[C@@H]1CCCCN1C)c1ccccc1S2 140 | ZINC000001530769,CCC(=O)O[C@](Cc1ccccc1)(c1ccccc1)[C@H](C)CN(C)C 141 | ZINC000043763856,CN[C@@]1(C)[C@@H]2CC[C@@H](C2)C1(C)C 142 | ZINC000000001681,CCOC(=O)C1(c2ccccc2)CCN(C)CC1 143 | ZINC000003812888,COc1ccc(CCN(C)CCC[C@](C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC 144 | ZINC000003786192,CC1=C(CC(=O)O)c2cc(F)ccc2/C1=C\c1ccc([S@@](C)=O)cc1 145 | ZINC000001853550,CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21 146 | ZINC000003784182,COc1ccc(-c2ccc3cc(C(=O)O)ccc3c2)cc1C12CC3CC(CC(C3)C1)C2 147 | ZINC000003812933,C[C@@]12CC3CC(N)(C1)C[C@](C)(C3)C2 148 | ZINC000000056556,CC(C)NC[C@H](O)COc1cccc2ccccc12 149 | ZINC000003871832,COc1ccc(CCN(C)CCC[C@@](C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC 150 | ZINC000003872605,COCCCC/C(=N\OCCN)c1ccc(C(F)(F)F)cc1 151 | ZINC000003805768,COc1cc2c(cc1OC)CC(=O)N(CCCN(C)C[C@H]1Cc3cc(OC)c(OC)cc31)CC2 152 | ZINC000000968257,CN(C)CCC=C1c2ccccc2CCc2ccccc21 153 | ZINC000003932831,C[C@]12CC[C@H]3[C@@H](CC[C@H]4NC(=O)C=C[C@]34C)[C@@H]1CC[C@@H]2C(=O)Nc1cc(C(F)(F)F)ccc1C(F)(F)F 154 | ZINC000003964325,CCN(CC)CCNC(=O)c1c(C)[nH]c(/C=C2\C(=O)Nc3ccc(F)cc32)c1C 155 | ZINC000000057062,Cc1cccc(C)c1OC[C@H](C)N 156 | ZINC000000113404,CN(C)CC[C@@H](c1ccc(Cl)cc1)c1ccccn1 157 | ZINC000000113410,CN(C)CC[C@H](c1ccc(Cl)cc1)c1ccccn1 158 | ZINC000001851149,O=C1CCc2ccc(OCCCCN3CCN(c4cccc(Cl)c4Cl)CC3)cc2N1 159 | ZINC000001530625,CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1CCC(C)=C[C@@H]21 160 | ZINC000000020245,CN(C)CCCN1c2ccccc2CCc2ccccc21 161 | ZINC000000643153,CC(=O)N1CCN(c2ccc(OC[C@@H]3CO[C@@](Cn4ccnc4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1 162 | ZINC000000155269,Cc1ccccc1[C@@H](OCCN(C)C)c1ccccc1 163 | ZINC000000000903,Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1)=NC2 164 | ZINC000001550499,C[C@@H](NCCCc1cccc(C(F)(F)F)c1)c1cccc2ccccc12 165 | ZINC000019144231,CCN(CC)CCC[C@H](C)Nc1ccnc2cc(Cl)ccc12 166 | ZINC000000621893,COc1ccc([C@@H]2Sc3ccccc3N(CCN(C)C)C(=O)[C@@H]2OC(C)=O)cc1 167 | ZINC000003795819,O=C1c2cccc3c2[C@H](CCC3)CN1[C@@H]1CN2CCC1CC2 168 | ZINC000019632628,OCCOCCN1CCN(C2=Nc3ccccc3Sc3ccccc32)CC1 169 | ZINC000049933061,CCCCc1oc2ccc(NS(C)(=O)=O)cc2c1C(=O)c1ccc(OCCCN(CCCC)CCCC)cc1 170 | ZINC000085534336,CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O 171 | ZINC000096272772,Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 172 | ZINC000001571009,CC(C)(C)CC(C)(C)c1ccc(OCCOCC[N+](C)(C)Cc2ccccc2)cc1 173 | ZINC000003874185,O[C@@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@@H]1CCCCN1 174 | ZINC000030691754,COc1ccc(NC(=O)c2ccc(C(=N)N(C)C)cc2)c(C(=O)Nc2ccc(Cl)cn2)c1 175 | ZINC000038140873,CN[C@]1(C)[C@@H]2CC[C@@H](C2)C1(C)C 176 | ZINC000003794794,O=C1c2c(O)ccc(O)c2C(=O)c2c(NCCNCCO)ccc(NCCNCCO)c21 177 | ZINC000003802417,C[C@H]1CN(C[C@H](Cc2ccccc2)C(=O)NCC(=O)O)CC[C@@]1(C)c1cccc(O)c1 178 | ZINC000000537964,O[C@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@H]1CCCCN1 179 | ZINC000000537964,O[C@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@H]1CCCCN1 180 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - rdkit 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - joblib=0.14.1 7 | - networkx=2.4 8 | - numpy=1.18.1 9 | - pandas=0.25.3 10 | - pymatgen=2019.12.3 11 | - python=3.7.0 12 | - rdkit=2019.09.3.0 13 | - scikit-learn=0.22.1 14 | - scipy=1.4.1 15 | - pip: 16 | - jupyter==1.0.0 17 | - matplotlib==3.1.3 18 | - tqdm==4.42.1 19 | -------------------------------------------------------------------------------- /models/9.3_rdkit_pls.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/models/9.3_rdkit_pls.joblib -------------------------------------------------------------------------------- /models/morgan_svm.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/models/morgan_svm.joblib -------------------------------------------------------------------------------- /models/rdfrags_svm.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/models/rdfrags_svm.joblib -------------------------------------------------------------------------------- /models/rdkit_svm.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funatsu-lab/support-page/cb0adbbba5fad51af2e0c689b51d86f4b77559fa/models/rdkit_svm.joblib -------------------------------------------------------------------------------- /notebooks/1.4-fragment-visualization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# RDKitを使った分子構造可視化" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "RDKit WARNING: [14:04:23] Enabling RDKit 2019.09.3 jupyter extensions\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "from rdkit import Chem\n", 25 | "from rdkit.Chem import rdMolDescriptors\n", 26 | "from rdkit.Chem.Draw import IPythonConsole\n", 27 | "from rdkit.Chem import Draw" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "'2019.09.3'" 39 | ] 40 | }, 41 | "execution_count": 2, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [ 47 | "import rdkit \n", 48 | "rdkit.__version__" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## IbuprofenからMorgan fingerprintを取り出す" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "[1, 79, 80, 283, 310, 389, 650, 807, 854, 857]\n" 68 | ] 69 | }, 70 | { 71 | "data": { 72 | "image/png": "\n", 73 | "text/plain": [ 74 | "" 75 | ] 76 | }, 77 | "execution_count": 3, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "ibuprofen = Chem.MolFromSmiles('CC(C)CC1=CC=C(C=C1)C(C)C(=O)O')\n", 84 | "\n", 85 | "bi = {}\n", 86 | "fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(ibuprofen,\n", 87 | " radius=2,\n", 88 | " bitInfo=bi)\n", 89 | "print(list(fp.GetOnBits())[:10])\n", 90 | "ibuprofen" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "## Morgan fingerprintを色付けして表示\n", 98 | "\n", 99 | "RDKit 2019.09 以上のバージョンでは、`Draw.DrawMorganBit`を利用しましょう\n", 100 | "\n", 101 | "ID: 854の以下の例では、ibuprofenの左端が取得されています。" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 4, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJYAAACWCAIAAACzY+a1AAAOZElEQVR4nO2deWwTVx7Hf+OxYzsen4nNJlCS0k0aSIGqDdLSqmK7FarCamHZqG0ghLSACohDS4T4p9qGKMduhXpu0WpTzipsVK5CqxZadlXUwnZBLO0WQjkaIARoYic+4tueY/+YdDL1kTiO4/Fz3keR5fHzzPzsb76/d8y8Z4LjOMCgjEzqADDjBUuIPFhC5MESIg+WEHmwhMiDJUQeLCHyYAmRB0uIPFhC5MESIg+WEHmwhMiDJUQeLCHyYAmRB0uIPFhC5MkqCY8fP+52u6WOIt0QWXP70/Xr12fNmmWxWLq6utRqtdThpI/scWFzczPDMIsXL55U+kHWuPCHH36YOXMmQRDXrl178MEHpQ4nrWSJC5uammiafumllyabfpAdLpzMFoTscOFktiBkgQu7urrKysomrQUhC1zIW/DFF1+cnPoB6i7EFgTUXdjc3EzTdF1d3aTVD5B2oWDBq1evzpgxQ+pwJANhF7a0tNA0vXLlysmsH6Drwps3b5aVlbEs+/3335eUlEgdjpSg6sKWlpZwOFxXVzfJ9QNEXdjd3V1aWsowDLYgIOrCpqamUCi0cuVKrB+g6ELBgleuXCktLZU6HOlBz4XNzc2hUKi2thbrx4OYC+/cuVNSUoItKAYxF/IWXLFiBdZPACUXChbs7Ox8+OGHpQ4nU0DJhbwFa2pqsH5ikHEhtmA8kHFhS0sLtmBM0HAhtuAIoOHC1tbWUCi0fPlyrF80CLiwp6enpKSEpmlswZgg4MLW1tZgMLhs2TKsX0wy3YWCBS9fvlxWViZ1OJmIXOoAhmAYrqsrcOtWwGoN+f0sAOTmyiyWnP37m4LBYE1NzWTTr7GxsaGhIZF3ZkQivX078MEHttOnnd3dAV4/APD52G++uXnw4H6ZjFy9epu0EaaNxsbGUV+JQHoJL1xwf/65w+NhootOnnyXpkMVFYtv3DBfvOhJf2zpp6GhQaxZIl6UuC789lvP+fOxJ3W6XH2vvPIrmg6/+uo/CwvLAGD+fN3s2Zr0BigNgoqJ5FIpXdjfH75wIe6k3E8/fSccDlZULOb1A4Bz5wbtdjpd0UmG4LwIR8ZDSgnPnXOzbOwil6vv7NkOgpAtWrRZeJFl4fz5wTQFJxERmTMRFSWT0OWi790Lxis9ceKdcDhQUfE7wYIAQBDQ0xOMWWtmDdGZc9RcKpmEPT1x9XO5+s6c6SAIWWXlZvHrHAccB3fvxt0RdWh6uJpIsEcBEvYLHY7hcINBr8836PcP/X3xxd5wOFBRsXjq1JnRO2Zrdchx3MDAgEwmoyhqTMsFTIiEfr/f8XMCgUDEi7du2ex2h8/n8nrtNB0W706SpFKZO3/+8zEPHgrFqT8zHpqm7XY7RVG5ubnRpV6vl2VZlmWdTqfP58vLy0vwsKNLGA6HnU6ny+VyiuA3hRfFmy6Xa2yfDCAnR61W63JzdWq1Tq3WWa23bLbbJ0++W17+a4KITPVyOTHW42cIHo+HYRiXy+X1enU6nVKpFIo4jvN6vcKmuGhUYkh45cqVNWvWCJL4fL6xxqrX6w0GwwiPRqOxry/n1i05r5lcrhDv7vcPNjY+fePGf06f3vf006siDq7TZcqg4JjgOC4YHKrFaZqO6I7zFuSfy2QyjWYM3d8YXwfHcV9//bX4FZVKZYyFWq2OLjKbzQqFIvqwEVit4WPH+mMWqdW65cv/snNn3dGjLeXlv7FYisWlhYU5iX+8zIEgCIvF4vP5PB4PSZIqlUooirCgRqMhiDFkmhijM36//+LFi4JpKIoaZ/TxOHjQ5nTGbZvs2bPx3LmjpaVP1NcfEj5SXp6iqip/guJJDxzHMQwjlw+bx+PxCOuOyWQyi8UyJgljdCrUavWTTz5ZXl4+bdq0idMPACoqtAAQL9oXXmjS6SzXr//7yy/fF+0ygfGkB4IgxPqN04Ig7ejMjBmqhx5Sxxuj1WiMtbU7AODIkSab7TYAlJaqi4pUsd+NLOOpBYf2SnVIY2PBAv3UqUqI48U5cxbOm7ckGPS1t2974IGcp57Spzu+CWb8FgTJJZTLicpK45w5cf/1li1r1enMV6+euXPnCEmi152gabq/vz8QCMQsjbBgzP7iqGTKjRcOB/3dd97btwPB4HDPXaWSFRer7t79V01NlU6nu3Tp0vTp0yUMMgmcTqff7wcAhUKh0+lycoab0xzHWa1WQUKtVptcyyNTJOThOBgcpL1eFgA0GplOJ+fzynPPPXf48OFnnnnm1KlTSaQaqaBp2mazCZtGo1Hcl4hoiJrNZpksmaSYWRLGo7+/v7y83Gq17t69e9WqyM5+xiJYEADkcrnZbBaKUmVBkLwuTJD8/Py3334bAOrr63t6eqQOJyFomhb0AwCtVisuFdeCBEEkVwvyoCEhAFRXV1dVVblcrnXr1kkdS0KwLEuSJP9cLpePMBxDUVRyKZQHjUTKY7PZysvLbTbbvn376urqpA4nIfx+v8fj0Wq18WpBfuBtskgIAO3t7bW1tXq9/vLly9OmTZM6nGRIYS3Ig0wi5VmxYsXSpUsRSqfRpLAW5EFMQgDYuXOnyWT65JNP2tvbpY5lzKS2FuRBT8KCgoI33ngDADZt2nTv3j2pw4mEjXdTHgBMgAUBRQkBoK6ubsmSJU6nc/369VLHEsng4KDVavV6vdGNjOgR0fFbEJBrzgjcv3//kUcecTgcHR0d1dXVUoczhHg4hiTJvLw8oV8BqW6ICiDpQgAoLCzcsWMHAGzcuLGvr0/qcIbweIYnfhAEIdZvgiwI6EoIAKtXr66srBwYGFi7dq3UsQAAMAwjHo6J6Cr4fD5xLZjEdcF4ICwhALz33nsGg+H48eOHDh2SOhYQ/2ibXC4X3wvKcZzYoCm0IKBbFwq0tbWtXbs2Pz+/s7PTYrFIGEkgEPB4POFwGAA0GptMdp5hrAAgk+Wz7BM+39B9oSmsBYcOiLqEHMdVVlZ+9tln1dXVHR0dUgcTdLlO+f00QCuA0LtQAuwCMPAbFEVFDHmPE+QlBIDu7u7Zs2e73e7Dhw9XVVVJFQZN33E4ttL0zaiSpQDCBTK/ycQolalcCRftupCnqKjotddeA4D169eLL7GmE5q+MzCwJpZ+CoDfizY/cjpfDoevp/DU2SAhAKxbt27hwoU2m23Lli3pPzvHBR2OrSxrj1X4WwDTT8/9AB+xrNvh2Mpx3lhvToYskZAgiLa2NoqiDhw48OGHH6b57F7vgVj+AwAFwFLR5scAgwDAMPfd7l2pOnuWSAgAxcXFra2tALBhwwa7PaYhJgSOC3u9/4hTuCjCgkKBz3ckVUbMHgkBYMOGDQsWLPjxxx/r6+vTdtJQ6ALLOmOVKAD+INr8GGB4zhfH+QKBsykJIKsklMlke/fupShq//79x44dS89Jw+HLcUrEFgyILTjajmMjGzoVEbz11ltbtmwpKCjo7Ow0Go0pP37EXNf799+3Ws+5XLTTyQSDbCDA8c+Lix9btWrr3LkVAABwEmBnxHFUqoVG45/HHw+Sc/VGZvPmzUePHv3qq6+2bt26e/fukd/s9/ujZyBHI7ynt7c3wX/6a9e+nTfv6ty5jwMQANMBCICIHVNjniyUUCaT7dq169FHH92zZ4/JZCoqKhLPQHY4HOLNUCg0poOTJMnP2TMajfyT3Nze3NwurZbU60nxY0FBZV7eip/2mwXwLMDJnx/KHH38JMjCRMrz+uuvv/nmm4lc1o83ATbmTNgpU6aILyEBQDB4xm7/Y6wDEwDbAR77aTMAsAmgVyg2GJrU6spkP5/oNNkqIcMwvb2927ZtE+a6iq0jPIrvDUwOjgtZrc+ybMxlrCwA7wIIlyz+B/AnAA6AIIgci+WETKYb59khiyVMJ2733zyeeJXuIgDx3SF/BfgcADSa53W61CzvmFWdCqmgqDqSjHdT6wmAb0SbqwHySdJMUSm7TI1dmBrC4Rt2+8tx0ukUgL8K6ZQgLplMv8jJmZOqU2MXpgaFosRk+jtJFsYq7APYL2xw3Gya/mUKT40lTBkKRanZ3KHR1BKEBuDxnxeeAOgUNgYHBxkmZYsB4kSaYhiGcToHQiFGpToL8F8AjiQtCsUsufyJgQGf8G0rlUqTyTTyoRIES5hKAoGA0+nkv9Loibter3dwcHg9VYPBMKbl8uKBE2kqES8ow7KsWDAA0Gg04rn2qUqnWMJUIpfLxbeP8gOw4jcYDAZhrQCWZZNYcjAaLGGKoShKvASd+OZgACBJUnz7WjAYjHhDEmAJU49erycIgiAIrVYbfbUrOp2OPBlqVHBzZkLw+/0KhUJcNYrh1xMSvnl+DD3pc2EXTghqtTqefhBVZQYCgXjLQyUCllAaKIoSp1OXy5V0OsUSpgmWZfnpFgJ8lSmUJt06xRKmg0AgYLPZHA6H2GqpSqdYwgnH6XTy4jEMI57ABlE9kOTSKZZwwhHfqOHz+YRF1nkiOvsRAzqJgCWccCiKErdOI6w26oDOqGAJJxyCIAwGg7DJMEzEbXPjTKdYwnSgUCh4q/HLYETfcxXROh1TOsWjM2mCX/FihNW33W63eD5+xPqzI4BdmCYIgqAoaoQljbVabXLpFEuYQej1wz8BkHg6xRJKSSgUEldkQpXJk2DrFEsoDRzHud3ugYGB6M6+uAeSyKUoLKEE8Eu18Y0Xr9crHjuN7oGI2zgxwRJKAEmSfLuGIAi9Xh/xQ3PidKpSqSiKGvkXmbGEEsArp1QqzWZzzCVJKYpSKpVtbW1Go1F8D1xsLTlMBrN9+/aIx2hw1z7TEZwX7ze2cSLNaBobG3nlGhoa4tWIWMLMRdCPJ56KWMLMJTpzxsyluC5EHuxC5MESIg+WEHmwhMiDJUQeLCHyYAmRB0uIPFhC5MESIg+WEHmwhMiDJUQeLCHyYAmRB0uIPFhC5MESIs//AUQF4Abdh4VgAAAAAElFTkSuQmCC\n", 112 | "text/plain": [ 113 | "" 114 | ] 115 | }, 116 | "execution_count": 4, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "Draw.DrawMorganBit(ibuprofen, 854,bi)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "[(1, ((1, 0), (10, 0))),\n", 134 | " (79, ((3, 1),)),\n", 135 | " (80, ((3, 0),)),\n", 136 | " (283, ((0, 1), (2, 1), (11, 1))),\n", 137 | " (310, ((5, 2), (9, 2))),\n", 138 | " (389, ((14, 1),)),\n", 139 | " (650, ((13, 0),)),\n", 140 | " (807, ((12, 0), (14, 0))),\n", 141 | " (854, ((1, 2),)),\n", 142 | " (857, ((4, 2),))]" 143 | ] 144 | }, 145 | "execution_count": 5, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "list(bi.items())[:10]" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 6, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAXFElEQVR4nO3de1QTZ/oH8AkhBMQVkOoiWqtcFCXAgrIWFXS9dK3dtqtuRUEuChWhiqKiuFARlJuyuFr3iOKlrVZ7ZNvttqvU2sUL1S6CcjFcBBRFqyhKkoIkIbffHzmb5YegM8lMJjTfzx85GOdJvvF4Hp93zLzD0Wg0BAAA6MuC7QAAAAMb2igAgEHQRgEADII2CgBgELRRAKOSSqWtra2NjY1sBwHaWLIdAGBgk0qlov+SyWQ9f9knmUxGEISVldWHH34YGxvr6OjI9icAQ3HwhScAHbVaLZFIRCKRWCyWSCR9Pup+EIlEEolErVZTegs+n29nZyeXyyUSSWho6PHjxxn6LGA0aKPwC/fi8bDX/Pj48WOVSkXp9a2trR0cHGxsbLQ/vJiNjY2DgwNBEHfu3PHy8urs7Pzyyy8XLFjAzEcHI0EbhYFE2/LIrJ21h4nFYqp/w3t1wxf3R0dHRz6fr99n+eijj+Lj40eMGCEUCocOHarfi4ApQBsFk9PZ2ZmamqpbR/dcYiuVSkovZWlpaW9vb2dnZ29v7+DgoPu5v0d7e3uGPtTz1Gr1rFmzLl68GBER8fHHHxvtfYF2aKNgWoRC4Zo1ay5cuNDn71pbW5NZPuuOcXJysrAw3a+jNDc3e3t7d3Z2fvXVV++++y7bcUBPaKNgWhYvXlxYWBgYGBgWFqYdIXtOi1ZWVmwHpNnu3bvXr1/v7OwsFAq1p01hwEEbBRNSW1vr5eVlZWXV1NQ0cuRItuMYg1qtnjlzZklJyYoVKw4fPsx2HNCH6a53YKD75z//2dXVRakkLS1NrVavXLnSTHooQRAWFhaHDh2ysbE5cuRIUVER23FAH5hGgRFVVVW+vr5jxoy5efMmj8cjU2KGo6hObm5uYmLiyJEjhUKhMf+bC2iBaRQYkZ6ertFoFixYQLKHakvUavX7779vbj2UIIj169dPnz79p59+2rx5M9tZgDJMo0A/oVDo4+NjZWV169YtZ2dnMiV1dXUCgYDH4zU1NY0aNYrphCbo5s2bv/nNb+RyeVFR0e9//3u24wAFmEaBfqmpqWq1OjY2lmQPJXqMoubZQwmCGD9+fGpqqkajiYmJ6ejoYDsOUIA2CjTTaDReXl5OTk6bNm0iWVJXV3fq1Ck+n2/mS9rExER/f/+7d++a+Z/DgINFPTBCoVCQPysaEhJy8uTJDz74YN++fYymMn11dXV+fn5yufzs2bNz585lOw6QgjYKLNOeFbW0tGxoaHjttdfYjmMQ7VX8EolEIpEMHz58zJgxerxIRkZGSkrKmDFjbty4MXjwYLozAv3QRoFlA2IUvX79emtra89d8uLj43t9o8DPz6+iokL3y61bt6alpenxXkqlMiAgoLy8fM2aNXv37jU0OjAPbRTY1NjYOGHCBC6Xa4RRVKPRiMViW1vbXleUHjx4sL6+vueOokePHvXy8up5zJQpU65evdrzmStXrgQEBPR8JjAw8OrVq7pLV1esWBEbG6tf1Orqan9/f6VSWVxcPGPGDP1eBIwGu98DbSoqKnx9fSmVbNu2TaVSxcTE6NFDu7u7e/Y+kUg0Y8aM4cOH9zwmLi7u8uXLuoU2QRAlJSXTp0/vecyJEycuXrzY85nHjx/3eq+pU6cOHTq052ZRz3+joLi4uL/TwdevXz99+vSHH35I8qN5e3snJyenpqYuX768uroaS3tTpwGgg1AotLCwmDlzpkqlIlnS0NBgaWnJ4/Hu3LmjxzvOmTOn11/mb7/9ttcx8+fP73mAvb39uXPneh1z/Pjx3NzcgoKCwsLCc+fOlZWVdXR06JGnP2KxeMiQIQRBnDlzhnyVQqGYNGkSQRDr1q2jMQwwAW0U6LF48WKCIOLj48mXhIaGEgQRGxur3zsuWbLE0dHR1dV10qRJs2fPXrRoUWlpaa9jamtrKyoqmpub29vb9XsXWvzlL38hCMLZ2ZlSjMrKSh6PZ2FhcenSJeaygeHQRoEGNTU1FhYWfD7//v37JEt0o2hzczOT0UyCSqXSnkmIjo6mVLh161aCIMaNG9fV1cVQNjAc2ijQIDg4mCCINWvWkC9ZtmwZQRCrVq1iLpVJqa+vt7GxIQiiqKiIfJVCodCebt64cSNz2cBAaKNgqNraWu0oeu/ePZIljY2N5jOK6uzcudPBweGLL76gVFVRUaFd2peUlDAUDAyELzyBoWQyWUFBgUgk0q5AyQgPDz927FhMTEx+fj6j2UyKSqVqa2tzcnKiWpicnJyZmTl+/PiKigrtSAsmBW0UjK2pqWnChAkcDufmzZtjx45lO84AIJfLJ0+eLBQKN2/enJ2dzXYc6A1bk4Cxbd++XalULl++HD2UJD6f/+mnn/J4vNzc3F5XAYApwDQKRnXr1i0PDw+MonpISkrKycnx8PCoqKiwtrZmOw78D6ZRMCrtKBoZGWnmPVSj0ezbt6/X1VMvlpaW5unpWV9fv2PHDuaCgR4wjYKeGhoa/vOf/4SEhFhakr2kGKOoztGjR1esWOHi4lJdXW1ra0uyqrS0dNq0aRwO58cff5w8eTKjCYECVr8nAANYSEgIQRDJycnkSyIjIwnqX0H/RVIoFNo+SOm6L41Gs2HDBoIgJk6cKJPJGMoGVGEaBX3osTOTbhStr693cXFhOqHp023jdP78+aCgIJJVcrncz8+vtrZW7434gHY4Nwr6SEtLU6lUUVFR5HdmysjIUCqV4eHh6KFa3t7eW7ZsUavVkZGRnZ2dJKv4fP6hQ4e4XG5mZua1a9cYTQgkYRoFyhobGydOnMjhcBoaGkhu8I5RtE8KhcLf33/QoEGnTp2idC+/hISEv/71rz4+PlevXu21fSoYH6ZRoKygoECpVEZFRZG/SYZ2FA0LC0MP7YnH4xUVFZWUlFC9H2pGRoa7u3tVVRW+jW8KMI0CZWq1urCwMCAgYPTo0WSOv3v37rhx41QqVV1dnbu7O9PxzMSVK1cCAwMtLCxKS0v9/PzYjmPWMI0CZRYWFsHBwSR7KEEQ6enp3d3dERER6KE0mjp1alxcnHZZoFAo2I5j1jCNArMwijKnq6vLx8enqalpx44dycnJbMcxX5hGgVnbt2/v7u4ODw9HD32ptra2jRs3yuVykscPGjSooKCAw+Gkp6cLhUJGs8ELYBoFBulG0dra2nHjxrEdx9TNnDnz4sWLSUlJWVlZ5Kvi4uL279/v6+tbWlra3z31gFGYRoGsjRs35uXldXV1kS/ZsWNHd3d3WFgYeigZOTk5XC43Nze3rKyMfNWuXbtcXFwqKiry8vKYywYvgGkUSNHjcviWlhZ3d3eMopRs2rRp165dEyZMuH79OvltnIqLi+fMmWNlZXXt2jVPT09GE8LzMI0CKXrszKQdRZctW4YeSt727dsnTpxYV1eXkZFBvmrWrFnR0dFyuTwqKkqlUjEXD/qENgovd+vWrc8++4zH423ZsoVkSUtLyyeffMLlcsmXAPHfaz15PB7Vbpibmzt69OjS0tLdu3czlA36xea+KDBA6LEz08qVKwmCCA8PZy7VLxj5mwP29P3333M4HD6fX1NTQ3skeAGcG4WXUKlUc+fO/eGHH/Q4K1pTUzN+/HimE4JOVFTUkSNHAgICSkpKuFwu23HMBRb18BJcLre4uLi6upr8WdHMzMzu7u6QkBD0UCPLy8t79dVXf/zxx48++ojtLGYE0yjQ7N69e25ubhhF2XLmzJm33npr0KBBlZWVuOTBODCNAs20o+jSpUvRQ2lx4cKF/Px88sfPnz8/IiKiq6srMjJSrVYzFwx0MI0Cne7du+fu7q5UKoVCoYeHB9txBrympiYPDw8ul1teXu7l5UWySiKRCASC+/fv7927d82aNYwmBALTKLyAVCqlWpKVlSWXy5csWYIeSgs3N7dVq1Zp98civ42TnZ3d/v37CYJISkpqampiMiAQBIEvPEE/bt++7eDgQOmOdS0tLXw+n8vl1tXVMRfM3HR2drq6uhIEkZmZSakwNDSUIIiZM2eq1WqGsoEWplHoW0ZGhkgkevDgAfmS7OxsuVweHByMUZRGtra22m2c0tLSKG3jtGfPHicnJ6qnVkEPODcKfdBjZybtWVGFQlFVVSUQCJhOaG5WrVr19ddfnzx5csaMGeSrvvnmm3feecfW1raqqko70gIj2B6HwRRFR0cTBBEZGUm+JC4ujiCIkJAQ5lKZs59//rm9vV2PwiVLlhAEMWvWLCztmYNpFHrTY2emhw8furq6yuVyjKKm5unTp56eno8ePTpw4ID2Cl2gHc6NQm8qlertt9+mtEloRkaGVCoNDg5GDzU1jo6OBw4cIAgiMTGxpaWF7Ti/TJhGoW9qtdrCgtS/shhFTd97773397//ffbs2efOneNwOGzH+aXBNAp9I9lDCYLIzMyUSqWLFy9GDzUOhUKRkZHR0NBAvmT//v3Dhw//97//ffToUeaCmS1Mo2AQ3ShaWVlJ/jIbMMS2bdvS0tKmT59+8eJF8v/aff7550uXLrWzs7tx48arr77KaEJzg2kUDJKVlSWVSt977z30UKNJSEgYNWrUDz/8QGkbpyVLlixatEgikaxatYq5bOYJ0yjoTzuKymSyqqoqtFFjOn369B/+8Aeq2zi1tbV5enq2tbV9/PHHERERjCY0K5hGQX/Z2dkYRVnx1ltvhYWFdXV1paenk68aNmyY9u6ha9euvX//PmPpzA6mUdBTa2uri4uLTCarrKz09vZmO47ZaW9v37VrV0pKiq2tLaXCd95555tvvgkNDT1+/DhD2cyNJdsBYKDSjaLooawYOnRoVlaWHoX5+fm/+tWvdu7cSXsks4VpFPSBURRAB+dGQR85OTlSqXTRokXooQCYRoGy1tZWV1dXqVRaUVHh4+PDdhwAlmEaBcp27tzZ1dW1cOFC9FAAAtMoUPXo0SMXFxeMogA6mEaBGu0oumDBAvRQAC1Mo0ABRlGA5+F7o/ASYrFYLBZLJBKxWLxnzx6MogC9oI2aHalUKuqHTCbr9bttbW1KpVJXa2lpOWjQoPfff5/F/ACmBov6gU0qlWrnRO2j9geRSKR7ptdjZ2cn1bcYMmSInZ2dvb29vb393bt3W1pa5s6de/bsWez+C6CFNmpaeg6Dz8+Gfc6PVN/C2tra4Tk2NjZ9Pj9s2DAej6erffLkiUAgePTo0cGDBzGTAmihjTKrzz7YX3988uSJQqGg+hZ9tr/++uOvf/1rLpdryCc6depUcHDwkCFDbty4MXr0aENeCuCXAW2UNmKxOCwsrOcK+ueff6b6IoMHD9auoJ9/dHBw0C2udc/b2Ngw8VleTHtjnzlz5nz33XdY2gOgjdKmq6vr+S3Lnh8V+1s+Ozg4ODo68vl8VsJT8uTJE09Pz8ePHx85cmT58uVsxwFgGdoonb7++uue86O9vT3biZhy8uTJkJAQ3NgHgEAbBb0tWrToyy+/nD9//unTp9nOAsAmtFHQU2trq6enZ3t7+yeffBIeHs52HADWoI2C/o4dOxYeHm5nZ1dTUzNy5Ei24wCwA1uTgP7CwsL++Mc/4p69YOYwjYJBHj586OnpKRKJPvvss5CQELbjALAA0ygYZMSIEdp79sbHx7e2trIdB4AFaKNgqMjIyDfffPPp06cxMTFsZwFgARb1QIMHDx4IBAKRSPT5558HBwezHQfAqDCNAg2cnZ219z1fvXr1o0eP2I4DYFRoo0CP6OjoefPmPXnyJD4+nu0sAEaFRT3Q5qeffhIIBGKxuLCw8E9/+hPbcQCMBNMo9KuwsJDS5UkjR47Mzs4mCCI2Nvbx48eM5QIwLZhGoW9isdjNze3p06eUtnHSaDTz5s377rvvli5deuLECUYTApgItFHo14kTJ0JDQ+3s7IRC4ahRo0hW3b1718vLq6Oj44svvli4cCGjCQFMARb10K+QkJCFCxdSvdbztddey8rKIgjigw8+aG9vZywdgKnANAovotvG6dNPPw0LCyNZpdFo3njjje+//37ZsmXHjh1jNCEA6zCNwos4OTnt3r3bw8PD3d2dfBWHwzl48ODgwYOPHz/+j3/8g7l4AKYA0yi8nFwu1+PuJnv37l27du2IESNqamocHByYCAZgCjCNwsvpd4eo1atXBwUFPXz4cP369bRHAjAdmEaBQc3NzV5eXs+ePfvqq6/effddtuMAMALTKDBo7Nix6enpBEHExcWJRCK24wAwAm0UKOjs7Fy9evWpU6fIl6xbt2769OkPHjxITExkLhgAi7CoBwq0N18aNmxYTU3NsGHDSFbdvHnT19dXKpUWFRXNmzeP0YQAxodpFChYtmzZvHnz2traVq9eTb5q/PjxaWlpBEFER0eLxWLG0gGwA9MoUNPS0iIQCDo6Oiht46RWq4OCgi5fvhwTE5Ofn89oQgAjQxsFyvLz82NjY1955ZWamprhw4eTrKqvr/f19ZXL5d9+++0bb7zBaEIAY8KiHiiLiYmZNWvW/PnzraysyFd5eHhs3bpVo9GsXLmyo6ODuXgARoZpFPSh33VNSqVy6tSpZWVlcXFxf/vb35gIBmB8aKNgVHV1dX5+fnK5/OzZs3PnzmU7DgANsKgHo5owYUJycrJ2ad/Z2cl2HAAaoI2CsSUlJU2ePPnOnTvJyclsZwGgAdooGKq6uvrtt98m/4VQS0vLw4cPW1lZ7du379KlS4xmAzACtFEwVEJCwr/+9a+EhATyJd7e3n/+85/VanVkZCSW9jDQ4b+YwFC3b9/29vZ+9uzZmTNn3nzzTZJVSqXy9ddfv3btWkJCQl5eHqMJARiFNgo0yMvL27Bhg7Ozs1AoJL9Dc1VVlb+/v0qlunDhQmBgIKMJAZiDRT3QQLeN06ZNm8hX+fj4JCUlqdXq6OhoqVTKXDwARqGNAg0sLCwKCgqcnJwCAgIoFaakpHh5eTU0NKSmpjKUDYBpWNQDbWQymbW1NdWqioqKKVOmqFSqS5cuTZs2jYlgAIzCNAq00aOHEgTh6+u7ceNG7dJeJpPRngqAaWijwL7U1FSBQFBfX6/dlhRgYMGiHkzC1atXp06dyuFwrly54u/vz3YcAAowjQL9NBrNgQMHkpKSyJf89re/3bBhg1KpjIiIwNIeBhZMo0C/xsZGgUCgVCqLi4tnzJhBskoul/v5+dXW1qakpGzfvp3RhAA0wjQK9HN3d09JSVGr1cuXLyd/rSefzz98+DCXy83Ozi4vL2c0IQCN0EaBEVu2bJk0aVJzc3NKSgr5qtdff33t2rVKpTIqKqq7u5u5eAA0wqIemFJdXe3v769UKs+fPx8UFESySiaT+fn51dXVpaambtu2jcmAAPTANApM8fb21s6kr7zyCvkqa2tr7dI+IyPj2rVrzMUDoAumUWCQQqHgcDiWlpZUC9etW7dnzx4fH5+ysjIej8dENgC6YBoFBvF4PD16KEEQmZmZbm5uVVVV2dnZtKcCoBemUTBRly9fDgoKsrS0LC8v9/LyYjsOQL8wjYKJmjZtWmxsbHd3d3h4uEKhYDsOQL/QRsFInj59GhYWVlpaSr4kJyfH1dW1srJy165dzAUDMBAW9WAk6enpqampEydOvH79Op/PJ1l1/vz52bNnW1lZlZeXCwQCRhMC6AfTKBjJ5s2bPT09a2trKW3j9Lvf/S4mJkYul0dERGBpD6YJ0ygYT2lp6bRp06hu4/Ts2TNvb+/bt2/n5ORQukkJgHFgGgXjmTJlyvr165VK5aFDh8hX2draFhQUcDicrVu31tTUMBcPQD9cXG8HxhQYGOjm5paSksLhcMhXjR079v79+2VlZRKJZOHChczFA9ADFvUwMEgkkj179iQmJtrY2LCdBeD/QRsFADAIzo0CABgEbRQAwCBoowAABkEbBQAwCNooAIBB/g8JdlbqUJBoawAAAABJRU5ErkJggg==\n", 162 | "text/plain": [ 163 | "" 164 | ] 165 | }, 166 | "execution_count": 6, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "atom_id, radius = bi[310][0]\n", 173 | "path_ids = Chem.FindAtomEnvironmentOfRadiusN(ibuprofen, radius, atom_id)\n", 174 | "substructure = Chem.PathToSubmol(ibuprofen, path_ids)\n", 175 | "substructure" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "## Morgan fingerprintが分子の中のどこを指しているか確認する\n", 183 | "\n", 184 | "`Mol.GetSubstructMatch`を利用する" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 7, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | "substructure cccc(c)C\n" 197 | ] 198 | }, 199 | { 200 | "data": { 201 | "image/png": "\n", 202 | "text/plain": [ 203 | "" 204 | ] 205 | }, 206 | "execution_count": 7, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "print('substructure', Chem.MolToSmiles(substructure))\n", 213 | "ibuprofen.GetSubstructMatch(substructure)\n", 214 | "ibuprofen" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "`bi`変数には、`(atom_index, radius)`が格納されている" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 8, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "atom_index 1 radius 2\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "atom_index, radius = bi[854][0]\n", 239 | "print('atom_index', atom_index, 'radius', radius)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "## RDKit fingerprintの取得と可視化" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 9, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "from pprint import pprint " 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 10, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "name": "stdout", 265 | "output_type": "stream", 266 | "text": [ 267 | "[(19, [[0, 2, 3, 14, 1], [0, 2, 3, 4, 1]]),\n", 268 | " (103,\n", 269 | " [[4, 5, 6, 7],\n", 270 | " [4, 5, 6, 14],\n", 271 | " [4, 5, 14, 8],\n", 272 | " [4, 14, 8, 7],\n", 273 | " [5, 6, 7, 8],\n", 274 | " [6, 7, 8, 14]]),\n", 275 | " (105, [[5, 6, 9, 11, 13], [7, 8, 9, 11, 13]]),\n", 276 | " (110, [[9, 11, 12, 10]]),\n", 277 | " (161, [[4, 5, 6], [4, 5, 14], [4, 14, 8], [5, 6, 7], [6, 7, 8], [7, 8, 14]]),\n", 278 | " (173, [[0, 2, 1]]),\n", 279 | " (194,\n", 280 | " [[4, 5, 6, 7],\n", 281 | " [4, 5, 6, 14],\n", 282 | " [4, 5, 14, 8],\n", 283 | " [4, 14, 8, 7],\n", 284 | " [5, 6, 7, 8],\n", 285 | " [6, 7, 8, 14]]),\n", 286 | " (254, [[9, 11, 13]]),\n", 287 | " (261, [[9, 11, 12]]),\n", 288 | " (294, [[4, 5], [4, 14], [5, 6], [6, 7], [7, 8], [8, 14]])]\n" 289 | ] 290 | }, 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "[19, 103, 105, 110, 161, 173, 194, 254, 261, 294]" 295 | ] 296 | }, 297 | "execution_count": 10, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "rdkbi = {}\n", 304 | "rdkfp = Chem.RDKFingerprint(ibuprofen, maxPath=5, bitInfo=rdkbi)\n", 305 | "pprint(list(rdkbi.items())[:10])\n", 306 | "list(rdkfp.GetOnBits())[:10]" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 11, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "data": { 316 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJYAAACWCAIAAACzY+a1AAAKV0lEQVR4nO2dfWwT5xnAnzv7/JWcP4AkWlKBgLCMDZoGMhjSmLpuEqhAIWgSqfjcFAwR4mOiLRWtSmmL2ioVqbqVELXqh0BqKTC15Q+6NhFlEp2UQcdEIU2cBDoGSRzAPjvGH+e7d39YM+Zwvojx3aM9P1mn+PF7z/tcfn7vXjvvKRxjDAjM8HoXQIwXUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUogeUqgfHHfnkY5oGowCc67rGi2qGpLl7xkL8XyRIFRwnE2vSvSB4yDz30tono4FHRQmk/8Oh/8ci50GUFIRjrPZ7Y+LYj3Pe/JfzwMnGARJumsrCDlMzw3/r0YU5Xo8flZR+njebjaXWyxzOc4ynv5isb8Fg88xFtWUAcB4ftKECW8Jwo/Hk/9Bk0wmg8GgJEnBYDAQCKR+yNwGg8F3CwuLLl4ESQJJgkAgS5apU+Hy5Syj8N4z5yiG5pAKFaUvFHojFjsNcKcBz08Qxc0ORw3AqE7TGmT5+5s3/8BYYqgGPD+xqOgjnp9wH8nzQE9PzxdffLFly5bhm12fN+9HbW13njud4HKB231nW1oKDQ3ZFY797JpdoSx33rq1RVWzvYMAHI4nXK7n72MqdOPG72X5wvBtHI4VLtfzY82cH1atWnXs2DG73V5SUuJ2u91ut8vl0mzdbvcvJ06cVFAAbjd4POByAZ/tF5XVVq4Uqmr4xo1aRekfahcAJopbCwvXj5g9E1nuuHFj9Yj1cJxQUtLCcY4xJc8Dly5dmj17tiAI3d3dZWVlOciYedpMWbgvhVmmM5HIoaH9Qeq8Ojj4jsPxxJhmH4nEPxMJFgopoVBSkpRwWJEkZdYsx9Sp1szkjCVk+aLF8vPRZ84PL730kqqqXq83N/4g23VOExndHPVehSwaPTGK3mOx2FcctywQCMRisWg0GhiJaFQKBm9r8ixd6qmosO3cWZoZVJRboyk9n7S3tx89etRqtT7zzDN616JFq1BR/IoyoAl2dESbm/3hsCJJyVBICYUUSVLC4SeTSXVMnQkCJ4omp9PkdJpcLjPHwcmTwZMn4bHHXFVVBelmPF94fwfz4EgNwY0bNz700EN616JFey1MJrsGBmo1jdraBles6Lh3Z5vNZrfbbTabZ1hSbQoKfrBYXtBMm/ftu/b2233l5bavvppptaYu+3xJyUmen5jTwxwX7e3ts2bNEgShq6vLgAq1ozDr5W3GDFtDwxSn0+R2m5xOc2oYlZQsLyraO5a+fuL3NylKb2bo6adLW1qkjo5oY2Pvs8+WAYDV+gtD+QOAl19+WVXVuro6A/qDrDNSv3+FovxnxD1drhccjifG1Fks1hoI7NIEv/02snx5B8fB559XVFV5Jk78wFCf7n0+38yZM00mU2dn55QpU/QuJwtZPrIUFKwceTfeabf/dqyd2Wy/KSz0aoJz5hR4vcXJJNu+/Qeb7TlD+QOAF198UVGUuro6Y/qDrAodjlqzuXz43ZzOPyYSpv7+/s7Ozra2tp6eHk2DY8eOLVu2bOHChQ8//PDkyZNdLteePXsAQBS9Hs+rJlNJZuNdu8oqKpw+X7SxsQ2MhM/nO3LkiMViMeBE9A4sG8lkr99fc/363GyP6tdfr7XZ7vrDwlNPPaXJsH//fk1H27ZtS7/a0XFp8eIFHR0NodBb4fAH8fi/vvnmjMlkMpvNZ8+ezVqSLqxevRoA6uvr9S5kOLIrZIyp6qAk7e/tXZjpb2BgdSz294MHDwKAIAhFRUXl5eXV1dWNjY2a3bu7uz/77LPTp0+fP3/+ypUrgUAg89VVq1YBwJIlSzKDO3bsAIDKysp4PJ6z4xsHnZ2dZrNZEITLly/rXctwDKkwhapG4/F/3L59IhptkeUfUsF4PB6JRMbTq9/vLyoqAoAPP/wwHYxEIjNmzACAvXv3jid5rlizZg0AbN68We9CRmAEhQ+OQ4cOAYDL5bp69Wo6eObMGZ7nzWbzuXPn9Coshc/nQzEEmY4KGWM1NTUAsHTp0szg1q1bAeCRRx5JJBJ6FcYYW7t2LQBs2rRJxxpGiZ4Kr127VlZW9uabb6qqmg5GIpHy8nIAeOWVV/QqLD0Ee3p69Kph9OipkDEWi8XuDZ46dYrjOIvFcuHChfyXxFANQaa7wqGor68HgKqqqvyfTnENQWZYhYODg9OnTweA1157Lc9dr1u3DgC8Xm+e+71vDKqQMdba2spxnNVq/e677/LWaVdXF64hyAyl8PDhw5r3vtfrBYD58+cnk8n81LB+/XoA2LhxY366ywlGUdjX1+d0OgHg6NGj6aAkSZMnTwaAhoYGWb4yOHgkFHorFDpw+/ZfVTWc8xowDkFmHIWMsebmZgCYNGlSf39/OtjS0sJxnNVq+vrrn2V+1dfbuzAcfoexXI7ODRs2AEBdXV0Oc+YBAylUVXXRokUAUFtbmw4mEp1PPlkKAHPnFly9OkfznfvNm9tUNTdT1vQQ7O7uzknCvGGg22I4jmtubhZF8eOPPz5+/DgAMHY7ENi5Z09xaanl3LnIe+9pFvVw8fiZcPhPOel93759yWRy3bp106ZNy0nCvDHCgvz8c+DAgaampvfff7+6unpw8N1w+CAAtLZKa9d22e18S8tP7160CACm4uK/mExjWBioqqokSYFAILV+XpIkn8+3e/duAGhvb0991Y4IwylkjMmybLFYAGBgoCaZvJqK79hx5ZNPblZXF376aYVmebQobjaZVg+/BDJzpaTf71cURdPvggULKisrm5qa8nKUucRwCtOoari//9fpp5KkPProxf5+ubLS4fGYU2shU49YbGxrIQHA4/Fo1tKvXLly+fLlOT2CPGFchYrS5/cvzYx8+WXw+PFbJ05kudNjmIWQ9y6TLC4uNpt1u7Ey5xhXIWOxvr5fAdw1wsJhpbVVSq0kFkWTy2USRdOECYs8nlf1qlN3jPtm5DibIMyU5YuZQVE0rVihvXXNYqnKY12Gw0AfKu6loOB3I7bhuEK7fXEeijEshlZoty+xWOYM38bp3M7zzvzUY0wMrRCA93jeEIRZQ7zKieImh6MmrxUZD+NOZ9IwJkcihyORj1T1zk1rgjBbFOut1nk6FmYQECj8H6osdypKL8cJZvMMzXrw/2cQKSSyY/BrITEypBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9pBA9/wWgphVBBOcPowAAAABJRU5ErkJggg==\n", 317 | "text/plain": [ 318 | "" 319 | ] 320 | }, 321 | "execution_count": 11, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [ 327 | "Draw.DrawRDKitBit(ibuprofen, 105, rdkbi)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 12, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJYAAACWCAIAAACzY+a1AAAHZUlEQVR4nO2c22/TVhzHj+0QN1lD4m5jEi8gGlJCYYNWAoT2D0C5tOUmxGNfpr3saUKbaMUbfdj2NK0F2kD7gCgFSrlJSGOPMO1lk9aJqUxNOzVVC4jc2sSJsb0HF1PSXJzUifc7/n0UVUlqH/+iT77H5yTOYVRVJQhkWKsLQNYKKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKgQPKrQChlm+5f2XwSff4jCpKMQYmgx90bSchxWBKaw5K4WZsQAeKqwV2SxhmDzOVLV4P1kS7EgrJZ0m0ejyTRTfe5j3tmlTlQpBhYQQoqpqLBaLRqOxWCwej+t/8z78PhBof/So7D4wlTK6ZZmhtK/Cly9f9vT0PHz4MBaLJRIJ4zu+2riRqCqpqyOCQFyu5TvFby4XEQSjbla/OYruyNhzSdlUKrVlyxaGYebn57VnfG/xer05f1fe8fl8G3w+j89HOK7so+Y9F+Y8n3ebQjsSQmybwv7+/oWFhZ07dz558kQQBJ/PV4ujaiOXHBlF9RjBjikURbGxsXFubu7BgwcHDx6s6bFLzgsxhUbo6+ubm5trbW09cOBArY+tmdDPbWbkx3YptDKC1cF2U/v+/n7LIlgd7JVCURT9fn8kErl//35bW5vV5ZiDvVJ48eLFSCTS0tJCRxeqYaMU6hG8d+/eoUOHrC7HNGyUQj2C1HShGnZJoR7Bu3fvHj582OpyzMQuKbx06VIkEtm9ezdNXaiGLVKYyWQaGxsjkcj4+PiRI0esLsdkbJFCPYKUdaEa9Kcwk8n4/f7Z2dk7d+4cPXrU6nLMh/4UXr58eXZ2dteuXfR1oRqUp1CP4NjYWHt7u9XlVAXKUzgwMKBFkMouVIPmFGaz2UAgMDMzc/v27Y6ODqvLqRY0p3BgYGBmZmbHjh0UR5BQnEKbRJBQnMLBwUE7RJDQmkJJkgKBwPT09K1btzo7O60up7rQmcLBwcHp6enm5mZaJxIroTCFegRv3rx57Ngxq8upOhSmMBQKaRGkexSjQ1sKJUlqamoKh8Ojo6PHjx+3upxaQFsKQ6FQOBxubm6mfhSjQ1UK9QjeuHHjxIkTVpdTI6hK4ZUrV8Lh8Pbt2+0witGhJ4V6BEdGRk6ePGl1ObWDnhRevXpVi6BNRjE6lKRQj+D169dPnTpldTk1hZIUDg0NhcPhYDBon1GMThk/TlNVSVFeEaIwzAcs66taSWUjSdKFCxcIIT09PSxLyZvSOKUVKkoinR5Ppx9L0t+EvNGeZNkPeX6v293udLZUucLSDA8PT01NBYNBW41idIqfC9WlpdFk8idVXSy0Bc/v8XrPcdzGahRnBEmStm3bNjU1de3atdOnT1tVhoUUVKiq2VisWxQfl2yCZT2C8J3T2Wp2bYYIhUJdXV3BYHBiYsKGvSgpPJxRY7FzoviYkNLrbChK8vXrryTpL3MrM4Isy729vYSQ7u5ue/ojhRQuLY2I4i+EMIQYmnKoqhiNfqOqhlfHMYnh4eHnz59v3brVnmdBjTwKFSWeTPYRQgz605DlucXFIZOqMnjE5QieP3+eq2AVGFrIMyJNpcZUdamCtlKp0fr6LoZxrrmqd0iSpC+hlbPA1tOnTycnJ5uamuw2l88hj0JR/LmythQlkc3+xvOfF9kmnU6LophOp6NF0beZn58vMmY+c+ZMZ2ennSNIVitU1YwkTRrfP5GQk0k5kZDjcTmZlDOZgUzmj5VZyYlONpstqz6O47TFswRBWL20Vltbm9/vL6tB+shVKMsLhChFdjh79t/ff19KJOR4/E0iIa9KyD+EFDsj1tXV5ZjQ1s8qtPKZx+Op7IXZh9UdaYmUhMPixMS7kSfPsz4f5/VydXUsz7MNDZ9s2LBfKIDL5RIEweyXYHdyp/ay/OLFi2ILejx7lpYk1evlvF7O4+E47r2Jo9vd6fV+W5VKkQLkppDjPmbZ9YpScH3OYNBVrDlHozl1IYZZPS9knM49FTXFEEJ4fu8aC0LKJc/U3u2u7EcIqtP5qcOxeW31IGWTRyHP73M6P6ugrfr6L9ZcD1I2eT8jZbzeboZxl9WQ293B85X1wMiayP8xt8OxWRB6jX9UxvP71q//2ryqkDIo+AUNz+9vaPiRZT8q2YTb3S4IP5j70ShinBJXsClKYnFxMJUay/tF0rp1zR7PlzgKtRZDFyGqaiqT+TWb/VNRFlRVYVm3w9HI8/twFvh/gJLrSO2MTS9WoAlUCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCB5UCJ7/AA9JQm+DyaDOAAAAAElFTkSuQmCC\n", 338 | "text/plain": [ 339 | "" 340 | ] 341 | }, 342 | "execution_count": 12, 343 | "metadata": {}, 344 | "output_type": "execute_result" 345 | } 346 | ], 347 | "source": [ 348 | "Draw.DrawRDKitBit(ibuprofen, 254, rdkbi)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "- Morgan fingerprintは、原子から同心円状に結合をたどって行きます。\n", 356 | "- 一方でRDKit fingerprintでは、直線的な部分構造を取り出すようです。\n", 357 | "- `rdkbi`では、部分構造に含まれる結合のidが取り出されています。" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 13, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "epinephrine = Chem.MolFromSmiles('CNC[C@H](O)c1ccc(O)c(O)c1')" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "## 分子をまとめて表示" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 14, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "image/png": "\n", 384 | "text/plain": [ 385 | "" 386 | ] 387 | }, 388 | "execution_count": 14, 389 | "metadata": {}, 390 | "output_type": "execute_result" 391 | } 392 | ], 393 | "source": [ 394 | "Draw.MolsToGridImage([epinephrine,\n", 395 | " ibuprofen], molsPerRow=2)" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": {}, 401 | "source": [ 402 | "### 図の保存" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 15, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "img = Draw.MolsToGridImage([epinephrine,\n", 412 | " ibuprofen], molsPerRow=2)\n", 413 | "img.save('./molecules.jpg')" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 16, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "name": "stdout", 423 | "output_type": "stream", 424 | "text": [ 425 | "molecules.jpg\n" 426 | ] 427 | } 428 | ], 429 | "source": [ 430 | "!ls molecules.jpg" 431 | ] 432 | } 433 | ], 434 | "metadata": { 435 | "kernelspec": { 436 | "display_name": "Python 3", 437 | "language": "python", 438 | "name": "python3" 439 | }, 440 | "language_info": { 441 | "codemirror_mode": { 442 | "name": "ipython", 443 | "version": 3 444 | }, 445 | "file_extension": ".py", 446 | "mimetype": "text/x-python", 447 | "name": "python", 448 | "nbconvert_exporter": "python", 449 | "pygments_lexer": "ipython3", 450 | "version": "3.7.0" 451 | } 452 | }, 453 | "nbformat": 4, 454 | "nbformat_minor": 4 455 | } 456 | -------------------------------------------------------------------------------- /notebooks/4.2-doe_orthogonal.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"4.2-doe_orthogonal.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyN7Xwmki2kvYSR6WBiJRQEB"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"h7TTRpRxBnJx","colab_type":"text"},"source":["# 実験計画法 (Design of Experiments, DoE)\n"]},{"cell_type":"code","metadata":{"id":"zfLPOz07sOUq","colab_type":"code","outputId":"dfbeb457-36c2-4945-8844-a290538d7657","executionInfo":{"status":"ok","timestamp":1581085543339,"user_tz":-540,"elapsed":6733,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":235}},"source":["import numpy as np \n","import pandas as pd"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Collecting pyDOE2\n"," Downloading https://files.pythonhosted.org/packages/70/1f/8a2e1b57f6dd22cd2002e4332e229e87a3858d560c516b50ab7fe5bb075c/pyDOE2-1.3.0.tar.gz\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from pyDOE2) (1.17.5)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from pyDOE2) (1.4.1)\n","Building wheels for collected packages: pyDOE2\n"," Building wheel for pyDOE2 (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyDOE2: filename=pyDOE2-1.3.0-cp36-none-any.whl size=25519 sha256=59ddb72ba21f0659cee848d43b576914d01c0b4afb40d9695ea2f0e57b472f1b\n"," Stored in directory: /root/.cache/pip/wheels/60/88/1d/b334ee00dd83e82d111ec56c3be91573d335c93870698037f1\n","Successfully built pyDOE2\n","Installing collected packages: pyDOE2\n","Successfully installed pyDOE2-1.3.0\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"Nv_ovqPcjmNX","colab_type":"text"},"source":["## pyDOEによるデモ \n","\n","pyDOEというパッケージにいくつか実験計画法の手法が登録されている。
\n","所望の手法があれば利用すると良い。
\n","以下はfull factorial手法の例(2因子3水準)。"]},{"cell_type":"code","metadata":{"id":"Grr-TKrRuZcR","colab_type":"code","outputId":"5e559eef-28ff-48ff-b19a-2f3c304d8687","executionInfo":{"status":"ok","timestamp":1581085543758,"user_tz":-540,"elapsed":6906,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":125}},"source":["!pip install pyDOE2\n","import pyDOE2 as pyDOE\n","pyDOE.fullfact([2, 3])# 2因子 3水準"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0., 0.],\n"," [1., 0.],\n"," [0., 1.],\n"," [1., 1.],\n"," [0., 2.],\n"," [1., 2.]])"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"markdown","metadata":{"id":"nZsLUHT-j5vk","colab_type":"text"},"source":["## 直交計画表の関数を作る \n","\n","直交計画表を作る関数を定義する。
\n","因子数を入力したら直交表が出力される。
"]},{"cell_type":"code","metadata":{"id":"yZQ7dWnNBz_9","colab_type":"code","colab":{}},"source":["def orthogonal_table(n_factors=7):\n"," \"\"\" Orthogonal table \"\"\"\n"," n_pow = np.ceil(np.log2(n_factors))\n"," n_bases = 2**int(n_pow)\n"," bases = np.array([list(map(int, '{:09b}'.format(val)))for val in range(n_bases)])\n"," kernels = [bases[:,np.where(row>0)[0]] \n"," for row in bases if len(np.where(row>0)[0])>0]\n"," indexes = [np.where(row>0)[0].tolist()\n"," for row in bases if len(np.where(row>0)[0])>0]\n"," indexes = sorted(indexes,key=lambda x:len(x))\n"," D = np.zeros((bases.shape[0], len(indexes)),dtype=np.bool)\n"," for enum, ix in enumerate(indexes):\n"," for i_ in ix:\n"," if ix[0]==i_:\n"," D[:, enum] = bases[:, i_]\n"," else:\n"," D[:, enum] = D[:, enum] ^ bases[:, i_]\n"," return np.array([[2 if v else 1 for v in row] for row in D])"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"U2-xNYwhDv1t","colab_type":"code","outputId":"8904f803-b1fb-473a-f311-9bdf2b3c028d","executionInfo":{"status":"ok","timestamp":1581085543761,"user_tz":-540,"elapsed":6456,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":71}},"source":["for i in [7, 14, 21]:\n"," X = orthogonal_table(i)\n"," print(len(X), len(X[0]))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["8 7\n","16 15\n","32 31\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"L4P3cWoQA6PK","colab_type":"code","outputId":"692836a8-1851-447d-aab5-1ab71810a9d8","executionInfo":{"status":"ok","timestamp":1581085543762,"user_tz":-540,"elapsed":6232,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":294}},"source":["D = orthogonal_table(7)\n","columns=[''] * D.shape[1]\n","columns[0]='A'\n","columns[2]='B'\n","columns[4]='C'\n","columns[6]='D'\n","#割付\n","df=pd.DataFrame(D,columns=columns)\n","df['measure'] = [40,46,28,18,32,26,32,58]\n","df"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ABCDmeasure
0111111140
1211221246
2121212228
3221122118
4112122232
5212212126
6122221132
7222111258
\n","
"],"text/plain":[" A B C D measure\n","0 1 1 1 1 1 1 1 40\n","1 2 1 1 2 2 1 2 46\n","2 1 2 1 2 1 2 2 28\n","3 2 2 1 1 2 2 1 18\n","4 1 1 2 1 2 2 2 32\n","5 2 1 2 2 1 2 1 26\n","6 1 2 2 2 2 1 1 32\n","7 2 2 2 1 1 1 2 58"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"code","metadata":{"id":"zfuFbC-ZBS6X","colab_type":"code","colab":{}},"source":["\n","df_alloc = df[['A','B','C','D','measure']]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"RcODuW_pEmBw","colab_type":"code","outputId":"44c22721-c592-4496-820c-1c14abfa730b","executionInfo":{"status":"ok","timestamp":1581085543763,"user_tz":-540,"elapsed":5749,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":294}},"source":["df_alloc"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ABCDmeasure
0111140
1212246
2111228
3212118
4122232
5221126
6122132
7221258
\n","
"],"text/plain":[" A B C D measure\n","0 1 1 1 1 40\n","1 2 1 2 2 46\n","2 1 1 1 2 28\n","3 2 1 2 1 18\n","4 1 2 2 2 32\n","5 2 2 1 1 26\n","6 1 2 2 1 32\n","7 2 2 1 2 58"]},"metadata":{"tags":[]},"execution_count":29}]},{"cell_type":"code","metadata":{"id":"K5d9r4diEnSD","colab_type":"code","outputId":"6b036a29-2e51-4ffc-d581-0861898c80e8","executionInfo":{"status":"ok","timestamp":1581085543764,"user_tz":-540,"elapsed":5123,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["print(df_alloc['measure'].sum()/8)\n","m = df_alloc['measure'].mean()"],"execution_count":0,"outputs":[{"output_type":"stream","text":["35.0\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"5-YF9QYcQuP7","colab_type":"text"},"source":["## ANOVA\n","\n","分散分析(ANalysis Of VAriance, ANOVA)を実施する。
\n","直交計画法などは、それぞれの因子が直交
\n","(無相関)していると仮定しているので、
\n","応答(目的変数のこと)の分散が
\n","因子ごとの分散の和で表せる。"]},{"cell_type":"code","metadata":{"id":"6GW7JYqGQdFj","colab_type":"code","colab":{}},"source":["INPUT=df_alloc.columns[df_alloc.columns!='measure']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"cfw7ZFMXFx8d","colab_type":"code","colab":{}},"source":["effects_factors = [np.array([df_alloc.loc[df_alloc[factor]==level]['measure'].mean() - m for level in [1, 2]])\n"," for factor in INPUT]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"oTCP4QmZGLZH","colab_type":"code","outputId":"3cb47018-829e-4724-ebe6-786bfac51c08","executionInfo":{"status":"ok","timestamp":1581072454408,"user_tz":-540,"elapsed":718,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["print(*effects_factors)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[-2. 2.] [-2. 2.] [ 3. -3.] [-6. 6.]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"raKD7OaDOnvF","colab_type":"code","colab":{}},"source":["ss_total = np.sum(df_alloc['measure']**2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"zFUJWUvmOs_3","colab_type":"code","colab":{}},"source":["CT = m ** 2 * df_alloc.shape[0]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"7HJbRKPbO0E8","colab_type":"code","outputId":"feeb02d4-9e11-44f8-cc80-3078806eccb9","executionInfo":{"status":"ok","timestamp":1581072570142,"user_tz":-540,"elapsed":616,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["squaresum = np.array([np.sum(effect**2.)*(df_alloc.shape[0]/2.)for effect in effects_factors])\n","squaresum"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([ 32., 32., 72., 288.])"]},"metadata":{"tags":[]},"execution_count":70}]},{"cell_type":"code","metadata":{"id":"gjPBdnETOXQg","colab_type":"code","colab":{}},"source":["diff_factors = np.array([np.array([effects_factors[ix_factor][val - 1] for val in df_alloc[factor]])\n"," for ix_factor, factor in enumerate(INPUT)])"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"CnoqG3_1RnnB","colab_type":"code","outputId":"7f49e222-4144-4bcc-cd54-67d6525300cb","executionInfo":{"status":"ok","timestamp":1581072761081,"user_tz":-540,"elapsed":625,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":179}},"source":["e = df_alloc['measure'] - diff_factors.T.sum(axis=1)\n","e**2."],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 2209.0\n","1 1849.0\n","2 529.0\n","3 729.0\n","4 841.0\n","5 625.0\n","6 1681.0\n","7 2025.0\n","Name: measure, dtype: float64"]},"metadata":{"tags":[]},"execution_count":80}]},{"cell_type":"code","metadata":{"id":"zRgQbAB7RwLj","colab_type":"code","outputId":"bd748549-5cf0-4893-aa7c-b876c55bc9b2","executionInfo":{"status":"ok","timestamp":1581072954824,"user_tz":-540,"elapsed":680,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":53}},"source":["if squaresum.sum()+np.sum(e**2.)==ss_total:\n"," print('SS_total', ss_total, ':', 'SS_factors', squaresum.sum()+np.sum(e**2.))\n"," print('It is true that: SS = S_a + S_b + S_c + S_d + S_e')"],"execution_count":0,"outputs":[{"output_type":"stream","text":["SS_total 10912 : SS_factors 10912.0\n","It is true that: SS = S_a + S_b + S_c + S_d + S_e\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Xs5W03tJSH7L","colab_type":"code","outputId":"9f10af74-5f93-40e7-e00a-1c34c7b677a1","executionInfo":{"status":"ok","timestamp":1581073126460,"user_tz":-540,"elapsed":567,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["CT/1, squaresum/1\n","np.sum(e**2.) / (df_alloc.shape[0]-1-len(INPUT))"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["3496.0"]},"metadata":{"tags":[]},"execution_count":103}]},{"cell_type":"code","metadata":{"id":"KCw4R7yDSNcP","colab_type":"code","colab":{}},"source":["v_factors = squaresum/1\n","v_error = np.sum(e**2.) / (df_alloc.shape[0]-1-len(INPUT))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"6NlI74HlTdLh","colab_type":"code","outputId":"936de711-31f9-42b6-a284-0d1ccc08ffc7","executionInfo":{"status":"ok","timestamp":1581073173546,"user_tz":-540,"elapsed":496,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["v_factors / v_error"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([0.00915332, 0.00915332, 0.02059497, 0.08237986])"]},"metadata":{"tags":[]},"execution_count":106}]},{"cell_type":"markdown","metadata":{"id":"FFuXUsu4TlhP","colab_type":"text"},"source":["## 結論\n","\n","F値について、いかが成り立つ。\n","\n","$$\n","F_3^1=\\left\\{ \\begin{array}{l} 10.13 \\\\ 34.12 \\end{array} \\right.\n","$$\n","\n","これと上記の結果から、
\n","「**今回の割付では因子A, B, C, Dの分散は
\n","誤差分散との有意差があるとは言えない**」
\n","と言える。\n","\n","もう少しわかりやすい言い換えると、
\n","今回のような実験結果が得られたときに、
\n","因子A~Dは誤差項と同じ程度の分散しか持っていない
\n","ということになる。応答(目的変数)を
\n","重回帰モデルで予測する際に有効な変数は
\n","今回は取得できていないということになる。\n","\n","実際の実験に利用する際には、実験結果である物性値に
\n","影響を与えている因子が特定できなかったことを意味する。"]},{"cell_type":"code","metadata":{"id":"9knTbPZqTqeo","colab_type":"code","outputId":"a9f0d8a1-e758-414e-b892-695d0d92ea54","executionInfo":{"status":"ok","timestamp":1581085734655,"user_tz":-540,"elapsed":1703,"user":{"displayName":"Shojiro Shibayama","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDB_A1C4K1EQSoiyTWON_OEPGUUAMhFNuZqJIN72A=s64","userId":"02174551276126794392"}},"colab":{"base_uri":"https://localhost:8080/","height":281}},"source":["import numpy as np \n","from scipy.stats import f\n","import matplotlib.pyplot as plt\n","dfn = 1 # degree of freedom, nominator\n","dfd = 3 # degree of freedom, denominator\n","rv = f(dfn, dfd)\n","x = np.linspace(f.ppf(0., dfn, dfd),\n"," f.ppf(0.99, dfn, dfd), 100)\n","fig , ax = plt.subplots(1,1)\n","ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')\n","plt.vlines(10.13,ymin=0,ymax=.5, colors='blue', linestyles='dashed')\n","plt.vlines(34.12,ymin=0,ymax=.5, colors='blue', linestyles='dashed')\n","plt.xlabel('$F$ value');\n","plt.ylabel('probability');"],"execution_count":0,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAYIAAAEICAYAAABS0fM3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deZRV1Zn38e9TVcxjgIIglimCJYYY\nx4pDcKA7+EYTBU0c3zZv6DdZRI3LuDKKsTWtGYhDYicxvtLROKRtx8SuRGw1RJMoUQFB04BgISQU\nKBRB5kGq6nn/OKfwUtyqukXdXefee36ftfY60773Ppzlrse99xnM3RERkfQqSzoAERFJlhKBiEjK\nKRGIiKScEoGISMopEYiIpFxF0gF01YgRI7y6ujrpMEREisqCBQs2uHtltmNFlwiqq6uZP39+0mGI\niBQVM/tre8c0NCQiknJKBCIiKadEICKSckoEIiIpp0QgIpJySgQiIimnRCAiknJBE4GZnWFmy8ys\n3syuznJ8mpk1mtmiuHwhVCzf+MY3mDhxIi+99FKonxARKUrBbigzs3LgduB0oAGYZ2Z17r6kTdWH\n3P2KUHG0Wrp0KXPnzmX9+vWhf0pEpKiE7BEcD9S7+5vu/i7wIDA14O91qH///gDs2LEjqRBERApS\nyEQwBlidsd0Q72vrM2b2mpk9amZV2b7IzKab2Xwzm9/Y2HhAwfTr1w+AnTt3HtDnpWOTJkVFRMII\n2caSniz+DVDt7kcCzwD3Zqvk7rPcvdbdaysrsz4zqVPqEYiIZBcyEawBMv8P/+B4317u/nd33x1v\n/hw4LlQwrT0CJQIRkX2FTATzgBozG2tmvYGLgLrMCmY2OmNzCrA0VDCtPQINDYmI7CvYVUPu3mRm\nVwBPAeXA3e6+2MxuAOa7ex1wpZlNAZqAjcC0UPFoaEhEJLug7yNw99nA7Db7rstYnwHMCBlDK00W\nh3XnnUlHIFLaQraxonsxzYFSjyCs8eOTjkCktIVsY0lfNdRjNFkc1m9+ExURCSNkG0tdj0BDQ2Hc\nemu0PPvsZOMQKVUh21hqegQaGhIRyS41iUCTxSIi2aUmEahHICKSXWoSgSaLRUSy02Sx5MX99ycd\ngUhpC9nGUpMI1CMIqyrrc2NFJF9CtrHUDA2pRxDWQw9FRUTCCNnGUtMj0GRxWHfcES0vvDDZOERK\nVcg2lpoeQa9evSgrK2PPnj00NTUlHY6ISMFITSIwMw0PiYhkkZpEAJowFhHJJlWJQPMEIiL7S81k\nMejKoZAefTTpCERKW8g2lqpEoKGhcEaMSDoCkdIWso2lcmhIPYL8u+eeqIhIGCHbWKoSgXoE4SgR\niISlRJAnmiwWEdlfKhOBhoZERN6TqkSgoSERkf2lKhGoRyAisj9dPip5MXt20hGIlLaQbSxViUCT\nxeHEp1ZEAgnZxjQ0JHnxs59FRUTCCNnGUpUINDQUzsMPR0VEwgjZxlKVCNQjEBHZX6oSgXoEIiL7\nS1Ui0GSxiMj+giYCMzvDzJaZWb2ZXd1Bvc+YmZtZbch4NDQkIrK/YJePmlk5cDtwOtAAzDOzOndf\n0qbeIODLwEuhYmmloaFwnnsu6QhESlvINhayR3A8UO/ub7r7u8CDwNQs9W4EfgDsChgLoKEhEZFs\nQiaCMcDqjO2GeN9eZnYsUOXuT3T0RWY23czmm9n8xsbGAw6otUegoaH8u+WWqIhIGCHbWGKTxWZW\nBvwQ+Gpndd19lrvXunttZWXlAf+megTh/Pa3URGRMEK2sZCJYA1QlbF9cLyv1SDgCOA5M1sFnAjU\nhZwwVo9ARGR/IRPBPKDGzMaaWW/gIqCu9aC7b3b3Ee5e7e7VwIvAFHefHyog9QhERPYXLBG4exNw\nBfAUsBR42N0Xm9kNZjYl1O92JPOqIXdPIgQRkYIT9Omj7j4bmN1m33Xt1J0UMhaAiooKevXqxZ49\ne3j33Xfp06dP6J9MjTjHikggIdtYqh5DDdHw0ObNm9mxY4cSQR49+WTSEYiUtpBtLFWPmABNGIuI\ntJW6RKAJ4zBuvDEqIhJGyDamRCB5MWdOVEQkjJBtLHWJQENDIiL7Sl0iUI9ARGRfqUsE6hGIiOwr\nlZePgnoE+TZ8eNIRiJS2kG1MiUDy4rHHko5ApLSFbGMaGhIRSbnUJQL1CMKYMSMqIhJGyDaWuqEh\nva4yjD//OekIREpbyDaW2h6BhoZERCKpTQTqEYiIRFKXCDRZLCKyr9TNEahHEMbBBycdgUhpC9nG\nUpcINFkcxi9/mXQEIqUtZBtL3dCQJotFRPaV2kSgHkF+XXVVVEQkjJBtLLVDQ+oR5NeiRUlHIFLa\nQrYx9QhERFIudYlAk8UiIvtKXSLQZLGIyL5SO0egHkF+HXZY0hGIlLaQbSx1iUA9gjBmzUo6ApHS\nFrKNpW5oqG/fvgDs2rWLlpaWhKMREUle6hJBWVnZ3mSgXkH+TJ8eFREJI2QbS93QEETDQ7t27WLn\nzp0MGDAg6XBKwvLlSUcgUtpCtrGcegRmVh4uhJ6nCWMRkffkOjT0hpndbGYTgkbTQzRhLCLynlwT\nwVHAcuDnZvaimU03s8GdfcjMzjCzZWZWb2ZXZzl+qZn9xcwWmdnzPZVodHexiMh7ckoE7r7V3f/d\n3T8GfBO4HnjLzO41s0OzfSYeTrodOBOYAFyc5Q/9A+7+EXc/GrgJ+OGB/kO6QkND+Xf00VERkTBC\ntrGcJovjP+qfAv4ZqAZuBf4DOAWYDWS71eF4oN7d34y/40FgKrCktYK7b8moPwDwLv8LDoCGhvLv\nttuSjkCktIVsY7leNfQG8Cxws7vPzdj/qJmd2s5nxgCrM7YbgBPaVjKzLwFfAXoD/5jti8xsOjAd\n4JBDDskx5PapRyAi8p5c5wj+j7t/PjMJmNlEAHe/sjsBuPvt7j6OaMjp2nbqzHL3Wnevrays7M7P\nAZojCOGSS6IiImGEbGO5JoIfZ9n3k04+swaoytg+ON7XngeBc3KMp1uUCPKvoSEqIhJGyDbW4dCQ\nmZ0EfAyoNLOvZBwaDHR2b8E8oMbMxhIlgIuA/93m+2vc/Y1481NEQ1DBDR4cXfC0efPmnvg5EZGC\n1tkcQW9gYFxvUMb+LcB5HX3Q3ZvM7ArgKaKkcbe7LzazG4D57l4HXGFmk4E9wDvA5w7sn9E1I0eO\nBGD9+vU98XMiIgWtw0Tg7n8A/mBm97j7X7v65e4+m+iqosx912Wsf7mr35kPSgQiIu/pbGjoNne/\nCvipme13aae7TwkWWUBKBPl30klJRyBS2kK2sc6Ghu6Pl7eEC6HntSaCxsbGhCMpHd//ftIRiJS2\nkG2ss6GhBfHyD+FC6HnqEYiIvKezoaG/0MHdvu5+ZN4j6gGt9yIoEeTPZz4TLR97LNk4REpVyDbW\n2dDQWfn/yeQNHjyY3r17s337drZv3653EuTB3/+edAQipS1kG+tsaKjLVwoVAzNj5MiRNDQ00NjY\nqEQgIqnW4Z3FZvZ8vNxqZlvaLnsmxDA0YSwiEumsR3ByvBzUUb1ipAljEZFIzu8sNrNjgZOJJo+f\nd/eFwaLqAUoE+fXxjycdgUhpC9nGcn0fwXXA+cCv4l33mNkj7v6dYJEFpiuH8utf/iXpCERKW8g2\nlmuP4J+Ao9x9F4CZzQQWAUWbCNQjEBGJ5PoY6rVA34ztPnT8SOmCp8ni/DrzzKiISBgh21hnN5T9\nhGhOYDOw2MyeibdPB14OE1LPUI8gv/TWT5GwQraxzoaG5sfLBcCvM/Y/FySaHqREICIS6ezy0Xt7\nKpCepkQgIhLJ9aqhGuD7wAQy5grc/YOB4gou86ohd8fMEo5IRCQZuV419AvgeuBHwD8A/0zuE80F\nqV+/fgwcOJBt27axZcsWhgwZknRIRe2sknwqlUjhCNnGck0E/dx9jplZ/Pyhb5vZAuC6zj5YyEaO\nHMm2bdtYv369EkE3fe1rSUcgUtpCtrFc/69+t5mVAW+Y2RVmdi7Ru4yLmuYJRERyTwRfBvoDVwLH\nAZ+lh140H5ISQf5MmhQVEQkjZBvLaWjI3ecBxL2CK919a5hwepYSgYhIjj0CM6uN31b2GvAXM3vV\nzI4LG1p4urtYRCT3yeK7gcvd/U8AZnYy0ZVERfmqylZ68JyISO5zBM2tSQDA3Z8HmsKE1HM0NCQi\n0vmzho6NV/9gZncC/0n0rKEL0WMmJMMFFyQdgUhpC9nGOhsaurXN9vUZ657nWHqcEkH+XH550hGI\nlLaQbayzZw39Q7ifTp4mi/Nnx45o2b9/snGIlKqQbSzXq4aGmNkPzWx+XG41s6K/FXfEiBEAbNiw\ngebm5oSjKW6f/GRURCSMkG0s18niu4GtwAVx2UJ01VBRq6ioYPjw4bS0tLBx48akwxERSUSuiWCc\nu1/v7m/G5V+Bon3yaCZdQioiaZdrItgZ3zsAgJlNBDp9X46ZnWFmy8ys3syuznL8K2a2xMxeM7M5\nZvaB3EPPD00Yi0ja5XpD2aXAfRnzAu/QybOGzKwcuJ3otZYNwDwzq3P3JRnVFgK17r7DzC4DbiK6\nNLXHaMJYRNKu00QQP19ovLsfZWaDAdx9Sw7ffTxQ7+5vxt/zIDAV2JsI3P3ZjPovApd0Ifa8UI8g\nP6ZNSzoCkdIWso11mgjcvcXMvgE8nGMCaDUGWJ2x3QCc0EH9zwNPZjtgZtOB6QCHHHJIF0Lo3KhR\nowBYu3ZtXr83bZQIRMIK2cZynSP4nZl9zcyqzGxYa8lXEGZ2CVAL3JztuLvPcvdad69tndzNl0MP\nPRSAN954I6/fmzYbNkRFRMII2cZynSO4kOhO4rb3tnV05dAaoCpj++B43z7MbDLwLeA0d9+dYzx5\nc9hhhwGwfPnynv7pknLeedHyuecSDUOkZIVsY7n2CCYQTfy+CiwCfgJ8uJPPzANqzGysmfUGLgLq\nMiuY2THAncAUd09kkL6mpgaIegQtLS1JhCAikqhcE8G9wIeAHxMlgQnxvna5exNwBfAUsJRojmGx\nmd1gZlPiajcTvfLyETNbZGZ17XxdMEOGDGHUqFHs3LmThoaGnv55EZHE5To0dIS7T8jYftbMlrRb\nO+bus4HZbfZdl7E+OcffD2r8+PGsW7eO5cuX530yWkSk0OXaI3jFzE5s3TCzE4D5YULqeZonEJE0\ny7VHcBww18z+Fm8fAiyLX1/p7l7UbyprTQTLli1LOJLiddllSUcgUtpCtrFcE8EZ4UJInnoE3Xdh\nj94PLpI+IdtYTonA3f8aLoTkKRF03+r41sGqqo7riciBCdnGcu0RlLRx48ZRVlbGqlWr2L17N336\n9Ek6pKLz2c9GS91HIBJGyDaW62RxSevduzdjx46lpaWFFStWJB2OiEiPUiKIaXhIRNJKiSCmRCAi\naaVEEBs/fjygS0hFJH00WRxTj6B7vvrVpCMQKW0h25gSQUyJoHvOPjvpCERKW8g2pqGh2JgxY+jX\nrx/r169n06ZNSYdTdJYti4qIhBGyjSkRxMrKyvY+klq9gq774hejIiJhhGxjSgQZWieMlQhEJE2U\nCDJonkBE0kiJIMOECdErFxYuXJhwJCIiPUeJIMNJJ50EwNy5c/XaShFJDV0+mqG6upqDDjqItWvX\nsmzZMj70oQ8lHVLRuPbapCMQKW0h25h6BBnMjIkTJwLw/PPPJxxNcZk8OSoiEkbINqZE0MbJJ58M\nwAsvvJBwJMVl0aKoiEgYIduYhobaaE0E6hF0zVVXRUu9j0AkjJBtTD2CNo488kgGDBjAihUrePvt\nt5MOR0QkOCWCNioqKvZePaThIRFJAyWCLDRPICJpokSQha4cEpE00WRxFieccALl5eUsXLiQ7du3\nM2DAgKRDKnjf+17SEYiUtpBtTD2CLAYNGsRRRx1FU1MTL7/8ctLhFIWPfSwqIhJGyDamRNAOzRN0\nzdy5URGRMEK2MSWCdrQmgjlz5iQcSXG45pqoiEgYIduYEkE7Tj/9dHr16sUf//hH1q1bl3Q4IiLB\nKBG0Y+jQoXziE5+gpaWFxx57LOlwRESCCZoIzOwMM1tmZvVmdnWW46ea2Stm1mRm54WM5UBceOGF\nADz00EMJRyIiEk6wRGBm5cDtwJnABOBiM5vQptrfgGnAA6Hi6I4pU6bQp08f/vSnP7F27dqkwxER\nCSJkj+B4oN7d33T3d4EHgamZFdx9lbu/BhTkW2AGDx7MmWeeibvzyCOPJB1OQbvttqiISBgh21jI\nRDAGWJ2x3RDv6zIzm25m881sfmNjY16Cy1Xr8NDDDz/co79bbI4+OioiEkbINlYUk8XuPsvda929\ntrKyskd/+6yzzqJfv37MnTuX1atXd/6BlPrd76IiImGEbGMhE8EaoCpj++B4X1EZOHAgn/rUpwD1\nCjryne9ERUTCCNnGQiaCeUCNmY01s97ARUBdwN8L5oILLgDg/vvvx90TjkZEJL+CJQJ3bwKuAJ4C\nlgIPu/tiM7vBzKYAmNlHzawBOB+408wWh4qnO84++2xGjhzJq6++yu80/iEiJSboHIG7z3b3w9x9\nnLt/N953nbvXxevz3P1gdx/g7sPd/cMh4zlQffv25ar4PXEzZ85MOBoRkfwqisniQnDZZZcxaNAg\nfv/73+uJpCJSUpQIcjR06FAuu+wyAH7wgx8kHE3hufPOqIhIGCHbmBXb5Gdtba3Pnz8/kd9+6623\nqK6uZs+ePSxZsoTDDz88kThERLrKzBa4e222Y+oRdMHo0aOZNm0a7s5NN92UdDgF5Te/iYqIhBGy\njalH0EX19fWMHz8egHnz5nHssccmFkshmTQpWj73XJJRiJSu7rYx9Qjy6NBDD+XKK6+kpaWF6dOn\n09TUlHRIIiLdokRwAG688UaqqqpYsGABt99+e9LhiIh0ixLBARg4cCA//elPAbj22mv1DCIRKWpK\nBAdoypQpfPrTn2bbtm1cfvnlevSEiBQtTRZ3w5o1a5gwYQJbtmxh5syZfPOb30w6pMS0doqqqjqu\nJyIHprttTJPFgYwZM4b77rsPgBkzZvDEE08kHFFyqqqUBERCCtnGlAi6aerUqdx44424OxdffDFL\nly5NOqREPPRQVEQkjJBtTIkgD771rW9x/vnns3XrVs4+++xUvt/4jjuiIiJhhGxjSgR5YGbcc889\nHHvssaxYsYJTTjmFVatWJR2WiEhOlAjypH///jz99NMcd9xxvPnmm5x66qksX7486bBERDqlRJBH\nw4cPZ86cOUycOJHVq1dz6qmn8sILLyQdlohIh5QI8mzIkCE89dRTTJ48mXXr1nHaaadx66236j4D\nESlYuo8gkD179nDNNddwyy23AHDOOecwa9YsKisrE44sjA0bouWIEcnGIVKqutvGdB9BAnr16sXN\nN9/Mr3/9a4YMGcLjjz/O4Ycfzl133UVLS0vS4eXdiBFKAiIhhWxjSgSBnXPOObzyyitMnjyZjRs3\n8oUvfIHTTjuNl156KenQ8uqee6IiImGEbGNKBD3ggx/8IE8//TQPPPAAo0aN4vnnn+fEE0/krLPO\nYsGCBUmHlxdKBCJhKRGUADPj4osv5vXXX2fGjBkMGDCAJ554gtraWiZPnszjjz+udxuISCKUCHrY\n0KFD+d73vsfKlSv5+te/Tv/+/ZkzZw7nnnsu48aN4/rrr9f9ByLSo5QIElJZWclNN93EmjVr+NGP\nfkRNTQ1/+9vfuOGGGxg/fjy1tbXMnDmTxYsX69JTEQlKiSBhQ4cO5aqrruL111/nmWeeYdq0aQwa\nNIgFCxYwY8YMjjjiCMaNG8fll1/OI488QmNjY9Ihi0iJ0X0EBWjnzp08+eST1NXV8cQTT7Ch9QLi\n2IQJEzjppJM48cQT+ehHP8qECRPo1atXQtFGduyIlv37JxqGSMnqbhvr6D4CJYIC19zczLx585gz\nZw7PPvssL7zwArt27dqnTu/evTniiCM4+uijmTBhwt5SVVVFWZk6fSKiRFBSdu/ezcKFC3nxxRf5\n85//zIIFC1ixYkXWun369GHcuHHU1NQwduxYqqurqa6upqqqiqqqKkaMGIGZ5SWun/0sWl5+eV6+\nTkTa6G4bUyIocVu2bOHVV1/ltddeY+nSpSxZsoSlS5fy9ttvd/i5vn37Mnr0aA466CBGjx7N+9//\nfkaOHMmoUaOorKyksrKSESNGMHz4cN73vvdRUVHR7ndNmhQtn3suf/8uEXlPd9tYR4mg/ZYtRWPw\n4MGccsopnHLKKfvs37p1K/X19dTX17Nq1SpWrlzJqlWrWL16NQ0NDWzatImVK1eycuXKnH5nyJAh\nDBs2jKFDh+4tgwcPZsiQIaxcOYSKisHceecgBg2KysCBAxk4cCADBgzYp/Tt2zdvPRER6b6gicDM\nzgD+DSgHfu7uM9sc7wPcBxwH/B240N1XhYwpTQYNGsQxxxzDMccck/X4tm3beOutt1i7di1r165l\n3bp1e8uGDRvYsGEDjY2NbNy4kXfeeYfNmzezefPmDn/z0ktzi61///7069cva+nbty99+/alT58+\n+y179+69d5mt9OrVa++yvVJRUbF3ma2Ul5dTUVFBWVmZEpakQrBEYGblwO3A6UADMM/M6tx9SUa1\nzwPvuPuhZnYR8APgwlAxyb4GDhxITU0NNTU1ndZtaWlh06ZNbNq0iXfeeWfv+ubNm9m0aRM//vEW\nmpu3cuaZW9myZQvbt29n27ZtbN26le3bt+/d3rlzJ7t27WLHjh3saL0MooC1JoXy8vL91svLyykr\nK9tnu+2+1vVsy7brZrZ3vaPSWi+zftt9HS3brrctB3osswA518ms297nstVp73Nt19vWb++7Otvf\n0ffmWqe9z+SyvmmT0bv3+4Hx5FvIHsHxQL27vwlgZg8CU4HMRDAV+Ha8/ijwUzMzL7aJixQoKytj\n2LBhDBs2LOvxurpoOWtW59/V3NzMzp0795YdO3awa9euvdu7du1i9+7d7Ny5k927d+9X3n333b3L\n1vU9e/awZ8+efdbblqampn3WW7ebm5v3W29qasLdaW5uprm5OY9nUuTAjRo1DfhF3r83ZCIYA6zO\n2G4ATmivjrs3mdlmYDiwz4XzZjYdmA5wyCGHhIpXuqErE1jl5eV75w8KWUtLy95E0NTUlHW9ubl5\nn3qZ25n7M9czE4y77z3m7vttt7feut26zLbeXp3M/dlKR8dyOQ50eDyzTmbd9j6XrU57n2u73rZ+\ne9/V2f6OvjfXOu19Jpf11uUZZxxGCEUxWezus4BZEF01lHA4khKtwy5J36wnElrIu43WAFUZ2wfH\n+7LWMbMKYAjRpLGIiPSQkIlgHlBjZmPNrDdwEVDXpk4d8Ll4/Tzg95ofEBHpWcGGhuIx/yuAp4gu\nH73b3Reb2Q3AfHevA+4C7jezemAjUbIQEZEeFHSOwN1nA7Pb7LsuY30XcH7IGEREpGN6IpmISMop\nEYiIpJwSgYhIyikRiIikXNE9htrMGoG/duEjI2hzp3IRUMw9QzH3jGKMGYoz7o5i/oC7V2Y7UHSJ\noKvMbH57z+AuVIq5ZyjmnlGMMUNxxn2gMWtoSEQk5ZQIRERSLg2JIIcHIxccxdwzFHPPKMaYoTjj\nPqCYS36OQEREOpaGHoGIiHRAiUBEJOVKOhGY2RlmtszM6s3s6qTjyYWZrTKzv5jZIjObn3Q82ZjZ\n3Wa23sz+J2PfMDN7xszeiJfvSzLGttqJ+dtmtiY+14vM7JNJxtiWmVWZ2bNmtsTMFpvZl+P9BXuu\nO4i5YM+1mfU1s5fN7NU45n+N9481s5fivx8PxY/TLwgdxHyPma3MOM9H5/R9pTpHYGblwHLgdKLX\nZM4DLnb3JR1+MGFmtgqodfeCvZHFzE4FtgH3ufsR8b6bgI3uPjNOuu9z928mGWemdmL+NrDN3W9J\nMrb2mNloYLS7v2Jmg4AFwDnANAr0XHcQ8wUU6Lm26O3wA9x9m5n1Ap4Hvgx8BfiVuz9oZv8PeNXd\n70gy1lYdxHwp8Ft3f7Qr31fKPYLjgXp3f9Pd3wUeBKYmHFNJcPc/Er0/ItNU4N54/V6ixl8w2om5\noLn7W+7+Sry+FVhK9J7vgj3XHcRcsDyyLd7sFRcH/hFo/YNaaOe5vZgPSCkngjHA6oztBgr8P8iY\nA0+b2QIzm550MF0wyt3fitffBkYlGUwXXGFmr8VDRwUzxNKWmVUDxwAvUSTnuk3MUMDn2szKzWwR\nsB54BlgBbHL3prhKwf39aBuzu7ee5+/G5/lHZtYnl+8q5URQrE5292OBM4EvxUMaRSV+3WgxjDne\nAYwDjgbeAm5NNpzszGwg8BhwlbtvyTxWqOc6S8wFfa7dvdndjyZ6t/rxwOEJh9SptjGb2RHADKLY\nPwoMA3IaMizlRLAGqMrYPjjeV9DcfU28XA/8mug/ymKwLh4fbh0nXp9wPJ1y93VxY2oB/p0CPNfx\n+O9jwH+4+6/i3QV9rrPFXAznGsDdNwHPAicBQ82s9S2OBfv3IyPmM+KhOXf33cAvyPE8l3IimAfU\nxDP/vYneh1yXcEwdMrMB8QQbZjYA+F/A/3T8qYJRB3wuXv8c8F8JxpKT1j+msXMpsHMdTwjeBSx1\n9x9mHCrYc91ezIV8rs2s0syGxuv9iC4wWUr0x/W8uFqhnedsMb+e8T8IRjSnkdN5LtmrhgDiS9Ru\nA8qBu939uwmH1CEz+yBRLwCi90k/UIgxm9l/ApOIHnm7DrgeeBx4GDiE6DHhF7h7wUzOthPzJKKh\nCgdWAV/MGHtPnJmdDPwJ+AvQEu++hmjMvSDPdQcxX0yBnmszO5JoMric6H+OH3b3G+L2+CDREMtC\n4JL4/7QT10HMvwcqAQMWAZdmTCq3/32lnAhERKRzpTw0JCIiOVAiEBFJOSUCEZGUUyIQEUk5JQIR\nkZRTIhARSTklAhGRlFMiEImZ2RfN7K2MZ7n/spvf1+mNPCKFoKLzKiKp8RHgWne/K+lARHqSegQi\n7zmS6Lb8/ZjZTDP7Usb2t83sa/H64/Fjwxdne3S4mVXbvm9G+1r8UpzW7Uvit00tMrM745cqifQY\nJQKR93wY+EX8B/l3bY49RPSWrVYXxPsA/q+7HwfUAlea2fBcf9DMPgRcCEyMHyncDPzTgf4DRA6E\nhoZEiN61C7zt7kdmO+7uCyS1SMoAAAELSURBVM1spJkdRPRQr3fcvfXFR1ea2bnxehVQA/w9x5/+\nOHAcMC96YCT9KLDHSkvpUyIQiXwEWNxJnUeIHkv8fuLegJlNAiYDJ7n7DjN7Dujb5nNN7Nv7zjxu\nwL3uPuOAIxfpJg0NiUSOpPNE8BDRey3OI0oKAEOIegc7zOxw4MQsn1sHjDSz4fGrA8/KODYHOM/M\nRgKY2TAz+0A3/h0iXaZEIBL5CLCkowruvhgYBKzJeJb+fwMVZrYUmAm8mOVze4AbgJeJ3of7esax\nJcC1RO+pfi0+Prrtd4iEpPcRiIiknHoEIiIpp0QgIpJySgQiIimnRCAiknJKBCIiKadEICKSckoE\nIiIp9/8B5CRCQKC+O6oAAAAASUVORK5CYII=\n","text/plain":["
"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"rk3eUKQmBa6t","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /notebooks/4.4-deap_d_optimal_design.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"4.4-deap_d_optimal_design.ipynb のコピー","provenance":[{"file_id":"https://gist.github.com/sshojiro/1806ea69ce0b190a38a516bc050d36a9#file-4-4-deap_d_optimal_design-ipynb","timestamp":1587451718335}],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"21oYBLXofBGZ","colab_type":"text"},"source":["# 遺伝的アルゴリズムによるD最適化計画法\n","\n","`deap`という遺伝的アルゴリズムのパッケージを利用する。\n","\n","D最適化基準はデータ行列$X$に対して行列式$|X^TX|$で与えられる。\n","\n","触媒データを利用してサンプル選定を行う。\n"]},{"cell_type":"code","metadata":{"id":"pbZ_ueuCfAFQ","colab_type":"code","outputId":"601b0d83-a942-469a-c63c-c89cf8adb391","colab":{"base_uri":"https://localhost:8080/","height":233}},"source":["!wget https://raw.githubusercontent.com/funatsu-lab/support-page/master/data/catalyst/journal_data.csv\n","!ls journal_data.csv"],"execution_count":0,"outputs":[{"output_type":"stream","text":["--2020-01-11 13:24:44-- https://raw.githubusercontent.com/funatsu-lab/support-page/master/data/catalyst/journal_data.csv\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5502 (5.4K) [text/plain]\n","Saving to: ‘journal_data.csv’\n","\n","journal_data.csv 100%[===================>] 5.37K --.-KB/s in 0s \n","\n","2020-01-11 13:24:49 (81.6 MB/s) - ‘journal_data.csv’ saved [5502/5502]\n","\n","journal_data.csv\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"9tpOfCCFgDBs","colab_type":"code","outputId":"f6c9f057-1397-4125-8f53-0e39d1a4667b","colab":{"base_uri":"https://localhost:8080/","height":145}},"source":["# deapのインストール \n","!pip install deap"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Collecting deap\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/81/98/3166fb5cfa47bf516e73575a1515734fe3ce05292160db403ae542626b32/deap-1.3.0-cp36-cp36m-manylinux2010_x86_64.whl (151kB)\n","\r\u001b[K |██▏ | 10kB 20.5MB/s eta 0:00:01\r\u001b[K |████▎ | 20kB 7.3MB/s eta 0:00:01\r\u001b[K |██████▌ | 30kB 8.4MB/s eta 0:00:01\r\u001b[K |████████▋ | 40kB 6.5MB/s eta 0:00:01\r\u001b[K |██████████▉ | 51kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████ | 61kB 7.9MB/s eta 0:00:01\r\u001b[K |███████████████ | 71kB 8.8MB/s eta 0:00:01\r\u001b[K |█████████████████▎ | 81kB 8.3MB/s eta 0:00:01\r\u001b[K |███████████████████▍ | 92kB 9.2MB/s eta 0:00:01\r\u001b[K |█████████████████████▋ | 102kB 9.5MB/s eta 0:00:01\r\u001b[K |███████████████████████▊ | 112kB 9.5MB/s eta 0:00:01\r\u001b[K |█████████████████████████▉ | 122kB 9.5MB/s eta 0:00:01\r\u001b[K |████████████████████████████ | 133kB 9.5MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▏ | 143kB 9.5MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 153kB 9.5MB/s \n","\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from deap) (1.17.5)\n","Installing collected packages: deap\n","Successfully installed deap-1.3.0\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"udlussIVfKgP","colab_type":"code","colab":{}},"source":["from pandas import read_csv\n","from pandas import DataFrame\n","from sklearn.preprocessing import scale\n","from sklearn.model_selection import train_test_split\n","from random import randint\n","from deap import creator, base, tools, algorithms\n","from numpy import log as np_log, where as np_where, array as np_array, arange as np_arange, exp as np_exp, unique as np_uniq, power as np_pow\n","from numpy.linalg import det as np_det\n","from numpy.random import permutation as np_perm\n","from tqdm import tqdm\n","from pprint import pprint "],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"dtd7pDhAfNBW","colab_type":"code","colab":{}},"source":["df = read_csv('./journal_data.csv', header=0, index_col=0)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"ATUFlglQfZH5","colab_type":"code","outputId":"4dc8921b-d5ec-42b3-a8f1-10e5d2cfa647","colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["df_train, df_test = train_test_split(df, test_size=.33, random_state=66)\n","print(df_train.shape, df_test.shape)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["(50, 23) (25, 23)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"wXhgWaahvwVp","colab_type":"code","colab":{}},"source":["INPUT=df_train.columns[:-3].tolist()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"dpNxbzZVuYIN","colab_type":"code","outputId":"19eeec62-380c-4b71-e6da-d88a8d24f87b","colab":{"base_uri":"https://localhost:8080/","height":53}},"source":["Xtrain = scale(df_train[INPUT])\n","print('determinant', np_det(Xtrain.T @ Xtrain))\n","print('scaled determinant', np_det(Xtrain.T @ Xtrain)**(1/len(INPUT)))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["determinant 1.0109311742906108e+27\n","scaled determinant 22.399384226277576\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"OC7n49ZmfkzB","colab_type":"text"},"source":["## トレーニングデータからD最適化計画を取得する\n","\n","50個から17サンプルを取り出す事を考える。
\n","すると${}_{50}C_{17}$ 通りも調べないと行けなくなり、効率が悪い。
\n","そこで、最適化手法の1つである遺伝的アルゴリズムを利用する。"]},{"cell_type":"code","metadata":{"id":"ODAAhenogGil","colab_type":"code","colab":{}},"source":["def d_criterion(X):\n"," return np_log(np_pow(np_det(X.T @ X), (1/X.shape[1])) )"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"iu6qEIv6hFBC","colab_type":"code","outputId":"60dd9ac1-07f5-4a1d-8c5f-b5510be5753c","colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["d_criterion(Xtrain)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["3.1090334685848173"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"zNo8xo3mjSwC","colab_type":"text"},"source":["## Deapを用いた実装\n","[公式サンプルコード](https://github.com/DEAP/deap)"]},{"cell_type":"code","metadata":{"id":"0Y52w6SlxP1a","colab_type":"code","outputId":"6d3f7e2f-94a8-4c05-dbf3-23f46d200e48","colab":{"base_uri":"https://localhost:8080/","height":53}},"source":["def perm(n_max, n_out):\n"," return np_perm(np_arange(n_max)).tolist()[:n_out]\n","print(len(perm(50, 17)))\n","print(perm(50, 17))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["17\n","[24, 16, 20, 18, 37, 45, 2, 31, 25, 22, 38, 9, 44, 6, 19, 27, 7]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"EvPylurohoeg","colab_type":"code","colab":{}},"source":["n_pop = 300\n","n_dim = 20\n","n_gen= 100\n","n_samples=Xtrain.shape[0]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"kN-7OyE-mc4I","colab_type":"code","colab":{}},"source":["def d_opt01(individual):\n"," \"Deap evaluation function. \"\n"," if sum(individual)!=n_dim:\n"," return -9999.99,\n"," x_in = np_array(individual)\n"," x_sc = Xtrain[np_where(x_in==1)[0], :]\n"," return d_criterion(x_sc),"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"45CKGDsWhhe0","colab_type":"code","colab":{}},"source":["creator.create(\"FitnessMax\", base.Fitness, weights=(1.0,))\n","creator.create(\"Individual\", list, fitness=creator.FitnessMax)\n","\n","toolbox = base.Toolbox()\n","toolbox.register(\"attr_index\", randint, 0, 1)\n","toolbox.register(\"individual\", tools.initRepeat, creator.Individual, \n"," toolbox.attr_index, n=n_samples)\n","toolbox.register(\"population\", tools.initRepeat, \n"," list, toolbox.individual)\n","toolbox.register(\"evaluate\", d_opt01)\n","toolbox.register(\"mate\", tools.cxTwoPoint)\n","toolbox.register(\"mutate\", tools.mutFlipBit, indpb=0.05)\n","toolbox.register(\"select\", tools.selTournament, tournsize=3)\n","population = toolbox.population(n=n_pop)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"wVRBVnE8hqKF","colab_type":"code","outputId":"07f2cafa-5f9c-4d4f-9157-99531274bb3b","colab":{"base_uri":"https://localhost:8080/","height":71}},"source":["for gen in tqdm(range(n_gen)):\n"," offspring = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.1)\n"," fits = toolbox.map(toolbox.evaluate, offspring)\n"," for fit, ind in zip(fits, offspring):\n"," ind.fitness.values = fit\n"," population = toolbox.select(offspring, k=len(population))\n","top10 = tools.selBest(population, k=10)"],"execution_count":0,"outputs":[{"output_type":"stream","text":[" 0%| | 0/100 [00:00\n","全て揃っていることが分かり、最適化に成功している。"]},{"cell_type":"code","metadata":{"id":"Ixvgu_IbyTea","colab_type":"code","outputId":"d52dfac7-5a6d-4a76-fd6b-e78d070341ac","colab":{"base_uri":"https://localhost:8080/","height":197}},"source":["for top in top10:\n"," print(top)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n","[0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f7uE3QYOd2x4","colab_type":"text"},"source":["**D最適化基準を標準化する場合**\n","\n","$d$次元のデータ行列$X$に対して以下のように定めることもある。\n","\n","\\begin{equation}\n","D = \\det|X^TX|^{1/d}\n","\\end{equation}\n","\n"]},{"cell_type":"code","metadata":{"id":"IMdwR00FMVpy","colab_type":"code","outputId":"09e72fd5-8c47-4b3c-d01f-bad056377897","colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["xin = np_array(top10[0])\n","mat_ = Xtrain[np_where(xin==1)[0], :]\n","print('D criterion', np_pow(np_det(mat_.T@mat_), 1./mat_.shape[1]))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["D criterion 8.602635504346852\n"],"name":"stdout"}]}]} -------------------------------------------------------------------------------- /notebooks/8.1.2-structure-generation-brics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# BRICSを使った構造生成" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "df = pd.read_csv('../data/delaney-solubility/delaney-processed.csv')" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "2019.09.3\n" 30 | ] 31 | }, 32 | { 33 | "name": "stderr", 34 | "output_type": "stream", 35 | "text": [ 36 | "RDKit WARNING: [20:33:06] Enabling RDKit 2019.09.3 jupyter extensions\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "from rdkit.Chem import MolFromSmiles\n", 42 | "from rdkit import Chem \n", 43 | "import rdkit \n", 44 | "print(rdkit.__version__)\n", 45 | "df['mol'] = df['smiles'].apply(MolFromSmiles)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 6, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "from rdkit.Chem import BRICS, Recap" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 9, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "1個目の分子からBRICSにより見つかるフラグメントの数 6\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "print('1個目の分子からBRICSにより見つかるフラグメントの数', len(BRICS.BRICSDecompose(df['mol'][0])))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 5, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "image/png": "\n", 82 | "text/plain": [ 83 | "" 84 | ] 85 | }, 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "df['mol'][0]" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 10, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "1個目の分子からRECAPにより再帰的に見つかるフラグメントの数 10\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "def recursive_retreive(list_fragments, leaf):\n", 110 | " if leaf.children:\n", 111 | " for leaf in leaf.children.values():\n", 112 | " list_fragments += leaf.smiles\n", 113 | " recursive_retreive(list_fragments, leaf)\n", 114 | "decomp = Recap.RecapDecompose(df['mol'][0])\n", 115 | "lst_frg = []\n", 116 | "recursive_retreive(lst_frg, decomp)\n", 117 | "print('1個目の分子からRECAPにより再帰的に見つかるフラグメントの数', len(set(lst_frg)))" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## BRICSを使ったフラグメントライブラリ作成" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 18, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "fragments = [list(BRICS.BRICSDecompose(mol)) for mol in df['mol']]" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 22, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "BRICSが取り出した全フラグメント 1028\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "def unwrap(list_data):\n", 151 | " list_output = []\n", 152 | " for li in list_data: \n", 153 | " list_output += li\n", 154 | " return list_output \n", 155 | "fr_all = unwrap(fragments)\n", 156 | "print('BRICSが取り出した全フラグメント', len(set(fr_all)))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 58, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "name": "stdout", 166 | "output_type": "stream", 167 | "text": [ 168 | "2\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "import random \n", 174 | "random.seed(42)\n", 175 | "list_fragments = [MolFromSmiles(smi)for smi in fr_all]\n", 176 | "random.shuffle(list_fragments,\n", 177 | " random=random.random)\n", 178 | "seed_structures = list_fragments[:2]\n", 179 | "print(len(seed_structures))" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 60, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stderr", 189 | "output_type": "stream", 190 | "text": [ 191 | "\n", 192 | "\n", 193 | " 0%| | 0/2 [00:00 15 | from cheminfo.metrics import t2_score 16 | ModuleNotFoundError: No module named 'cheminfo' 17 | ``` 18 | """ 19 | import sys 20 | sys.path.append('./') 21 | from cheminfo.metrics import t2_score 22 | 23 | if __name__=='__main__': 24 | print(t2_score.__name__) 25 | -------------------------------------------------------------------------------- /src/from_src.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pythonパスシステムの理解。 3 | このファイルが存在するフォルダ src/ で 4 | 実行すると実行できて、プロジェクトルートへ移動すると実行できない。 5 | 6 | ```bash 7 | $ pwd 8 | ./support-page/src 9 | $ python from_src.py 10 | t2_score 11 | $ cd ../ 12 | $ python src/from_src.py 13 | Traceback (most recent call last): 14 | File "src/from_src.py", line 13, in 15 | from cheminfo.metrics import t2_score 16 | ModuleNotFoundError: No module named 'cheminfo' 17 | ``` 18 | """ 19 | import sys 20 | sys.path.append('../') 21 | from cheminfo.metrics import t2_score 22 | 23 | if __name__=='__main__': 24 | print(t2_score.__name__) 25 | -------------------------------------------------------------------------------- /src/models/9.3-screening.py: -------------------------------------------------------------------------------- 1 | from joblib import load as jl_load 2 | import numpy as np 3 | from rdkit.Chem import MolFromSmiles 4 | 5 | from cheminfo.descriptors import RDKitDescriptor 6 | from cheminfo.metrics import t2_score, q_value 7 | from multiprocessing import Pool, cpu_count 8 | 9 | def processor(argv): 10 | """予測値とSMILESを文字列で返す関数。multiprocessing.Pool.imap用。""" 11 | smiles, model = argv 12 | smi = smiles.strip() 13 | mol = MolFromSmiles(smi) 14 | mol.UpdatePropertyCache(strict=True) 15 | rdcalc = RDKitDescriptor() 16 | xnew = np.array(rdcalc.transform([mol])) 17 | # print(xnew,type(xnew)) 18 | ypred = model.predict(xnew) 19 | t2 = t2_score(xnew, model) 20 | q = q_value(xnew, model) 21 | return '%s %.8f %.8f %.8f'%(smi, ypred, t2, q) 22 | 23 | 24 | def count_lines(filename): 25 | """ファイルの行数を数える関数""" 26 | with open(filename, 'r')as f: 27 | c=0 28 | for _ in f: 29 | c+=1 30 | return c 31 | 32 | 33 | def main(argv): 34 | """メイン関数。入力を受け取り、並列処理を実行。""" 35 | assert len(argv)>2, "SYNTAX: python src/10.3-screening.py MODEL_FILE.joblib SMILES.smi" 36 | modelfile, smilesfile = argv[1:] 37 | model = jl_load(modelfile) 38 | n_counts = count_lines(smilesfile) 39 | cs = 1000 40 | model_sampler = (model for _ in range(n_counts)) 41 | outfile = open(smilesfile.replace('.smi', '.out'), 'w') 42 | with open(smilesfile, 'r') as f: 43 | with Pool(cpu_count()) as pool: 44 | "並列処理" 45 | for ret in pool.imap(processor, zip(f, model_sampler), chunksize=cs): 46 | outfile.write(ret+'\n') 47 | outfile.close()# 開いたファイルオブジェクトは必ず閉じる。 48 | 49 | if __name__ == '__main__': 50 | import sys 51 | main(sys.argv) 52 | -------------------------------------------------------------------------------- /src/parallel.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | def f(x): 3 | return x*x 4 | if __name__=='__main__': 5 | with Pool(processes=4) as pool: 6 | print(pool.map(f, range(10))) 7 | for i in pool.imap(f, range(10)): 8 | print(i) 9 | -------------------------------------------------------------------------------- /src/parallel_wo_with.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | def f(x): 3 | return x*x 4 | if __name__=='__main__': 5 | pool = Pool(processes=4) 6 | print(pool.map(f, range(10))) 7 | for i in pool.imap(f, range(10)): 8 | print(i) 9 | pool.close() 10 | --------------------------------------------------------------------------------