├── Goal.jpg ├── NeuronMotif ├── idx.sh ├── decouple.sh ├── sampling.sh ├── redict_redict.sh ├── ref_dict.sh ├── tree.sh ├── dict_dict.sh ├── run.layer.sh ├── visualize.sh ├── script.sh ├── build_segment.py ├── decouple.py ├── merge.py ├── chen2html.py └── segment.py ├── demos ├── demo1 │ ├── idx.sh │ ├── weight.hdf5 │ ├── decouple.sh │ ├── script.sh │ ├── sampling.sh │ ├── redict_redict.sh │ ├── ref_dict.sh │ ├── tree.sh │ ├── dict_dict.sh │ ├── run.layer.sh │ ├── visualize.sh │ ├── build_segment.py │ ├── decouple.py │ ├── README.md │ ├── merge.py │ ├── modeldef.py │ ├── chen2html.py │ └── segment.py ├── demo2 │ ├── idx.sh │ ├── weight.hdf5 │ ├── decouple.sh │ ├── sampling.sh │ ├── redict_redict.sh │ ├── ref_dict.sh │ ├── script.sh │ ├── tree.sh │ ├── dict_dict.sh │ ├── run.layer.sh │ ├── visualize.sh │ ├── build_segment.py │ ├── decouple.py │ ├── merge.py │ ├── chen2html.py │ ├── README.md │ └── segment.py ├── Basset │ ├── BD-10 │ │ ├── idx.sh │ │ ├── decouple.sh │ │ ├── sampling.sh │ │ ├── redict_redict.sh │ │ ├── ref_dict.sh │ │ ├── tree.sh │ │ ├── script.sh │ │ ├── dict_dict.sh │ │ ├── run.layer.sh │ │ ├── visualize.sh │ │ ├── build_segment.py │ │ ├── decouple.py │ │ ├── merge.py │ │ ├── chen2html.py │ │ ├── README.md │ │ └── segment.py │ └── Basset │ │ ├── idx.sh │ │ ├── decouple.sh │ │ ├── script.sh │ │ ├── sampling.sh │ │ ├── redict_redict.sh │ │ ├── ref_dict.sh │ │ ├── tree.sh │ │ ├── dict_dict.sh │ │ ├── run.layer.sh │ │ ├── visualize.sh │ │ ├── build_segment.py │ │ ├── decouple.py │ │ ├── merge.py │ │ ├── README.md │ │ ├── chen2html.py │ │ └── segment.py ├── DeepSEA │ ├── DD-10 │ │ ├── idx.sh │ │ ├── decouple.sh │ │ ├── sampling.sh │ │ ├── redict_redict.sh │ │ ├── ref_dict.sh │ │ ├── tree.sh │ │ ├── script.sh │ │ ├── dict_dict.sh │ │ ├── run.layer.sh │ │ ├── visualize.sh │ │ ├── build_segment.py │ │ ├── decouple.py │ │ ├── merge.py │ │ ├── README.md │ │ ├── chen2html.py │ │ └── segment.py │ └── DeepSEA │ │ ├── idx.sh │ │ ├── decouple.sh │ │ ├── script.sh │ │ ├── sampling.sh │ │ ├── redict_redict.sh │ │ ├── ref_dict.sh │ │ ├── tree.sh │ │ ├── dict_dict.sh │ │ ├── run.layer.sh │ │ ├── visualize.sh │ │ ├── build_segment.py │ │ ├── decouple.py │ │ ├── merge.py │ │ ├── README.md │ │ ├── chen2html.py │ │ └── segment.py └── README.md ├── dcnn ├── demo │ ├── demo1 │ │ ├── PWM0 │ │ ├── PWM1 │ │ └── simu.py │ └── demo2 │ │ ├── PWM │ │ └── simu.py ├── README.md ├── DeepSEA │ ├── data │ │ └── README.md │ ├── DD-10 │ │ ├── data │ │ │ └── README.md │ │ └── deepsea.pred.py │ └── DeepSEA │ │ ├── data │ │ └── README.md │ │ ├── deepsea.pred.py │ │ └── deepsea.py └── Basset │ ├── data │ └── README.md │ ├── BD-10 │ ├── data │ │ └── README.md │ └── BD-10.pred.py │ ├── BD-5 │ ├── data │ │ └── README.md │ └── BD-5.pred.py │ ├── Basset │ ├── data │ │ └── README.md │ └── basset.pred.py │ └── Basenji │ ├── data │ └── README.md │ └── basenji.pred.py ├── .gitignore └── LICENSE /Goal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XWangLabTHU/NeuronMotif/HEAD/Goal.jpg -------------------------------------------------------------------------------- /NeuronMotif/idx.sh: -------------------------------------------------------------------------------- 1 | ct=0 2 | let ct=${1}-1 3 | for i in `seq 0 ${ct}` 4 | do 5 | echo $i 6 | done 7 | -------------------------------------------------------------------------------- /demos/demo1/idx.sh: -------------------------------------------------------------------------------- 1 | ct=0 2 | let ct=${1}-1 3 | for i in `seq 0 ${ct}` 4 | do 5 | echo $i 6 | done 7 | -------------------------------------------------------------------------------- /demos/demo1/weight.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XWangLabTHU/NeuronMotif/HEAD/demos/demo1/weight.hdf5 -------------------------------------------------------------------------------- /demos/demo2/idx.sh: -------------------------------------------------------------------------------- 1 | ct=0 2 | let ct=${1}-1 3 | for i in `seq 0 ${ct}` 4 | do 5 | echo $i 6 | done 7 | -------------------------------------------------------------------------------- /demos/demo2/weight.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XWangLabTHU/NeuronMotif/HEAD/demos/demo2/weight.hdf5 -------------------------------------------------------------------------------- /demos/Basset/BD-10/idx.sh: -------------------------------------------------------------------------------- 1 | ct=0 2 | let ct=${1}-1 3 | for i in `seq 0 ${ct}` 4 | do 5 | echo $i 6 | done 7 | -------------------------------------------------------------------------------- /demos/Basset/Basset/idx.sh: -------------------------------------------------------------------------------- 1 | ct=0 2 | let ct=${1}-1 3 | for i in `seq 0 ${ct}` 4 | do 5 | echo $i 6 | done 7 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/idx.sh: -------------------------------------------------------------------------------- 1 | ct=0 2 | let ct=${1}-1 3 | for i in `seq 0 ${ct}` 4 | do 5 | echo $i 6 | done 7 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/idx.sh: -------------------------------------------------------------------------------- 1 | ct=0 2 | let ct=${1}-1 3 | for i in `seq 0 ${ct}` 4 | do 5 | echo $i 6 | done 7 | -------------------------------------------------------------------------------- /NeuronMotif/decouple.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/lock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/lock${3} 5 | python kmeans.py $1 $3 $2 6 | fi 7 | -------------------------------------------------------------------------------- /demos/demo1/decouple.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/lock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/lock${3} 5 | python kmeans.py $1 $3 $2 6 | fi 7 | -------------------------------------------------------------------------------- /demos/demo2/decouple.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/lock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/lock${3} 5 | python kmeans.py $1 $3 $2 6 | fi 7 | -------------------------------------------------------------------------------- /dcnn/demo/demo1/PWM0: -------------------------------------------------------------------------------- 1 | 3211 4685 114 19905 32 193 4 441 2 | 7998 5550 19195 0 19878 19257 501 178 3 | 5228 5179 158 223 97 649 355 19381 4 | 3692 4715 662 1 122 30 19269 129 5 | -------------------------------------------------------------------------------- /dcnn/demo/demo1/PWM1: -------------------------------------------------------------------------------- 1 | 4685 114 19905 32 193 4 441 1982 2 | 5550 19195 0 19878 19257 501 178 9561 3 | 5179 158 223 97 649 355 19381 5178 4 | 4715 662 1 122 30 19269 129 3408 5 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/decouple.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/lock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/lock${3} 5 | python kmeans.py $1 $3 $2 6 | fi 7 | -------------------------------------------------------------------------------- /demos/Basset/Basset/decouple.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/lock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/lock${3} 5 | python kmeans.py $1 $3 $2 6 | fi 7 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/decouple.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/lock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/lock${3} 5 | python kmeans.py $1 $3 $2 6 | fi 7 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/decouple.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/lock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/lock${3} 5 | python kmeans.py $1 $3 $2 6 | fi 7 | -------------------------------------------------------------------------------- /demos/demo1/script.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # bash run.layer.sh &> .layer.log 5 | 6 | 7 | bash run.layer.sh 1 3 3 &> .layer1.log 8 | bash run.layer.sh 2 1 1 &> .layer2.log 9 | -------------------------------------------------------------------------------- /demos/Basset/Basset/script.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # bash run.layer.sh 5 | 6 | 7 | bash run.layer.sh 1 300 20 8 | bash run.layer.sh 2 200 20 9 | bash run.layer.sh 3 200 15 10 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/script.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # bash run.layer.sh 5 | 6 | 7 | bash run.layer.sh 1 320 20 8 | bash run.layer.sh 2 480 20 9 | bash run.layer.sh 3 960 10 10 | -------------------------------------------------------------------------------- /NeuronMotif/sampling.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/samplinglock${2} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/samplinglock${2} 5 | sp=layer${1}/kernel-${2}.h5 6 | if [[ ! -e $sp ]]; then 7 | python vis.py $1 $2 8 | fi 9 | fi 10 | -------------------------------------------------------------------------------- /demos/demo1/sampling.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/samplinglock${2} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/samplinglock${2} 5 | sp=layer${1}/kernel-${2}.h5 6 | if [[ ! -e $sp ]]; then 7 | python vis.py $1 $2 8 | fi 9 | fi 10 | -------------------------------------------------------------------------------- /demos/demo2/sampling.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/samplinglock${2} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/samplinglock${2} 5 | sp=layer${1}/kernel-${2}.h5 6 | if [[ ! -e $sp ]]; then 7 | python vis.py $1 $2 8 | fi 9 | fi 10 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/sampling.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/samplinglock${2} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/samplinglock${2} 5 | sp=layer${1}/kernel-${2}.h5 6 | if [[ ! -e $sp ]]; then 7 | python vis.py $1 $2 8 | fi 9 | fi 10 | -------------------------------------------------------------------------------- /demos/Basset/Basset/sampling.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/samplinglock${2} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/samplinglock${2} 5 | sp=layer${1}/kernel-${2}.h5 6 | if [[ ! -e $sp ]]; then 7 | python vis.py $1 $2 8 | fi 9 | fi 10 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/sampling.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/samplinglock${2} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/samplinglock${2} 5 | sp=layer${1}/kernel-${2}.h5 6 | if [[ ! -e $sp ]]; then 7 | python vis.py $1 $2 8 | fi 9 | fi 10 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/sampling.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/samplinglock${2} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/samplinglock${2} 5 | sp=layer${1}/kernel-${2}.h5 6 | if [[ ! -e $sp ]]; then 7 | python vis.py $1 $2 8 | fi 9 | fi 10 | -------------------------------------------------------------------------------- /NeuronMotif/redict_redict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-redict.chen > layer$layer/kernel-$kernel-redict.meme 5 | tomtom --norc layer$layer/kernel-$kernel-redict.meme layer$layer/kernel-$kernel-redict.meme -oc layer$layer/kernel-$kernel-redict-redict 6 | -------------------------------------------------------------------------------- /demos/demo1/redict_redict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-redict.chen > layer$layer/kernel-$kernel-redict.meme 5 | tomtom --norc layer$layer/kernel-$kernel-redict.meme layer$layer/kernel-$kernel-redict.meme -oc layer$layer/kernel-$kernel-redict-redict 6 | -------------------------------------------------------------------------------- /demos/demo2/redict_redict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-redict.chen > layer$layer/kernel-$kernel-redict.meme 5 | tomtom --norc layer$layer/kernel-$kernel-redict.meme layer$layer/kernel-$kernel-redict.meme -oc layer$layer/kernel-$kernel-redict-redict 6 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/redict_redict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-redict.chen > layer$layer/kernel-$kernel-redict.meme 5 | tomtom --norc layer$layer/kernel-$kernel-redict.meme layer$layer/kernel-$kernel-redict.meme -oc layer$layer/kernel-$kernel-redict-redict 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | demos/demo1/__pycache__ 2 | demos/demo2/__pycache__ 3 | */*/*/layer* 4 | */*/layer* 5 | */layer* 6 | */*/*/HTML 7 | */*/HTML 8 | */HTML 9 | */*/*/nohup* 10 | */*/nohup* 11 | */*/*/motifDB.txt 12 | */*/*/weight.hdf5 13 | */*/*/__pycache__ 14 | */*/__pycache__ 15 | */__pycache__ 16 | 17 | -------------------------------------------------------------------------------- /demos/Basset/Basset/redict_redict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-redict.chen > layer$layer/kernel-$kernel-redict.meme 5 | tomtom --norc layer$layer/kernel-$kernel-redict.meme layer$layer/kernel-$kernel-redict.meme -oc layer$layer/kernel-$kernel-redict-redict 6 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/redict_redict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-redict.chen > layer$layer/kernel-$kernel-redict.meme 5 | tomtom --norc layer$layer/kernel-$kernel-redict.meme layer$layer/kernel-$kernel-redict.meme -oc layer$layer/kernel-$kernel-redict-redict 6 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/redict_redict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-redict.chen > layer$layer/kernel-$kernel-redict.meme 5 | tomtom --norc layer$layer/kernel-$kernel-redict.meme layer$layer/kernel-$kernel-redict.meme -oc layer$layer/kernel-$kernel-redict-redict 6 | -------------------------------------------------------------------------------- /NeuronMotif/ref_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-unified-dict.chen > layer$layer/kernel-$kernel-unified-dict.meme 5 | chen2meme layer$layer/kernel-$kernel-segs.chen > layer$layer/kernel-$kernel-segs.meme 6 | tomtom --norc layer$layer/kernel-$kernel-segs.meme layer$layer/kernel-$kernel-unified-dict.meme -oc layer$layer/kernel-$kernel-segs-dict 7 | -------------------------------------------------------------------------------- /demos/demo1/ref_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-unified-dict.chen > layer$layer/kernel-$kernel-unified-dict.meme 5 | chen2meme layer$layer/kernel-$kernel-segs.chen > layer$layer/kernel-$kernel-segs.meme 6 | tomtom --norc layer$layer/kernel-$kernel-segs.meme layer$layer/kernel-$kernel-unified-dict.meme -oc layer$layer/kernel-$kernel-segs-dict 7 | -------------------------------------------------------------------------------- /demos/demo2/ref_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-unified-dict.chen > layer$layer/kernel-$kernel-unified-dict.meme 5 | chen2meme layer$layer/kernel-$kernel-segs.chen > layer$layer/kernel-$kernel-segs.meme 6 | tomtom --norc layer$layer/kernel-$kernel-segs.meme layer$layer/kernel-$kernel-unified-dict.meme -oc layer$layer/kernel-$kernel-segs-dict 7 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/ref_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-unified-dict.chen > layer$layer/kernel-$kernel-unified-dict.meme 5 | chen2meme layer$layer/kernel-$kernel-segs.chen > layer$layer/kernel-$kernel-segs.meme 6 | tomtom --norc layer$layer/kernel-$kernel-segs.meme layer$layer/kernel-$kernel-unified-dict.meme -oc layer$layer/kernel-$kernel-segs-dict 7 | -------------------------------------------------------------------------------- /demos/Basset/Basset/ref_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-unified-dict.chen > layer$layer/kernel-$kernel-unified-dict.meme 5 | chen2meme layer$layer/kernel-$kernel-segs.chen > layer$layer/kernel-$kernel-segs.meme 6 | tomtom --norc layer$layer/kernel-$kernel-segs.meme layer$layer/kernel-$kernel-unified-dict.meme -oc layer$layer/kernel-$kernel-segs-dict 7 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/ref_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-unified-dict.chen > layer$layer/kernel-$kernel-unified-dict.meme 5 | chen2meme layer$layer/kernel-$kernel-segs.chen > layer$layer/kernel-$kernel-segs.meme 6 | tomtom --norc layer$layer/kernel-$kernel-segs.meme layer$layer/kernel-$kernel-unified-dict.meme -oc layer$layer/kernel-$kernel-segs-dict 7 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/ref_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-unified-dict.chen > layer$layer/kernel-$kernel-unified-dict.meme 5 | chen2meme layer$layer/kernel-$kernel-segs.chen > layer$layer/kernel-$kernel-segs.meme 6 | tomtom --norc layer$layer/kernel-$kernel-segs.meme layer$layer/kernel-$kernel-unified-dict.meme -oc layer$layer/kernel-$kernel-segs-dict 7 | -------------------------------------------------------------------------------- /demos/demo2/script.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # bash run.layer.sh &> .layer.log 5 | 6 | 7 | bash run.layer.sh 1 5 28 &> .layer1.log 8 | bash run.layer.sh 2 5 28 &> .layer2.log 9 | bash run.layer.sh 3 6 28 &> .layer3.log 10 | bash run.layer.sh 4 6 28 &> .layer4.log 11 | bash run.layer.sh 5 1 28 2 &> .layer5.log 12 | 13 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/tree.sh: -------------------------------------------------------------------------------- 1 | 2 | layer=$1 3 | kernel=$2 4 | mkdir layer$layer 5 | lk=layer$layer/treelock$kernel 6 | if [[ ! -e $lk ]]; then 7 | mkdir layer${1}/treelock${2} 8 | python build_segment.py $layer $kernel 9 | python build_dict.py $layer $kernel 10 | CUDA_VISIBLE_DEVICES=-1 python build_tree.py $layer $kernel 11 | tomtom layer$layer/kernel-$kernel-unified-dict.meme motifDB.txt -oc layer$layer/tomtom_dict_$kernel 12 | fi 13 | -------------------------------------------------------------------------------- /NeuronMotif/tree.sh: -------------------------------------------------------------------------------- 1 | 2 | layer=$1 3 | kernel=$2 4 | mkdir layer$layer 5 | lk=layer$layer/treelock$kernel 6 | if [[ ! -e $lk ]]; then 7 | mkdir layer${1}/treelock${2} 8 | python build_segment.py $layer $kernel 9 | python build_dict.py $layer $kernel 10 | CUDA_VISIBLE_DEVICES=-1 python build_tree.py $layer $kernel 11 | tomtom --norc layer$layer/kernel-$kernel-unified-dict.meme motifDB.txt -oc layer$layer/tomtom_dict_$kernel 12 | fi 13 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/tree.sh: -------------------------------------------------------------------------------- 1 | 2 | layer=$1 3 | kernel=$2 4 | mkdir layer$layer 5 | lk=layer$layer/treelock$kernel 6 | if [[ ! -e $lk ]]; then 7 | mkdir layer${1}/treelock${2} 8 | python build_segment.py $layer $kernel 9 | python build_dict.py $layer $kernel 10 | CUDA_VISIBLE_DEVICES=-1 python build_tree.py $layer $kernel 11 | tomtom layer$layer/kernel-$kernel-unified-dict.meme motifDB.txt -oc layer$layer/tomtom_dict_$kernel 12 | fi 13 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/tree.sh: -------------------------------------------------------------------------------- 1 | 2 | layer=$1 3 | kernel=$2 4 | mkdir layer$layer 5 | lk=layer$layer/treelock$kernel 6 | if [[ ! -e $lk ]]; then 7 | mkdir layer${1}/treelock${2} 8 | python build_segment.py $layer $kernel 9 | python build_dict.py $layer $kernel 10 | CUDA_VISIBLE_DEVICES=-1 python build_tree.py $layer $kernel 11 | tomtom layer$layer/kernel-$kernel-unified-dict.meme motifDB.txt -oc layer$layer/tomtom_dict_$kernel 12 | fi 13 | -------------------------------------------------------------------------------- /demos/demo1/tree.sh: -------------------------------------------------------------------------------- 1 | 2 | layer=$1 3 | kernel=$2 4 | mkdir layer$layer 5 | lk=layer$layer/treelock$kernel 6 | if [[ ! -e $lk ]]; then 7 | mkdir layer${1}/treelock${2} 8 | python build_segment.py $layer $kernel 9 | python build_dict.py $layer $kernel 10 | CUDA_VISIBLE_DEVICES=-1 python build_tree.py $layer $kernel 11 | tomtom --norc layer$layer/kernel-$kernel-unified-dict.meme motifDB.txt -oc layer$layer/tomtom_dict_$kernel 12 | fi 13 | -------------------------------------------------------------------------------- /demos/demo2/tree.sh: -------------------------------------------------------------------------------- 1 | 2 | layer=$1 3 | kernel=$2 4 | mkdir layer$layer 5 | lk=layer$layer/treelock$kernel 6 | if [[ ! -e $lk ]]; then 7 | mkdir layer${1}/treelock${2} 8 | python build_segment.py $layer $kernel 9 | python build_dict.py $layer $kernel 10 | CUDA_VISIBLE_DEVICES=-1 python build_tree.py $layer $kernel 11 | tomtom --norc layer$layer/kernel-$kernel-unified-dict.meme motifDB.txt -oc layer$layer/tomtom_dict_$kernel 12 | fi 13 | -------------------------------------------------------------------------------- /demos/Basset/Basset/tree.sh: -------------------------------------------------------------------------------- 1 | 2 | layer=$1 3 | kernel=$2 4 | mkdir layer$layer 5 | lk=layer$layer/treelock$kernel 6 | if [[ ! -e $lk ]]; then 7 | mkdir layer${1}/treelock${2} 8 | python build_segment.py $layer $kernel 9 | python build_dict.py $layer $kernel 10 | CUDA_VISIBLE_DEVICES=-1 python build_tree.py $layer $kernel 11 | tomtom --norc layer$layer/kernel-$kernel-unified-dict.meme motifDB.txt -oc layer$layer/tomtom_dict_$kernel 12 | fi 13 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/script.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # bash run.layer.sh 5 | 6 | 7 | bash run.layer.sh 1 128 20 8 | bash run.layer.sh 2 128 20 9 | bash run.layer.sh 3 160 20 10 | bash run.layer.sh 4 160 20 11 | bash run.layer.sh 5 256 20 12 | bash run.layer.sh 6 256 20 13 | bash run.layer.sh 7 384 20 14 | bash run.layer.sh 8 384 20 15 | bash run.layer.sh 9 512 10 16 | bash run.layer.sh 10 512 10 17 | 18 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/script.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # bash run.layer.sh 5 | 6 | 7 | bash run.layer.sh 1 128 20 8 | bash run.layer.sh 2 128 20 9 | bash run.layer.sh 3 160 20 10 | bash run.layer.sh 4 160 20 11 | bash run.layer.sh 5 256 20 12 | bash run.layer.sh 6 320 20 13 | bash run.layer.sh 7 512 20 14 | bash run.layer.sh 8 640 20 15 | bash run.layer.sh 9 1024 20 16 | bash run.layer.sh 10 1280 10 17 | 18 | -------------------------------------------------------------------------------- /NeuronMotif/dict_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-dict.chen > layer$layer/kernel-$kernel-dict.meme 5 | #chen2meme layer$layer/kernel-$kernel-test-dict.chen > layer$layer/kernel-$kernel-test-dict.meme 6 | #tomtom --norc layer$layer/kernel-$kernel-test-dict.meme layer$layer/kernel-$kernel-test-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 7 | tomtom --norc layer$layer/kernel-$kernel-dict.meme layer$layer/kernel-$kernel-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 8 | -------------------------------------------------------------------------------- /demos/demo1/dict_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-dict.chen > layer$layer/kernel-$kernel-dict.meme 5 | #chen2meme layer$layer/kernel-$kernel-test-dict.chen > layer$layer/kernel-$kernel-test-dict.meme 6 | #tomtom --norc layer$layer/kernel-$kernel-test-dict.meme layer$layer/kernel-$kernel-test-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 7 | tomtom --norc layer$layer/kernel-$kernel-dict.meme layer$layer/kernel-$kernel-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 8 | -------------------------------------------------------------------------------- /demos/demo2/dict_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-dict.chen > layer$layer/kernel-$kernel-dict.meme 5 | #chen2meme layer$layer/kernel-$kernel-test-dict.chen > layer$layer/kernel-$kernel-test-dict.meme 6 | #tomtom --norc layer$layer/kernel-$kernel-test-dict.meme layer$layer/kernel-$kernel-test-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 7 | tomtom --norc layer$layer/kernel-$kernel-dict.meme layer$layer/kernel-$kernel-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 8 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/dict_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-dict.chen > layer$layer/kernel-$kernel-dict.meme 5 | #chen2meme layer$layer/kernel-$kernel-test-dict.chen > layer$layer/kernel-$kernel-test-dict.meme 6 | #tomtom --norc layer$layer/kernel-$kernel-test-dict.meme layer$layer/kernel-$kernel-test-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 7 | tomtom --norc layer$layer/kernel-$kernel-dict.meme layer$layer/kernel-$kernel-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 8 | -------------------------------------------------------------------------------- /demos/Basset/Basset/dict_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-dict.chen > layer$layer/kernel-$kernel-dict.meme 5 | #chen2meme layer$layer/kernel-$kernel-test-dict.chen > layer$layer/kernel-$kernel-test-dict.meme 6 | #tomtom --norc layer$layer/kernel-$kernel-test-dict.meme layer$layer/kernel-$kernel-test-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 7 | tomtom --norc layer$layer/kernel-$kernel-dict.meme layer$layer/kernel-$kernel-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 8 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/dict_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-dict.chen > layer$layer/kernel-$kernel-dict.meme 5 | #chen2meme layer$layer/kernel-$kernel-test-dict.chen > layer$layer/kernel-$kernel-test-dict.meme 6 | #tomtom --norc layer$layer/kernel-$kernel-test-dict.meme layer$layer/kernel-$kernel-test-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 7 | tomtom --norc layer$layer/kernel-$kernel-dict.meme layer$layer/kernel-$kernel-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 8 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/dict_dict.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernel=$2 3 | 4 | chen2meme layer$layer/kernel-$kernel-dict.chen > layer$layer/kernel-$kernel-dict.meme 5 | #chen2meme layer$layer/kernel-$kernel-test-dict.chen > layer$layer/kernel-$kernel-test-dict.meme 6 | #tomtom --norc layer$layer/kernel-$kernel-test-dict.meme layer$layer/kernel-$kernel-test-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 7 | tomtom --norc layer$layer/kernel-$kernel-dict.meme layer$layer/kernel-$kernel-dict.meme -oc layer$layer/kernel-$kernel-dict-dict 8 | -------------------------------------------------------------------------------- /dcnn/demo/demo2/PWM: -------------------------------------------------------------------------------- 1 | 87.00 167.00 281.00 56.00 8.00 744.00 40.00 107.00 851.00 5.00 333.00 54.00 12.00 56.00 104.00 372.00 82.00 117.00 402.00 2 | 291.00 145.00 49.00 800.00 903.00 13.00 528.00 433.00 11.00 0.00 3.00 12.00 0.00 8.00 733.00 13.00 482.00 322.00 181.00 3 | 76.00 414.00 449.00 21.00 0.00 65.00 334.00 48.00 32.00 903.00 566.00 504.00 890.00 775.00 5.00 507.00 307.00 73.00 266.00 4 | 459.00 187.00 134.00 36.00 2.00 91.00 11.00 324.00 18.00 3.00 9.00 341.00 8.00 71.00 67.00 17.00 37.00 396.00 59.00 5 | -------------------------------------------------------------------------------- /NeuronMotif/run.layer.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernels=$2 3 | threads=$3 4 | motif_nb=$4 5 | bash idx.sh $kernels | xargs -n 1 -P $threads bash sampling.sh $layer 6 | python decouple.py $threads $layer $kernels $motif_nb 7 | bash idx.sh $kernels | xargs -n 1 -P $threads bash visualize.sh $layer $kernels 8 | bash idx.sh $kernels | xargs -n 1 -P $threads bash tree.sh $layer 9 | mkdir HTML 10 | rm -rf HTML/layer$layer 11 | mkdir HTML/layer$layer 12 | cp layer$layer/*.html HTML/layer$layer/ 13 | cp -r layer$layer/tomtom_*.sel.ppm.meme HTML/layer$layer/ 14 | cp -r layer$layer/tomtom_dict_* HTML/layer$layer/ 15 | -------------------------------------------------------------------------------- /demos/demo1/run.layer.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernels=$2 3 | threads=$3 4 | motif_nb=$4 5 | bash idx.sh $kernels | xargs -n 1 -P $threads bash sampling.sh $layer 6 | python decouple.py $threads $layer $kernels $motif_nb 7 | bash idx.sh $kernels | xargs -n 1 -P $threads bash visualize.sh $layer $kernels 8 | bash idx.sh $kernels | xargs -n 1 -P $threads bash tree.sh $layer 9 | mkdir HTML 10 | rm -rf HTML/layer$layer 11 | mkdir HTML/layer$layer 12 | cp layer$layer/*.html HTML/layer$layer/ 13 | cp -r layer$layer/tomtom_*.sel.ppm.meme HTML/layer$layer/ 14 | cp -r layer$layer/tomtom_dict_* HTML/layer$layer/ 15 | -------------------------------------------------------------------------------- /demos/demo2/run.layer.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernels=$2 3 | threads=$3 4 | motif_nb=$4 5 | bash idx.sh $kernels | xargs -n 1 -P $threads bash sampling.sh $layer 6 | python decouple.py $threads $layer $kernels $motif_nb 7 | bash idx.sh $kernels | xargs -n 1 -P $threads bash visualize.sh $layer $kernels 8 | bash idx.sh $kernels | xargs -n 1 -P $threads bash tree.sh $layer 9 | mkdir HTML 10 | rm -rf HTML/layer$layer 11 | mkdir HTML/layer$layer 12 | cp layer$layer/*.html HTML/layer$layer/ 13 | cp -r layer$layer/tomtom_*.sel.ppm.meme HTML/layer$layer/ 14 | cp -r layer$layer/tomtom_dict_* HTML/layer$layer/ 15 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/run.layer.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernels=$2 3 | threads=$3 4 | motif_nb=$4 5 | bash idx.sh $kernels | xargs -n 1 -P $threads bash sampling.sh $layer 6 | python decouple.py $threads $layer $kernels $motif_nb 7 | bash idx.sh $kernels | xargs -n 1 -P $threads bash visualize.sh $layer $kernels 8 | bash idx.sh $kernels | xargs -n 1 -P $threads bash tree.sh $layer 9 | mkdir HTML 10 | rm -rf HTML/layer$layer 11 | mkdir HTML/layer$layer 12 | cp layer$layer/*.html HTML/layer$layer/ 13 | cp -r layer$layer/tomtom_*.sel.ppm.meme HTML/layer$layer/ 14 | cp -r layer$layer/tomtom_dict_* HTML/layer$layer/ 15 | -------------------------------------------------------------------------------- /demos/Basset/Basset/run.layer.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernels=$2 3 | threads=$3 4 | motif_nb=$4 5 | bash idx.sh $kernels | xargs -n 1 -P $threads bash sampling.sh $layer 6 | python decouple.py $threads $layer $kernels $motif_nb 7 | bash idx.sh $kernels | xargs -n 1 -P $threads bash visualize.sh $layer $kernels 8 | bash idx.sh $kernels | xargs -n 1 -P $threads bash tree.sh $layer 9 | mkdir HTML 10 | rm -rf HTML/layer$layer 11 | mkdir HTML/layer$layer 12 | cp layer$layer/*.html HTML/layer$layer/ 13 | cp -r layer$layer/tomtom_*.sel.ppm.meme HTML/layer$layer/ 14 | cp -r layer$layer/tomtom_dict_* HTML/layer$layer/ 15 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/run.layer.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernels=$2 3 | threads=$3 4 | motif_nb=$4 5 | bash idx.sh $kernels | xargs -n 1 -P $threads bash sampling.sh $layer 6 | python decouple.py $threads $layer $kernels $motif_nb 7 | bash idx.sh $kernels | xargs -n 1 -P $threads bash visualize.sh $layer $kernels 8 | bash idx.sh $kernels | xargs -n 1 -P $threads bash tree.sh $layer 9 | mkdir HTML 10 | rm -rf HTML/layer$layer 11 | mkdir HTML/layer$layer 12 | cp layer$layer/*.html HTML/layer$layer/ 13 | cp -r layer$layer/tomtom_*.sel.ppm.meme HTML/layer$layer/ 14 | cp -r layer$layer/tomtom_dict_* HTML/layer$layer/ 15 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/run.layer.sh: -------------------------------------------------------------------------------- 1 | layer=$1 2 | kernels=$2 3 | threads=$3 4 | motif_nb=$4 5 | bash idx.sh $kernels | xargs -n 1 -P $threads bash sampling.sh $layer 6 | python decouple.py $threads $layer $kernels $motif_nb 7 | bash idx.sh $kernels | xargs -n 1 -P $threads bash visualize.sh $layer $kernels 8 | bash idx.sh $kernels | xargs -n 1 -P $threads bash tree.sh $layer 9 | mkdir HTML 10 | rm -rf HTML/layer$layer 11 | mkdir HTML/layer$layer 12 | cp layer$layer/*.html HTML/layer$layer/ 13 | cp -r layer$layer/tomtom_*.sel.ppm.meme HTML/layer$layer/ 14 | cp -r layer$layer/tomtom_dict_* HTML/layer$layer/ 15 | -------------------------------------------------------------------------------- /NeuronMotif/visualize.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/vislock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/vislock${3} 5 | python chen2html.py layer${1}/kernel-${3}.ppm.h5 ${3} layer${1}/${3} ${2} 6 | python tochen.py $1 $3 7 | chen2meme layer${1}/kernel-${3}.sel.ppm.chen > layer${1}/kernel-${3}.sel.ppm.meme 8 | tomtom layer${1}/kernel-${3}.sel.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.sel.ppm.meme 9 | chen2meme layer${1}/kernel-${3}.all.ppm.chen > layer${1}/kernel-${3}.all.ppm.meme 10 | # tomtom layer${1}/kernel-${3}.all.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.all.ppm.meme 11 | fi 12 | -------------------------------------------------------------------------------- /demos/demo1/visualize.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/vislock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/vislock${3} 5 | python chen2html.py layer${1}/kernel-${3}.ppm.h5 ${3} layer${1}/${3} ${2} 6 | python tochen.py $1 $3 7 | chen2meme layer${1}/kernel-${3}.sel.ppm.chen > layer${1}/kernel-${3}.sel.ppm.meme 8 | tomtom layer${1}/kernel-${3}.sel.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.sel.ppm.meme 9 | chen2meme layer${1}/kernel-${3}.all.ppm.chen > layer${1}/kernel-${3}.all.ppm.meme 10 | # tomtom layer${1}/kernel-${3}.all.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.all.ppm.meme 11 | fi 12 | -------------------------------------------------------------------------------- /demos/demo2/visualize.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/vislock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/vislock${3} 5 | python chen2html.py layer${1}/kernel-${3}.ppm.h5 ${3} layer${1}/${3} ${2} 6 | python tochen.py $1 $3 7 | chen2meme layer${1}/kernel-${3}.sel.ppm.chen > layer${1}/kernel-${3}.sel.ppm.meme 8 | tomtom layer${1}/kernel-${3}.sel.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.sel.ppm.meme 9 | chen2meme layer${1}/kernel-${3}.all.ppm.chen > layer${1}/kernel-${3}.all.ppm.meme 10 | # tomtom layer${1}/kernel-${3}.all.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.all.ppm.meme 11 | fi 12 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/visualize.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/vislock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/vislock${3} 5 | python chen2html.py layer${1}/kernel-${3}.ppm.h5 ${3} layer${1}/${3} ${2} 6 | python tochen.py $1 $3 7 | chen2meme layer${1}/kernel-${3}.sel.ppm.chen > layer${1}/kernel-${3}.sel.ppm.meme 8 | tomtom layer${1}/kernel-${3}.sel.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.sel.ppm.meme 9 | chen2meme layer${1}/kernel-${3}.all.ppm.chen > layer${1}/kernel-${3}.all.ppm.meme 10 | # tomtom layer${1}/kernel-${3}.all.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.all.ppm.meme 11 | fi 12 | -------------------------------------------------------------------------------- /demos/Basset/Basset/visualize.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/vislock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/vislock${3} 5 | python chen2html.py layer${1}/kernel-${3}.ppm.h5 ${3} layer${1}/${3} ${2} 6 | python tochen.py $1 $3 7 | chen2meme layer${1}/kernel-${3}.sel.ppm.chen > layer${1}/kernel-${3}.sel.ppm.meme 8 | tomtom layer${1}/kernel-${3}.sel.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.sel.ppm.meme 9 | chen2meme layer${1}/kernel-${3}.all.ppm.chen > layer${1}/kernel-${3}.all.ppm.meme 10 | # tomtom layer${1}/kernel-${3}.all.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.all.ppm.meme 11 | fi 12 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/visualize.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/vislock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/vislock${3} 5 | python chen2html.py layer${1}/kernel-${3}.ppm.h5 ${3} layer${1}/${3} ${2} 6 | python tochen.py $1 $3 7 | chen2meme layer${1}/kernel-${3}.sel.ppm.chen > layer${1}/kernel-${3}.sel.ppm.meme 8 | tomtom layer${1}/kernel-${3}.sel.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.sel.ppm.meme 9 | chen2meme layer${1}/kernel-${3}.all.ppm.chen > layer${1}/kernel-${3}.all.ppm.meme 10 | # tomtom layer${1}/kernel-${3}.all.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.all.ppm.meme 11 | fi 12 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/visualize.sh: -------------------------------------------------------------------------------- 1 | mkdir layer${1} 2 | lk=layer${1}/vislock${3} 3 | if [[ ! -e $lk ]]; then 4 | mkdir layer${1}/vislock${3} 5 | python chen2html.py layer${1}/kernel-${3}.ppm.h5 ${3} layer${1}/${3} ${2} 6 | python tochen.py $1 $3 7 | chen2meme layer${1}/kernel-${3}.sel.ppm.chen > layer${1}/kernel-${3}.sel.ppm.meme 8 | tomtom layer${1}/kernel-${3}.sel.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.sel.ppm.meme 9 | chen2meme layer${1}/kernel-${3}.all.ppm.chen > layer${1}/kernel-${3}.all.ppm.meme 10 | # tomtom layer${1}/kernel-${3}.all.ppm.meme motifDB.txt -o layer${1}/tomtom_${3}.all.ppm.meme 11 | fi 12 | -------------------------------------------------------------------------------- /dcnn/README.md: -------------------------------------------------------------------------------- 1 | # Train DCNN model 2 | 3 | ## Download data 4 | 5 | For the models trained by Basset dataset or DeepSEA dataset, please go to the data folder in Basset folder or DeepSEA folder. 6 | There are instructions for downloading the training data. 7 | 8 | ## Train model 9 | 10 | Take BD-10 model as an example, go to the model folder and run: 11 | 12 | ``` 13 | CUDA_VISIBLE_DEVICES=0 python BD-10.py 14 | ``` 15 | 16 | CUDA_VISIBLE_DEVICES is the GPU ID in your server 17 | 18 | ## Test model 19 | 20 | Take BD-10 model as an example, go to the model folder and run: 21 | 22 | ``` 23 | CUDA_VISIBLE_DEVICES=0 python BD-10.pred.py 24 | ``` 25 | -------------------------------------------------------------------------------- /dcnn/DeepSEA/data/README.md: -------------------------------------------------------------------------------- 1 | DeepSEA data 2 | 3 | Download the data for training the DCNN: 4 | 5 | ``` 6 | wget --no-check-certificate http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_train/DeepSEA_data.tar.gz 7 | 8 | ``` 9 | 10 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_train%2FDeepSEA_data.tar.gz&dl=1 11 | 12 | 13 | ``` 14 | tar -xzvf DeepSEA_data.tar.gz 15 | mv data/* ./ 16 | ``` 17 | 18 | # Reference 19 | 20 | Jian Zhou, Olga G. Troyanskaya. Predicting the Effects of Noncoding Variants with Deep learning-based Sequence Model. Nature Methods (2015). 21 | -------------------------------------------------------------------------------- /dcnn/DeepSEA/DD-10/data/README.md: -------------------------------------------------------------------------------- 1 | DeepSEA data 2 | 3 | Download the data for training the DCNN: 4 | 5 | ``` 6 | wget --no-check-certificate http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_train/DeepSEA_data.tar.gz 7 | 8 | ``` 9 | 10 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_train%2FDeepSEA_data.tar.gz&dl=1 11 | 12 | 13 | ``` 14 | tar -xzvf DeepSEA_data.tar.gz 15 | mv data/* ./ 16 | ``` 17 | 18 | # Reference 19 | 20 | Jian Zhou, Olga G. Troyanskaya. Predicting the Effects of Noncoding Variants with Deep learning-based Sequence Model. Nature Methods (2015). 21 | -------------------------------------------------------------------------------- /dcnn/DeepSEA/DeepSEA/data/README.md: -------------------------------------------------------------------------------- 1 | DeepSEA data 2 | 3 | Download the data for training the DCNN: 4 | 5 | ``` 6 | wget --no-check-certificate http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_train/DeepSEA_data.tar.gz 7 | 8 | ``` 9 | 10 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_train%2FDeepSEA_data.tar.gz&dl=1 11 | 12 | 13 | ``` 14 | tar -xzvf DeepSEA_data.tar.gz 15 | mv data/* ./ 16 | ``` 17 | 18 | # Reference 19 | 20 | Jian Zhou, Olga G. Troyanskaya. Predicting the Effects of Noncoding Variants with Deep learning-based Sequence Model. Nature Methods (2015). 21 | -------------------------------------------------------------------------------- /dcnn/Basset/data/README.md: -------------------------------------------------------------------------------- 1 | # Basset data 2 | 3 | Download the data for training the DCNN: 4 | 5 | ``` 6 | wget --no-check-certificate http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_train/Basset_data.tar.gz 7 | ``` 8 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_train%2FBasset_data.tar.gz&dl=1 9 | 10 | ``` 11 | tar -xzvf Basset_data.tar.gz 12 | 13 | mv data/* ./ 14 | ``` 15 | 16 | # Reference 17 | 18 | Kelley, D. R., Snoek, J., & Rinn, J. L. (2016). Basset: learning the regulatory code of the accessible genome with deep convolutional neural networks. Genome research, 26(7), 990-999. 19 | -------------------------------------------------------------------------------- /dcnn/Basset/BD-10/data/README.md: -------------------------------------------------------------------------------- 1 | # Basset data 2 | 3 | Download the data for training the DCNN: 4 | 5 | ``` 6 | wget --no-check-certificate http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_train/Basset_data.tar.gz 7 | ``` 8 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_train%2FBasset_data.tar.gz&dl=1 9 | 10 | ``` 11 | tar -xzvf Basset_data.tar.gz 12 | 13 | mv data/* ./ 14 | ``` 15 | 16 | # Reference 17 | 18 | Kelley, D. R., Snoek, J., & Rinn, J. L. (2016). Basset: learning the regulatory code of the accessible genome with deep convolutional neural networks. Genome research, 26(7), 990-999. 19 | -------------------------------------------------------------------------------- /dcnn/Basset/BD-5/data/README.md: -------------------------------------------------------------------------------- 1 | # Basset data 2 | 3 | Download the data for training the DCNN: 4 | 5 | ``` 6 | wget --no-check-certificate http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_train/Basset_data.tar.gz 7 | ``` 8 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_train%2FBasset_data.tar.gz&dl=1 9 | 10 | ``` 11 | tar -xzvf Basset_data.tar.gz 12 | 13 | mv data/* ./ 14 | ``` 15 | 16 | # Reference 17 | 18 | Kelley, D. R., Snoek, J., & Rinn, J. L. (2016). Basset: learning the regulatory code of the accessible genome with deep convolutional neural networks. Genome research, 26(7), 990-999. 19 | -------------------------------------------------------------------------------- /dcnn/Basset/Basset/data/README.md: -------------------------------------------------------------------------------- 1 | # Basset data 2 | 3 | Download the data for training the DCNN: 4 | 5 | ``` 6 | wget --no-check-certificate http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_train/Basset_data.tar.gz 7 | ``` 8 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_train%2FBasset_data.tar.gz&dl=1 9 | 10 | ``` 11 | tar -xzvf Basset_data.tar.gz 12 | 13 | mv data/* ./ 14 | ``` 15 | 16 | # Reference 17 | 18 | Kelley, D. R., Snoek, J., & Rinn, J. L. (2016). Basset: learning the regulatory code of the accessible genome with deep convolutional neural networks. Genome research, 26(7), 990-999. 19 | -------------------------------------------------------------------------------- /dcnn/Basset/Basenji/data/README.md: -------------------------------------------------------------------------------- 1 | # Basset data 2 | 3 | Download the data for training the DCNN: 4 | 5 | ``` 6 | wget --no-check-certificate http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_train/Basset_data.tar.gz 7 | ``` 8 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_train%2FBasset_data.tar.gz&dl=1 9 | 10 | ``` 11 | tar -xzvf Basset_data.tar.gz 12 | 13 | mv data/* ./ 14 | ``` 15 | 16 | # Reference 17 | 18 | Kelley, D. R., Snoek, J., & Rinn, J. L. (2016). Basset: learning the regulatory code of the accessible genome with deep convolutional neural networks. Genome research, 26(7), 990-999. 19 | -------------------------------------------------------------------------------- /demos/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | We provide two very simple demos [demo1](https://github.com/wzthu/NeuronMotif/tree/master/demos/demo1) and [demo2](https://github.com/wzthu/NeuronMotif/tree/master/demos/demo2) to test NeuronMotif. 4 | 5 | We exhibit the code examples in the paper work: 6 | 7 | To run NeuronMotif and get the result for the models Basset and BD-10, please go to directory ["Basset/Basset"](https://github.com/wzthu/NeuronMotif/tree/master/demos/Basset/Basset) and ["Basset/BD-10"](https://github.com/wzthu/NeuronMotif/tree/master/demos/Basset/BD-10) 8 | 9 | To run NeuronMotif and get the result for the models DeepSEA and DD-10, please go to directory ["DeepSEA/DeepSEA"](https://github.com/wzthu/NeuronMotif/tree/master/demos/DeepSEA/DeepSEA) and ["DeepSEA/DD-10"](https://github.com/wzthu/NeuronMotif/tree/master/demos/DeepSEA/DD-10) 10 | 11 | 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Zheng Wei 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NeuronMotif/script.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # bash run.layer.sh &> .layer.log 5 | 6 | # the layer number 7 | # the number of kernels in the layer 8 | # the number of neuron can be demixed at the same time 9 | # 10 | #If you know the maximun number of hard-syntax motifs recognized by the neuron in the layer, setting this parameter is benificial to improving motif quality. 11 | #Examples: 12 | #1 hard syntax motif CTCF 13 | #1 hard syntax motif CTCF-[5bp]-CTCF 14 | #2 hard-syntax motifs CTCF-[5bp]-CTCF-[3~5bp]-CTCF 15 | #3 hard-syntax motifs CTCF-[3~5bp]-CTCF-[3~5bp]-CTCF 16 | 17 | 18 | 19 | bash run.layer.sh 1 128 20 &> .layer1.log 20 | bash run.layer.sh 2 128 20 &> .layer2.log 21 | bash run.layer.sh 3 160 20 &> .layer3.log 22 | bash run.layer.sh 4 160 20 &> .layer4.log 23 | bash run.layer.sh 5 256 20 &> .layer5.log 24 | bash run.layer.sh 6 256 20 &> .layer6.log 25 | bash run.layer.sh 7 384 20 &> .layer7.log 26 | bash run.layer.sh 8 384 20 &> .layer8.log 27 | bash run.layer.sh 9 512 10 &> .layer9.log 28 | bash run.layer.sh 10 512 10 &> .layer10.log 29 | 30 | -------------------------------------------------------------------------------- /NeuronMotif/build_segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pandas as pd 4 | from segment import * 5 | import numpy as np 6 | import h5py 7 | import sys 8 | 9 | layer = str(sys.argv[1]) 10 | kernel = str(sys.argv[2]) 11 | 12 | flanking = 1 13 | if len(sys.argv) > 3: 14 | flanking = int(sys.argv[3]) 15 | # default 1 16 | #layer = '10' 17 | #kernel = '1130' 18 | 19 | 20 | f = h5py.File('layer' + str(layer)+ '/kernel-'+str(kernel)+'.ppm.h5','r') 21 | 22 | 23 | ppmids = [] 24 | 25 | run_rounds = 0 26 | 27 | for a in list(f.keys()): 28 | if a.startswith('conact'): 29 | rl = len(a[len('conact'):].split('_')) 30 | if rl > run_rounds: 31 | run_rounds = rl 32 | 33 | for a in list(f.keys()): 34 | if a.startswith('conact'): 35 | rl = len(a[len('conact'):].split('_')) 36 | if rl == run_rounds: 37 | ppmids.append(a[len('conact'):]) 38 | 39 | 40 | 41 | I1 = [] 42 | I2 = [] 43 | sp_sizes = [] 44 | ppms = [] 45 | for ppmid in ppmids: 46 | I1.append(f['conact' + ppmid][0]) 47 | I2.append(f['act' + ppmid][:].max()) 48 | sp_sizes.append(f['index' + ppmid].shape[0]) 49 | ppms.append(f['ppm' + ppmid][:]) 50 | # ppms[-1] = ppms[-1][:,[0,2,1,3]] 51 | 52 | seg_ppms = [ppm_segment(ppms[i],smooth=True, sp_size=sp_sizes[i],flank = flanking, shortest = None) for i in range(len(ppms)) ] 53 | seg_max = np.array([len(seg_ppms[i][0]) for i in range(len(seg_ppms))]) 54 | 55 | 56 | 57 | seg_ppms_lst = [seg_ppms[i][0] for i in range(len(seg_ppms))] 58 | starts_lst = [seg_ppms[i][1] for i in range(len(seg_ppms))] 59 | ends_lst = [seg_ppms[i][2] for i in range(len(seg_ppms))] 60 | 61 | segsfile = 'layer' + str(layer)+ '/kernel-'+str(kernel)+'-segs.chen' 62 | segs_to_chen(ppmids=ppmids, ppms=seg_ppms_lst, starts=starts_lst, ends=ends_lst, filepath=segsfile) 63 | -------------------------------------------------------------------------------- /demos/demo1/build_segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pandas as pd 4 | from segment import * 5 | import numpy as np 6 | import h5py 7 | import sys 8 | 9 | layer = str(sys.argv[1]) 10 | kernel = str(sys.argv[2]) 11 | 12 | flanking = 1 13 | if len(sys.argv) > 3: 14 | flanking = int(sys.argv[3]) 15 | # default 1 16 | #layer = '10' 17 | #kernel = '1130' 18 | 19 | 20 | f = h5py.File('layer' + str(layer)+ '/kernel-'+str(kernel)+'.ppm.h5','r') 21 | 22 | 23 | ppmids = [] 24 | 25 | run_rounds = 0 26 | 27 | for a in list(f.keys()): 28 | if a.startswith('conact'): 29 | rl = len(a[len('conact'):].split('_')) 30 | if rl > run_rounds: 31 | run_rounds = rl 32 | 33 | for a in list(f.keys()): 34 | if a.startswith('conact'): 35 | rl = len(a[len('conact'):].split('_')) 36 | if rl == run_rounds: 37 | ppmids.append(a[len('conact'):]) 38 | 39 | 40 | 41 | I1 = [] 42 | I2 = [] 43 | sp_sizes = [] 44 | ppms = [] 45 | for ppmid in ppmids: 46 | I1.append(f['conact' + ppmid][0]) 47 | I2.append(f['act' + ppmid][:].max()) 48 | sp_sizes.append(f['index' + ppmid].shape[0]) 49 | ppms.append(f['ppm' + ppmid][:]) 50 | # ppms[-1] = ppms[-1][:,[0,2,1,3]] 51 | 52 | seg_ppms = [ppm_segment(ppms[i],smooth=True, sp_size=sp_sizes[i],flank = flanking, shortest = None) for i in range(len(ppms)) ] 53 | seg_max = np.array([len(seg_ppms[i][0]) for i in range(len(seg_ppms))]) 54 | 55 | 56 | 57 | seg_ppms_lst = [seg_ppms[i][0] for i in range(len(seg_ppms))] 58 | starts_lst = [seg_ppms[i][1] for i in range(len(seg_ppms))] 59 | ends_lst = [seg_ppms[i][2] for i in range(len(seg_ppms))] 60 | 61 | segsfile = 'layer' + str(layer)+ '/kernel-'+str(kernel)+'-segs.chen' 62 | segs_to_chen(ppmids=ppmids, ppms=seg_ppms_lst, starts=starts_lst, ends=ends_lst, filepath=segsfile) 63 | -------------------------------------------------------------------------------- /demos/demo2/build_segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pandas as pd 4 | from segment import * 5 | import numpy as np 6 | import h5py 7 | import sys 8 | 9 | layer = str(sys.argv[1]) 10 | kernel = str(sys.argv[2]) 11 | 12 | flanking = 1 13 | if len(sys.argv) > 3: 14 | flanking = int(sys.argv[3]) 15 | # default 1 16 | #layer = '10' 17 | #kernel = '1130' 18 | 19 | 20 | f = h5py.File('layer' + str(layer)+ '/kernel-'+str(kernel)+'.ppm.h5','r') 21 | 22 | 23 | ppmids = [] 24 | 25 | run_rounds = 0 26 | 27 | for a in list(f.keys()): 28 | if a.startswith('conact'): 29 | rl = len(a[len('conact'):].split('_')) 30 | if rl > run_rounds: 31 | run_rounds = rl 32 | 33 | for a in list(f.keys()): 34 | if a.startswith('conact'): 35 | rl = len(a[len('conact'):].split('_')) 36 | if rl == run_rounds: 37 | ppmids.append(a[len('conact'):]) 38 | 39 | 40 | 41 | I1 = [] 42 | I2 = [] 43 | sp_sizes = [] 44 | ppms = [] 45 | for ppmid in ppmids: 46 | I1.append(f['conact' + ppmid][0]) 47 | I2.append(f['act' + ppmid][:].max()) 48 | sp_sizes.append(f['index' + ppmid].shape[0]) 49 | ppms.append(f['ppm' + ppmid][:]) 50 | # ppms[-1] = ppms[-1][:,[0,2,1,3]] 51 | 52 | seg_ppms = [ppm_segment(ppms[i],smooth=True, sp_size=sp_sizes[i],flank = flanking, shortest = None) for i in range(len(ppms)) ] 53 | seg_max = np.array([len(seg_ppms[i][0]) for i in range(len(seg_ppms))]) 54 | 55 | 56 | 57 | seg_ppms_lst = [seg_ppms[i][0] for i in range(len(seg_ppms))] 58 | starts_lst = [seg_ppms[i][1] for i in range(len(seg_ppms))] 59 | ends_lst = [seg_ppms[i][2] for i in range(len(seg_ppms))] 60 | 61 | segsfile = 'layer' + str(layer)+ '/kernel-'+str(kernel)+'-segs.chen' 62 | segs_to_chen(ppmids=ppmids, ppms=seg_ppms_lst, starts=starts_lst, ends=ends_lst, filepath=segsfile) 63 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/build_segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pandas as pd 4 | from segment import * 5 | import numpy as np 6 | import h5py 7 | import sys 8 | 9 | layer = str(sys.argv[1]) 10 | kernel = str(sys.argv[2]) 11 | 12 | flanking = 1 13 | if len(sys.argv) > 3: 14 | flanking = int(sys.argv[3]) 15 | # default 1 16 | #layer = '10' 17 | #kernel = '1130' 18 | 19 | 20 | f = h5py.File('layer' + str(layer)+ '/kernel-'+str(kernel)+'.ppm.h5','r') 21 | 22 | 23 | ppmids = [] 24 | 25 | run_rounds = 0 26 | 27 | for a in list(f.keys()): 28 | if a.startswith('conact'): 29 | rl = len(a[len('conact'):].split('_')) 30 | if rl > run_rounds: 31 | run_rounds = rl 32 | 33 | for a in list(f.keys()): 34 | if a.startswith('conact'): 35 | rl = len(a[len('conact'):].split('_')) 36 | if rl == run_rounds: 37 | ppmids.append(a[len('conact'):]) 38 | 39 | 40 | 41 | I1 = [] 42 | I2 = [] 43 | sp_sizes = [] 44 | ppms = [] 45 | for ppmid in ppmids: 46 | I1.append(f['conact' + ppmid][0]) 47 | I2.append(f['act' + ppmid][:].max()) 48 | sp_sizes.append(f['index' + ppmid].shape[0]) 49 | ppms.append(f['ppm' + ppmid][:]) 50 | # ppms[-1] = ppms[-1][:,[0,2,1,3]] 51 | 52 | seg_ppms = [ppm_segment(ppms[i],smooth=True, sp_size=sp_sizes[i],flank = flanking, shortest = None) for i in range(len(ppms)) ] 53 | seg_max = np.array([len(seg_ppms[i][0]) for i in range(len(seg_ppms))]) 54 | 55 | 56 | 57 | seg_ppms_lst = [seg_ppms[i][0] for i in range(len(seg_ppms))] 58 | starts_lst = [seg_ppms[i][1] for i in range(len(seg_ppms))] 59 | ends_lst = [seg_ppms[i][2] for i in range(len(seg_ppms))] 60 | 61 | segsfile = 'layer' + str(layer)+ '/kernel-'+str(kernel)+'-segs.chen' 62 | segs_to_chen(ppmids=ppmids, ppms=seg_ppms_lst, starts=starts_lst, ends=ends_lst, filepath=segsfile) 63 | -------------------------------------------------------------------------------- /demos/Basset/Basset/build_segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pandas as pd 4 | from segment import * 5 | import numpy as np 6 | import h5py 7 | import sys 8 | 9 | layer = str(sys.argv[1]) 10 | kernel = str(sys.argv[2]) 11 | 12 | flanking = 1 13 | if len(sys.argv) > 3: 14 | flanking = int(sys.argv[3]) 15 | # default 1 16 | #layer = '10' 17 | #kernel = '1130' 18 | 19 | 20 | f = h5py.File('layer' + str(layer)+ '/kernel-'+str(kernel)+'.ppm.h5','r') 21 | 22 | 23 | ppmids = [] 24 | 25 | run_rounds = 0 26 | 27 | for a in list(f.keys()): 28 | if a.startswith('conact'): 29 | rl = len(a[len('conact'):].split('_')) 30 | if rl > run_rounds: 31 | run_rounds = rl 32 | 33 | for a in list(f.keys()): 34 | if a.startswith('conact'): 35 | rl = len(a[len('conact'):].split('_')) 36 | if rl == run_rounds: 37 | ppmids.append(a[len('conact'):]) 38 | 39 | 40 | 41 | I1 = [] 42 | I2 = [] 43 | sp_sizes = [] 44 | ppms = [] 45 | for ppmid in ppmids: 46 | I1.append(f['conact' + ppmid][0]) 47 | I2.append(f['act' + ppmid][:].max()) 48 | sp_sizes.append(f['index' + ppmid].shape[0]) 49 | ppms.append(f['ppm' + ppmid][:]) 50 | # ppms[-1] = ppms[-1][:,[0,2,1,3]] 51 | 52 | seg_ppms = [ppm_segment(ppms[i],smooth=True, sp_size=sp_sizes[i],flank = flanking, shortest = None) for i in range(len(ppms)) ] 53 | seg_max = np.array([len(seg_ppms[i][0]) for i in range(len(seg_ppms))]) 54 | 55 | 56 | 57 | seg_ppms_lst = [seg_ppms[i][0] for i in range(len(seg_ppms))] 58 | starts_lst = [seg_ppms[i][1] for i in range(len(seg_ppms))] 59 | ends_lst = [seg_ppms[i][2] for i in range(len(seg_ppms))] 60 | 61 | segsfile = 'layer' + str(layer)+ '/kernel-'+str(kernel)+'-segs.chen' 62 | segs_to_chen(ppmids=ppmids, ppms=seg_ppms_lst, starts=starts_lst, ends=ends_lst, filepath=segsfile) 63 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/build_segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pandas as pd 4 | from segment import * 5 | import numpy as np 6 | import h5py 7 | import sys 8 | 9 | layer = str(sys.argv[1]) 10 | kernel = str(sys.argv[2]) 11 | 12 | flanking = 1 13 | if len(sys.argv) > 3: 14 | flanking = int(sys.argv[3]) 15 | # default 1 16 | #layer = '10' 17 | #kernel = '1130' 18 | 19 | 20 | f = h5py.File('layer' + str(layer)+ '/kernel-'+str(kernel)+'.ppm.h5','r') 21 | 22 | 23 | ppmids = [] 24 | 25 | run_rounds = 0 26 | 27 | for a in list(f.keys()): 28 | if a.startswith('conact'): 29 | rl = len(a[len('conact'):].split('_')) 30 | if rl > run_rounds: 31 | run_rounds = rl 32 | 33 | for a in list(f.keys()): 34 | if a.startswith('conact'): 35 | rl = len(a[len('conact'):].split('_')) 36 | if rl == run_rounds: 37 | ppmids.append(a[len('conact'):]) 38 | 39 | 40 | 41 | I1 = [] 42 | I2 = [] 43 | sp_sizes = [] 44 | ppms = [] 45 | for ppmid in ppmids: 46 | I1.append(f['conact' + ppmid][0]) 47 | I2.append(f['act' + ppmid][:].max()) 48 | sp_sizes.append(f['index' + ppmid].shape[0]) 49 | ppms.append(f['ppm' + ppmid][:]) 50 | ppms[-1] = ppms[-1][:,[0,2,1,3]] 51 | 52 | seg_ppms = [ppm_segment(ppms[i],smooth=True, sp_size=sp_sizes[i],flank = flanking, shortest = None) for i in range(len(ppms)) ] 53 | seg_max = np.array([len(seg_ppms[i][0]) for i in range(len(seg_ppms))]) 54 | 55 | 56 | 57 | seg_ppms_lst = [seg_ppms[i][0] for i in range(len(seg_ppms))] 58 | starts_lst = [seg_ppms[i][1] for i in range(len(seg_ppms))] 59 | ends_lst = [seg_ppms[i][2] for i in range(len(seg_ppms))] 60 | 61 | segsfile = 'layer' + str(layer)+ '/kernel-'+str(kernel)+'-segs.chen' 62 | segs_to_chen(ppmids=ppmids, ppms=seg_ppms_lst, starts=starts_lst, ends=ends_lst, filepath=segsfile) 63 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/build_segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pandas as pd 4 | from segment import * 5 | import numpy as np 6 | import h5py 7 | import sys 8 | 9 | layer = str(sys.argv[1]) 10 | kernel = str(sys.argv[2]) 11 | 12 | flanking = 1 13 | if len(sys.argv) > 3: 14 | flanking = int(sys.argv[3]) 15 | # default 1 16 | #layer = '10' 17 | #kernel = '1130' 18 | 19 | 20 | f = h5py.File('layer' + str(layer)+ '/kernel-'+str(kernel)+'.ppm.h5','r') 21 | 22 | 23 | ppmids = [] 24 | 25 | run_rounds = 0 26 | 27 | for a in list(f.keys()): 28 | if a.startswith('conact'): 29 | rl = len(a[len('conact'):].split('_')) 30 | if rl > run_rounds: 31 | run_rounds = rl 32 | 33 | for a in list(f.keys()): 34 | if a.startswith('conact'): 35 | rl = len(a[len('conact'):].split('_')) 36 | if rl == run_rounds: 37 | ppmids.append(a[len('conact'):]) 38 | 39 | 40 | 41 | I1 = [] 42 | I2 = [] 43 | sp_sizes = [] 44 | ppms = [] 45 | for ppmid in ppmids: 46 | I1.append(f['conact' + ppmid][0]) 47 | I2.append(f['act' + ppmid][:].max()) 48 | sp_sizes.append(f['index' + ppmid].shape[0]) 49 | ppms.append(f['ppm' + ppmid][:]) 50 | ppms[-1] = ppms[-1][:,[0,2,1,3]] 51 | 52 | seg_ppms = [ppm_segment(ppms[i],smooth=True, sp_size=sp_sizes[i],flank = flanking, shortest = None) for i in range(len(ppms)) ] 53 | seg_max = np.array([len(seg_ppms[i][0]) for i in range(len(seg_ppms))]) 54 | 55 | 56 | 57 | seg_ppms_lst = [seg_ppms[i][0] for i in range(len(seg_ppms))] 58 | starts_lst = [seg_ppms[i][1] for i in range(len(seg_ppms))] 59 | ends_lst = [seg_ppms[i][2] for i in range(len(seg_ppms))] 60 | 61 | segsfile = 'layer' + str(layer)+ '/kernel-'+str(kernel)+'-segs.chen' 62 | segs_to_chen(ppmids=ppmids, ppms=seg_ppms_lst, starts=starts_lst, ends=ends_lst, filepath=segsfile) 63 | -------------------------------------------------------------------------------- /NeuronMotif/decouple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import sys 4 | import os 5 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 6 | threads = sys.argv[1] 7 | layer = sys.argv[2] 8 | kernel_size = int(sys.argv[3]) 9 | motif_nb = None 10 | if len(sys.argv) > 4: 11 | motif_nb = int(sys.argv[4]) 12 | 13 | from modeldef import * 14 | from scipy import stats 15 | 16 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = int(layer), kernel = 0, weight_file='weight.hdf5') 17 | 18 | poolsz = 1 19 | 20 | for i in range(int(layer)-1): 21 | poolsz *= pool_sz[i] 22 | 23 | 24 | if motif_nb is not None: 25 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 26 | os.system('python merge.py %s %d' % (layer,motif_nb)) 27 | else: 28 | motif_nb = 1 29 | while True: 30 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 31 | print('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 32 | maxacts = [] 33 | for i in range(kernel_size): 34 | ppmids = [] 35 | if not os.path.exists('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5'): 36 | continue 37 | f = h5py.File('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5','r') 38 | print(i) 39 | for a in list(f.keys()): 40 | if a.startswith('conact'): 41 | ppmids.append(a[len('conact'):]) 42 | if len(ppmids) == 0: 43 | continue 44 | maxacts.append(np.max([f['conact'+ppmid][:]/np.max(f['act'+ppmid][:]) for ppmid in ppmids])) 45 | print(np.mean(maxacts)-1) 46 | if len(maxacts) < 10: 47 | statrs = [np.mean(maxacts) - 0.95 ,0.05] 48 | else: 49 | statrs = stats.ttest_1samp(maxacts,popmean=0.95) 50 | print(statrs) 51 | # os.system('rm -rf layer%s/lock*' %(layer)) 52 | if (statrs[0] > 0 and statrs[1] < 0.1) or poolsz <= 1: 53 | os.system('python merge.py %s %d' % (layer,motif_nb)) 54 | break 55 | else: 56 | os.system('rm -rf layer%s/kernel*.ppm.h5' %(layer)) 57 | os.system('rm -rf layer%s/lock*' %(layer)) 58 | motif_nb += 1 59 | 60 | 61 | -------------------------------------------------------------------------------- /demos/demo1/decouple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import sys 4 | import os 5 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 6 | threads = sys.argv[1] 7 | layer = sys.argv[2] 8 | kernel_size = int(sys.argv[3]) 9 | motif_nb = None 10 | if len(sys.argv) > 4: 11 | motif_nb = int(sys.argv[4]) 12 | 13 | from modeldef import * 14 | from scipy import stats 15 | 16 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = int(layer), kernel = 0, weight_file='weight.hdf5') 17 | 18 | poolsz = 1 19 | 20 | for i in range(int(layer)-1): 21 | poolsz *= pool_sz[i] 22 | 23 | 24 | if motif_nb is not None: 25 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 26 | os.system('python merge.py %s %d' % (layer,motif_nb)) 27 | else: 28 | motif_nb = 1 29 | while True: 30 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 31 | print('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 32 | maxacts = [] 33 | for i in range(kernel_size): 34 | ppmids = [] 35 | if not os.path.exists('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5'): 36 | continue 37 | f = h5py.File('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5','r') 38 | print(i) 39 | for a in list(f.keys()): 40 | if a.startswith('conact'): 41 | ppmids.append(a[len('conact'):]) 42 | if len(ppmids) == 0: 43 | continue 44 | maxacts.append(np.max([f['conact'+ppmid][:]/np.max(f['act'+ppmid][:]) for ppmid in ppmids])) 45 | print(np.mean(maxacts)-1) 46 | if len(maxacts) < 10: 47 | statrs = [np.mean(maxacts) - 0.95 ,0.05] 48 | else: 49 | statrs = stats.ttest_1samp(maxacts,popmean=0.95) 50 | print(statrs) 51 | # os.system('rm -rf layer%s/lock*' %(layer)) 52 | if (statrs[0] > 0 and statrs[1] < 0.1) or poolsz <= 1: 53 | os.system('python merge.py %s %d' % (layer,motif_nb)) 54 | break 55 | else: 56 | os.system('rm -rf layer%s/kernel*.ppm.h5' %(layer)) 57 | os.system('rm -rf layer%s/lock*' %(layer)) 58 | motif_nb += 1 59 | 60 | 61 | -------------------------------------------------------------------------------- /demos/demo2/decouple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import sys 4 | import os 5 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 6 | threads = sys.argv[1] 7 | layer = sys.argv[2] 8 | kernel_size = int(sys.argv[3]) 9 | motif_nb = None 10 | if len(sys.argv) > 4: 11 | motif_nb = int(sys.argv[4]) 12 | 13 | from modeldef import * 14 | from scipy import stats 15 | 16 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = int(layer), kernel = 0, weight_file='weight.hdf5') 17 | 18 | poolsz = 1 19 | 20 | for i in range(int(layer)-1): 21 | poolsz *= pool_sz[i] 22 | 23 | 24 | if motif_nb is not None: 25 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 26 | os.system('python merge.py %s %d' % (layer,motif_nb)) 27 | else: 28 | motif_nb = 1 29 | while True: 30 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 31 | print('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 32 | maxacts = [] 33 | for i in range(kernel_size): 34 | ppmids = [] 35 | if not os.path.exists('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5'): 36 | continue 37 | f = h5py.File('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5','r') 38 | print(i) 39 | for a in list(f.keys()): 40 | if a.startswith('conact'): 41 | ppmids.append(a[len('conact'):]) 42 | if len(ppmids) == 0: 43 | continue 44 | maxacts.append(np.max([f['conact'+ppmid][:]/np.max(f['act'+ppmid][:]) for ppmid in ppmids])) 45 | print(np.mean(maxacts)-1) 46 | if len(maxacts) < 10: 47 | statrs = [np.mean(maxacts) - 0.95 ,0.05] 48 | else: 49 | statrs = stats.ttest_1samp(maxacts,popmean=0.95) 50 | print(statrs) 51 | # os.system('rm -rf layer%s/lock*' %(layer)) 52 | if (statrs[0] > 0 and statrs[1] < 0.1) or poolsz <= 1: 53 | os.system('python merge.py %s %d' % (layer,motif_nb)) 54 | break 55 | else: 56 | os.system('rm -rf layer%s/kernel*.ppm.h5' %(layer)) 57 | os.system('rm -rf layer%s/lock*' %(layer)) 58 | motif_nb += 1 59 | 60 | 61 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/decouple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import sys 4 | import os 5 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 6 | threads = sys.argv[1] 7 | layer = sys.argv[2] 8 | kernel_size = int(sys.argv[3]) 9 | motif_nb = None 10 | if len(sys.argv) > 4: 11 | motif_nb = int(sys.argv[4]) 12 | 13 | from modeldef import * 14 | from scipy import stats 15 | 16 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = int(layer), kernel = 0, weight_file='weight.hdf5') 17 | 18 | poolsz = 1 19 | 20 | for i in range(int(layer)-1): 21 | poolsz *= pool_sz[i] 22 | 23 | 24 | if motif_nb is not None: 25 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 26 | os.system('python merge.py %s %d' % (layer,motif_nb)) 27 | else: 28 | motif_nb = 1 29 | while True: 30 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 31 | print('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 32 | maxacts = [] 33 | for i in range(kernel_size): 34 | ppmids = [] 35 | if not os.path.exists('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5'): 36 | continue 37 | f = h5py.File('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5','r') 38 | print(i) 39 | for a in list(f.keys()): 40 | if a.startswith('conact'): 41 | ppmids.append(a[len('conact'):]) 42 | if len(ppmids) == 0: 43 | continue 44 | maxacts.append(np.max([f['conact'+ppmid][:]/np.max(f['act'+ppmid][:]) for ppmid in ppmids])) 45 | print(np.mean(maxacts)-1) 46 | if len(maxacts) < 10: 47 | statrs = [np.mean(maxacts) - 0.95 ,0.05] 48 | else: 49 | statrs = stats.ttest_1samp(maxacts,popmean=0.95) 50 | print(statrs) 51 | # os.system('rm -rf layer%s/lock*' %(layer)) 52 | if (statrs[0] > 0 and statrs[1] < 0.1) or poolsz <= 1: 53 | os.system('python merge.py %s %d' % (layer,motif_nb)) 54 | break 55 | else: 56 | os.system('rm -rf layer%s/kernel*.ppm.h5' %(layer)) 57 | os.system('rm -rf layer%s/lock*' %(layer)) 58 | motif_nb += 1 59 | 60 | 61 | -------------------------------------------------------------------------------- /demos/Basset/Basset/decouple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import sys 4 | import os 5 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 6 | threads = sys.argv[1] 7 | layer = sys.argv[2] 8 | kernel_size = int(sys.argv[3]) 9 | motif_nb = None 10 | if len(sys.argv) > 4: 11 | motif_nb = int(sys.argv[4]) 12 | 13 | from modeldef import * 14 | from scipy import stats 15 | 16 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = int(layer), kernel = 0, weight_file='weight.hdf5') 17 | 18 | poolsz = 1 19 | 20 | for i in range(int(layer)-1): 21 | poolsz *= pool_sz[i] 22 | 23 | 24 | if motif_nb is not None: 25 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 26 | os.system('python merge.py %s %d' % (layer,motif_nb)) 27 | else: 28 | motif_nb = 1 29 | while True: 30 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 31 | print('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 32 | maxacts = [] 33 | for i in range(kernel_size): 34 | ppmids = [] 35 | if not os.path.exists('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5'): 36 | continue 37 | f = h5py.File('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5','r') 38 | print(i) 39 | for a in list(f.keys()): 40 | if a.startswith('conact'): 41 | ppmids.append(a[len('conact'):]) 42 | if len(ppmids) == 0: 43 | continue 44 | maxacts.append(np.max([f['conact'+ppmid][:]/np.max(f['act'+ppmid][:]) for ppmid in ppmids])) 45 | print(np.mean(maxacts)-1) 46 | if len(maxacts) < 10: 47 | statrs = [np.mean(maxacts) - 0.95 ,0.05] 48 | else: 49 | statrs = stats.ttest_1samp(maxacts,popmean=0.95) 50 | print(statrs) 51 | # os.system('rm -rf layer%s/lock*' %(layer)) 52 | if (statrs[0] > 0 and statrs[1] < 0.1) or poolsz <= 1: 53 | os.system('python merge.py %s %d' % (layer,motif_nb)) 54 | break 55 | else: 56 | os.system('rm -rf layer%s/kernel*.ppm.h5' %(layer)) 57 | os.system('rm -rf layer%s/lock*' %(layer)) 58 | motif_nb += 1 59 | 60 | 61 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/decouple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import sys 4 | import os 5 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 6 | threads = sys.argv[1] 7 | layer = sys.argv[2] 8 | kernel_size = int(sys.argv[3]) 9 | motif_nb = None 10 | if len(sys.argv) > 4: 11 | motif_nb = int(sys.argv[4]) 12 | 13 | from modeldef import * 14 | from scipy import stats 15 | 16 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = int(layer), kernel = 0, weight_file='weight.hdf5') 17 | 18 | poolsz = 1 19 | 20 | for i in range(int(layer)-1): 21 | poolsz *= pool_sz[i] 22 | 23 | 24 | if motif_nb is not None: 25 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 26 | os.system('python merge.py %s %d' % (layer,motif_nb)) 27 | else: 28 | motif_nb = 1 29 | while True: 30 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 31 | print('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 32 | maxacts = [] 33 | for i in range(kernel_size): 34 | ppmids = [] 35 | if not os.path.exists('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5'): 36 | continue 37 | f = h5py.File('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5','r') 38 | print(i) 39 | for a in list(f.keys()): 40 | if a.startswith('conact'): 41 | ppmids.append(a[len('conact'):]) 42 | if len(ppmids) == 0: 43 | continue 44 | maxacts.append(np.max([f['conact'+ppmid][:]/np.max(f['act'+ppmid][:]) for ppmid in ppmids])) 45 | print(np.mean(maxacts)-1) 46 | if len(maxacts) < 10: 47 | statrs = [np.mean(maxacts) - 0.95 ,0.05] 48 | else: 49 | statrs = stats.ttest_1samp(maxacts,popmean=0.95) 50 | print(statrs) 51 | # os.system('rm -rf layer%s/lock*' %(layer)) 52 | if (statrs[0] > 0 and statrs[1] < 0.1) or poolsz <= 1: 53 | os.system('python merge.py %s %d' % (layer,motif_nb)) 54 | break 55 | else: 56 | os.system('rm -rf layer%s/kernel*.ppm.h5' %(layer)) 57 | os.system('rm -rf layer%s/lock*' %(layer)) 58 | motif_nb += 1 59 | 60 | 61 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/decouple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import sys 4 | import os 5 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 6 | threads = sys.argv[1] 7 | layer = sys.argv[2] 8 | kernel_size = int(sys.argv[3]) 9 | motif_nb = None 10 | if len(sys.argv) > 4: 11 | motif_nb = int(sys.argv[4]) 12 | 13 | from modeldef import * 14 | from scipy import stats 15 | 16 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = int(layer), kernel = 0, weight_file='weight.hdf5') 17 | 18 | poolsz = 1 19 | 20 | for i in range(int(layer)-1): 21 | poolsz *= pool_sz[i] 22 | 23 | 24 | if motif_nb is not None: 25 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 26 | os.system('python merge.py %s %d' % (layer,motif_nb)) 27 | else: 28 | motif_nb = 1 29 | while True: 30 | os.system('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 31 | print('bash idx.sh %d | xargs -n 1 -P %s bash decouple.sh %s %d' %(kernel_size, threads, layer, motif_nb)) 32 | maxacts = [] 33 | for i in range(kernel_size): 34 | ppmids = [] 35 | if not os.path.exists('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5'): 36 | continue 37 | f = h5py.File('layer'+ layer +'/kernel-' +str(i) +'.ppm.h5','r') 38 | print(i) 39 | for a in list(f.keys()): 40 | if a.startswith('conact'): 41 | ppmids.append(a[len('conact'):]) 42 | if len(ppmids) == 0: 43 | continue 44 | maxacts.append(np.max([f['conact'+ppmid][:]/np.max(f['act'+ppmid][:]) for ppmid in ppmids])) 45 | print(np.mean(maxacts)-1) 46 | if len(maxacts) < 10: 47 | statrs = [np.mean(maxacts) - 0.95 ,0.05] 48 | else: 49 | statrs = stats.ttest_1samp(maxacts,popmean=0.95) 50 | print(statrs) 51 | # os.system('rm -rf layer%s/lock*' %(layer)) 52 | if (statrs[0] > 0 and statrs[1] < 0.1) or poolsz <= 1: 53 | os.system('python merge.py %s %d' % (layer,motif_nb)) 54 | break 55 | else: 56 | os.system('rm -rf layer%s/kernel*.ppm.h5' %(layer)) 57 | os.system('rm -rf layer%s/lock*' %(layer)) 58 | motif_nb += 1 59 | 60 | 61 | -------------------------------------------------------------------------------- /dcnn/DeepSEA/DeepSEA/deepsea.pred.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | 6 | 7 | import keras 8 | 9 | 10 | import h5py 11 | import numpy as np 12 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 13 | from keras.layers.core import Dropout, Activation, Flatten 14 | from keras.layers.merge import Concatenate 15 | from keras.models import Model 16 | from keras.callbacks import EarlyStopping, ModelCheckpoint 17 | from keras.optimizers import Adam 18 | from keras.utils import multi_gpu_model 19 | 20 | from keras.regularizers import l1,l2, l1_l2 21 | from keras.constraints import MaxNorm 22 | from keras.optimizers import SGD 23 | 24 | from keras.activations import relu 25 | 26 | 27 | input_bp = 1000 28 | conv_kernel_size = 8 29 | pool_kernel_size = 4 30 | 31 | maxnrom = MaxNorm(max_value=0.9, axis=0) 32 | l1l2 = l1_l2(l1=0, l2=1e-6) 33 | 34 | def crelu(x, alpha=0.0, max_value=None, threshold=1e-6): 35 | return relu(x, alpha, max_value, threshold) 36 | 37 | batch_size=1000 38 | 39 | seqInput = Input(shape=(input_bp, 4), name='seqInput') 40 | 41 | seq = Conv1D(320, conv_kernel_size,kernel_regularizer=l1l2, kernel_constraint=maxnrom)(seqInput) 42 | seq = Activation(crelu)(seq) 43 | seq = MaxPooling1D(pool_size=pool_kernel_size,strides=pool_kernel_size)(seq) 44 | seq = Dropout(0.2)(seq) 45 | seq = Conv1D(480, conv_kernel_size,kernel_regularizer=l1l2, kernel_constraint=maxnrom)(seq) 46 | seq = Activation(crelu)(seq) 47 | seq = MaxPooling1D(pool_size=pool_kernel_size,strides=pool_kernel_size)(seq) 48 | seq = Dropout(0.2)(seq) 49 | seq = Conv1D(960, conv_kernel_size,kernel_regularizer=l1l2, kernel_constraint=maxnrom)(seq) 50 | seq = Activation(crelu)(seq) 51 | seq = Dropout(0.5)(seq) 52 | seq = Flatten()(seq) 53 | seq = Dense(925,kernel_regularizer=l1l2, kernel_constraint=maxnrom)(seq) 54 | seq = Activation(crelu)(seq) 55 | seq = Dense(919,kernel_regularizer=l1l2, kernel_constraint=maxnrom, activity_regularizer=l1_l2(l1=1e-8,l2=0))(seq) 56 | seq = Activation('sigmoid')(seq) 57 | 58 | 59 | model = Model(inputs = [seqInput], outputs = [seq]) 60 | model.load_weights('weight.hdf5') 61 | 62 | from scipy.io import loadmat 63 | 64 | 65 | 66 | #trainmat = loadmat('data/train.mat') 67 | testmat = loadmat('data/test.mat') 68 | #trainy = trainmat['traindata'] 69 | #trianx = trainmat['trainxdata'] 70 | testx = testmat['testxdata'] 71 | testy = testmat['testdata'] 72 | 73 | 74 | 75 | 76 | testidx = testx.transpose((0,2,1)) 77 | #validy = validy.transpose((1,0)) 78 | result = model.predict(testidx,batch_size=1000) 79 | 80 | 81 | from sklearn.metrics import average_precision_score 82 | from sklearn.metrics import roc_auc_score 83 | auprc = np.zeros((testy.shape[1])) 84 | auroc = np.zeros((testy.shape[1])) 85 | for i in range(testy.shape[1]): 86 | print(i) 87 | if testy[:,i].sum() !=0 and testy[:,i].sum() !=testy.shape[0]: 88 | auprc[i] = average_precision_score(testy[:,i],result[:,i]) 89 | auroc[i] = roc_auc_score(testy[:,i],result[:,i]) 90 | 91 | 92 | with h5py.File('test.h5','w') as f: 93 | f['test_result'] = result 94 | f['test_label'] = testy 95 | f['auprc'] = auprc 96 | f['auroc']=auroc 97 | 98 | -------------------------------------------------------------------------------- /dcnn/demo/demo1/simu.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | 6 | 7 | import keras 8 | 9 | 10 | import h5py 11 | import numpy as np 12 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 13 | from keras.layers.core import Dropout, Activation, Flatten 14 | from keras.layers.merge import Concatenate 15 | from keras.models import Model 16 | from keras.callbacks import EarlyStopping, ModelCheckpoint 17 | from keras.optimizers import Adam 18 | from keras.utils import multi_gpu_model 19 | 20 | from keras.regularizers import l1,l2, l1_l2 21 | from keras.constraints import MaxNorm 22 | from keras.optimizers import SGD 23 | 24 | from keras.activations import relu 25 | 26 | 27 | import os 28 | import tensorflow as tf 29 | import keras.backend.tensorflow_backend as KTF 30 | 31 | 32 | 33 | 34 | input_bp = 600 35 | 36 | batch_size=128 37 | 38 | 39 | 40 | 41 | seqInput = Input(shape=(8, 4), name='seqInput') 42 | 43 | 44 | seq = Conv1D(3, 5)(seqInput) 45 | seq = Activation('relu')(seq) 46 | seq = MaxPooling1D(2)(seq) 47 | seq = Conv1D(1, 2)(seq) 48 | seq = Activation('sigmoid')(seq) 49 | seq = Flatten()(seq) 50 | 51 | model = Model(inputs = [seqInput], outputs = [seq]) 52 | model_json = model.to_json() 53 | with open("model.json", "w") as json_file: 54 | json_file.write(model_json) 55 | 56 | #from keras.optimizers import RMSprop 57 | model.compile('adam', loss='binary_crossentropy', metrics=['accuracy']) 58 | 59 | 60 | PWM0 = np.loadtxt('PWM0') 61 | PWM1 = np.loadtxt('PWM1') 62 | PWM = np.ones(PWM1.shape)*0.25 63 | 64 | 65 | def pwm_to_sample(PWM, n = 1000): 66 | PWM /= PWM.sum(axis=0) 67 | PWM = PWM.T 68 | PWM = PWM[::-1,:] 69 | PWM = PWM[:,::-1] 70 | sample = np.zeros((n,PWM.shape[0],PWM.shape[1])) 71 | for i in range(n): 72 | for j in range(sample.shape[1]): 73 | sample[i,j,np.random.choice(4,1,p=PWM[j,:])] = 1 74 | return sample 75 | 76 | sp0 = pwm_to_sample(PWM0) 77 | sp1 = pwm_to_sample(PWM1) 78 | spn = pwm_to_sample(PWM,n=2000) 79 | 80 | sp = np.concatenate([sp0,sp1,spn],axis=0) 81 | 82 | label = np.r_[np.ones(2000),np.zeros(2000)] 83 | 84 | callbacks=[] 85 | callbacks.append(ModelCheckpoint(filepath='weight.hdf5',save_best_only=True)) 86 | callbacks.append(EarlyStopping(patience=15)) 87 | 88 | 89 | history = model.fit(x= sp, y=label, epochs=100,validation_split=0.1,callbacks=callbacks) 90 | 91 | print(model.layers[1].get_weights()[0][:,:,0]) 92 | print(model.layers[1].get_weights()[0][:,:,1]) 93 | print(model.layers[1].get_weights()[0][:,:,2]) 94 | 95 | print(model.layers[4].get_weights()[0][:,:,0] ) 96 | 97 | history_dict=history.history 98 | loss_values = history_dict['loss'] 99 | val_loss_values=history_dict['val_loss'] 100 | plt.figure() 101 | plt.plot(loss_values,'bo',label='training loss') 102 | plt.plot(val_loss_values,'r',label='val training loss') 103 | 104 | plt.savefig('history.pdf') 105 | #rs = model.predict(oh)[0,:] 106 | 107 | 108 | with h5py.File('history.h5','w') as f: 109 | f['loss_values'] =loss_values 110 | f['val_loss'] = val_loss_values 111 | f['sample'] = sp 112 | f['label'] = label 113 | -------------------------------------------------------------------------------- /demos/demo1/README.md: -------------------------------------------------------------------------------- 1 | # Demo 1 2 | 3 | * Input: 8 bp DNA sequence 4 | * Output: a scalar belonging to interval [0,1] 5 | * Positive sequences: 2 ZEB1 motifs shifted by 1 bp 6 | * Negative sequences: random sequences 7 | 8 | Architecture of DCNN model: 9 | 10 | * Convolutional layer 1 (3 filters, size 5) 11 | + Activation function (ReLU) 12 | + Maxpooling operation (size 2) 13 | * Convolutional layer 2 (1 filters, size 2) 14 | + Activation function (sigmoid) 15 | * Flatten 16 | 17 | 18 | 19 | # Run this demo 20 | 21 | Download motif database 22 | 23 | Take JASPAR as an example: 24 | 25 | ``` 26 | wget --no-check-certificate -O motifDB.txt https://jaspar.genereg.net/download/data/2022/CORE/JASPAR2022_CORE_vertebrates_non-redundant_pfms_meme.txt 27 | ``` 28 | 29 | Then, run 30 | 31 | ``` 32 | bash script.sh 33 | ``` 34 | 35 | 36 | 37 | # Result 38 | 39 | The HTML visualization results are organized in each layer folder of HTML folder. You can visualize them by web browser: 40 | 41 | | Files | Contains | 42 | |--------------------------------------------------|----------------------------------------------------| 43 | | HTML/layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 44 | | HTML/layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 45 | | HTML/layer[Layer#]/tomtom_[Neuron#].sel.ppm.meme | some motif segment mapped to database | 46 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to database | 47 | 48 | 49 | 50 | For the result in each folder, the sequence samples, position probability matrixes and diagnosis indicators are stored in folder 'layerX'. 51 | 52 | 53 | The HTML, PPM and tomtom results are stored in the folder of corresponding layer folder. 54 | 55 | | Files | Contains | 56 | |--------------------------------------------------|----------------------------------------------------| 57 | | layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 58 | | layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 59 | | layer[Layer#]/kernel-[Neuron#].all.ppm.chen | PPMs of CN CRMs in chen format | 60 | | layer[Layer#]/kernel-[Neuron#].all.ppm.meme | PPMs of CN CRMs in meme format | 61 | | layer[Layer#]/kernel-[Neuron#].h5 | sequence sample | 62 | | layer[Layer#]/kernel-[Neuron#].ppm.h5 | PPMs/activation/indicators of CN CRMs in h5 format | 63 | | layer[Layer#]/kernel-[Neuron#]-segs.chen | PPMs of CRMs segments | 64 | | layer[Layer#]/kernel-[Neuron#]-segs-dict | motif segment mapped to dictionary | 65 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to tomtom | 66 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.chen | PPMs of motifs in dictionary (chen format) | 67 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.h5 | PPMs of motifs in dictionary (h5 format) | 68 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.meme | PPMs of motifs in dictionary (meme format) | 69 | -------------------------------------------------------------------------------- /NeuronMotif/merge.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | import sys 6 | import h5py 7 | layer = int(sys.argv[1]) 8 | #kernel = int(sys.argv[2]) 9 | motif_nb = 1 10 | if len(sys.argv) > 2: 11 | motif_nb = int(sys.argv[2]) 12 | print('layer' + str(layer)) 13 | #print('kernel' + str(kernel)) 14 | #layer = 2 15 | #kernel = 0 16 | 17 | import keras 18 | 19 | 20 | from modeldef import * 21 | from utils import * 22 | 23 | import h5py 24 | import numpy as np 25 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 26 | from keras.layers.core import Dropout, Activation, Flatten 27 | from keras.layers.merge import Concatenate 28 | from keras.models import Model 29 | from keras.callbacks import EarlyStopping, ModelCheckpoint 30 | from keras.optimizers import Adam 31 | from keras.utils import multi_gpu_model 32 | 33 | from keras.regularizers import l1,l2, l1_l2 34 | from keras.constraints import MaxNorm 35 | from keras.optimizers import SGD 36 | 37 | from keras.activations import relu 38 | 39 | 40 | import os 41 | import tensorflow as tf 42 | import keras.backend.tensorflow_backend as KTF 43 | 44 | kernel=0 45 | 46 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = layer, kernel = kernel, weight_file='weight.hdf5') 47 | 48 | submodel = model_list[-1] 49 | 50 | 51 | ppmlist = [] 52 | smppmlist = [] 53 | label = [] 54 | countlist = [] 55 | actlist = [] 56 | conactlist = [] 57 | total = 1 58 | for i in range(layer-1): 59 | total *= pool_sz[i] 60 | 61 | for i in range(motif_nb-1): 62 | total *= total 63 | 64 | for kernel in range(kernel_nb[layer-1]): 65 | print(kernel) 66 | if not os.path.exists('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5'): 67 | for j in range(total): 68 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 69 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 70 | countlist.append(0) 71 | actlist.append(0) 72 | conactlist.append(np.array([0])) 73 | continue 74 | with h5py.File('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5','r') as f: 75 | for j in range(total): 76 | if 'ppm' + str(j) in f.keys(): 77 | spnumb = f['act' + str(j)][:].shape[0] 78 | countlist.append(spnumb) 79 | actlist.append(f['act' + str(j)][:].max()) 80 | ppmlist.append(f['ppm' + str(j)][:][np.newaxis,:,:]) 81 | smooth_ppm = (f['ppm' + str(j)][:][np.newaxis,:,:] * spnumb + np.ones((1,input_bp,4))*0.25)/(spnumb + 1) 82 | smppmlist.append(smooth_ppm) 83 | conactlist.append(f['conact'+ str(j)][:]) 84 | print(f['conact'+ str(j)][:]) 85 | else: 86 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 87 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 88 | countlist.append(0) 89 | actlist.append(0) 90 | conactlist.append(np.array([0])) 91 | 92 | 93 | 94 | with h5py.File('layer'+str(layer)+'/allppm.h5','w') as f: 95 | f['allppm']=np.concatenate(ppmlist,axis=0) 96 | f['smoothppm']=np.concatenate(smppmlist,axis=0) 97 | f['act'] = np.array(actlist) 98 | f['spnumb'] = np.array(countlist) 99 | print(conactlist) 100 | f['conact'] = np.concatenate(conactlist,axis=0) 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /demos/demo1/merge.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | import sys 6 | import h5py 7 | layer = int(sys.argv[1]) 8 | #kernel = int(sys.argv[2]) 9 | motif_nb = 1 10 | if len(sys.argv) > 2: 11 | motif_nb = int(sys.argv[2]) 12 | print('layer' + str(layer)) 13 | #print('kernel' + str(kernel)) 14 | #layer = 2 15 | #kernel = 0 16 | 17 | import keras 18 | 19 | 20 | from modeldef import * 21 | from utils import * 22 | 23 | import h5py 24 | import numpy as np 25 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 26 | from keras.layers.core import Dropout, Activation, Flatten 27 | from keras.layers.merge import Concatenate 28 | from keras.models import Model 29 | from keras.callbacks import EarlyStopping, ModelCheckpoint 30 | from keras.optimizers import Adam 31 | from keras.utils import multi_gpu_model 32 | 33 | from keras.regularizers import l1,l2, l1_l2 34 | from keras.constraints import MaxNorm 35 | from keras.optimizers import SGD 36 | 37 | from keras.activations import relu 38 | 39 | 40 | import os 41 | import tensorflow as tf 42 | import keras.backend.tensorflow_backend as KTF 43 | 44 | kernel=0 45 | 46 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = layer, kernel = kernel, weight_file='weight.hdf5') 47 | 48 | submodel = model_list[-1] 49 | 50 | 51 | ppmlist = [] 52 | smppmlist = [] 53 | label = [] 54 | countlist = [] 55 | actlist = [] 56 | conactlist = [] 57 | total = 1 58 | for i in range(layer-1): 59 | total *= pool_sz[i] 60 | 61 | for i in range(motif_nb-1): 62 | total *= total 63 | 64 | for kernel in range(kernel_nb[layer-1]): 65 | print(kernel) 66 | if not os.path.exists('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5'): 67 | for j in range(total): 68 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 69 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 70 | countlist.append(0) 71 | actlist.append(0) 72 | conactlist.append(np.array([0])) 73 | continue 74 | with h5py.File('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5','r') as f: 75 | for j in range(total): 76 | if 'ppm' + str(j) in f.keys(): 77 | spnumb = f['act' + str(j)][:].shape[0] 78 | countlist.append(spnumb) 79 | actlist.append(f['act' + str(j)][:].max()) 80 | ppmlist.append(f['ppm' + str(j)][:][np.newaxis,:,:]) 81 | smooth_ppm = (f['ppm' + str(j)][:][np.newaxis,:,:] * spnumb + np.ones((1,input_bp,4))*0.25)/(spnumb + 1) 82 | smppmlist.append(smooth_ppm) 83 | conactlist.append(f['conact'+ str(j)][:]) 84 | print(f['conact'+ str(j)][:]) 85 | else: 86 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 87 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 88 | countlist.append(0) 89 | actlist.append(0) 90 | conactlist.append(np.array([0])) 91 | 92 | 93 | 94 | with h5py.File('layer'+str(layer)+'/allppm.h5','w') as f: 95 | f['allppm']=np.concatenate(ppmlist,axis=0) 96 | f['smoothppm']=np.concatenate(smppmlist,axis=0) 97 | f['act'] = np.array(actlist) 98 | f['spnumb'] = np.array(countlist) 99 | print(conactlist) 100 | f['conact'] = np.concatenate(conactlist,axis=0) 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /demos/demo2/merge.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | import sys 6 | import h5py 7 | layer = int(sys.argv[1]) 8 | #kernel = int(sys.argv[2]) 9 | motif_nb = 1 10 | if len(sys.argv) > 2: 11 | motif_nb = int(sys.argv[2]) 12 | print('layer' + str(layer)) 13 | #print('kernel' + str(kernel)) 14 | #layer = 2 15 | #kernel = 0 16 | 17 | import keras 18 | 19 | 20 | from modeldef import * 21 | from utils import * 22 | 23 | import h5py 24 | import numpy as np 25 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 26 | from keras.layers.core import Dropout, Activation, Flatten 27 | from keras.layers.merge import Concatenate 28 | from keras.models import Model 29 | from keras.callbacks import EarlyStopping, ModelCheckpoint 30 | from keras.optimizers import Adam 31 | from keras.utils import multi_gpu_model 32 | 33 | from keras.regularizers import l1,l2, l1_l2 34 | from keras.constraints import MaxNorm 35 | from keras.optimizers import SGD 36 | 37 | from keras.activations import relu 38 | 39 | 40 | import os 41 | import tensorflow as tf 42 | import keras.backend.tensorflow_backend as KTF 43 | 44 | kernel=0 45 | 46 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = layer, kernel = kernel, weight_file='weight.hdf5') 47 | 48 | submodel = model_list[-1] 49 | 50 | 51 | ppmlist = [] 52 | smppmlist = [] 53 | label = [] 54 | countlist = [] 55 | actlist = [] 56 | conactlist = [] 57 | total = 1 58 | for i in range(layer-1): 59 | total *= pool_sz[i] 60 | 61 | for i in range(motif_nb-1): 62 | total *= total 63 | 64 | for kernel in range(kernel_nb[layer-1]): 65 | print(kernel) 66 | if not os.path.exists('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5'): 67 | for j in range(total): 68 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 69 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 70 | countlist.append(0) 71 | actlist.append(0) 72 | conactlist.append(np.array([0])) 73 | continue 74 | with h5py.File('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5','r') as f: 75 | for j in range(total): 76 | if 'ppm' + str(j) in f.keys(): 77 | spnumb = f['act' + str(j)][:].shape[0] 78 | countlist.append(spnumb) 79 | actlist.append(f['act' + str(j)][:].max()) 80 | ppmlist.append(f['ppm' + str(j)][:][np.newaxis,:,:]) 81 | smooth_ppm = (f['ppm' + str(j)][:][np.newaxis,:,:] * spnumb + np.ones((1,input_bp,4))*0.25)/(spnumb + 1) 82 | smppmlist.append(smooth_ppm) 83 | conactlist.append(f['conact'+ str(j)][:]) 84 | print(f['conact'+ str(j)][:]) 85 | else: 86 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 87 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 88 | countlist.append(0) 89 | actlist.append(0) 90 | conactlist.append(np.array([0])) 91 | 92 | 93 | 94 | with h5py.File('layer'+str(layer)+'/allppm.h5','w') as f: 95 | f['allppm']=np.concatenate(ppmlist,axis=0) 96 | f['smoothppm']=np.concatenate(smppmlist,axis=0) 97 | f['act'] = np.array(actlist) 98 | f['spnumb'] = np.array(countlist) 99 | print(conactlist) 100 | f['conact'] = np.concatenate(conactlist,axis=0) 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/merge.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | import sys 6 | import h5py 7 | layer = int(sys.argv[1]) 8 | #kernel = int(sys.argv[2]) 9 | motif_nb = 1 10 | if len(sys.argv) > 2: 11 | motif_nb = int(sys.argv[2]) 12 | print('layer' + str(layer)) 13 | #print('kernel' + str(kernel)) 14 | #layer = 2 15 | #kernel = 0 16 | 17 | import keras 18 | 19 | 20 | from modeldef import * 21 | from utils import * 22 | 23 | import h5py 24 | import numpy as np 25 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 26 | from keras.layers.core import Dropout, Activation, Flatten 27 | from keras.layers.merge import Concatenate 28 | from keras.models import Model 29 | from keras.callbacks import EarlyStopping, ModelCheckpoint 30 | from keras.optimizers import Adam 31 | from keras.utils import multi_gpu_model 32 | 33 | from keras.regularizers import l1,l2, l1_l2 34 | from keras.constraints import MaxNorm 35 | from keras.optimizers import SGD 36 | 37 | from keras.activations import relu 38 | 39 | 40 | import os 41 | import tensorflow as tf 42 | import keras.backend.tensorflow_backend as KTF 43 | 44 | kernel=0 45 | 46 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = layer, kernel = kernel, weight_file='weight.hdf5') 47 | 48 | submodel = model_list[-1] 49 | 50 | 51 | ppmlist = [] 52 | smppmlist = [] 53 | label = [] 54 | countlist = [] 55 | actlist = [] 56 | conactlist = [] 57 | total = 1 58 | for i in range(layer-1): 59 | total *= pool_sz[i] 60 | 61 | for i in range(motif_nb-1): 62 | total *= total 63 | 64 | for kernel in range(kernel_nb[layer-1]): 65 | print(kernel) 66 | if not os.path.exists('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5'): 67 | for j in range(total): 68 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 69 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 70 | countlist.append(0) 71 | actlist.append(0) 72 | conactlist.append(np.array([0])) 73 | continue 74 | with h5py.File('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5','r') as f: 75 | for j in range(total): 76 | if 'ppm' + str(j) in f.keys(): 77 | spnumb = f['act' + str(j)][:].shape[0] 78 | countlist.append(spnumb) 79 | actlist.append(f['act' + str(j)][:].max()) 80 | ppmlist.append(f['ppm' + str(j)][:][np.newaxis,:,:]) 81 | smooth_ppm = (f['ppm' + str(j)][:][np.newaxis,:,:] * spnumb + np.ones((1,input_bp,4))*0.25)/(spnumb + 1) 82 | smppmlist.append(smooth_ppm) 83 | conactlist.append(f['conact'+ str(j)][:]) 84 | print(f['conact'+ str(j)][:]) 85 | else: 86 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 87 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 88 | countlist.append(0) 89 | actlist.append(0) 90 | conactlist.append(np.array([0])) 91 | 92 | 93 | 94 | with h5py.File('layer'+str(layer)+'/allppm.h5','w') as f: 95 | f['allppm']=np.concatenate(ppmlist,axis=0) 96 | f['smoothppm']=np.concatenate(smppmlist,axis=0) 97 | f['act'] = np.array(actlist) 98 | f['spnumb'] = np.array(countlist) 99 | print(conactlist) 100 | f['conact'] = np.concatenate(conactlist,axis=0) 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /demos/Basset/Basset/merge.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | import sys 6 | import h5py 7 | layer = int(sys.argv[1]) 8 | #kernel = int(sys.argv[2]) 9 | motif_nb = 1 10 | if len(sys.argv) > 2: 11 | motif_nb = int(sys.argv[2]) 12 | print('layer' + str(layer)) 13 | #print('kernel' + str(kernel)) 14 | #layer = 2 15 | #kernel = 0 16 | 17 | import keras 18 | 19 | 20 | from modeldef import * 21 | from utils import * 22 | 23 | import h5py 24 | import numpy as np 25 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 26 | from keras.layers.core import Dropout, Activation, Flatten 27 | from keras.layers.merge import Concatenate 28 | from keras.models import Model 29 | from keras.callbacks import EarlyStopping, ModelCheckpoint 30 | from keras.optimizers import Adam 31 | from keras.utils import multi_gpu_model 32 | 33 | from keras.regularizers import l1,l2, l1_l2 34 | from keras.constraints import MaxNorm 35 | from keras.optimizers import SGD 36 | 37 | from keras.activations import relu 38 | 39 | 40 | import os 41 | import tensorflow as tf 42 | import keras.backend.tensorflow_backend as KTF 43 | 44 | kernel=0 45 | 46 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = layer, kernel = kernel, weight_file='weight.hdf5') 47 | 48 | submodel = model_list[-1] 49 | 50 | 51 | ppmlist = [] 52 | smppmlist = [] 53 | label = [] 54 | countlist = [] 55 | actlist = [] 56 | conactlist = [] 57 | total = 1 58 | for i in range(layer-1): 59 | total *= pool_sz[i] 60 | 61 | for i in range(motif_nb-1): 62 | total *= total 63 | 64 | for kernel in range(kernel_nb[layer-1]): 65 | print(kernel) 66 | if not os.path.exists('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5'): 67 | for j in range(total): 68 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 69 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 70 | countlist.append(0) 71 | actlist.append(0) 72 | conactlist.append(np.array([0])) 73 | continue 74 | with h5py.File('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5','r') as f: 75 | for j in range(total): 76 | if 'ppm' + str(j) in f.keys(): 77 | spnumb = f['act' + str(j)][:].shape[0] 78 | countlist.append(spnumb) 79 | actlist.append(f['act' + str(j)][:].max()) 80 | ppmlist.append(f['ppm' + str(j)][:][np.newaxis,:,:]) 81 | smooth_ppm = (f['ppm' + str(j)][:][np.newaxis,:,:] * spnumb + np.ones((1,input_bp,4))*0.25)/(spnumb + 1) 82 | smppmlist.append(smooth_ppm) 83 | conactlist.append(f['conact'+ str(j)][:]) 84 | print(f['conact'+ str(j)][:]) 85 | else: 86 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 87 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 88 | countlist.append(0) 89 | actlist.append(0) 90 | conactlist.append(np.array([0])) 91 | 92 | 93 | 94 | with h5py.File('layer'+str(layer)+'/allppm.h5','w') as f: 95 | f['allppm']=np.concatenate(ppmlist,axis=0) 96 | f['smoothppm']=np.concatenate(smppmlist,axis=0) 97 | f['act'] = np.array(actlist) 98 | f['spnumb'] = np.array(countlist) 99 | print(conactlist) 100 | f['conact'] = np.concatenate(conactlist,axis=0) 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/merge.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | import sys 6 | import h5py 7 | layer = int(sys.argv[1]) 8 | #kernel = int(sys.argv[2]) 9 | motif_nb = 1 10 | if len(sys.argv) > 2: 11 | motif_nb = int(sys.argv[2]) 12 | print('layer' + str(layer)) 13 | #print('kernel' + str(kernel)) 14 | #layer = 2 15 | #kernel = 0 16 | 17 | import keras 18 | 19 | 20 | from modeldef import * 21 | from utils import * 22 | 23 | import h5py 24 | import numpy as np 25 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 26 | from keras.layers.core import Dropout, Activation, Flatten 27 | from keras.layers.merge import Concatenate 28 | from keras.models import Model 29 | from keras.callbacks import EarlyStopping, ModelCheckpoint 30 | from keras.optimizers import Adam 31 | from keras.utils import multi_gpu_model 32 | 33 | from keras.regularizers import l1,l2, l1_l2 34 | from keras.constraints import MaxNorm 35 | from keras.optimizers import SGD 36 | 37 | from keras.activations import relu 38 | 39 | 40 | import os 41 | import tensorflow as tf 42 | import keras.backend.tensorflow_backend as KTF 43 | 44 | kernel=0 45 | 46 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = layer, kernel = kernel, weight_file='weight.hdf5') 47 | 48 | submodel = model_list[-1] 49 | 50 | 51 | ppmlist = [] 52 | smppmlist = [] 53 | label = [] 54 | countlist = [] 55 | actlist = [] 56 | conactlist = [] 57 | total = 1 58 | for i in range(layer-1): 59 | total *= pool_sz[i] 60 | 61 | for i in range(motif_nb-1): 62 | total *= total 63 | 64 | for kernel in range(kernel_nb[layer-1]): 65 | print(kernel) 66 | if not os.path.exists('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5'): 67 | for j in range(total): 68 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 69 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 70 | countlist.append(0) 71 | actlist.append(0) 72 | conactlist.append(np.array([0])) 73 | continue 74 | with h5py.File('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5','r') as f: 75 | for j in range(total): 76 | if 'ppm' + str(j) in f.keys(): 77 | spnumb = f['act' + str(j)][:].shape[0] 78 | countlist.append(spnumb) 79 | actlist.append(f['act' + str(j)][:].max()) 80 | ppmlist.append(f['ppm' + str(j)][:][np.newaxis,:,:]) 81 | smooth_ppm = (f['ppm' + str(j)][:][np.newaxis,:,:] * spnumb + np.ones((1,input_bp,4))*0.25)/(spnumb + 1) 82 | smppmlist.append(smooth_ppm) 83 | conactlist.append(f['conact'+ str(j)][:]) 84 | print(f['conact'+ str(j)][:]) 85 | else: 86 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 87 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 88 | countlist.append(0) 89 | actlist.append(0) 90 | conactlist.append(np.array([0])) 91 | 92 | 93 | 94 | with h5py.File('layer'+str(layer)+'/allppm.h5','w') as f: 95 | f['allppm']=np.concatenate(ppmlist,axis=0) 96 | f['smoothppm']=np.concatenate(smppmlist,axis=0) 97 | f['act'] = np.array(actlist) 98 | f['spnumb'] = np.array(countlist) 99 | print(conactlist) 100 | f['conact'] = np.concatenate(conactlist,axis=0) 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/merge.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | import sys 6 | import h5py 7 | layer = int(sys.argv[1]) 8 | #kernel = int(sys.argv[2]) 9 | motif_nb = 1 10 | if len(sys.argv) > 2: 11 | motif_nb = int(sys.argv[2]) 12 | print('layer' + str(layer)) 13 | #print('kernel' + str(kernel)) 14 | #layer = 2 15 | #kernel = 0 16 | 17 | import keras 18 | 19 | 20 | from modeldef import * 21 | from utils import * 22 | 23 | import h5py 24 | import numpy as np 25 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 26 | from keras.layers.core import Dropout, Activation, Flatten 27 | from keras.layers.merge import Concatenate 28 | from keras.models import Model 29 | from keras.callbacks import EarlyStopping, ModelCheckpoint 30 | from keras.optimizers import Adam 31 | from keras.utils import multi_gpu_model 32 | 33 | from keras.regularizers import l1,l2, l1_l2 34 | from keras.constraints import MaxNorm 35 | from keras.optimizers import SGD 36 | 37 | from keras.activations import relu 38 | 39 | 40 | import os 41 | import tensorflow as tf 42 | import keras.backend.tensorflow_backend as KTF 43 | 44 | kernel=0 45 | 46 | kernel_nb,kernel_sz,pool_sz,input_bp, input_bps, model_list, act_model_list, gd = get_model_list(layer = layer, kernel = kernel, weight_file='weight.hdf5') 47 | 48 | submodel = model_list[-1] 49 | 50 | 51 | ppmlist = [] 52 | smppmlist = [] 53 | label = [] 54 | countlist = [] 55 | actlist = [] 56 | conactlist = [] 57 | total = 1 58 | for i in range(layer-1): 59 | total *= pool_sz[i] 60 | 61 | for i in range(motif_nb-1): 62 | total *= total 63 | 64 | for kernel in range(kernel_nb[layer-1]): 65 | print(kernel) 66 | if not os.path.exists('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5'): 67 | for j in range(total): 68 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 69 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 70 | countlist.append(0) 71 | actlist.append(0) 72 | conactlist.append(np.array([0])) 73 | continue 74 | with h5py.File('layer'+str(layer)+'/kernel-'+str(kernel)+'.ppm.h5','r') as f: 75 | for j in range(total): 76 | if 'ppm' + str(j) in f.keys(): 77 | spnumb = f['act' + str(j)][:].shape[0] 78 | countlist.append(spnumb) 79 | actlist.append(f['act' + str(j)][:].max()) 80 | ppmlist.append(f['ppm' + str(j)][:][np.newaxis,:,:]) 81 | smooth_ppm = (f['ppm' + str(j)][:][np.newaxis,:,:] * spnumb + np.ones((1,input_bp,4))*0.25)/(spnumb + 1) 82 | smppmlist.append(smooth_ppm) 83 | conactlist.append(f['conact'+ str(j)][:]) 84 | print(f['conact'+ str(j)][:]) 85 | else: 86 | ppmlist.append(np.ones((1,input_bp,4))*0.25) 87 | smppmlist.append(np.ones((1,input_bp,4))*0.25) 88 | countlist.append(0) 89 | actlist.append(0) 90 | conactlist.append(np.array([0])) 91 | 92 | 93 | 94 | with h5py.File('layer'+str(layer)+'/allppm.h5','w') as f: 95 | f['allppm']=np.concatenate(ppmlist,axis=0) 96 | f['smoothppm']=np.concatenate(smppmlist,axis=0) 97 | f['act'] = np.array(actlist) 98 | f['spnumb'] = np.array(countlist) 99 | print(conactlist) 100 | f['conact'] = np.concatenate(conactlist,axis=0) 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /demos/demo1/modeldef.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import sys 3 | import keras 4 | import os 5 | 6 | import h5py 7 | import numpy as np 8 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 9 | from keras.layers.core import Dropout, Activation, Flatten 10 | from keras.layers.merge import Concatenate 11 | from keras.models import Model 12 | 13 | 14 | from keras.activations import relu 15 | 16 | 17 | import os 18 | import tensorflow as tf 19 | 20 | def getRF(layer,kernel_sz, pool_sz): 21 | if layer == 0: 22 | return kernel_sz[0] 23 | sz = 1 24 | for i in range(layer,0,-1): 25 | sz += (kernel_sz[i]-1) 26 | sz *= pool_sz[i-1] 27 | sz += (kernel_sz[0]-1) 28 | return sz 29 | 30 | 31 | 32 | from keras.activations import relu 33 | 34 | 35 | ################################################## 36 | #Fill get_whole_model function with your own model 37 | # We take BD-10 model as an example 38 | input_bp = 600 39 | def get_whole_model(): 40 | seqInput = Input(shape=(input_bp, 4), name='seqInput') 41 | seq = Conv1D(3, 5)(seqInput) 42 | seq = Activation('relu')(seq) 43 | seq = MaxPooling1D(2)(seq) 44 | seq = Conv1D(1, 2)(seq) 45 | seq = Activation('sigmoid')(seq) 46 | seq = Flatten()(seq) 47 | return Model(inputs = [seqInput], outputs = [seq]) 48 | # 49 | ################################################## 50 | 51 | def get_model_list(layer, kernel, weight_file='weight.hdf5'): 52 | model = get_whole_model() 53 | model.load_weights(weight_file) 54 | #################################################### 55 | # Fill kernel_nb, kernel_sz and pool_sz according your model 56 | kernel_nb = [3,1] 57 | kernel_sz = [5,2] 58 | pool_sz = [2] 59 | # If there is no max-pooling operation, the pool_sz is 1 60 | ##################################################### 61 | input_bps = [getRF(i,kernel_sz, pool_sz) for i in range(len(kernel_sz))] # [8, 39, 163] 62 | input_bp = input_bps[layer-1] 63 | pre_model_list = [] 64 | model_list = [] 65 | act_model_list = [] 66 | out_list = [] 67 | #################################################################### 68 | # Build substructures for the convolutional neurons in your own model 69 | seqInput = Input(shape=(input_bp, 4), name='subseqInput') 70 | # Fill your own deep convolutional neural network structure and, 71 | # Before activation function, add 'model_list.append(Model(inputs = [seqInput], outputs = [seq]))' 72 | # After convolution function, add 'act_model_list.append(Model(inputs = [seqInput], outputs = [seq]))' 73 | seq = Conv1D(3, 5)(seqInput) 74 | act_model_list.append(Model(inputs = [seqInput], outputs = [seq])) 75 | model_list.append(Model(inputs = [seqInput], outputs = [seq])) 76 | if layer > 1: 77 | seq = Activation('relu')(seq) 78 | seq = MaxPooling1D(2)(seq) 79 | seq = Conv1D(1, 2)(seq) 80 | act_model_list.append(Model(inputs = [seqInput], outputs = [seq])) 81 | model_list.append(Model(inputs = [seqInput], outputs = [seq])) 82 | # the convolutional neuron output is seq 83 | #################################################################### 84 | out = seq 85 | for submodel in model_list: 86 | for i in range(len(submodel.layers)): 87 | submodel.layers[i].set_weights(model.layers[i].get_weights()) 88 | for submodel in act_model_list: 89 | for i in range(len(submodel.layers)): 90 | submodel.layers[i].set_weights(model.layers[i].get_weights()) 91 | gd = tf.gradients(seq[:,:,kernel],seqInput) 92 | return kernel_nb,kernel_sz,pool_sz,input_bp,input_bps,model_list, act_model_list, gd 93 | 94 | 95 | -------------------------------------------------------------------------------- /dcnn/DeepSEA/DD-10/deepsea.pred.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | 6 | 7 | import keras 8 | 9 | 10 | import h5py 11 | import numpy as np 12 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 13 | from keras.layers.core import Dropout, Activation, Flatten 14 | from keras.layers.merge import Concatenate 15 | from keras.models import Model 16 | from keras.callbacks import EarlyStopping, ModelCheckpoint 17 | from keras.optimizers import Adam 18 | from keras.utils import multi_gpu_model 19 | 20 | from keras.regularizers import l1,l2, l1_l2 21 | from keras.constraints import MaxNorm 22 | from keras.optimizers import SGD 23 | 24 | from keras.activations import relu 25 | 26 | input_bp = 1000 27 | conv_kernel_size = 8 28 | pool_kernel_size = 4 29 | 30 | maxnrom = MaxNorm(max_value=0.9, axis=0) 31 | l1l2 = l1_l2(l1=1e-8, l2=1e-6) 32 | 33 | def crelu(x, alpha=0.0, max_value=None, threshold=1e-6): 34 | return relu(x, alpha, max_value, threshold) 35 | 36 | batch_size=16 37 | 38 | seqInput = Input(shape=(input_bp, 4), name='seqInput') 39 | 40 | 41 | seq = Conv1D(128, 7)(seqInput) 42 | seq = BatchNormalization()(seq) 43 | seq = Activation('relu')(seq) 44 | seq = Conv1D(128, 3)(seq) 45 | seq = BatchNormalization()(seq) 46 | seq = Activation('relu')(seq) 47 | seq = MaxPooling1D(2)(seq) 48 | seq = Conv1D(160, 3)(seq) 49 | seq = BatchNormalization()(seq) 50 | seq = Activation('relu')(seq) 51 | seq = Conv1D(160, 3)(seq) 52 | seq = BatchNormalization()(seq) 53 | seq = Activation('relu')(seq) 54 | seq = MaxPooling1D(2)(seq) 55 | seq = Conv1D(256, 3)(seq) 56 | seq = BatchNormalization()(seq) 57 | seq = Activation('relu')(seq) 58 | seq = Conv1D(320, 3)(seq) 59 | seq = BatchNormalization()(seq) 60 | seq = Activation('relu')(seq) 61 | seq = MaxPooling1D(2)(seq) 62 | seq = Conv1D(512, 3)(seq) 63 | seq = BatchNormalization()(seq) 64 | seq = Activation('relu')(seq) 65 | seq = Conv1D(640, 3)(seq) 66 | seq = BatchNormalization()(seq) 67 | seq = Activation('relu')(seq) 68 | seq = MaxPooling1D(2)(seq) 69 | seq = Conv1D(1024, 3)(seq) 70 | seq = BatchNormalization()(seq) 71 | seq = Activation('relu')(seq) 72 | seq = Conv1D(1280, 3)(seq) 73 | seq = BatchNormalization()(seq) 74 | seq = Activation('relu')(seq) 75 | seq = MaxPooling1D(2)(seq) 76 | seq = Flatten()(seq) 77 | seq = Dense(925)(seq) 78 | seq = BatchNormalization()(seq) 79 | seq = Activation('relu')(seq) 80 | seq = Dropout(0.2)(seq) 81 | seq = Dense(919)(seq) 82 | seq = Activation('sigmoid')(seq) 83 | 84 | model = Model(inputs = [seqInput], outputs = [seq]) 85 | model.load_weights('weight.hdf5') 86 | 87 | from scipy.io import loadmat 88 | 89 | 90 | 91 | #trainmat = loadmat('data/train.mat') 92 | testmat = loadmat('data/test.mat') 93 | #trainy = trainmat['traindata'] 94 | #trianx = trainmat['trainxdata'] 95 | testx = testmat['testxdata'] 96 | testy = testmat['testdata'] 97 | 98 | 99 | 100 | 101 | testidx = testx.transpose((0,2,1)) 102 | #validy = validy.transpose((1,0)) 103 | result = model.predict(testidx,batch_size=1000) 104 | 105 | 106 | from sklearn.metrics import average_precision_score 107 | from sklearn.metrics import roc_auc_score 108 | auprc = np.zeros((testy.shape[1])) 109 | auroc = np.zeros((testy.shape[1])) 110 | for i in range(testy.shape[1]): 111 | print(i) 112 | if testy[:,i].sum() !=0 and testy[:,i].sum() !=testy.shape[0]: 113 | auprc[i] = average_precision_score(testy[:,i],result[:,i]) 114 | auroc[i] = roc_auc_score(testy[:,i],result[:,i]) 115 | 116 | 117 | with h5py.File('test.h5','w') as f: 118 | f['test_result'] = result 119 | f['test_label'] = testy 120 | f['auprc'] = auprc 121 | f['auroc']=auroc 122 | 123 | -------------------------------------------------------------------------------- /demos/Basset/Basset/README.md: -------------------------------------------------------------------------------- 1 | 2 | Please read the README in the folder [code](https://github.com/wzthu/NeuronMotif/tree/master/NeuronMotif) first. 3 | 4 | # Enter NeuronMotif environment 5 | 6 | ``` 7 | conda activate NeuronMotif 8 | ``` 9 | 10 | # Download the weight file 11 | 12 | Download and rename weight file as 'weight.hdf5': 13 | 14 | ``` 15 | wget --no-check-certificate -O weight.hdf5 http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_weight/Basset/Basset/weight.hdf5 16 | ``` 17 | 18 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_weight%2FBasset%2FBasset%2Fweight.hdf5&dl=1 19 | 20 | 21 | # Download JASPAR database: 22 | 23 | ``` 24 | wget --no-check-certificate -O motifDB.txt https://jaspar.genereg.net/download/data/2022/CORE/JASPAR2022_CORE_vertebrates_non-redundant_pfms_meme.txt 25 | ``` 26 | 27 | 28 | # Run 29 | 30 | Before running the scripts, you should adapt the number of threads in the scripts to your server or cluster. 31 | 32 | ## Through script in a server: 33 | 34 | 35 | ``` 36 | bash script.sh 37 | ``` 38 | 39 | ## Through scripts in many nodes of a cluster 40 | 41 | ### Run NeuronMotif: 42 | 43 | ``` 44 | # For one line of the script, the line can be submitted to several nodes at the same time for paralleling. 45 | # Next line can not be submitted until the tasks in all nodes are done. 46 | 47 | bash run.layer.sh 1 300 20 48 | bash run.layer.sh 2 200 20 49 | bash run.layer.sh 3 200 20 50 | ``` 51 | 52 | The HTML visualization results are organized in each layer folder of HTML folder. You can visualize them by web browser: 53 | 54 | | Files | Contains | 55 | |--------------------------------------------------|----------------------------------------------------| 56 | | HTML/layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 57 | | HTML/layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 58 | | HTML/layer[Layer#]/tomtom_[Neuron#].sel.ppm.meme | some motif segment mapped to database | 59 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to database | 60 | 61 | 62 | 63 | 64 | The HTML, PPM and tomtom results are stored in the folder of corresponding layer folder. 65 | 66 | | Files | Contains | 67 | |--------------------------------------------------|----------------------------------------------------| 68 | | layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 69 | | layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 70 | | layer[Layer#]/kernel-[Neuron#].all.ppm.chen | PPMs of CN CRMs in chen format | 71 | | layer[Layer#]/kernel-[Neuron#].all.ppm.meme | PPMs of CN CRMs in meme format | 72 | | layer[Layer#]/kernel-[Neuron#].h5 | sequence sample | 73 | | layer[Layer#]/kernel-[Neuron#].ppm.h5 | PPMs/activation/indicators of CN CRMs in h5 format | 74 | | layer[Layer#]/kernel-[Neuron#]-segs.chen | PPMs of CRMs segments | 75 | | layer[Layer#]/kernel-[Neuron#]-segs-dict | motif segment mapped to dictionary | 76 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to tomtom | 77 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.chen | PPMs of motifs in dictionary (chen format) | 78 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.h5 | PPMs of motifs in dictionary (h5 format) | 79 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.meme | PPMs of motifs in dictionary (meme format) | 80 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/README.md: -------------------------------------------------------------------------------- 1 | 2 | Please read the README in the folder [code](https://github.com/wzthu/NeuronMotif/tree/master/NeuronMotif) first. 3 | 4 | # Enter NeuronMotif environment 5 | 6 | ``` 7 | conda activate NeuronMotif 8 | ``` 9 | 10 | # Download the weight file 11 | 12 | Download and rename weight file as 'weight.hdf5': 13 | 14 | ``` 15 | wget --no-check-certificate -O weight.hdf5 http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_weight/DeepSEA/DeepSEA/weight.hdf5 16 | ``` 17 | 18 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_weight%2FDeepSEA%2FDeepSEA%2Fweight.hdf5&dl=1 19 | 20 | 21 | 22 | # Download JASPAR database: 23 | 24 | ``` 25 | wget --no-check-certificate -O motifDB.txt https://jaspar.genereg.net/download/data/2022/CORE/JASPAR2022_CORE_vertebrates_non-redundant_pfms_meme.txt 26 | ``` 27 | 28 | 29 | # Run 30 | 31 | Before running the scripts, you should adapt the number of threads in the scripts to your server or cluster. 32 | 33 | ## Through script in a server: 34 | 35 | 36 | ``` 37 | bash script.sh 38 | ``` 39 | 40 | ## Through scripts in many nodes of a cluster 41 | 42 | ### Run NeuronMotif: 43 | 44 | ``` 45 | # For one line of the script, the line can be submitted to several nodes at the same time for paralleling. 46 | # Next line can not be submitted until the tasks in all nodes are done. 47 | 48 | 49 | bash run.layer.sh 1 320 20 50 | bash run.layer.sh 2 480 20 51 | bash run.layer.sh 3 960 10 52 | ``` 53 | 54 | The HTML visualization results are organized in each layer folder of HTML folder. You can visualize them by web browser: 55 | 56 | | Files | Contains | 57 | |--------------------------------------------------|----------------------------------------------------| 58 | | HTML/layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 59 | | HTML/layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 60 | | HTML/layer[Layer#]/tomtom_[Neuron#].sel.ppm.meme | some motif segment mapped to database | 61 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to database | 62 | 63 | 64 | 65 | 66 | The HTML, PPM and tomtom results are stored in the folder of corresponding layer folder. 67 | 68 | | Files | Contains | 69 | |--------------------------------------------------|----------------------------------------------------| 70 | | layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 71 | | layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 72 | | layer[Layer#]/kernel-[Neuron#].all.ppm.chen | PPMs of CN CRMs in chen format | 73 | | layer[Layer#]/kernel-[Neuron#].all.ppm.meme | PPMs of CN CRMs in meme format | 74 | | layer[Layer#]/kernel-[Neuron#].h5 | sequence sample | 75 | | layer[Layer#]/kernel-[Neuron#].ppm.h5 | PPMs/activation/indicators of CN CRMs in h5 format | 76 | | layer[Layer#]/kernel-[Neuron#]-segs.chen | PPMs of CRMs segments | 77 | | layer[Layer#]/kernel-[Neuron#]-segs-dict | motif segment mapped to dictionary | 78 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to tomtom | 79 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.chen | PPMs of motifs in dictionary (chen format) | 80 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.h5 | PPMs of motifs in dictionary (h5 format) | 81 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.meme | PPMs of motifs in dictionary (meme format) | 82 | -------------------------------------------------------------------------------- /dcnn/Basset/Basset/basset.pred.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | 6 | import h5py 7 | 8 | 9 | import keras 10 | 11 | 12 | import h5py 13 | import numpy as np 14 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 15 | from keras.layers.core import Dropout, Activation, Flatten 16 | from keras.layers.merge import Concatenate 17 | from keras.models import Model 18 | from keras.callbacks import EarlyStopping, ModelCheckpoint 19 | from keras.optimizers import Adam 20 | from keras.utils import multi_gpu_model 21 | 22 | from keras.regularizers import l1,l2, l1_l2 23 | from keras.constraints import MaxNorm 24 | from keras.optimizers import SGD 25 | 26 | from keras.activations import relu 27 | 28 | 29 | import os 30 | import tensorflow as tf 31 | import keras.backend.tensorflow_backend as KTF 32 | 33 | from data.util import Randseq, fill_oh, gen_samples 34 | 35 | input_bp = 600 36 | 37 | batch_size=128 38 | 39 | seqInput = Input(shape=(input_bp, 4), name='seqInput') 40 | 41 | 42 | 43 | seq = Conv1D(300, 19)(seqInput) 44 | seq = BatchNormalization()(seq) 45 | seq = Activation('relu')(seq) 46 | seq = MaxPooling1D(pool_size=3)(seq) 47 | seq = Conv1D(200, 11)(seq) 48 | seq = BatchNormalization()(seq) 49 | seq = Activation('relu')(seq) 50 | seq = MaxPooling1D(pool_size=4)(seq) 51 | seq = Conv1D(200, 7)(seq) 52 | seq = BatchNormalization()(seq) 53 | seq = Activation('relu')(seq) 54 | seq = MaxPooling1D(pool_size=4)(seq) 55 | seq = Flatten()(seq) 56 | seq = Dense(1000)(seq) 57 | seq = Activation('relu')(seq) 58 | seq = Dropout(0.3)(seq) 59 | seq = Dense(1000)(seq) 60 | seq = Activation('relu')(seq) 61 | seq = Dropout(0.3)(seq) 62 | seq = Dense(164)(seq) 63 | seq = Activation('sigmoid')(seq) 64 | 65 | 66 | model = Model(inputs = [seqInput], outputs = [seq]) 67 | model.load_weights('weight.hdf5') 68 | 69 | 70 | chroms = ['chr'+str(i) for i in range(1,23)] 71 | chroms.append('chrX') 72 | chroms.append('chrY') 73 | 74 | with h5py.File('data/onehot.h5', 'r') as f: 75 | onehot = dict() 76 | for chrom in chroms: 77 | onehot[chrom] = f[chrom][:] 78 | 79 | 80 | 81 | model.compile('adam', loss='binary_crossentropy', metrics=['accuracy']) 82 | 83 | 84 | import pandas as pd 85 | 86 | bed = pd.read_csv('data/encode_roadmap.bed', sep='\t', header=None) 87 | 88 | label = np.zeros((bed.shape[0],164)) 89 | 90 | for i in range(bed.shape[0]): 91 | label[i,np.array(bed.iloc[i,6].split(','),dtype=int)] = 1 92 | 93 | 94 | with h5py.File('data/sample_sel.h5','r') as f: 95 | seltest = f['seltest'][:] 96 | 97 | 98 | test_data = {'sample':bed.iloc[seltest,:], 'label':label[seltest,:],'seq_onehot': onehot } 99 | 100 | test_randseq = Randseq(test_data['sample'].shape[0], True) 101 | 102 | test_steps = int(test_randseq.seqsize / batch_size) 103 | if test_randseq.seqsize != batch_size * test_steps: 104 | test_steps += 1 105 | 106 | sample_generator = gen_samples 107 | 108 | 109 | 110 | test_gen = sample_generator(batchsize=batch_size, 111 | randseq=test_randseq, 112 | data=test_data) 113 | 114 | 115 | result = model.predict_generator(generator=test_gen, 116 | steps = test_steps, 117 | verbose=1) 118 | 119 | 120 | 121 | from sklearn.metrics import average_precision_score 122 | from sklearn.metrics import roc_auc_score 123 | auprc=np.array([average_precision_score(test_data['label'][:,i],result[:,i]) for i in range(test_data['label'].shape[1])]) 124 | auroc=np.array([roc_auc_score(test_data['label'][:,i],result[:,i]) for i in range(test_data['label'].shape[1])]) 125 | 126 | with h5py.File('test.h5','w') as f: 127 | f['test_result'] = result 128 | f['test_label'] = test_data['label'] 129 | f['seltest'] = seltest 130 | f['auprc'] = auprc 131 | f['auroc']=auroc 132 | -------------------------------------------------------------------------------- /dcnn/Basset/BD-5/BD-5.pred.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | 6 | import h5py 7 | 8 | 9 | import keras 10 | 11 | 12 | import h5py 13 | import numpy as np 14 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 15 | from keras.layers.core import Dropout, Activation, Flatten 16 | from keras.layers.merge import Concatenate 17 | from keras.models import Model 18 | from keras.callbacks import EarlyStopping, ModelCheckpoint 19 | from keras.optimizers import Adam 20 | from keras.utils import multi_gpu_model 21 | 22 | from keras.regularizers import l1,l2, l1_l2 23 | from keras.constraints import MaxNorm 24 | from keras.optimizers import SGD 25 | 26 | from keras.activations import relu 27 | 28 | 29 | import os 30 | import tensorflow as tf 31 | import keras.backend.tensorflow_backend as KTF 32 | 33 | from data.util import Randseq, fill_oh, gen_samples 34 | 35 | input_bp = 600 36 | 37 | batch_size=128 38 | 39 | seqInput = Input(shape=(input_bp, 4), name='seqInput') 40 | 41 | 42 | 43 | 44 | seq = Conv1D(64, 3)(seqInput) 45 | seq = BatchNormalization()(seq) 46 | seq = Activation('relu')(seq) 47 | seq = MaxPooling1D(2)(seq) 48 | seq = Conv1D(128, 3)(seq) 49 | seq = BatchNormalization()(seq) 50 | seq = Activation('relu')(seq) 51 | seq = MaxPooling1D(2)(seq) 52 | seq = Conv1D(256, 3)(seq) 53 | seq = BatchNormalization()(seq) 54 | seq = Activation('relu')(seq) 55 | seq = MaxPooling1D(2)(seq) 56 | seq = Conv1D(384, 3)(seq) 57 | seq = BatchNormalization()(seq) 58 | seq = Activation('relu')(seq) 59 | seq = MaxPooling1D(2)(seq) 60 | seq = Conv1D(512, 3)(seq) 61 | seq = BatchNormalization()(seq) 62 | seq = Activation('relu')(seq) 63 | seq = Flatten()(seq) 64 | seq = Dense(1024)(seq) 65 | seq = BatchNormalization()(seq) 66 | seq = Activation('relu')(seq) 67 | seq = Dropout(0.2)(seq) 68 | seq = Dense(164)(seq) 69 | seq = Activation('sigmoid')(seq) 70 | 71 | 72 | model = Model(inputs = [seqInput], outputs = [seq]) 73 | model.load_weights('weight.hdf5') 74 | 75 | 76 | chroms = ['chr'+str(i) for i in range(1,23)] 77 | chroms.append('chrX') 78 | chroms.append('chrY') 79 | 80 | with h5py.File('data/onehot.h5', 'r') as f: 81 | onehot = dict() 82 | for chrom in chroms: 83 | onehot[chrom] = f[chrom][:] 84 | 85 | 86 | 87 | model.compile('adam', loss='binary_crossentropy', metrics=['accuracy']) 88 | 89 | 90 | import pandas as pd 91 | 92 | bed = pd.read_csv('data/encode_roadmap.bed', sep='\t', header=None) 93 | 94 | label = np.zeros((bed.shape[0],164)) 95 | 96 | for i in range(bed.shape[0]): 97 | label[i,np.array(bed.iloc[i,6].split(','),dtype=int)] = 1 98 | 99 | 100 | with h5py.File('data/sample.h5','r') as f: 101 | seltest = f['seltest'][:] 102 | 103 | 104 | test_data = {'sample':bed.iloc[seltest,:], 'label':label[seltest,:],'seq_onehot': onehot } 105 | 106 | test_randseq = Randseq(test_data['sample'].shape[0], True) 107 | 108 | test_steps = int(test_randseq.seqsize / batch_size) 109 | if test_randseq.seqsize != batch_size * test_steps: 110 | test_steps += 1 111 | 112 | sample_generator = gen_samples 113 | 114 | 115 | 116 | test_gen = sample_generator(batchsize=batch_size, 117 | randseq=test_randseq, 118 | data=test_data) 119 | 120 | 121 | result = model.predict_generator(generator=test_gen, 122 | steps = test_steps, 123 | verbose=1) 124 | 125 | 126 | 127 | from sklearn.metrics import average_precision_score 128 | from sklearn.metrics import roc_auc_score 129 | auprc=np.array([average_precision_score(test_data['label'][:,i],result[:,i]) for i in range(test_data['label'].shape[1])]) 130 | auroc=np.array([roc_auc_score(test_data['label'][:,i],result[:,i]) for i in range(test_data['label'].shape[1])]) 131 | 132 | with h5py.File('test.h5','w') as f: 133 | f['test_result'] = result 134 | f['test_label'] = test_data['label'] 135 | f['auprc'] = auprc 136 | f['auroc']=auroc 137 | -------------------------------------------------------------------------------- /NeuronMotif/chen2html.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import os 4 | import sys 5 | os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" 6 | f = h5py.File(sys.argv[1],'r') 7 | 8 | ppm = f['ppm0'][:] 9 | 10 | html_txt0 = ''' 11 | 12 | 13 |

Nucleotides

14 | 15 | 16 |

Amino acids

17 | 18 | 19 | 20 | 36 | 37 | 38 | ''' 39 | 40 | html_txt = ''' 41 | 42 | 57 | 58 | %s 59 |
60 | Visit NeuronMotif website for more information: https://wzthu.github.io/NeuronMotif/ 61 |
62 | Please be patient to load all motif logos or patterns in the column of CN CRMs ... 63 |
64 | %s 65 | 66 | 73 | 74 | 75 | ''' 76 | 77 | 78 | def ppm2js(ppm, ppm_id, width, height): 79 | ppm += 0.0001 80 | v = ppm.sum(axis=1) 81 | v = v.repeat(4).reshape(ppm.shape) 82 | ppm /= v 83 | ppm0 = ppm.copy() 84 | vlg = -ppm *np.log2(ppm) 85 | ppm = ppm *(2 -vlg.sum(axis=1)).repeat(4).reshape(ppm.shape) 86 | A ='"A": [' + ','.join(['%1.2f' % (p) for p in ppm[:,0]]) + '],' 87 | C ='"C": [' + ','.join(['%1.2f' % (p) for p in ppm[:,1]]) + '],' 88 | G ='"G": [' + ','.join(['%1.2f' % (p) for p in ppm[:,2]]) + '],' 89 | T ='"T": [' + ','.join(['%1.2f' % (p) for p in ppm[:,3]]) + ']' 90 | html = 'var data = {%s};' % (A+C+G+T) 91 | html += 'sequence_logo(document.getElementById("%s"), %d,%d, data, options);' % (ppm_id,width,height) 92 | return html 93 | 94 | ppm_ids = [] 95 | ppm_jss = [] 96 | width=ppm.shape[0]*8 97 | height = 50 98 | 99 | i_max = 0 100 | for k in list(f.keys()): 101 | if k.startswith('act'): 102 | if int(k[3:])>i_max: 103 | i_max= int(k[3:]) 104 | 105 | 106 | for i in range(i_max+1): 107 | if 'act%d' % (i) not in list(f.keys()): 108 | continue 109 | ppm_id = '%8d_%.3f_%.3f_%d' % (i,f['act%d' % (i)][:].max(), f['conact%d' % (i)][0],f['act%d' % (i)].shape[0]) 110 | ppm = f['ppm%d' % (i)][:] 111 | ppm_js = ppm2js(ppm, ppm_id, width, height) 112 | ppm_jss.append(ppm_js) 113 | ppm_ids.append('%sTomtomLink' % (ppm_id,sys.argv[2], ppm_id)) 114 | 115 | 116 | html_txt1 = html_txt % ('Neuron '+ ' '.join([''+str(i)+'' for i in range(int(sys.argv[4]))]) + ('
Click here to see syntax tree (if exist)' % (sys.argv[3].split('/')[1])),''+'\n'.join(ppm_ids)+'
Dcp1_Dcp2_ActMax_ConsensusAct_SampleSizeTomtomResultCN CRMs ('+str(ppm.shape[0])+' bp)
', '\n'.join(ppm_jss)) 117 | 118 | with open(sys.argv[3]+'.html','w') as ftest: 119 | ftest.write(html_txt1) 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /demos/demo1/chen2html.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import os 4 | import sys 5 | os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" 6 | f = h5py.File(sys.argv[1],'r') 7 | 8 | ppm = f['ppm0'][:] 9 | 10 | html_txt0 = ''' 11 | 12 | 13 |

Nucleotides

14 | 15 | 16 |

Amino acids

17 | 18 | 19 | 20 | 36 | 37 | 38 | ''' 39 | 40 | html_txt = ''' 41 | 42 | 57 | 58 | %s 59 |
60 | Visit NeuronMotif website for more information: https://wzthu.github.io/NeuronMotif/ 61 |
62 | Please be patient to load all motif logos or patterns in the column of CN CRMs ... 63 |
64 | %s 65 | 66 | 73 | 74 | 75 | ''' 76 | 77 | 78 | def ppm2js(ppm, ppm_id, width, height): 79 | ppm += 0.0001 80 | v = ppm.sum(axis=1) 81 | v = v.repeat(4).reshape(ppm.shape) 82 | ppm /= v 83 | ppm0 = ppm.copy() 84 | vlg = -ppm *np.log2(ppm) 85 | ppm = ppm *(2 -vlg.sum(axis=1)).repeat(4).reshape(ppm.shape) 86 | A ='"A": [' + ','.join(['%1.2f' % (p) for p in ppm[:,0]]) + '],' 87 | C ='"C": [' + ','.join(['%1.2f' % (p) for p in ppm[:,1]]) + '],' 88 | G ='"G": [' + ','.join(['%1.2f' % (p) for p in ppm[:,2]]) + '],' 89 | T ='"T": [' + ','.join(['%1.2f' % (p) for p in ppm[:,3]]) + ']' 90 | html = 'var data = {%s};' % (A+C+G+T) 91 | html += 'sequence_logo(document.getElementById("%s"), %d,%d, data, options);' % (ppm_id,width,height) 92 | return html 93 | 94 | ppm_ids = [] 95 | ppm_jss = [] 96 | width=ppm.shape[0]*8 97 | height = 50 98 | 99 | i_max = 0 100 | for k in list(f.keys()): 101 | if k.startswith('act'): 102 | if int(k[3:])>i_max: 103 | i_max= int(k[3:]) 104 | 105 | 106 | for i in range(i_max+1): 107 | if 'act%d' % (i) not in list(f.keys()): 108 | continue 109 | ppm_id = '%8d_%.3f_%.3f_%d' % (i,f['act%d' % (i)][:].max(), f['conact%d' % (i)][0],f['act%d' % (i)].shape[0]) 110 | ppm = f['ppm%d' % (i)][:] 111 | ppm_js = ppm2js(ppm, ppm_id, width, height) 112 | ppm_jss.append(ppm_js) 113 | ppm_ids.append('%sTomtomLink' % (ppm_id,sys.argv[2], ppm_id)) 114 | 115 | 116 | html_txt1 = html_txt % ('Neuron '+ ' '.join([''+str(i)+'' for i in range(int(sys.argv[4]))]) + ('
Click here to see syntax tree (if exist)' % (sys.argv[3].split('/')[1])),''+'\n'.join(ppm_ids)+'
Dcp1_Dcp2_ActMax_ConsensusAct_SampleSizeTomtomResultCN CRMs ('+str(ppm.shape[0])+' bp)
', '\n'.join(ppm_jss)) 117 | 118 | with open(sys.argv[3]+'.html','w') as ftest: 119 | ftest.write(html_txt1) 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /demos/demo2/chen2html.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import os 4 | import sys 5 | os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" 6 | f = h5py.File(sys.argv[1],'r') 7 | 8 | ppm = f['ppm0'][:] 9 | 10 | html_txt0 = ''' 11 | 12 | 13 |

Nucleotides

14 | 15 | 16 |

Amino acids

17 | 18 | 19 | 20 | 36 | 37 | 38 | ''' 39 | 40 | html_txt = ''' 41 | 42 | 57 | 58 | %s 59 |
60 | Visit NeuronMotif website for more information: https://wzthu.github.io/NeuronMotif/ 61 |
62 | Please be patient to load all motif logos or patterns in the column of CN CRMs ... 63 |
64 | %s 65 | 66 | 73 | 74 | 75 | ''' 76 | 77 | 78 | def ppm2js(ppm, ppm_id, width, height): 79 | ppm += 0.0001 80 | v = ppm.sum(axis=1) 81 | v = v.repeat(4).reshape(ppm.shape) 82 | ppm /= v 83 | ppm0 = ppm.copy() 84 | vlg = -ppm *np.log2(ppm) 85 | ppm = ppm *(2 -vlg.sum(axis=1)).repeat(4).reshape(ppm.shape) 86 | A ='"A": [' + ','.join(['%1.2f' % (p) for p in ppm[:,0]]) + '],' 87 | C ='"C": [' + ','.join(['%1.2f' % (p) for p in ppm[:,1]]) + '],' 88 | G ='"G": [' + ','.join(['%1.2f' % (p) for p in ppm[:,2]]) + '],' 89 | T ='"T": [' + ','.join(['%1.2f' % (p) for p in ppm[:,3]]) + ']' 90 | html = 'var data = {%s};' % (A+C+G+T) 91 | html += 'sequence_logo(document.getElementById("%s"), %d,%d, data, options);' % (ppm_id,width,height) 92 | return html 93 | 94 | ppm_ids = [] 95 | ppm_jss = [] 96 | width=ppm.shape[0]*8 97 | height = 50 98 | 99 | i_max = 0 100 | for k in list(f.keys()): 101 | if k.startswith('act'): 102 | if int(k[3:])>i_max: 103 | i_max= int(k[3:]) 104 | 105 | 106 | for i in range(i_max+1): 107 | if 'act%d' % (i) not in list(f.keys()): 108 | continue 109 | ppm_id = '%8d_%.3f_%.3f_%d' % (i,f['act%d' % (i)][:].max(), f['conact%d' % (i)][0],f['act%d' % (i)].shape[0]) 110 | ppm = f['ppm%d' % (i)][:] 111 | ppm_js = ppm2js(ppm, ppm_id, width, height) 112 | ppm_jss.append(ppm_js) 113 | ppm_ids.append('%sTomtomLink' % (ppm_id,sys.argv[2], ppm_id)) 114 | 115 | 116 | html_txt1 = html_txt % ('Neuron '+ ' '.join([''+str(i)+'' for i in range(int(sys.argv[4]))]) + ('
Click here to see syntax tree (if exist)' % (sys.argv[3].split('/')[1])),''+'\n'.join(ppm_ids)+'
Dcp1_Dcp2_ActMax_ConsensusAct_SampleSizeTomtomResultCN CRMs ('+str(ppm.shape[0])+' bp)
', '\n'.join(ppm_jss)) 117 | 118 | with open(sys.argv[3]+'.html','w') as ftest: 119 | ftest.write(html_txt1) 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/chen2html.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import os 4 | import sys 5 | os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" 6 | f = h5py.File(sys.argv[1],'r') 7 | 8 | ppm = f['ppm0'][:] 9 | 10 | html_txt0 = ''' 11 | 12 | 13 |

Nucleotides

14 | 15 | 16 |

Amino acids

17 | 18 | 19 | 20 | 36 | 37 | 38 | ''' 39 | 40 | html_txt = ''' 41 | 42 | 57 | 58 | %s 59 |
60 | Visit NeuronMotif website for more information: https://wzthu.github.io/NeuronMotif/ 61 |
62 | Please be patient to load all motif logos or patterns in the column of CN CRMs ... 63 |
64 | %s 65 | 66 | 73 | 74 | 75 | ''' 76 | 77 | 78 | def ppm2js(ppm, ppm_id, width, height): 79 | ppm += 0.0001 80 | v = ppm.sum(axis=1) 81 | v = v.repeat(4).reshape(ppm.shape) 82 | ppm /= v 83 | ppm0 = ppm.copy() 84 | vlg = -ppm *np.log2(ppm) 85 | ppm = ppm *(2 -vlg.sum(axis=1)).repeat(4).reshape(ppm.shape) 86 | A ='"A": [' + ','.join(['%1.2f' % (p) for p in ppm[:,0]]) + '],' 87 | C ='"C": [' + ','.join(['%1.2f' % (p) for p in ppm[:,1]]) + '],' 88 | G ='"G": [' + ','.join(['%1.2f' % (p) for p in ppm[:,2]]) + '],' 89 | T ='"T": [' + ','.join(['%1.2f' % (p) for p in ppm[:,3]]) + ']' 90 | html = 'var data = {%s};' % (A+C+G+T) 91 | html += 'sequence_logo(document.getElementById("%s"), %d,%d, data, options);' % (ppm_id,width,height) 92 | return html 93 | 94 | ppm_ids = [] 95 | ppm_jss = [] 96 | width=ppm.shape[0]*8 97 | height = 50 98 | 99 | i_max = 0 100 | for k in list(f.keys()): 101 | if k.startswith('act'): 102 | if int(k[3:])>i_max: 103 | i_max= int(k[3:]) 104 | 105 | 106 | for i in range(i_max+1): 107 | if 'act%d' % (i) not in list(f.keys()): 108 | continue 109 | ppm_id = '%8d_%.3f_%.3f_%d' % (i,f['act%d' % (i)][:].max(), f['conact%d' % (i)][0],f['act%d' % (i)].shape[0]) 110 | ppm = f['ppm%d' % (i)][:] 111 | ppm_js = ppm2js(ppm, ppm_id, width, height) 112 | ppm_jss.append(ppm_js) 113 | ppm_ids.append('%sTomtomLink' % (ppm_id,sys.argv[2], ppm_id)) 114 | 115 | 116 | html_txt1 = html_txt % ('Neuron '+ ' '.join([''+str(i)+'' for i in range(int(sys.argv[4]))]) + ('
Click here to see syntax tree (if exist)' % (sys.argv[3].split('/')[1])),''+'\n'.join(ppm_ids)+'
Dcp1_Dcp2_ActMax_ConsensusAct_SampleSizeTomtomResultCN CRMs ('+str(ppm.shape[0])+' bp)
', '\n'.join(ppm_jss)) 117 | 118 | with open(sys.argv[3]+'.html','w') as ftest: 119 | ftest.write(html_txt1) 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /demos/Basset/Basset/chen2html.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import os 4 | import sys 5 | os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" 6 | f = h5py.File(sys.argv[1],'r') 7 | 8 | ppm = f['ppm0'][:] 9 | 10 | html_txt0 = ''' 11 | 12 | 13 |

Nucleotides

14 | 15 | 16 |

Amino acids

17 | 18 | 19 | 20 | 36 | 37 | 38 | ''' 39 | 40 | html_txt = ''' 41 | 42 | 57 | 58 | %s 59 |
60 | Visit NeuronMotif website for more information: https://wzthu.github.io/NeuronMotif/ 61 |
62 | Please be patient to load all motif logos or patterns in the column of CN CRMs ... 63 |
64 | %s 65 | 66 | 73 | 74 | 75 | ''' 76 | 77 | 78 | def ppm2js(ppm, ppm_id, width, height): 79 | ppm += 0.0001 80 | v = ppm.sum(axis=1) 81 | v = v.repeat(4).reshape(ppm.shape) 82 | ppm /= v 83 | ppm0 = ppm.copy() 84 | vlg = -ppm *np.log2(ppm) 85 | ppm = ppm *(2 -vlg.sum(axis=1)).repeat(4).reshape(ppm.shape) 86 | A ='"A": [' + ','.join(['%1.2f' % (p) for p in ppm[:,0]]) + '],' 87 | C ='"C": [' + ','.join(['%1.2f' % (p) for p in ppm[:,1]]) + '],' 88 | G ='"G": [' + ','.join(['%1.2f' % (p) for p in ppm[:,2]]) + '],' 89 | T ='"T": [' + ','.join(['%1.2f' % (p) for p in ppm[:,3]]) + ']' 90 | html = 'var data = {%s};' % (A+C+G+T) 91 | html += 'sequence_logo(document.getElementById("%s"), %d,%d, data, options);' % (ppm_id,width,height) 92 | return html 93 | 94 | ppm_ids = [] 95 | ppm_jss = [] 96 | width=ppm.shape[0]*8 97 | height = 50 98 | 99 | i_max = 0 100 | for k in list(f.keys()): 101 | if k.startswith('act'): 102 | if int(k[3:])>i_max: 103 | i_max= int(k[3:]) 104 | 105 | 106 | for i in range(i_max+1): 107 | if 'act%d' % (i) not in list(f.keys()): 108 | continue 109 | ppm_id = '%8d_%.3f_%.3f_%d' % (i,f['act%d' % (i)][:].max(), f['conact%d' % (i)][0],f['act%d' % (i)].shape[0]) 110 | ppm = f['ppm%d' % (i)][:] 111 | ppm_js = ppm2js(ppm, ppm_id, width, height) 112 | ppm_jss.append(ppm_js) 113 | ppm_ids.append('%sTomtomLink' % (ppm_id,sys.argv[2], ppm_id)) 114 | 115 | 116 | html_txt1 = html_txt % ('Neuron '+ ' '.join([''+str(i)+'' for i in range(int(sys.argv[4]))]) + ('
Click here to see syntax tree (if exist)' % (sys.argv[3].split('/')[1])),''+'\n'.join(ppm_ids)+'
Dcp1_Dcp2_ActMax_ConsensusAct_SampleSizeTomtomResultCN CRMs ('+str(ppm.shape[0])+' bp)
', '\n'.join(ppm_jss)) 117 | 118 | with open(sys.argv[3]+'.html','w') as ftest: 119 | ftest.write(html_txt1) 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/README.md: -------------------------------------------------------------------------------- 1 | 2 | Please read the README in the folder [code](https://github.com/wzthu/NeuronMotif/tree/master/NeuronMotif) first. 3 | 4 | # Enter NeuronMotif environment 5 | 6 | ``` 7 | conda activate NeuronMotif 8 | ``` 9 | 10 | # Download the weight file 11 | 12 | Download and rename weight file as 'weight.hdf5': 13 | 14 | ``` 15 | wget --no-check-certificate -O weight.hdf5 http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_weight/Basset/BD-10/weight.hdf5 16 | ``` 17 | 18 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_weight%2FBasset%2FBD-10%2Fweight.hdf5&dl=1 19 | 20 | # Download JASPAR database: 21 | 22 | ``` 23 | wget --no-check-certificate -O motifDB.txt https://jaspar.genereg.net/download/data/2022/CORE/JASPAR2022_CORE_vertebrates_non-redundant_pfms_meme.txt 24 | ``` 25 | 26 | # Run 27 | 28 | Before running the scripts, you should adapt the number of threads in the scripts to your server or cluster. 29 | 30 | ## Through script in a server: 31 | 32 | 33 | ``` 34 | bash script.sh 35 | ``` 36 | 37 | ## Through scripts in many nodes of a cluster 38 | 39 | ### Run NeuronMotif: 40 | 41 | ``` 42 | # For one line of the script, the line can be submitted to several nodes at the same time for paralleling. 43 | # Next line can not be submitted until the tasks in all nodes are done. 44 | 45 | bash run.layer.sh 1 128 20 46 | bash run.layer.sh 2 128 20 47 | bash run.layer.sh 3 160 20 48 | bash run.layer.sh 4 160 20 49 | bash run.layer.sh 5 256 20 50 | bash run.layer.sh 6 256 20 51 | bash run.layer.sh 7 384 20 52 | bash run.layer.sh 8 384 20 53 | bash run.layer.sh 9 512 10 54 | bash run.layer.sh 10 512 10 55 | ``` 56 | 57 | 58 | The HTML visualization results are organized in each layer folder of HTML folder. You can visualize them by web browser: 59 | 60 | | Files | Contains | 61 | |--------------------------------------------------|----------------------------------------------------| 62 | | HTML/layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 63 | | HTML/layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 64 | | HTML/layer[Layer#]/tomtom_[Neuron#].sel.ppm.meme | some motif segment mapped to database | 65 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to database | 66 | 67 | 68 | 69 | 70 | The HTML, PPM and tomtom results are stored in the folder of corresponding layer folder. 71 | 72 | | Files | Contains | 73 | |--------------------------------------------------|----------------------------------------------------| 74 | | layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 75 | | layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 76 | | layer[Layer#]/kernel-[Neuron#].all.ppm.chen | PPMs of CN CRMs in chen format | 77 | | layer[Layer#]/kernel-[Neuron#].all.ppm.meme | PPMs of CN CRMs in meme format | 78 | | layer[Layer#]/kernel-[Neuron#].h5 | sequence sample | 79 | | layer[Layer#]/kernel-[Neuron#].ppm.h5 | PPMs/activation/indicators of CN CRMs in h5 format | 80 | | layer[Layer#]/kernel-[Neuron#]-segs.chen | PPMs of CRMs segments | 81 | | layer[Layer#]/kernel-[Neuron#]-segs-dict | motif segment mapped to dictionary | 82 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to tomtom | 83 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.chen | PPMs of motifs in dictionary (chen format) | 84 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.h5 | PPMs of motifs in dictionary (h5 format) | 85 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.meme | PPMs of motifs in dictionary (meme format) | 86 | -------------------------------------------------------------------------------- /demos/demo2/README.md: -------------------------------------------------------------------------------- 1 | # Demo 2 2 | * Input: 82 bp DNA sequence 3 | * Output: a scalar belonging to interval [0,1] 4 | * Positive sequences: random sequences with 2 CTCF motifs. One CTCF motif starts at the 1st-16th bp. The other CTCF motif starts at 47th-62th bp 5 | * Negative sequences: random sequences with 2 CTCF motifs. One CTCF motif starts at the 1st-16th bp or 47th-62th bp. The other CTCF motif starts at 17th-46th bp 6 | 7 | Architecture of DCNN model: 8 | 9 | 10 | * Convolutional layer 1 (5 filters, size 7) 11 | + BatchNormalization 12 | + Activation function (ReLU) 13 | + Maxpooling operation (size 2) 14 | * Convolutional layer 2 (5 filters, size 3) 15 | + BatchNormalization 16 | + Activation function (ReLU) 17 | + Maxpooling operation (size 2) 18 | * Convolutional layer 3 (6 filters, size 3) 19 | + BatchNormalization 20 | + Activation function (ReLU) 21 | + Maxpooling operation (size 2) 22 | * Convolutional layer 4 (6 filters, size 3) 23 | + BatchNormalization 24 | + Activation function (ReLU) 25 | + Maxpooling operation (size 2) 26 | * Convolutional layer 5 (1 filters, size 3) 27 | + BatchNormalization 28 | + Activation function (sigmoid) 29 | * Flatten 30 | 31 | 32 | # Run this demo 33 | 34 | Download motif database 35 | 36 | Take JASPAR as an example: 37 | 38 | ``` 39 | wget --no-check-certificate -O motifDB.txt https://jaspar.genereg.net/download/data/2022/CORE/JASPAR2022_CORE_vertebrates_non-redundant_pfms_meme.txt 40 | ``` 41 | 42 | Then, run 43 | 44 | ``` 45 | bash script.sh 46 | ``` 47 | 48 | 49 | # Result 50 | 51 | The HTML visualization results are organized in each layer folder of HTML folder. You can visualize them by web browser: 52 | 53 | | Files | Contains | 54 | |--------------------------------------------------|----------------------------------------------------| 55 | | HTML/layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 56 | | HTML/layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 57 | | HTML/layer[Layer#]/tomtom_[Neuron#].sel.ppm.meme | some motif segment mapped to database | 58 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to database | 59 | 60 | 61 | 62 | 63 | For the result in each folder, the sequence samples, position probability matrixes and diagnosis indicators are stored in folder 'layerX'. 64 | 65 | 66 | 67 | The HTML, PPM and tomtom results are stored in the folder of corresponding layer folder. 68 | 69 | | Files | Contains | 70 | |--------------------------------------------------|----------------------------------------------------| 71 | | layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 72 | | layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 73 | | layer[Layer#]/kernel-[Neuron#].all.ppm.chen | PPMs of CN CRMs in chen format | 74 | | layer[Layer#]/kernel-[Neuron#].all.ppm.meme | PPMs of CN CRMs in meme format | 75 | | layer[Layer#]/kernel-[Neuron#].h5 | sequence sample | 76 | | layer[Layer#]/kernel-[Neuron#].ppm.h5 | PPMs/activation/indicators of CN CRMs in h5 format | 77 | | layer[Layer#]/kernel-[Neuron#]-segs.chen | PPMs of CRMs segments | 78 | | layer[Layer#]/kernel-[Neuron#]-segs-dict | motif segment mapped to dictionary | 79 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to tomtom | 80 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.chen | PPMs of motifs in dictionary (chen format) | 81 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.h5 | PPMs of motifs in dictionary (h5 format) | 82 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.meme | PPMs of motifs in dictionary (meme format) | 83 | 84 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/README.md: -------------------------------------------------------------------------------- 1 | 2 | Please read the README in the folder [code](https://github.com/wzthu/NeuronMotif/tree/master/NeuronMotif) first. 3 | 4 | # Enter NeuronMotif environment 5 | 6 | ``` 7 | conda activate NeuronMotif 8 | ``` 9 | 10 | # Download the weight file 11 | 12 | Download and rename weight file as 'weight.hdf5': 13 | 14 | ``` 15 | wget --no-check-certificate -O weight.hdf5 http://bioinfo-xwwang-thu.cn/zwei/NeuronMotif/DCNN_weight/DeepSEA/DD-10/weight.hdf5 16 | ``` 17 | 18 | Alternatively, if the link above is not available temperately, you can download from https://cloud.tsinghua.edu.cn/d/fee522536d524eae9531/files/?p=%2FDCNN_weight%2FDeepSEA%2FDD-10%2Fweight.hdf5&dl=1 19 | 20 | 21 | # Download JASPAR database: 22 | 23 | ``` 24 | wget --no-check-certificate -O motifDB.txt https://jaspar.genereg.net/download/data/2022/CORE/JASPAR2022_CORE_vertebrates_non-redundant_pfms_meme.txt 25 | ``` 26 | 27 | 28 | # Run 29 | 30 | Before running the scripts, you should adapt the number of threads in the scripts to your server or cluster. 31 | 32 | ## Through the script in a server: 33 | 34 | 35 | ``` 36 | bash script.sh 37 | ``` 38 | 39 | ## Through scripts in many nodes of a cluster 40 | 41 | ### Run NeuronMotif: 42 | 43 | ``` 44 | # For one line of the script, the line can be submitted to several nodes at the same time for paralleling. 45 | # Next line can not be submitted until the tasks in all nodes are done. 46 | 47 | bash run.layer.sh 1 128 20 48 | bash run.layer.sh 2 128 20 49 | bash run.layer.sh 3 160 20 50 | bash run.layer.sh 4 160 20 51 | bash run.layer.sh 5 256 20 52 | bash run.layer.sh 6 320 20 53 | bash run.layer.sh 7 512 20 54 | bash run.layer.sh 8 640 20 55 | bash run.layer.sh 9 1024 10 56 | bash run.layer.sh 10 1280 10 57 | ``` 58 | 59 | The HTML visualization results are organized in each layer folder of HTML folder. You can visualize them by web browser: 60 | 61 | | Files | Contains | 62 | |--------------------------------------------------|----------------------------------------------------| 63 | | HTML/layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 64 | | HTML/layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 65 | | HTML/layer[Layer#]/tomtom_[Neuron#].sel.ppm.meme | some motif segment mapped to database | 66 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to database | 67 | 68 | 69 | 70 | The HTML, PPM and tomtom results are stored in the folder of corresponding layer folder. 71 | 72 | | Files | Contains | 73 | |--------------------------------------------------|----------------------------------------------------| 74 | | layer[Layer#]/[Neuron#].html | Visualization of CN CRMs | 75 | | layer[Layer#]/tree.[Neuron#].html | Visualization of syntax tree for CN CRMs | 76 | | layer[Layer#]/kernel-[Neuron#].all.ppm.chen | PPMs of CN CRMs in chen format | 77 | | layer[Layer#]/kernel-[Neuron#].all.ppm.meme | PPMs of CN CRMs in meme format | 78 | | layer[Layer#]/kernel-[Neuron#].h5 | sequence sample | 79 | | layer[Layer#]/kernel-[Neuron#].ppm.h5 | PPMs/activation/indicators of CN CRMs in h5 format | 80 | | layer[Layer#]/kernel-[Neuron#]-segs.chen | PPMs of CRMs segments | 81 | | layer[Layer#]/kernel-[Neuron#]-segs-dict | motif segment mapped to dictionary | 82 | | layer[Layer#]/tomtom_dict_[Neuron#] | motif dictionary mapped to database | 83 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.chen | PPMs of motifs in dictionary (chen format) | 84 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.h5 | PPMs of motifs in dictionary (h5 format) | 85 | | layer[Layer#]/kernel-[Neuron#]-unified-dict.meme | PPMs of motifs in dictionary (meme format) | 86 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/chen2html.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import os 4 | import sys 5 | os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" 6 | f = h5py.File(sys.argv[1],'r') 7 | 8 | ppm = f['ppm0'][:] 9 | 10 | html_txt0 = ''' 11 | 12 | 13 |

Nucleotides

14 | 15 | 16 |

Amino acids

17 | 18 | 19 | 20 | 36 | 37 | 38 | ''' 39 | 40 | html_txt = ''' 41 | 42 | 57 | 58 | %s 59 |
60 | Visit NeuronMotif website for more information: https://wzthu.github.io/NeuronMotif/ 61 |
62 | Please be patient to load all motif logos or patterns in the column of CN CRMs ... 63 |
64 | %s 65 | 66 | 73 | 74 | 75 | ''' 76 | 77 | 78 | def ppm2js(ppm, ppm_id, width, height): 79 | ppm += 0.0001 80 | v = ppm.sum(axis=1) 81 | v = v.repeat(4).reshape(ppm.shape) 82 | ppm /= v 83 | ppm0 = ppm.copy() 84 | vlg = -ppm *np.log2(ppm) 85 | ppm = ppm *(2 -vlg.sum(axis=1)).repeat(4).reshape(ppm.shape) 86 | A ='"A": [' + ','.join(['%1.2f' % (p) for p in ppm[:,0]]) + '],' 87 | C ='"C": [' + ','.join(['%1.2f' % (p) for p in ppm[:,1]]) + '],' 88 | G ='"G": [' + ','.join(['%1.2f' % (p) for p in ppm[:,2]]) + '],' 89 | T ='"T": [' + ','.join(['%1.2f' % (p) for p in ppm[:,3]]) + ']' 90 | html = 'var data = {%s};' % (A+C+G+T) 91 | html += 'sequence_logo(document.getElementById("%s"), %d,%d, data, options);' % (ppm_id,width,height) 92 | return html 93 | 94 | ppm_ids = [] 95 | ppm_jss = [] 96 | width=ppm.shape[0]*8 97 | height = 50 98 | 99 | i_max = 0 100 | for k in list(f.keys()): 101 | if k.startswith('act'): 102 | if int(k[3:])>i_max: 103 | i_max= int(k[3:]) 104 | 105 | 106 | for i in range(i_max+1): 107 | if 'act%d' % (i) not in list(f.keys()): 108 | continue 109 | ppm_id = '%8d_%.3f_%.3f_%d' % (i,f['act%d' % (i)][:].max(), f['conact%d' % (i)][0],f['act%d' % (i)].shape[0]) 110 | ppm = f['ppm%d' % (i)][:] 111 | ppm = ppm[:,[0,2,1,3]] 112 | ppm_js = ppm2js(ppm, ppm_id, width, height) 113 | ppm_jss.append(ppm_js) 114 | ppm_ids.append('%sTomtomLink' % (ppm_id,sys.argv[2], ppm_id)) 115 | 116 | 117 | html_txt1 = html_txt % ('Neuron '+ ' '.join([''+str(i)+'' for i in range(int(sys.argv[4]))]) + ('
Click here to see syntax tree (if exist)' % (sys.argv[3].split('/')[1])),''+'\n'.join(ppm_ids)+'
Dcp1_Dcp2_ActMax_ConsensusAct_SampleSizeTomtomResultCN CRMs ('+str(ppm.shape[0])+' bp)
', '\n'.join(ppm_jss)) 118 | 119 | with open(sys.argv[3]+'.html','w') as ftest: 120 | ftest.write(html_txt1) 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/chen2html.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import os 4 | import sys 5 | os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" 6 | f = h5py.File(sys.argv[1],'r') 7 | 8 | ppm = f['ppm0'][:] 9 | 10 | html_txt0 = ''' 11 | 12 | 13 |

Nucleotides

14 | 15 | 16 |

Amino acids

17 | 18 | 19 | 20 | 36 | 37 | 38 | ''' 39 | 40 | html_txt = ''' 41 | 42 | 57 | 58 | %s 59 |
60 | Visit NeuronMotif website for more information: https://wzthu.github.io/NeuronMotif/ 61 |
62 | Please be patient to load all motif logos or patterns in the column of CN CRMs ... 63 |
64 | %s 65 | 66 | 73 | 74 | 75 | ''' 76 | 77 | 78 | def ppm2js(ppm, ppm_id, width, height): 79 | ppm += 0.0001 80 | v = ppm.sum(axis=1) 81 | v = v.repeat(4).reshape(ppm.shape) 82 | ppm /= v 83 | ppm0 = ppm.copy() 84 | vlg = -ppm *np.log2(ppm) 85 | ppm = ppm *(2 -vlg.sum(axis=1)).repeat(4).reshape(ppm.shape) 86 | A ='"A": [' + ','.join(['%1.2f' % (p) for p in ppm[:,0]]) + '],' 87 | C ='"C": [' + ','.join(['%1.2f' % (p) for p in ppm[:,1]]) + '],' 88 | G ='"G": [' + ','.join(['%1.2f' % (p) for p in ppm[:,2]]) + '],' 89 | T ='"T": [' + ','.join(['%1.2f' % (p) for p in ppm[:,3]]) + ']' 90 | html = 'var data = {%s};' % (A+C+G+T) 91 | html += 'sequence_logo(document.getElementById("%s"), %d,%d, data, options);' % (ppm_id,width,height) 92 | return html 93 | 94 | ppm_ids = [] 95 | ppm_jss = [] 96 | width=ppm.shape[0]*8 97 | height = 50 98 | 99 | i_max = 0 100 | for k in list(f.keys()): 101 | if k.startswith('act'): 102 | if int(k[3:])>i_max: 103 | i_max= int(k[3:]) 104 | 105 | 106 | for i in range(i_max+1): 107 | if 'act%d' % (i) not in list(f.keys()): 108 | continue 109 | ppm_id = '%8d_%.3f_%.3f_%d' % (i,f['act%d' % (i)][:].max(), f['conact%d' % (i)][0],f['act%d' % (i)].shape[0]) 110 | ppm = f['ppm%d' % (i)][:] 111 | ppm = ppm[:,[0,2,1,3]] 112 | ppm_js = ppm2js(ppm, ppm_id, width, height) 113 | ppm_jss.append(ppm_js) 114 | ppm_ids.append('%sTomtomLink' % (ppm_id,sys.argv[2], ppm_id)) 115 | 116 | 117 | html_txt1 = html_txt % ('Neuron '+ ' '.join([''+str(i)+'' for i in range(int(sys.argv[4]))]) + ('
Click here to see syntax tree (if exist)' % (sys.argv[3].split('/')[1])),''+'\n'.join(ppm_ids)+'
Dcp1_Dcp2_ActMax_ConsensusAct_SampleSizeTomtomResultCN CRMs ('+str(ppm.shape[0])+' bp)
', '\n'.join(ppm_jss)) 118 | 119 | with open(sys.argv[3]+'.html','w') as ftest: 120 | ftest.write(html_txt1) 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /dcnn/DeepSEA/DeepSEA/deepsea.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | 6 | 7 | import keras 8 | 9 | 10 | import h5py 11 | import numpy as np 12 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 13 | from keras.layers.core import Dropout, Activation, Flatten 14 | from keras.layers.merge import Concatenate 15 | from keras.models import Model 16 | from keras.callbacks import EarlyStopping, ModelCheckpoint 17 | from keras.optimizers import Adam 18 | from keras.utils import multi_gpu_model 19 | 20 | from keras.regularizers import l1,l2, l1_l2 21 | from keras.constraints import MaxNorm 22 | from keras.optimizers import SGD 23 | 24 | from keras.activations import relu 25 | 26 | input_bp = 1000 27 | conv_kernel_size = 8 28 | pool_kernel_size = 4 29 | 30 | maxnrom = MaxNorm(max_value=0.9, axis=0) 31 | l1l2 = l1_l2(l1=0, l2=1e-6) 32 | 33 | def crelu(x, alpha=0.0, max_value=None, threshold=1e-6): 34 | return relu(x, alpha, max_value, threshold) 35 | 36 | batch_size=16 37 | 38 | seqInput = Input(shape=(input_bp, 4), name='seqInput') 39 | 40 | seq = Conv1D(320, conv_kernel_size,kernel_regularizer=l1l2, kernel_constraint=maxnrom)(seqInput) 41 | seq = Activation(crelu)(seq) 42 | seq = MaxPooling1D(pool_size=pool_kernel_size,strides=pool_kernel_size)(seq) 43 | seq = Dropout(0.2)(seq) 44 | seq = Conv1D(480, conv_kernel_size,kernel_regularizer=l1l2, kernel_constraint=maxnrom)(seq) 45 | seq = Activation(crelu)(seq) 46 | seq = MaxPooling1D(pool_size=pool_kernel_size,strides=pool_kernel_size)(seq) 47 | seq = Dropout(0.2)(seq) 48 | seq = Conv1D(960, conv_kernel_size,kernel_regularizer=l1l2, kernel_constraint=maxnrom)(seq) 49 | seq = Activation(crelu)(seq) 50 | seq = Dropout(0.5)(seq) 51 | seq = Flatten()(seq) 52 | seq = Dense(925,kernel_regularizer=l1l2, kernel_constraint=maxnrom)(seq) 53 | seq = Activation(crelu)(seq) 54 | seq = Dense(919,kernel_regularizer=l1l2, kernel_constraint=maxnrom, activity_regularizer=l1_l2(l1=1e-8,l2=0))(seq) 55 | seq = Activation('sigmoid')(seq) 56 | 57 | 58 | model = Model(inputs = [seqInput], outputs = [seq]) 59 | 60 | 61 | from scipy.io import loadmat 62 | 63 | model.compile(SGD(learning_rate=0.08, momentum=0.9, decay=8e-7), loss='binary_crossentropy', metrics=['accuracy']) 64 | 65 | import h5py 66 | with h5py.File('data/train.mat', 'r') as trainmat: 67 | trainy = trainmat['traindata'][:] 68 | trainx = trainmat['trainxdata'][:] 69 | 70 | #trainmat = loadmat('data/train.mat') 71 | valmat = loadmat('data/valid.mat') 72 | #trainy = trainmat['traindata'] 73 | #trianx = trainmat['trainxdata'] 74 | validx = valmat['validxdata'] 75 | validy = valmat['validdata'] 76 | 77 | callbacks=[] 78 | callbacks.append(ModelCheckpoint(filepath='weight.hdf5',save_best_only=True)) 79 | callbacks.append(EarlyStopping(patience=100)) 80 | 81 | trainx = trainx.transpose((2,0,1)) 82 | trainy = trainy.transpose((1,0)) 83 | validx = validx.transpose((0,2,1)) 84 | #validy = validy.transpose((1,0)) 85 | 86 | 87 | history = model.fit(x = trainx, 88 | y = trainy, 89 | epochs = 1000, 90 | verbose = 1, 91 | batch_size=batch_size, 92 | validation_data = (validx, validy), 93 | # validation_batch_size = batch_size*2, 94 | callbacks = callbacks 95 | ) 96 | """ 97 | history = model.fit(x = validx, 98 | y = validy, 99 | epochs = 1, 100 | batch_size=batch_size, 101 | validation_data = (validx, validy), 102 | # validation_batch_size = batch_size*2, 103 | callbacks = callbacks 104 | ) 105 | """ 106 | 107 | history_dict=history.history 108 | 109 | #Plots model's training cost/loss and model's validation split cost/loss 110 | loss_values = history_dict['loss'] 111 | val_loss_values=history_dict['val_loss'] 112 | plt.figure() 113 | plt.plot(loss_values,'bo',label='training loss') 114 | plt.plot(val_loss_values,'r',label='val training loss') 115 | 116 | plt.savefig('history.pdf') 117 | #rs = model.predict(oh)[0,:] 118 | 119 | 120 | with h5py.File('history.h5','w') as f: 121 | f['loss_values'] =loss_values 122 | f['val_loss'] = val_loss_values 123 | -------------------------------------------------------------------------------- /dcnn/demo/demo2/simu.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | 6 | 7 | import keras 8 | 9 | 10 | import h5py 11 | import numpy as np 12 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 13 | from keras.layers.core import Dropout, Activation, Flatten 14 | from keras.layers.merge import Concatenate 15 | from keras.models import Model 16 | from keras.callbacks import EarlyStopping, ModelCheckpoint 17 | from keras.optimizers import Adam 18 | from keras.utils import multi_gpu_model 19 | 20 | from keras.regularizers import l1,l2, l1_l2 21 | from keras.constraints import MaxNorm 22 | from keras.optimizers import SGD 23 | 24 | from keras.activations import relu 25 | 26 | 27 | import os 28 | import tensorflow as tf 29 | import keras.backend.tensorflow_backend as KTF 30 | 31 | 32 | 33 | 34 | input_bp = 82 35 | 36 | batch_size=128 37 | 38 | 39 | 40 | 41 | seqInput = Input(shape=(input_bp, 4), name='seqInput') 42 | 43 | seq = Conv1D(5, 7)(seqInput) 44 | seq = BatchNormalization()(seq) 45 | seq = Activation('relu')(seq) 46 | seq = MaxPooling1D(2)(seq) 47 | seq = Conv1D(5, 3)(seq) 48 | seq = BatchNormalization()(seq) 49 | seq = Activation('relu')(seq) 50 | seq = MaxPooling1D(2)(seq) 51 | seq = Conv1D(6, 3)(seq) 52 | seq = BatchNormalization()(seq) 53 | seq = Activation('relu')(seq) 54 | seq = MaxPooling1D(2)(seq) 55 | seq = Conv1D(6, 3)(seq) 56 | seq = BatchNormalization()(seq) 57 | seq = Activation('relu')(seq) 58 | seq = MaxPooling1D(2)(seq) 59 | seq = Conv1D(1, 3)(seq) 60 | seq = BatchNormalization()(seq) 61 | seq = Activation('sigmoid')(seq) 62 | seq = Flatten()(seq) 63 | 64 | model = Model(inputs = [seqInput], outputs = [seq]) 65 | model_json = model.to_json() 66 | with open("model.json", "w") as json_file: 67 | json_file.write(model_json) 68 | 69 | #from keras.optimizers import RMSprop 70 | model.compile('adam', loss='binary_crossentropy', metrics=['accuracy']) 71 | 72 | 73 | PWM0 = np.loadtxt('PWM') 74 | 75 | PWM = np.ones((4,input_bp))*0.25 76 | PWM1 = np.zeros((4,5))*0.25 77 | PWM1[1:2,:] = 0.5 78 | 79 | print(PWM0.shape) 80 | print(PWM.shape) 81 | 82 | def pwm_to_sample(PWM, n = 1000): 83 | PWM /= PWM.sum(axis=0) 84 | PWM = PWM.T 85 | PWM = PWM[::-1,:] 86 | PWM = PWM[:,::-1] 87 | sample = np.zeros((n,PWM.shape[0],PWM.shape[1])) 88 | for i in range(n): 89 | for j in range(sample.shape[1]): 90 | sample[i,j,np.random.choice(4,1,p=PWM[j,:])] = 1 91 | return sample 92 | 93 | size = 10000 94 | 95 | sp0 = pwm_to_sample(PWM0,n=size) 96 | sp1 = pwm_to_sample(PWM0,n=size) 97 | sp2 = pwm_to_sample(PWM0,n=size) 98 | sp3 = pwm_to_sample(PWM1,n=size) 99 | sp4 = pwm_to_sample(PWM0,n=size) 100 | spp = pwm_to_sample(PWM,n=size) 101 | spn = pwm_to_sample(PWM,n=size) 102 | pos0 = np.random.randint(0,16,size) 103 | pos1 = np.random.randint(44,60,size) 104 | pos2 = np.r_[np.random.randint(0,16,int(size/2)),np.random.randint(46,62,int(size/2))] 105 | pos4 = np.random.randint(17,45,size) 106 | pos3 = np.random.randint(0,76,size) 107 | 108 | print(sp0.shape) 109 | print(sp1.shape) 110 | print(spp.shape) 111 | 112 | 113 | for i in range(size): 114 | spp[i,pos0[i]:(pos0[i]+PWM0.shape[1]),:] = sp0[i,:,:] 115 | spp[i,pos1[i]:(pos1[i]+PWM0.shape[1]),:] = sp1[i,:,:] 116 | 117 | for i in range(size): 118 | spn[i,pos2[i]:(pos2[i]+PWM0.shape[1]),:] = sp2[i,:,:] 119 | spn[i,pos4[i]:(pos4[i]+PWM0.shape[1]),:] = sp4[i,:,:] 120 | # spn[i,pos3[i]:(pos3[i]+PWM1.shape[1]),:] = sp3[i,:,:] 121 | 122 | sp = np.concatenate([spp,spn],axis=0) 123 | 124 | label = np.r_[np.ones(size),np.zeros(size)] 125 | 126 | callbacks=[] 127 | callbacks.append(ModelCheckpoint(filepath='weight.hdf5',save_best_only=True)) 128 | callbacks.append(EarlyStopping(patience=15)) 129 | 130 | 131 | history = model.fit(x= sp, y=label, epochs=100,validation_split=0.1,callbacks=callbacks) 132 | 133 | 134 | history_dict=history.history 135 | loss_values = history_dict['loss'] 136 | val_loss_values=history_dict['val_loss'] 137 | plt.figure() 138 | plt.plot(loss_values,'bo',label='training loss') 139 | plt.plot(val_loss_values,'r',label='val training loss') 140 | 141 | plt.savefig('history.pdf') 142 | #rs = model.predict(oh)[0,:] 143 | 144 | 145 | with h5py.File('history.h5','w') as f: 146 | f['loss_values'] =loss_values 147 | f['val_loss'] = val_loss_values 148 | f['sample'] = sp 149 | f['label'] = label 150 | -------------------------------------------------------------------------------- /dcnn/Basset/Basenji/basenji.pred.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | 6 | import h5py 7 | 8 | 9 | import keras 10 | 11 | 12 | import h5py 13 | import numpy as np 14 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 15 | from keras.layers.core import Dropout, Activation, Flatten 16 | from keras.layers.merge import Concatenate 17 | from keras.models import Model 18 | from keras.callbacks import EarlyStopping, ModelCheckpoint 19 | from keras.optimizers import Adam 20 | from keras.utils import multi_gpu_model 21 | 22 | from keras.regularizers import l1,l2, l1_l2 23 | from keras.constraints import MaxNorm 24 | from keras.optimizers import SGD 25 | 26 | from keras.activations import relu 27 | 28 | 29 | import os 30 | import tensorflow as tf 31 | import keras.backend.tensorflow_backend as KTF 32 | 33 | from data.util import Randseq, fill_oh, gen_samples 34 | 35 | input_bp = 600 36 | 37 | batch_size=128 38 | 39 | 40 | seqInput = Input(shape=(input_bp, 4), name='seqInput') 41 | 42 | seq = Conv1D(288, 17)(seqInput) 43 | seq = BatchNormalization()(seq) 44 | seq = Activation('relu')(seq) 45 | seq = MaxPooling1D(3)(seq) 46 | seq = Conv1D(288, 5)(seq) 47 | seq = BatchNormalization()(seq) 48 | seq = Activation('relu')(seq) 49 | seq = MaxPooling1D(2)(seq) 50 | seq = Conv1D(int(np.round(288*pow(1.1222,1))), 5)(seq) 51 | seq = BatchNormalization()(seq) 52 | seq = Activation('relu')(seq) 53 | seq = MaxPooling1D(2)(seq) 54 | seq = Conv1D(int(np.round(288*pow(1.1222,2))), 5)(seq) 55 | seq = BatchNormalization()(seq) 56 | seq = Activation('relu')(seq) 57 | seq = MaxPooling1D(2)(seq) 58 | seq = Conv1D(int(np.round(288*pow(1.1222,3))), 5)(seq) 59 | seq = BatchNormalization()(seq) 60 | seq = Activation('relu')(seq) 61 | seq = MaxPooling1D(2)(seq) 62 | seq = Conv1D(int(np.round(288*pow(1.1222,4))), 5)(seq) 63 | seq = BatchNormalization()(seq) 64 | seq = Activation('relu')(seq) 65 | seq = MaxPooling1D(2)(seq) 66 | seq = Conv1D(256, 1)(seq) 67 | seq = BatchNormalization()(seq) 68 | seq = Activation('relu')(seq) 69 | seq = Flatten()(seq) 70 | seq = Dropout(0.2)(seq) 71 | seq = Dense(768)(seq) 72 | seq = BatchNormalization()(seq) 73 | seq = Activation('relu')(seq) 74 | seq = Dense(164)(seq) 75 | seq = BatchNormalization()(seq) 76 | seq = Activation('sigmoid')(seq) 77 | 78 | model = Model(inputs = [seqInput], outputs = [seq]) 79 | 80 | model.load_weights('weight.hdf5') 81 | 82 | 83 | chroms = ['chr'+str(i) for i in range(1,23)] 84 | chroms.append('chrX') 85 | chroms.append('chrY') 86 | 87 | with h5py.File('data/onehot.h5', 'r') as f: 88 | onehot = dict() 89 | for chrom in chroms: 90 | onehot[chrom] = f[chrom][:] 91 | 92 | 93 | 94 | model.compile('adam', loss='binary_crossentropy', metrics=['accuracy']) 95 | 96 | 97 | import pandas as pd 98 | 99 | bed = pd.read_csv('data/encode_roadmap.bed', sep='\t', header=None) 100 | 101 | label = np.zeros((bed.shape[0],164)) 102 | 103 | for i in range(bed.shape[0]): 104 | label[i,np.array(bed.iloc[i,6].split(','),dtype=int)] = 1 105 | 106 | 107 | with h5py.File('data/sample.h5','r') as f: 108 | seltest = f['seltest'][:] 109 | 110 | 111 | test_data = {'sample':bed.iloc[seltest,:], 'label':label[seltest,:],'seq_onehot': onehot } 112 | 113 | test_randseq = Randseq(test_data['sample'].shape[0], True) 114 | 115 | test_steps = int(test_randseq.seqsize / batch_size) 116 | if test_randseq.seqsize != batch_size * test_steps: 117 | test_steps += 1 118 | 119 | sample_generator = gen_samples 120 | 121 | 122 | 123 | test_gen = sample_generator(batchsize=batch_size, 124 | randseq=test_randseq, 125 | data=test_data) 126 | 127 | 128 | result = model.predict_generator(generator=test_gen, 129 | steps = test_steps, 130 | verbose=1) 131 | 132 | 133 | 134 | from sklearn.metrics import average_precision_score 135 | from sklearn.metrics import roc_auc_score 136 | auprc=np.array([average_precision_score(test_data['label'][:,i],result[:,i]) for i in range(test_data['label'].shape[1])]) 137 | auroc=np.array([roc_auc_score(test_data['label'][:,i],result[:,i]) for i in range(test_data['label'].shape[1])]) 138 | 139 | with h5py.File('test.h5','w') as f: 140 | f['test_result'] = result 141 | f['test_label'] = test_data['label'] 142 | f['auprc'] = auprc 143 | f['auroc']=auroc 144 | -------------------------------------------------------------------------------- /NeuronMotif/segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | def ppm_smooth(ppm, sp_size, M = 80): 5 | return (ppm*sp_size + M*np.ones((ppm.shape[0],4))*0.25)/(sp_size + M) 6 | 7 | def ppm_segment0(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 1, extend = 0, shortest = None): 8 | if shortest is None: 9 | shortest = int(flank*1.5) + 3 10 | ppm0 = ppm.copy() 11 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 12 | ic0 = - ppm1 * np.log(ppm1) 13 | ic0 = 2-ic0.sum(axis=1) 14 | if smooth: 15 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 16 | ic = - ppm * np.log(ppm) 17 | ic = 2-ic.sum(axis=1) 18 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 2)] 19 | return None 20 | 21 | 22 | def ppm_segment(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 0, extend = 0, shortest = None): 23 | if shortest is None: 24 | shortest = int(flank*1.5) + 3 25 | ppm0 = ppm.copy() 26 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 27 | ic0 = - ppm1 * np.log(ppm1) 28 | ic0 = 2-ic0.sum(axis=1) 29 | if smooth: 30 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 31 | ic = - ppm * np.log(ppm) 32 | ic = 2-ic.sum(axis=1) 33 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 34 | s = [0] 35 | s.extend(sm3) 36 | s.append(0) 37 | ic = np.array(s) 38 | # ic1 = (ic-1)*0.99+1 39 | # ic = ic1/(2-ic1) 40 | # sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 41 | # s = [0] 42 | # s.extend(sm3) 43 | # s.append(0) 44 | # ic = np.array(s) 45 | ismtf = ic > ic_min 46 | for p in range(ismtf.shape[0]-1): 47 | if (not ismtf[p]) and ismtf[p+1]: 48 | print(p) 49 | st = p-(flank-1) 50 | ed = p + 1 51 | if st < 0: 52 | st = 0 53 | ismtf[st:ed] = True 54 | for p in range(ismtf.shape[0]-2,-1,-1): 55 | if ismtf[p] and (not ismtf[p+1]): 56 | print(p) 57 | st = p + 1 58 | ed = p + flank + 1 59 | if ed > ismtf.shape[0] - 1: 60 | ed = ismtf.shape[0] - 1 61 | ismtf[st:ed] = True 62 | lst = [] 63 | if ismtf[0]: 64 | lst.append(0) 65 | for p in range(ismtf.shape[0]-1): 66 | if (not ismtf[p]) and ismtf[p+1]: 67 | lst.append(p+1) 68 | if ismtf[p] and (not ismtf[p+1]): 69 | lst.append(p+1) 70 | if ismtf[-1]: 71 | lst.append(ismtf.shape[0]-1) 72 | print(lst) 73 | ppm_segs = [] 74 | starts = [] 75 | ends = [] 76 | ppm_segs_ics = [] 77 | for p in range(0,len(lst),2): 78 | if lst[p+1]-lst[p] < shortest: 79 | continue 80 | st = lst[p] 81 | if extend > 0: 82 | if p == 0: 83 | if st - extend < 0: 84 | st = 0 85 | else: 86 | st -= extend 87 | else: 88 | if st - extend <= lst[p-1] + extend: 89 | st = int((lst[p-1] + st)/2)+1 90 | else: 91 | st -= extend 92 | ed = lst[p+1] 93 | if extend >0: 94 | if p+1 == len(lst)-1: 95 | if ed + extend > ismtf.shape[0] - 1: 96 | ed = ismtf.shape[0] - 1 97 | else: 98 | ed += extend 99 | else: 100 | if ed + extend >= lst[p+2] - extend: 101 | ed = int((lst[p+2] + ed)/2) 102 | else: 103 | ed += extend 104 | ppm_segs.append(ppm0[lst[p]:(lst[p+1]),:]) 105 | starts.append(st) 106 | ends.append(ed) 107 | ppm_segs_ics.append(ic0[lst[p]:(lst[p+1])].mean()) 108 | return ppm_segs, starts, ends, ppm_segs_ics, ic0[ismtf == False].mean() 109 | 110 | def segs_to_chen(ppmids, ppms, starts, ends, filepath): 111 | lines = [] 112 | for i in range(len(ppms)): 113 | print(i) 114 | for j in range(len(ppms[i])): 115 | print(j) 116 | lines.append('>%s_%d_%d\n' %(ppmids[i],starts[i][j], ends[i][j])) 117 | ppm = ppms[i][j] 118 | for k in range(ppm.shape[0]): 119 | print(k) 120 | lines.append('\t'.join(list(np.array(np.array(ppm[k,:]*1000,dtype=int),dtype=str))) + '\n') 121 | with open(filepath, 'w') as f: 122 | f.writelines(lines) 123 | 124 | 125 | -------------------------------------------------------------------------------- /demos/demo1/segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | def ppm_smooth(ppm, sp_size, M = 80): 5 | return (ppm*sp_size + M*np.ones((ppm.shape[0],4))*0.25)/(sp_size + M) 6 | 7 | def ppm_segment0(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 1, extend = 0, shortest = None): 8 | if shortest is None: 9 | shortest = int(flank*1.5) + 3 10 | ppm0 = ppm.copy() 11 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 12 | ic0 = - ppm1 * np.log(ppm1) 13 | ic0 = 2-ic0.sum(axis=1) 14 | if smooth: 15 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 16 | ic = - ppm * np.log(ppm) 17 | ic = 2-ic.sum(axis=1) 18 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 2)] 19 | return None 20 | 21 | 22 | def ppm_segment(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 0, extend = 0, shortest = None): 23 | if shortest is None: 24 | shortest = int(flank*1.5) + 3 25 | ppm0 = ppm.copy() 26 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 27 | ic0 = - ppm1 * np.log(ppm1) 28 | ic0 = 2-ic0.sum(axis=1) 29 | if smooth: 30 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 31 | ic = - ppm * np.log(ppm) 32 | ic = 2-ic.sum(axis=1) 33 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 34 | s = [0] 35 | s.extend(sm3) 36 | s.append(0) 37 | ic = np.array(s) 38 | # ic1 = (ic-1)*0.99+1 39 | # ic = ic1/(2-ic1) 40 | # sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 41 | # s = [0] 42 | # s.extend(sm3) 43 | # s.append(0) 44 | # ic = np.array(s) 45 | ismtf = ic > ic_min 46 | for p in range(ismtf.shape[0]-1): 47 | if (not ismtf[p]) and ismtf[p+1]: 48 | print(p) 49 | st = p-(flank-1) 50 | ed = p + 1 51 | if st < 0: 52 | st = 0 53 | ismtf[st:ed] = True 54 | for p in range(ismtf.shape[0]-2,-1,-1): 55 | if ismtf[p] and (not ismtf[p+1]): 56 | print(p) 57 | st = p + 1 58 | ed = p + flank + 1 59 | if ed > ismtf.shape[0] - 1: 60 | ed = ismtf.shape[0] - 1 61 | ismtf[st:ed] = True 62 | lst = [] 63 | if ismtf[0]: 64 | lst.append(0) 65 | for p in range(ismtf.shape[0]-1): 66 | if (not ismtf[p]) and ismtf[p+1]: 67 | lst.append(p+1) 68 | if ismtf[p] and (not ismtf[p+1]): 69 | lst.append(p+1) 70 | if ismtf[-1]: 71 | lst.append(ismtf.shape[0]-1) 72 | print(lst) 73 | ppm_segs = [] 74 | starts = [] 75 | ends = [] 76 | ppm_segs_ics = [] 77 | for p in range(0,len(lst),2): 78 | if lst[p+1]-lst[p] < shortest: 79 | continue 80 | st = lst[p] 81 | if extend > 0: 82 | if p == 0: 83 | if st - extend < 0: 84 | st = 0 85 | else: 86 | st -= extend 87 | else: 88 | if st - extend <= lst[p-1] + extend: 89 | st = int((lst[p-1] + st)/2)+1 90 | else: 91 | st -= extend 92 | ed = lst[p+1] 93 | if extend >0: 94 | if p+1 == len(lst)-1: 95 | if ed + extend > ismtf.shape[0] - 1: 96 | ed = ismtf.shape[0] - 1 97 | else: 98 | ed += extend 99 | else: 100 | if ed + extend >= lst[p+2] - extend: 101 | ed = int((lst[p+2] + ed)/2) 102 | else: 103 | ed += extend 104 | ppm_segs.append(ppm0[lst[p]:(lst[p+1]),:]) 105 | starts.append(st) 106 | ends.append(ed) 107 | ppm_segs_ics.append(ic0[lst[p]:(lst[p+1])].mean()) 108 | return ppm_segs, starts, ends, ppm_segs_ics, ic0[ismtf == False].mean() 109 | 110 | def segs_to_chen(ppmids, ppms, starts, ends, filepath): 111 | lines = [] 112 | for i in range(len(ppms)): 113 | print(i) 114 | for j in range(len(ppms[i])): 115 | print(j) 116 | lines.append('>%s_%d_%d\n' %(ppmids[i],starts[i][j], ends[i][j])) 117 | ppm = ppms[i][j] 118 | for k in range(ppm.shape[0]): 119 | print(k) 120 | lines.append('\t'.join(list(np.array(np.array(ppm[k,:]*1000,dtype=int),dtype=str))) + '\n') 121 | with open(filepath, 'w') as f: 122 | f.writelines(lines) 123 | 124 | 125 | -------------------------------------------------------------------------------- /demos/demo2/segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | def ppm_smooth(ppm, sp_size, M = 80): 5 | return (ppm*sp_size + M*np.ones((ppm.shape[0],4))*0.25)/(sp_size + M) 6 | 7 | def ppm_segment0(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 1, extend = 0, shortest = None): 8 | if shortest is None: 9 | shortest = int(flank*1.5) + 3 10 | ppm0 = ppm.copy() 11 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 12 | ic0 = - ppm1 * np.log(ppm1) 13 | ic0 = 2-ic0.sum(axis=1) 14 | if smooth: 15 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 16 | ic = - ppm * np.log(ppm) 17 | ic = 2-ic.sum(axis=1) 18 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 2)] 19 | return None 20 | 21 | 22 | def ppm_segment(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 0, extend = 0, shortest = None): 23 | if shortest is None: 24 | shortest = int(flank*1.5) + 3 25 | ppm0 = ppm.copy() 26 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 27 | ic0 = - ppm1 * np.log(ppm1) 28 | ic0 = 2-ic0.sum(axis=1) 29 | if smooth: 30 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 31 | ic = - ppm * np.log(ppm) 32 | ic = 2-ic.sum(axis=1) 33 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 34 | s = [0] 35 | s.extend(sm3) 36 | s.append(0) 37 | ic = np.array(s) 38 | # ic1 = (ic-1)*0.99+1 39 | # ic = ic1/(2-ic1) 40 | # sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 41 | # s = [0] 42 | # s.extend(sm3) 43 | # s.append(0) 44 | # ic = np.array(s) 45 | ismtf = ic > ic_min 46 | for p in range(ismtf.shape[0]-1): 47 | if (not ismtf[p]) and ismtf[p+1]: 48 | print(p) 49 | st = p-(flank-1) 50 | ed = p + 1 51 | if st < 0: 52 | st = 0 53 | ismtf[st:ed] = True 54 | for p in range(ismtf.shape[0]-2,-1,-1): 55 | if ismtf[p] and (not ismtf[p+1]): 56 | print(p) 57 | st = p + 1 58 | ed = p + flank + 1 59 | if ed > ismtf.shape[0] - 1: 60 | ed = ismtf.shape[0] - 1 61 | ismtf[st:ed] = True 62 | lst = [] 63 | if ismtf[0]: 64 | lst.append(0) 65 | for p in range(ismtf.shape[0]-1): 66 | if (not ismtf[p]) and ismtf[p+1]: 67 | lst.append(p+1) 68 | if ismtf[p] and (not ismtf[p+1]): 69 | lst.append(p+1) 70 | if ismtf[-1]: 71 | lst.append(ismtf.shape[0]-1) 72 | print(lst) 73 | ppm_segs = [] 74 | starts = [] 75 | ends = [] 76 | ppm_segs_ics = [] 77 | for p in range(0,len(lst),2): 78 | if lst[p+1]-lst[p] < shortest: 79 | continue 80 | st = lst[p] 81 | if extend > 0: 82 | if p == 0: 83 | if st - extend < 0: 84 | st = 0 85 | else: 86 | st -= extend 87 | else: 88 | if st - extend <= lst[p-1] + extend: 89 | st = int((lst[p-1] + st)/2)+1 90 | else: 91 | st -= extend 92 | ed = lst[p+1] 93 | if extend >0: 94 | if p+1 == len(lst)-1: 95 | if ed + extend > ismtf.shape[0] - 1: 96 | ed = ismtf.shape[0] - 1 97 | else: 98 | ed += extend 99 | else: 100 | if ed + extend >= lst[p+2] - extend: 101 | ed = int((lst[p+2] + ed)/2) 102 | else: 103 | ed += extend 104 | ppm_segs.append(ppm0[lst[p]:(lst[p+1]),:]) 105 | starts.append(st) 106 | ends.append(ed) 107 | ppm_segs_ics.append(ic0[lst[p]:(lst[p+1])].mean()) 108 | return ppm_segs, starts, ends, ppm_segs_ics, ic0[ismtf == False].mean() 109 | 110 | def segs_to_chen(ppmids, ppms, starts, ends, filepath): 111 | lines = [] 112 | for i in range(len(ppms)): 113 | print(i) 114 | for j in range(len(ppms[i])): 115 | print(j) 116 | lines.append('>%s_%d_%d\n' %(ppmids[i],starts[i][j], ends[i][j])) 117 | ppm = ppms[i][j] 118 | for k in range(ppm.shape[0]): 119 | print(k) 120 | lines.append('\t'.join(list(np.array(np.array(ppm[k,:]*1000,dtype=int),dtype=str))) + '\n') 121 | with open(filepath, 'w') as f: 122 | f.writelines(lines) 123 | 124 | 125 | -------------------------------------------------------------------------------- /demos/Basset/BD-10/segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | def ppm_smooth(ppm, sp_size, M = 80): 5 | return (ppm*sp_size + M*np.ones((ppm.shape[0],4))*0.25)/(sp_size + M) 6 | 7 | def ppm_segment0(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 1, extend = 0, shortest = None): 8 | if shortest is None: 9 | shortest = int(flank*1.5) + 3 10 | ppm0 = ppm.copy() 11 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 12 | ic0 = - ppm1 * np.log(ppm1) 13 | ic0 = 2-ic0.sum(axis=1) 14 | if smooth: 15 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 16 | ic = - ppm * np.log(ppm) 17 | ic = 2-ic.sum(axis=1) 18 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 2)] 19 | return None 20 | 21 | 22 | def ppm_segment(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 0, extend = 0, shortest = None): 23 | if shortest is None: 24 | shortest = int(flank*1.5) + 3 25 | ppm0 = ppm.copy() 26 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 27 | ic0 = - ppm1 * np.log(ppm1) 28 | ic0 = 2-ic0.sum(axis=1) 29 | if smooth: 30 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 31 | ic = - ppm * np.log(ppm) 32 | ic = 2-ic.sum(axis=1) 33 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 34 | s = [0] 35 | s.extend(sm3) 36 | s.append(0) 37 | ic = np.array(s) 38 | # ic1 = (ic-1)*0.99+1 39 | # ic = ic1/(2-ic1) 40 | # sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 41 | # s = [0] 42 | # s.extend(sm3) 43 | # s.append(0) 44 | # ic = np.array(s) 45 | ismtf = ic > ic_min 46 | for p in range(ismtf.shape[0]-1): 47 | if (not ismtf[p]) and ismtf[p+1]: 48 | print(p) 49 | st = p-(flank-1) 50 | ed = p + 1 51 | if st < 0: 52 | st = 0 53 | ismtf[st:ed] = True 54 | for p in range(ismtf.shape[0]-2,-1,-1): 55 | if ismtf[p] and (not ismtf[p+1]): 56 | print(p) 57 | st = p + 1 58 | ed = p + flank + 1 59 | if ed > ismtf.shape[0] - 1: 60 | ed = ismtf.shape[0] - 1 61 | ismtf[st:ed] = True 62 | lst = [] 63 | if ismtf[0]: 64 | lst.append(0) 65 | for p in range(ismtf.shape[0]-1): 66 | if (not ismtf[p]) and ismtf[p+1]: 67 | lst.append(p+1) 68 | if ismtf[p] and (not ismtf[p+1]): 69 | lst.append(p+1) 70 | if ismtf[-1]: 71 | lst.append(ismtf.shape[0]-1) 72 | print(lst) 73 | ppm_segs = [] 74 | starts = [] 75 | ends = [] 76 | ppm_segs_ics = [] 77 | for p in range(0,len(lst),2): 78 | if lst[p+1]-lst[p] < shortest: 79 | continue 80 | st = lst[p] 81 | if extend > 0: 82 | if p == 0: 83 | if st - extend < 0: 84 | st = 0 85 | else: 86 | st -= extend 87 | else: 88 | if st - extend <= lst[p-1] + extend: 89 | st = int((lst[p-1] + st)/2)+1 90 | else: 91 | st -= extend 92 | ed = lst[p+1] 93 | if extend >0: 94 | if p+1 == len(lst)-1: 95 | if ed + extend > ismtf.shape[0] - 1: 96 | ed = ismtf.shape[0] - 1 97 | else: 98 | ed += extend 99 | else: 100 | if ed + extend >= lst[p+2] - extend: 101 | ed = int((lst[p+2] + ed)/2) 102 | else: 103 | ed += extend 104 | ppm_segs.append(ppm0[lst[p]:(lst[p+1]),:]) 105 | starts.append(st) 106 | ends.append(ed) 107 | ppm_segs_ics.append(ic0[lst[p]:(lst[p+1])].mean()) 108 | return ppm_segs, starts, ends, ppm_segs_ics, ic0[ismtf == False].mean() 109 | 110 | def segs_to_chen(ppmids, ppms, starts, ends, filepath): 111 | lines = [] 112 | for i in range(len(ppms)): 113 | print(i) 114 | for j in range(len(ppms[i])): 115 | print(j) 116 | lines.append('>%s_%d_%d\n' %(ppmids[i],starts[i][j], ends[i][j])) 117 | ppm = ppms[i][j] 118 | for k in range(ppm.shape[0]): 119 | print(k) 120 | lines.append('\t'.join(list(np.array(np.array(ppm[k,:]*1000,dtype=int),dtype=str))) + '\n') 121 | with open(filepath, 'w') as f: 122 | f.writelines(lines) 123 | 124 | 125 | -------------------------------------------------------------------------------- /demos/Basset/Basset/segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | def ppm_smooth(ppm, sp_size, M = 80): 5 | return (ppm*sp_size + M*np.ones((ppm.shape[0],4))*0.25)/(sp_size + M) 6 | 7 | def ppm_segment0(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 1, extend = 0, shortest = None): 8 | if shortest is None: 9 | shortest = int(flank*1.5) + 3 10 | ppm0 = ppm.copy() 11 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 12 | ic0 = - ppm1 * np.log(ppm1) 13 | ic0 = 2-ic0.sum(axis=1) 14 | if smooth: 15 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 16 | ic = - ppm * np.log(ppm) 17 | ic = 2-ic.sum(axis=1) 18 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 2)] 19 | return None 20 | 21 | 22 | def ppm_segment(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 0, extend = 0, shortest = None): 23 | if shortest is None: 24 | shortest = int(flank*1.5) + 3 25 | ppm0 = ppm.copy() 26 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 27 | ic0 = - ppm1 * np.log(ppm1) 28 | ic0 = 2-ic0.sum(axis=1) 29 | if smooth: 30 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 31 | ic = - ppm * np.log(ppm) 32 | ic = 2-ic.sum(axis=1) 33 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 34 | s = [0] 35 | s.extend(sm3) 36 | s.append(0) 37 | ic = np.array(s) 38 | # ic1 = (ic-1)*0.99+1 39 | # ic = ic1/(2-ic1) 40 | # sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 41 | # s = [0] 42 | # s.extend(sm3) 43 | # s.append(0) 44 | # ic = np.array(s) 45 | ismtf = ic > ic_min 46 | for p in range(ismtf.shape[0]-1): 47 | if (not ismtf[p]) and ismtf[p+1]: 48 | print(p) 49 | st = p-(flank-1) 50 | ed = p + 1 51 | if st < 0: 52 | st = 0 53 | ismtf[st:ed] = True 54 | for p in range(ismtf.shape[0]-2,-1,-1): 55 | if ismtf[p] and (not ismtf[p+1]): 56 | print(p) 57 | st = p + 1 58 | ed = p + flank + 1 59 | if ed > ismtf.shape[0] - 1: 60 | ed = ismtf.shape[0] - 1 61 | ismtf[st:ed] = True 62 | lst = [] 63 | if ismtf[0]: 64 | lst.append(0) 65 | for p in range(ismtf.shape[0]-1): 66 | if (not ismtf[p]) and ismtf[p+1]: 67 | lst.append(p+1) 68 | if ismtf[p] and (not ismtf[p+1]): 69 | lst.append(p+1) 70 | if ismtf[-1]: 71 | lst.append(ismtf.shape[0]-1) 72 | print(lst) 73 | ppm_segs = [] 74 | starts = [] 75 | ends = [] 76 | ppm_segs_ics = [] 77 | for p in range(0,len(lst),2): 78 | if lst[p+1]-lst[p] < shortest: 79 | continue 80 | st = lst[p] 81 | if extend > 0: 82 | if p == 0: 83 | if st - extend < 0: 84 | st = 0 85 | else: 86 | st -= extend 87 | else: 88 | if st - extend <= lst[p-1] + extend: 89 | st = int((lst[p-1] + st)/2)+1 90 | else: 91 | st -= extend 92 | ed = lst[p+1] 93 | if extend >0: 94 | if p+1 == len(lst)-1: 95 | if ed + extend > ismtf.shape[0] - 1: 96 | ed = ismtf.shape[0] - 1 97 | else: 98 | ed += extend 99 | else: 100 | if ed + extend >= lst[p+2] - extend: 101 | ed = int((lst[p+2] + ed)/2) 102 | else: 103 | ed += extend 104 | ppm_segs.append(ppm0[lst[p]:(lst[p+1]),:]) 105 | starts.append(st) 106 | ends.append(ed) 107 | ppm_segs_ics.append(ic0[lst[p]:(lst[p+1])].mean()) 108 | return ppm_segs, starts, ends, ppm_segs_ics, ic0[ismtf == False].mean() 109 | 110 | def segs_to_chen(ppmids, ppms, starts, ends, filepath): 111 | lines = [] 112 | for i in range(len(ppms)): 113 | print(i) 114 | for j in range(len(ppms[i])): 115 | print(j) 116 | lines.append('>%s_%d_%d\n' %(ppmids[i],starts[i][j], ends[i][j])) 117 | ppm = ppms[i][j] 118 | for k in range(ppm.shape[0]): 119 | print(k) 120 | lines.append('\t'.join(list(np.array(np.array(ppm[k,:]*1000,dtype=int),dtype=str))) + '\n') 121 | with open(filepath, 'w') as f: 122 | f.writelines(lines) 123 | 124 | 125 | -------------------------------------------------------------------------------- /demos/DeepSEA/DD-10/segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | def ppm_smooth(ppm, sp_size, M = 80): 5 | return (ppm*sp_size + M*np.ones((ppm.shape[0],4))*0.25)/(sp_size + M) 6 | 7 | def ppm_segment0(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 1, extend = 0, shortest = None): 8 | if shortest is None: 9 | shortest = int(flank*1.5) + 3 10 | ppm0 = ppm.copy() 11 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 12 | ic0 = - ppm1 * np.log(ppm1) 13 | ic0 = 2-ic0.sum(axis=1) 14 | if smooth: 15 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 16 | ic = - ppm * np.log(ppm) 17 | ic = 2-ic.sum(axis=1) 18 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 2)] 19 | return None 20 | 21 | 22 | def ppm_segment(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 0, extend = 0, shortest = None): 23 | if shortest is None: 24 | shortest = int(flank*1.5) + 3 25 | ppm0 = ppm.copy() 26 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 27 | ic0 = - ppm1 * np.log(ppm1) 28 | ic0 = 2-ic0.sum(axis=1) 29 | if smooth: 30 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 31 | ic = - ppm * np.log(ppm) 32 | ic = 2-ic.sum(axis=1) 33 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 34 | s = [0] 35 | s.extend(sm3) 36 | s.append(0) 37 | ic = np.array(s) 38 | # ic1 = (ic-1)*0.99+1 39 | # ic = ic1/(2-ic1) 40 | # sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 41 | # s = [0] 42 | # s.extend(sm3) 43 | # s.append(0) 44 | # ic = np.array(s) 45 | ismtf = ic > ic_min 46 | for p in range(ismtf.shape[0]-1): 47 | if (not ismtf[p]) and ismtf[p+1]: 48 | print(p) 49 | st = p-(flank-1) 50 | ed = p + 1 51 | if st < 0: 52 | st = 0 53 | ismtf[st:ed] = True 54 | for p in range(ismtf.shape[0]-2,-1,-1): 55 | if ismtf[p] and (not ismtf[p+1]): 56 | print(p) 57 | st = p + 1 58 | ed = p + flank + 1 59 | if ed > ismtf.shape[0] - 1: 60 | ed = ismtf.shape[0] - 1 61 | ismtf[st:ed] = True 62 | lst = [] 63 | if ismtf[0]: 64 | lst.append(0) 65 | for p in range(ismtf.shape[0]-1): 66 | if (not ismtf[p]) and ismtf[p+1]: 67 | lst.append(p+1) 68 | if ismtf[p] and (not ismtf[p+1]): 69 | lst.append(p+1) 70 | if ismtf[-1]: 71 | lst.append(ismtf.shape[0]-1) 72 | print(lst) 73 | ppm_segs = [] 74 | starts = [] 75 | ends = [] 76 | ppm_segs_ics = [] 77 | for p in range(0,len(lst),2): 78 | if lst[p+1]-lst[p] < shortest: 79 | continue 80 | st = lst[p] 81 | if extend > 0: 82 | if p == 0: 83 | if st - extend < 0: 84 | st = 0 85 | else: 86 | st -= extend 87 | else: 88 | if st - extend <= lst[p-1] + extend: 89 | st = int((lst[p-1] + st)/2)+1 90 | else: 91 | st -= extend 92 | ed = lst[p+1] 93 | if extend >0: 94 | if p+1 == len(lst)-1: 95 | if ed + extend > ismtf.shape[0] - 1: 96 | ed = ismtf.shape[0] - 1 97 | else: 98 | ed += extend 99 | else: 100 | if ed + extend >= lst[p+2] - extend: 101 | ed = int((lst[p+2] + ed)/2) 102 | else: 103 | ed += extend 104 | ppm_segs.append(ppm0[lst[p]:(lst[p+1]),:]) 105 | starts.append(st) 106 | ends.append(ed) 107 | ppm_segs_ics.append(ic0[lst[p]:(lst[p+1])].mean()) 108 | return ppm_segs, starts, ends, ppm_segs_ics, ic0[ismtf == False].mean() 109 | 110 | def segs_to_chen(ppmids, ppms, starts, ends, filepath): 111 | lines = [] 112 | for i in range(len(ppms)): 113 | print(i) 114 | for j in range(len(ppms[i])): 115 | print(j) 116 | lines.append('>%s_%d_%d\n' %(ppmids[i],starts[i][j], ends[i][j])) 117 | ppm = ppms[i][j] 118 | for k in range(ppm.shape[0]): 119 | print(k) 120 | lines.append('\t'.join(list(np.array(np.array(ppm[k,:]*1000,dtype=int),dtype=str))) + '\n') 121 | with open(filepath, 'w') as f: 122 | f.writelines(lines) 123 | 124 | 125 | -------------------------------------------------------------------------------- /demos/DeepSEA/DeepSEA/segment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | def ppm_smooth(ppm, sp_size, M = 80): 5 | return (ppm*sp_size + M*np.ones((ppm.shape[0],4))*0.25)/(sp_size + M) 6 | 7 | def ppm_segment0(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 1, extend = 0, shortest = None): 8 | if shortest is None: 9 | shortest = int(flank*1.5) + 3 10 | ppm0 = ppm.copy() 11 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 12 | ic0 = - ppm1 * np.log(ppm1) 13 | ic0 = 2-ic0.sum(axis=1) 14 | if smooth: 15 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 16 | ic = - ppm * np.log(ppm) 17 | ic = 2-ic.sum(axis=1) 18 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 2)] 19 | return None 20 | 21 | 22 | def ppm_segment(ppm, ic_min = 1, smooth=False, sp_size = None, M = 30, flank = 0, extend = 0, shortest = None): 23 | if shortest is None: 24 | shortest = int(flank*1.5) + 3 25 | ppm0 = ppm.copy() 26 | ppm1 = ppm_smooth(ppm = ppm, sp_size = sp_size, M = 1) 27 | ic0 = - ppm1 * np.log(ppm1) 28 | ic0 = 2-ic0.sum(axis=1) 29 | if smooth: 30 | ppm = ppm_smooth(ppm = ppm, sp_size = sp_size, M = M) 31 | ic = - ppm * np.log(ppm) 32 | ic = 2-ic.sum(axis=1) 33 | sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 34 | s = [0] 35 | s.extend(sm3) 36 | s.append(0) 37 | ic = np.array(s) 38 | # ic1 = (ic-1)*0.99+1 39 | # ic = ic1/(2-ic1) 40 | # sm3 = [ic[(i-1):(i+2)].mean() for i in range(1,ic.shape[0] - 1)] 41 | # s = [0] 42 | # s.extend(sm3) 43 | # s.append(0) 44 | # ic = np.array(s) 45 | ismtf = ic > ic_min 46 | for p in range(ismtf.shape[0]-1): 47 | if (not ismtf[p]) and ismtf[p+1]: 48 | print(p) 49 | st = p-(flank-1) 50 | ed = p + 1 51 | if st < 0: 52 | st = 0 53 | ismtf[st:ed] = True 54 | for p in range(ismtf.shape[0]-2,-1,-1): 55 | if ismtf[p] and (not ismtf[p+1]): 56 | print(p) 57 | st = p + 1 58 | ed = p + flank + 1 59 | if ed > ismtf.shape[0] - 1: 60 | ed = ismtf.shape[0] - 1 61 | ismtf[st:ed] = True 62 | lst = [] 63 | if ismtf[0]: 64 | lst.append(0) 65 | for p in range(ismtf.shape[0]-1): 66 | if (not ismtf[p]) and ismtf[p+1]: 67 | lst.append(p+1) 68 | if ismtf[p] and (not ismtf[p+1]): 69 | lst.append(p+1) 70 | if ismtf[-1]: 71 | lst.append(ismtf.shape[0]-1) 72 | print(lst) 73 | ppm_segs = [] 74 | starts = [] 75 | ends = [] 76 | ppm_segs_ics = [] 77 | for p in range(0,len(lst),2): 78 | if lst[p+1]-lst[p] < shortest: 79 | continue 80 | st = lst[p] 81 | if extend > 0: 82 | if p == 0: 83 | if st - extend < 0: 84 | st = 0 85 | else: 86 | st -= extend 87 | else: 88 | if st - extend <= lst[p-1] + extend: 89 | st = int((lst[p-1] + st)/2)+1 90 | else: 91 | st -= extend 92 | ed = lst[p+1] 93 | if extend >0: 94 | if p+1 == len(lst)-1: 95 | if ed + extend > ismtf.shape[0] - 1: 96 | ed = ismtf.shape[0] - 1 97 | else: 98 | ed += extend 99 | else: 100 | if ed + extend >= lst[p+2] - extend: 101 | ed = int((lst[p+2] + ed)/2) 102 | else: 103 | ed += extend 104 | ppm_segs.append(ppm0[lst[p]:(lst[p+1]),:]) 105 | starts.append(st) 106 | ends.append(ed) 107 | ppm_segs_ics.append(ic0[lst[p]:(lst[p+1])].mean()) 108 | return ppm_segs, starts, ends, ppm_segs_ics, ic0[ismtf == False].mean() 109 | 110 | def segs_to_chen(ppmids, ppms, starts, ends, filepath): 111 | lines = [] 112 | for i in range(len(ppms)): 113 | print(i) 114 | for j in range(len(ppms[i])): 115 | print(j) 116 | lines.append('>%s_%d_%d\n' %(ppmids[i],starts[i][j], ends[i][j])) 117 | ppm = ppms[i][j] 118 | for k in range(ppm.shape[0]): 119 | print(k) 120 | lines.append('\t'.join(list(np.array(np.array(ppm[k,:]*1000,dtype=int),dtype=str))) + '\n') 121 | with open(filepath, 'w') as f: 122 | f.writelines(lines) 123 | 124 | 125 | -------------------------------------------------------------------------------- /dcnn/Basset/BD-10/BD-10.pred.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from matplotlib import pyplot as plt 5 | 6 | import h5py 7 | 8 | 9 | import keras 10 | 11 | 12 | import h5py 13 | import numpy as np 14 | from keras.layers import Input, Dense, Conv1D, MaxPooling2D, MaxPooling1D, BatchNormalization 15 | from keras.layers.core import Dropout, Activation, Flatten 16 | from keras.layers.merge import Concatenate 17 | from keras.models import Model 18 | from keras.callbacks import EarlyStopping, ModelCheckpoint 19 | from keras.optimizers import Adam 20 | from keras.utils import multi_gpu_model 21 | 22 | from keras.regularizers import l1,l2, l1_l2 23 | from keras.constraints import MaxNorm 24 | from keras.optimizers import SGD 25 | 26 | from keras.activations import relu 27 | 28 | 29 | import os 30 | import tensorflow as tf 31 | import keras.backend.tensorflow_backend as KTF 32 | 33 | from data.util import Randseq, fill_oh, gen_samples 34 | 35 | input_bp = 600 36 | 37 | batch_size=128 38 | 39 | 40 | seqInput = Input(shape=(input_bp, 4), name='seqInput') 41 | 42 | 43 | 44 | seq = Conv1D(128, 7)(seqInput) 45 | seq = BatchNormalization()(seq) 46 | seq = Activation('relu')(seq) 47 | seq = Conv1D(128, 3)(seq) 48 | seq = BatchNormalization()(seq) 49 | seq = Activation('relu')(seq) 50 | seq = MaxPooling1D(2)(seq) 51 | seq = Conv1D(160, 3)(seq) 52 | seq = BatchNormalization()(seq) 53 | seq = Activation('relu')(seq) 54 | seq = Conv1D(160, 3)(seq) 55 | seq = BatchNormalization()(seq) 56 | seq = Activation('relu')(seq) 57 | seq = MaxPooling1D(2)(seq) 58 | seq = Conv1D(256, 3)(seq) 59 | seq = BatchNormalization()(seq) 60 | seq = Activation('relu')(seq) 61 | seq = Conv1D(256, 3)(seq) 62 | seq = BatchNormalization()(seq) 63 | seq = Activation('relu')(seq) 64 | seq = MaxPooling1D(2)(seq) 65 | seq = Conv1D(384, 3)(seq) 66 | seq = BatchNormalization()(seq) 67 | seq = Activation('relu')(seq) 68 | seq = Conv1D(384, 3)(seq) 69 | seq = BatchNormalization()(seq) 70 | seq = Activation('relu')(seq) 71 | seq = MaxPooling1D(2)(seq) 72 | seq = Conv1D(512, 3)(seq) 73 | seq = BatchNormalization()(seq) 74 | seq = Activation('relu')(seq) 75 | seq = Conv1D(512, 3)(seq) 76 | seq = BatchNormalization()(seq) 77 | seq = Activation('relu')(seq) 78 | seq = Flatten()(seq) 79 | seq = Dropout(0.2)(seq) 80 | seq = Dense(768)(seq) 81 | seq = BatchNormalization()(seq) 82 | seq = Activation('relu')(seq) 83 | seq = Dense(164)(seq) 84 | seq = BatchNormalization()(seq) 85 | seq = Activation('sigmoid')(seq) 86 | 87 | model = Model(inputs = [seqInput], outputs = [seq]) 88 | 89 | model.load_weights('weight.hdf5') 90 | 91 | 92 | chroms = ['chr'+str(i) for i in range(1,23)] 93 | chroms.append('chrX') 94 | chroms.append('chrY') 95 | 96 | with h5py.File('data/onehot.h5', 'r') as f: 97 | onehot = dict() 98 | for chrom in chroms: 99 | onehot[chrom] = f[chrom][:] 100 | 101 | 102 | 103 | model.compile('adam', loss='binary_crossentropy', metrics=['accuracy']) 104 | 105 | 106 | import pandas as pd 107 | 108 | bed = pd.read_csv('data/encode_roadmap.bed', sep='\t', header=None) 109 | 110 | label = np.zeros((bed.shape[0],164)) 111 | 112 | for i in range(bed.shape[0]): 113 | label[i,np.array(bed.iloc[i,6].split(','),dtype=int)] = 1 114 | 115 | 116 | with h5py.File('data/sample.h5','r') as f: 117 | seltest = f['seltest'][:] 118 | 119 | 120 | test_data = {'sample':bed.iloc[seltest,:], 'label':label[seltest,:],'seq_onehot': onehot } 121 | 122 | test_randseq = Randseq(test_data['sample'].shape[0], True) 123 | 124 | test_steps = int(test_randseq.seqsize / batch_size) 125 | if test_randseq.seqsize != batch_size * test_steps: 126 | test_steps += 1 127 | 128 | sample_generator = gen_samples 129 | 130 | 131 | 132 | test_gen = sample_generator(batchsize=batch_size, 133 | randseq=test_randseq, 134 | data=test_data) 135 | 136 | 137 | result = model.predict_generator(generator=test_gen, 138 | steps = test_steps, 139 | verbose=1) 140 | 141 | 142 | 143 | from sklearn.metrics import average_precision_score 144 | from sklearn.metrics import roc_auc_score 145 | auprc=np.array([average_precision_score(test_data['label'][:,i],result[:,i]) for i in range(test_data['label'].shape[1])]) 146 | auroc=np.array([roc_auc_score(test_data['label'][:,i],result[:,i]) for i in range(test_data['label'].shape[1])]) 147 | 148 | with h5py.File('test.h5','w') as f: 149 | f['test_result'] = result 150 | f['test_label'] = test_data['label'] 151 | f['auprc'] = auprc 152 | f['auroc']=auroc 153 | --------------------------------------------------------------------------------