├── .gitignore
├── .gitmodules
├── PRIAM_search.jar
├── README.md
├── data
    ├── config_template.ini
    ├── maps
    │   ├── efclasses.mapping
    │   ├── pf-EC-superseded.mapping
    │   ├── pf-metacyc-RXN-EC.mapping
    │   ├── pf-official-EC-metacyc-RXN.mapping
    │   └── pf-to-remove-non-small-molecule-metabolism.mapping
    └── weights
    │   ├── blast
    │   ├── deepec
    │   └── priam
├── e2p2.py
├── pipeline
    └── weight.py
├── src
    ├── __init__.py
    ├── bash
    │   ├── __init__.py
    │   └── pipeline.py
    ├── definitions.py
    ├── e2p2
    │   ├── __init__.py
    │   ├── classifiers
    │   │   ├── __init__.py
    │   │   ├── blast.py
    │   │   ├── deepec.py
    │   │   └── priam.py
    │   └── ensembles
    │   │   ├── __init__.py
    │   │   └── max_weight_absolute_threshold.py
    └── lib
    │   ├── __init__.py
    │   ├── classifier.py
    │   ├── config.py
    │   ├── ensemble.py
    │   ├── function_class.py
    │   ├── process.py
    │   ├── read.py
    │   └── write.py
└── version


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *__pycache__*
 3 | *.idea*
 4 | test*
 5 | tools*
 6 | *_test*
 7 | utils*
 8 | .vscode*
 9 | .virtualenvs*
10 | config.ini
11 | *.log
12 | 
13 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "deepec"]
2 | 	path = deepec
3 | 	url = https://github.com/bxuecarnegie/deepec.git
4 | 	branch = main
5 | 


--------------------------------------------------------------------------------
/PRIAM_search.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carnegie/E2P2/4d9150bf68f34e8bcf3a363c379a349ba5f2ecb0/PRIAM_search.jar


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Ensemble Enzyme Prediction Pipeline (E2P2)
  2 | 
  3 | The Ensemble Enzyme Prediction Pipeline (E2P2) annotates protein sequences with Enzyme Function classes comprised of full, four-part Enzyme Commission numbers and MetaCyc reaction identifiers. It is the enzyme annotation pipeline used to generate the species-specific metabolic databases at the [Plant Metabolic Network](www.plantcyc.org) since 2013. E2P2 systematically integrates results from two molecular function annotation algorithms using an ensemble classification scheme. For a given genome, all protein sequences are submitted as individual queries against the base-level annotation methods. 
  4 | 
  5 | Due to PRIAM's end of development and availability, we've replaced it with [DeepEC](https://bitbucket.org/kaistsystemsbiology/deepec/src/master/) and moved the current E2P2 version to "v5". You can still download the previous version in the "v4" branch.
  6 | 
  7 | ## Getting Started
  8 | The following instuctions are for users to set up the E2P2 pipeline on a Unix machine and start running, developing and/or testing the pipeline.
  9 | 
 10 | ### Prerequisites
 11 | This pipeline is tested on Ubuntu, CentOS, macOS and should theoretically run on all Linux distributions.
 12 | * [Python 3](https://www.python.org/downloads/)
 13 | * [NCBI BLAST+](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download)
 14 | * ~~[Java 1.5 or above](https://www.oracle.com/technetwork/java/javase/downloads/index.html)~~
 15 | ~~**Currently it has been known that PRIAM might not work with Java version 11, we recommend using version 8 instead.~~
 16 | * ~~[PRIAM_Search utility V2](http://priam.prabi.fr/REL_JAN18/index_jan18.html)~~
 17 | ~~**A copy is temperarily included due to the source website being down.~~
 18 | * [DeepEC](https://github.com/bxuecarnegie/deepec)
 19 | A fork of DeepEC is provided as a submodule, check "environment.yml" for prerequisites.
 20 | 
 21 | ### Installing
 22 | 
 23 | Download E2P2 from [E2P2 at GitHub](https://github.com/carnegie/E2P2)
 24 | 
 25 | **Currently the E2P2 pipeline does not support white spaces or other illegal symbols in files paths.**
 26 | 
 27 | ```
 28 | git clone --recurse-submodules https://github.com/carnegie/E2P2.git
 29 | ```
 30 | * If folder "deepec" is empty, run the following command in the 'E2P2' folder
 31 | ```
 32 | git submodule update --init
 33 | ```
 34 | 
 35 | 
 36 | Download Reference Protein Sequence Dataset (RPSD) from https://ftp.dpb.carnegiescience.edu/rpsd/
 37 | * Version release_2024_07_31 and up.
 38 | 
 39 | 
 40 | Unzip and extract RPSD data (different argument for tar depending on the file extensions)
 41 | ```
 42 | tar -xzf blastdb.tar.gz
 43 | tar -xzf deepec.tar.gz
 44 | tar -xzf weights.tar.gz
 45 | tar -xzf maps.tar.gz
 46 | 
 47 | tar -xjf blastdb.tar.bz2
 48 | tar -xjf deepec.tar.bz2
 49 | tar -xjf weights.tar.bz2
 50 | tar -xjf maps.tar.bz2
 51 | 
 52 | # If you want to just use the required arguments with a different version of RPSD data, 
 53 | # replace the "maps" and "weights" folders under "data" in "E2P2" using the above folders 
 54 | # with the same name. And replace the files under "deepec/deepec/data" with the files in deepec.tar.gz.
 55 | ```
 56 | 
 57 | ## Usage Example
 58 | 
 59 | ### config.ini
 60 | In the project's data folder is a "config_template.ini".
 61 | 
 62 | > **Users need copy the template to the root folder as "config.ini" and edit the environmental variables.**
 63 | 
 64 | python3 e2p2.py [-h] --input /PATH/TO/Araport11_genes.201606.pep.repr.fasta -o PATH/TO/output.pf e2p2 --threshold 0.5 
 65 | 
 66 | ### Required Arguments
 67 |     --input INPUT_FILE, -i INPUT_FILE: Path to input protein sequences file
 68 | 
 69 |     e2p2: subparser argument, used to separate pipeline arguments and classifier/ensemble arguments
 70 | 
 71 | ### Optional Arguments Before "e2p2"
 72 |     -h, --help            show this help message and exit
 73 |     --input INPUT_FILE, -i INPUT_FILE
 74 |                         Path to input protein sequences file
 75 |     --protein_gene PROTEIN_GENE_PATH, -pg PROTEIN_GENE_PATH
 76 |                         Provide a protein to gene map. This can be used to generate a splice variant removed fasta file and output the final version of e2p2.
 77 |     --remove_splice_variants, -rm
 78 |                         Argument flag to remove splice variants.
 79 |     --output OUTPUT_PATH, -o OUTPUT_PATH
 80 |                         Path to output file. By Default would be in the same folder of the input.
 81 |     --temp_folder TEMP_FOLDER, -tf TEMP_FOLDER
 82 |                         Specify the location of the temp folder. By default would be in the same directory of the output.
 83 |     --log LOG_PATH, -l LOG_PATH
 84 |                         Specify the location of the log file. By default would be "runE2P2.log" in the temp folder.
 85 |     --verbose {0,1}, -v {0,1}
 86 |                         Verbose level of log output. Default is 0.
 87 |                                     0: only step information are logged
 88 |                                     1: all information are logged
 89 | 
 90 | ### Optional Arguments After "e2p2"
 91 |     -h, --help: Show help message and exit
 92 |     
 93 |     --blastp BLASTP, -b BLASTP
 94 |                         Command of or path to BLAST+ "blastp".
 95 |     --num_threads NUM_THREADS, -n NUM_THREADS
 96 |                         Number of threads to run "blastp".
 97 |     --blast_db BLAST_DB, -bd BLAST_DB
 98 |                         Path to rpsd blast database name. For example, "/PATH/TO/FOLDER/rpsd.fa", where you can find the following files in
 99 |                         /PATH/TO/FOLDER:rpsd.fa.phr; rpsd.fa.pin; rpsd.fa.psq
100 |     --blast_e_value BLAST_E_VALUE, -be BLAST_E_VALUE
101 |                         Blastp e-value cutoff
102 |     --blast_weight BLAST_WEIGHT, -bw BLAST_WEIGHT
103 |                         Path to weight file for the blast classifier
104 |     --python_path PYTHON_PATH, -py PYTHON_PATH
105 |                         Command of or path to "java".
106 |     --deepec_path DEEPEC_PATH, -dp DEEPEC_PATH
107 |                         Path to "deepec.py".
108 |     --ec_to_ef_mapping_path EC_TO_EF_MAPPING_PATH, -ee EC_TO_EF_MAPPING_PATH
109 |                         Path to mapping file from ECs to EFs
110 |     --threshold THRESHOLD, -t THRESHOLD
111 |                         Threshold for voting results. Default is 0.5.
112 | 
113 | ### Additional information
114 | - Input protein sequences should be in FASTA format.
115 | - Headers of the FASTA file should begin with the sequence ID followed by a space or '|'.
116 |     For example: >AT1G01010.1 | NAC domain containing protein 1 | Chr1:3760-5630 FORWARD LENGTH=429 | 201606
117 | 
118 | ## Authors
119 | 
120 | * **Bo Xue** - [bxuecarnegie](https://github.com/bxuecarnegie)
121 | 
122 | ### Previous versions
123 | * **Chuan Wang** - [Chuan Wang](https://github.com/grittyy)
124 | * **Lee Chae**
125 | 
126 | ## Institution
127 |     Plant Metabolic Network
128 |     Department of Plant Biology
129 |     Carnegie Institution for Science
130 |     Stanford, CA 94305
131 | 
132 | 
133 | ## Acknowledgments
134 | 
135 | * Special Thanks to
136 |   * Thomas Bernard - *PRIAM*
137 |   * Ludo Cottret - [lipme](https://github.com/lipme) - *Singularity Container For Previous Versions*
138 | 


--------------------------------------------------------------------------------
/data/config_template.ini:
--------------------------------------------------------------------------------
 1 | [Mapping]
 2 | efclasses = data/maps/efclasses.mapping
 3 | ec_superseded = data/maps/pf-EC-superseded.mapping
 4 | metacyc_rxn_ec = data/maps/pf-metacyc-RXN-EC.mapping
 5 | official_ec_metacyc_rxn = data/maps/pf-official-EC-metacyc-RXN.mapping
 6 | to_remove_non_small_molecule_metabolism = data/maps/pf-to-remove-non-small-molecule-metabolism.mapping
 7 | 
 8 | [Ensembles]
 9 | ; Name matches the following sections
10 | ensemble1 = MaxWeightAbsoluteThreshold
11 | 
12 | [MaxWeightAbsoluteThreshold]
13 | class = src/e2p2/ensembles/max_weight_absolute_threshold.py
14 | threshold = 0.5
15 | 
16 | [Classifiers]
17 | ; Name matches the following sections
18 | classifier1 = BLAST
19 | ; classifier2 = PRIAM
20 | classifier3 = DEEPEC
21 | 
22 | [BLAST]
23 | blastp = blastp
24 | blast_db = /PATH/TO/rpsd.v5.2.ef.fasta
25 | num_threads = 4
26 | blast_e_value = 1e-2
27 | ; Below sets up the classifier
28 | class = src/e2p2/classifiers/blast.py
29 | weight = data/weights/blast
30 | command = ${BLAST:blastp} -db ${BLAST:blast_db} -num_threads ${BLAST:num_threads} -query ${IO:query} -out ${IO:blast} -outfmt 6
31 | 
32 | ; resume: fr (resume) or fn (new)
33 | ; -n ${PRIAM:timestamp} requires a workaround
34 | [PRIAM]
35 | java_path = /PATH/TO/1.8.0.382/bin/java
36 | priam_search = /PATH/TO/PRIAM_search.jar
37 | blast_bin = /PATH/TO/blast/2.15.0/bin
38 | priam_profiles = /PATH/TO/release_2019-03-07/profiles
39 | resume = fn
40 | xms = 3072m
41 | xmx = 3072m
42 | ; Below sets up the classifier
43 | class = src/e2p2/classifiers/priam.py
44 | weight = data/weights/priam
45 | command = ${PRIAM:java_path} -Xms${PRIAM:xms} -Xmx${PRIAM:xmx} -jar ${PRIAM:priam_search} --bd ${PRIAM:blast_bin} --bp -n ${IO:timestamp} -i ${IO:query} -p ${PRIAM:priam_profiles} --bh -o ${IO:priam} --${PRIAM:resume}
46 | 
47 | [DEEPEC]
48 | python_path = python
49 | deepec_path = /PATH/TO/deepec/deepec.py
50 | ec_to_ef_mapping_path = /PATH/TO/deepec/data/ec_to_ef.mapping
51 | 
52 | ; Below sets up the classifier
53 | class = src/e2p2/classifiers/deepec.py
54 | weight = data/weights/deepec
55 | command = ${DEEPEC:python_path} ${DEEPEC:deepec_path} -i ${IO:query} -o ${IO:deepec}


--------------------------------------------------------------------------------
/data/maps/pf-to-remove-non-small-molecule-metabolism.mapping:
--------------------------------------------------------------------------------
   1 | 1.14.11.33
   2 | 1.4.3.13
   3 | 1.6.4.4-RXN
   4 | 1.8.1.8
   5 | 1.8.4.11
   6 | 1.8.4.12
   7 | 1.8.4.12-RXN
   8 | 1.8.4.2
   9 | 2.1.1.100
  10 | 2.1.1.100-RXN
  11 | 2.1.1.113
  12 | 2.1.1.113-RXN
  13 | 2.1.1.125
  14 | 2.1.1.125-RXN
  15 | 2.1.1.126
  16 | 2.1.1.126-RXN
  17 | 2.1.1.127-RXN
  18 | 2.1.1.166
  19 | 2.1.1.167
  20 | 2.1.1.168
  21 | 2.1.1.170
  22 | 2.1.1.171
  23 | 2.1.1.172
  24 | 2.1.1.173
  25 | 2.1.1.174
  26 | 2.1.1.176
  27 | 2.1.1.177
  28 | 2.1.1.178
  29 | 2.1.1.179
  30 | 2.1.1.180
  31 | 2.1.1.181
  32 | 2.1.1.182
  33 | 2.1.1.183
  34 | 2.1.1.184
  35 | 2.1.1.185
  36 | 2.1.1.186
  37 | 2.1.1.187
  38 | 2.1.1.188
  39 | 2.1.1.189
  40 | 2.1.1.190
  41 | 2.1.1.191
  42 | 2.1.1.192
  43 | 2.1.1.193
  44 | 2.1.1.198
  45 | 2.1.1.199
  46 | 2.1.1.200
  47 | 2.1.1.202
  48 | 2.1.1.203
  49 | 2.1.1.204
  50 | 2.1.1.205
  51 | 2.1.1.206
  52 | 2.1.1.207
  53 | 2.1.1.208
  54 | 2.1.1.209
  55 | 2.1.1.211
  56 | 2.1.1.213
  57 | 2.1.1.214
  58 | 2.1.1.215
  59 | 2.1.1.216
  60 | 2.1.1.217
  61 | 2.1.1.218
  62 | 2.1.1.219
  63 | 2.1.1.220
  64 | 2.1.1.221
  65 | 2.1.1.223
  66 | 2.1.1.224
  67 | 2.1.1.225
  68 | 2.1.1.226
  69 | 2.1.1.227
  70 | 2.1.1.228
  71 | 2.1.1.229
  72 | 2.1.1.230
  73 | 2.1.1.233
  74 | 2.1.1.242
  75 | 2.1.1.244
  76 | 2.1.1.245
  77 | 2.1.1.256
  78 | 2.1.1.257
  79 | 2.1.1.258
  80 | 2.1.1.260
  81 | 2.1.1.264
  82 | 2.1.1.266
  83 | 2.1.1.268
  84 | 2.1.1.282
  85 | 2.1.1.286
  86 | 2.1.1.287
  87 | 2.1.1.290
  88 | 2.1.1.296
  89 | 2.1.1.298
  90 | 2.1.1.299
  91 | 2.1.1.33
  92 | 2.1.1.34
  93 | 2.1.1.34-RXN
  94 | 2.1.1.35
  95 | 2.1.1.37
  96 | 2.1.1.43
  97 | 2.1.1.55
  98 | 2.1.1.56
  99 | 2.1.1.57
 100 | 2.1.1.57-RXN
 101 | 2.1.1.59
 102 | 2.1.1.59-RXN
 103 | 2.1.1.61
 104 | 2.1.1.62
 105 | 2.1.1.62-RXN
 106 | 2.1.1.63
 107 | 2.1.1.63-RXN
 108 | 2.1.1.72
 109 | 2.1.1.72-RXN
 110 | 2.1.1.74
 111 | 2.1.1.74-RXN
 112 | 2.1.1.77
 113 | 2.1.1.77-RXN
 114 | 2.1.1.80
 115 | 2.1.1.85
 116 | 2.1.1.85-RXN
 117 | 2.1.1.fb
 118 | 2.1.1.fc
 119 | 2.1.1.fd
 120 | 2.1.1.fe
 121 | 2.1.1.ff
 122 | 2.3.1.128-RXN
 123 | 2.3.1.225
 124 | 2.3.1.231
 125 | 2.3.1.48
 126 | 2.3.1.97-RXN
 127 | 2.3.2.13
 128 | 2.3.2.13-RXN
 129 | 2.4.1.101
 130 | 2.4.1.101-RXN
 131 | 2.4.1.102
 132 | 2.4.1.102-RXN
 133 | 2.4.1.109-RXN
 134 | 2.4.1.113
 135 | 2.4.1.113-RXN
 136 | 2.4.1.119-RXN
 137 | 2.4.1.143
 138 | 2.4.1.143-RXN
 139 | 2.4.1.144
 140 | 2.4.1.144-RXN
 141 | 2.4.1.145
 142 | 2.4.1.145-RXN
 143 | 2.4.1.146
 144 | 2.4.1.146-RXN
 145 | 2.4.1.147
 146 | 2.4.1.147-RXN
 147 | 2.4.1.148
 148 | 2.4.1.148-RXN
 149 | 2.4.1.155
 150 | 2.4.1.155-RXN
 151 | 2.4.1.186
 152 | 2.4.1.201
 153 | 2.4.1.201-RXN
 154 | 2.4.1.229
 155 | 2.4.1.229-RXN
 156 | 2.4.1.255
 157 | 2.4.1.312
 158 | 2.4.1.41
 159 | 2.4.1.41-RXN
 160 | 2.4.1.94
 161 | 2.4.1.94-RXN
 162 | 2.4.2.26
 163 | 2.4.2.26-RXN
 164 | 2.5.1.58
 165 | 2.5.1.58-RXN
 166 | 2.5.1.59
 167 | 2.5.1.60
 168 | 2.5.1.60-RXN
 169 | 2.7.10.1
 170 | 2.7.10.1-RXN
 171 | 2.7.10.2
 172 | 2.7.10.2-RXN
 173 | 2.7.1.109-RXN
 174 | 2.7.11.1
 175 | 2.7.11.10
 176 | 2.7.11.10-RXN
 177 | 2.7.11.11
 178 | 2.7.11.11-RXN
 179 | 2.7.11.12
 180 | 2.7.11.12-RXN
 181 | 2.7.11.13
 182 | 2.7.11.13-RXN
 183 | 2.7.11.14
 184 | 2.7.11.14-RXN
 185 | 2.7.11.15
 186 | 2.7.11.15-RXN
 187 | 2.7.11.16
 188 | 2.7.11.16-RXN
 189 | 2.7.11.17
 190 | 2.7.11.17-RXN
 191 | 2.7.11.18
 192 | 2.7.11.18-RXN
 193 | 2.7.11.19
 194 | 2.7.11.19-RXN
 195 | 2.7.11.2
 196 | 2.7.11.20
 197 | 2.7.11.20-RXN
 198 | 2.7.11.21
 199 | 2.7.11.21-RXN
 200 | 2.7.11.22
 201 | 2.7.11.22-RXN
 202 | 2.7.11.23
 203 | 2.7.11.24
 204 | 2.7.11.24-RXN
 205 | 2.7.11.25
 206 | 2.7.11.25-RXN
 207 | 2.7.11.26
 208 | 2.7.11.27
 209 | 2.7.11.27-RXN
 210 | 2.7.11.28
 211 | 2.7.11.29
 212 | 2.7.11.29-RXN
 213 | 2.7.11.2-RXN
 214 | 2.7.11.3
 215 | 2.7.11.30
 216 | 2.7.11.30-RXN
 217 | 2.7.11.31
 218 | 2.7.11.32
 219 | 2.7.11.33
 220 | 2.7.11.4
 221 | 2.7.11.4-RXN
 222 | 2.7.11.5
 223 | 2.7.11.6
 224 | 2.7.1.160-RXN
 225 | 2.7.11.6-RXN
 226 | 2.7.11.7
 227 | 2.7.11.7-RXN
 228 | 2.7.11.8
 229 | 2.7.11.8-RXN
 230 | 2.7.11.9
 231 | 2.7.11.9-RXN
 232 | 2.7.12.1
 233 | 2.7.12.1-RXN
 234 | 2.7.12.2
 235 | 2.7.12.2-RXN
 236 | 2.7.13.1
 237 | 2.7.13.1-RXN
 238 | 2.7.13.2
 239 | 2.7.13.2-RXN
 240 | 2.7.13.3
 241 | 2.7.13.3-RXN
 242 | 2.7.1.78
 243 | 2.7.7.19
 244 | 2.7.7.48
 245 | 2.7.7.49
 246 | 2.7.7.50
 247 | 2.7.7.56
 248 | 2.7.7.6
 249 | 2.7.7.7
 250 | 2.7.7.8
 251 | 2.7.7.8-RXN
 252 | 3.1.11.1
 253 | 3.1.11.1-RXN
 254 | 3.1.11.2
 255 | 3.1.11.2-RXN
 256 | 3.1.11.3
 257 | 3.1.11.3-RXN
 258 | 3.1.11.4
 259 | 3.1.11.4-RXN
 260 | 3.1.11.5
 261 | 3.1.11.6
 262 | 3.1.11.6-RXN
 263 | 3.1.12.1
 264 | 3.1.13.1
 265 | 3.1.13.1-RXN
 266 | 3.1.13.2
 267 | 3.1.13.2-RXN
 268 | 3.1.13.3
 269 | 3.1.13.3-RXN
 270 | 3.1.13.4
 271 | 3.1.13.4-RXN
 272 | 3.1.13.5
 273 | 3.1.13.5-RXN
 274 | 3.1.14.1
 275 | 3.1.14.1-RXN
 276 | 3.1.15.1
 277 | 3.1.15.1-RXN
 278 | 3.1.16.1
 279 | 3.1.16.1-RXN
 280 | 3.1.21.1
 281 | 3.1.21.1-RXN
 282 | 3.1.21.2
 283 | 3.1.21.2-RXN
 284 | 3.1.21.3
 285 | 3.1.21.3-RXN
 286 | 3.1.21.4
 287 | 3.1.21.4-RXN
 288 | 3.1.21.5
 289 | 3.1.21.5-RXN
 290 | 3.1.21.6
 291 | 3.1.21.6-RXN
 292 | 3.1.21.7
 293 | 3.1.21.7-RXN
 294 | 3.1.21.8
 295 | 3.1.21.9
 296 | 3.1.22.1
 297 | 3.1.22.1-RXN
 298 | 3.1.22.2
 299 | 3.1.2.22-RXN
 300 | 3.1.22.2-RXN
 301 | 3.1.22.4
 302 | 3.1.22.4-RXN
 303 | 3.1.22.5
 304 | 3.1.22.5-RXN
 305 | 3.1.25.1
 306 | 3.1.25.1-RXN
 307 | 3.1.26
 308 | 3.1.26.1
 309 | 3.1.26.10
 310 | 3.1.26.10-RXN
 311 | 3.1.26.11
 312 | 3.1.26.11-RXN
 313 | 3.1.26.12
 314 | 3.1.26.12-RXN
 315 | 3.1.26.13
 316 | 3.1.26.1-RXN
 317 | 3.1.26.2
 318 | 3.1.26.2-RXN
 319 | 3.1.26.3
 320 | 3.1.26.3-RXN
 321 | 3.1.26.4
 322 | 3.1.26.4-RXN
 323 | 3.1.26.5
 324 | 3.1.26.5-RXN
 325 | 3.1.26.6
 326 | 3.1.26.6-RXN
 327 | 3.1.26.7
 328 | 3.1.26.7-RXN
 329 | 3.1.26.8
 330 | 3.1.26.8-RXN
 331 | 3.1.26.9
 332 | 3.1.26.9-RXN
 333 | 3.1.27
 334 | 3.1.27.1
 335 | 3.1.27.10
 336 | 3.1.27.10-RXN
 337 | 3.1.27.1-RXN
 338 | 3.1.27.2
 339 | 3.1.27.2-RXN
 340 | 3.1.27.3
 341 | 3.1.27.3-RXN
 342 | 3.1.27.4
 343 | 3.1.27.4-RXN
 344 | 3.1.27.5
 345 | 3.1.27.5-RXN
 346 | 3.1.27.6
 347 | 3.1.27.6-RXN
 348 | 3.1.27.7
 349 | 3.1.27.7-RXN
 350 | 3.1.27.8
 351 | 3.1.27.8-RXN
 352 | 3.1.27.9-RXN
 353 | 3.1.30.1
 354 | 3.1.30.1-RXN
 355 | 3.1.30.2
 356 | 3.1.30.2-RXN
 357 | 3.1.31.1
 358 | 3.1.31.1-RXN
 359 | 3.1.3.16
 360 | 3.1.3.16-RXN
 361 | 3.1.3.17
 362 | 3.1.3.33
 363 | 3.1.3.43-RXN
 364 | 3.1.3.48
 365 | 3.1.3.53
 366 | 3.1.4.1-RXN
 367 | 3.2.1.130-RXN
 368 | 3.2.1.143-RXN
 369 | 3.2.2.17
 370 | 3.2.2.17-RXN
 371 | 3.2.2.20
 372 | 3.2.2.21-RXN
 373 | 3.2.2.22
 374 | 3.2.2.22-RXN
 375 | 3.2.2.23-RXN
 376 | 3.2.2.24-RXN
 377 | 3.2.2.27
 378 | 3.2.2.29
 379 | 3.4.11.1
 380 | 3.4.11.10
 381 | 3.4.11.10-RXN
 382 | 3.4.11.13
 383 | 3.4.11.13-RXN
 384 | 3.4.11.14
 385 | 3.4.11.14-RXN
 386 | 3.4.11.15
 387 | 3.4.11.15-RXN
 388 | 3.4.11.16
 389 | 3.4.11.16-RXN
 390 | 3.4.11.17
 391 | 3.4.11.17-RXN
 392 | 3.4.11.18
 393 | 3.4.11.18-RXN
 394 | 3.4.11.19
 395 | 3.4.11.19-RXN
 396 | 3.4.11.1-RXN
 397 | 3.4.11.2
 398 | 3.4.11.20
 399 | 3.4.11.20-RXN
 400 | 3.4.11.21
 401 | 3.4.11.21-RXN
 402 | 3.4.11.22
 403 | 3.4.11.22-RXN
 404 | 3.4.11.23
 405 | 3.4.11.24
 406 | 3.4.11.24-RXN
 407 | 3.4.11.25
 408 | 3.4.11.26
 409 | 3.4.11.2-RXN
 410 | 3.4.11.3
 411 | 3.4.11.3-RXN
 412 | 3.4.11.4
 413 | 3.4.11.4-RXN
 414 | 3.4.11.5
 415 | 3.4.11.5-RXN
 416 | 3.4.11.6
 417 | 3.4.11.6-RXN
 418 | 3.4.11.7
 419 | 3.4.11.7-RXN
 420 | 3.4.11.9
 421 | 3.4.11.9-RXN
 422 | 3.4.14.10-RXN
 423 | 3.4.14.1-RXN
 424 | 3.4.14.2-RXN
 425 | 3.4.14.4-RXN
 426 | 3.4.14.5
 427 | 3.4.14.5-RXN
 428 | 3.4.15.1
 429 | 3.4.15.1-RXN
 430 | 3.4.15.5
 431 | 3.4.15.5-RXN
 432 | 3.4.16.2
 433 | 3.4.16.2-RXN
 434 | 3.4.16.4
 435 | 3.4.16.4-RXN
 436 | 3.4.16.5
 437 | 3.4.16.5-RXN
 438 | 3.4.16.6
 439 | 3.4.16.6-RXN
 440 | 3.4.17
 441 | 3.4.17.1
 442 | 3.4.17.10
 443 | 3.4.17.12
 444 | 3.4.17.12-RXN
 445 | 3.4.17.13
 446 | 3.4.17.13-RXN
 447 | 3.4.17.14
 448 | 3.4.17.14-RXN
 449 | 3.4.17.15
 450 | 3.4.17.15-RXN
 451 | 3.4.17.16
 452 | 3.4.17.16-RXN
 453 | 3.4.17.17
 454 | 3.4.17.17-RXN
 455 | 3.4.17.18
 456 | 3.4.17.18-RXN
 457 | 3.4.17.19
 458 | 3.4.17.19-RXN
 459 | 3.4.17.2
 460 | 3.4.17.20
 461 | 3.4.17.20-RXN
 462 | 3.4.17.21
 463 | 3.4.17.21-RXN
 464 | 3.4.17.22
 465 | 3.4.17.22-RXN
 466 | 3.4.17.3
 467 | 3.4.17.4
 468 | 3.4.17.6
 469 | 3.4.17.8
 470 | 3.4.17.8-RXN
 471 | 3.4.18.1
 472 | 3.4.18.1-RXN
 473 | 3.4.19.12-RXN
 474 | 3.4.19.5-RXN
 475 | 3.4.19.9-RXN
 476 | 3.4.21
 477 | 3.4.21.1
 478 | 3.4.21.10
 479 | 3.4.21.100
 480 | 3.4.21.100-RXN
 481 | 3.4.21.101
 482 | 3.4.21.101-RXN
 483 | 3.4.21.102
 484 | 3.4.21.102-RXN
 485 | 3.4.21.103
 486 | 3.4.21.104
 487 | 3.4.21.104-RXN
 488 | 3.4.21.105
 489 | 3.4.21.105-RXN
 490 | 3.4.21.106
 491 | 3.4.21.106-RXN
 492 | 3.4.21.107
 493 | 3.4.21.107-RXN
 494 | 3.4.21.108
 495 | 3.4.21.108-RXN
 496 | 3.4.21.109
 497 | 3.4.21.109-RXN
 498 | 3.4.21.10-RXN
 499 | 3.4.21.110
 500 | 3.4.21.110-RXN
 501 | 3.4.21.111
 502 | 3.4.21.111-RXN
 503 | 3.4.21.112
 504 | 3.4.21.112-RXN
 505 | 3.4.21.113
 506 | 3.4.21.113-RXN
 507 | 3.4.21.114
 508 | 3.4.21.114-RXN
 509 | 3.4.21.115
 510 | 3.4.21.115-RXN
 511 | 3.4.21.116
 512 | 3.4.21.116-RXN
 513 | 3.4.21.117
 514 | 3.4.21.117-RXN
 515 | 3.4.21.118
 516 | 3.4.21.118-RXN
 517 | 3.4.21.119
 518 | 3.4.21.119-RXN
 519 | 3.4.21.12
 520 | 3.4.21.120
 521 | 3.4.21.120-RXN
 522 | 3.4.21.121
 523 | 3.4.21.12-RXN
 524 | 3.4.21.19
 525 | 3.4.21.19-RXN
 526 | 3.4.21.1-RXN
 527 | 3.4.21.2
 528 | 3.4.21.20
 529 | 3.4.21.20-RXN
 530 | 3.4.21.21
 531 | 3.4.21.21-RXN
 532 | 3.4.21.22
 533 | 3.4.21.22-RXN
 534 | 3.4.21.25
 535 | 3.4.21.25-RXN
 536 | 3.4.21.26
 537 | 3.4.21.26-RXN
 538 | 3.4.21.27
 539 | 3.4.21.27-RXN
 540 | 3.4.21.2-RXN
 541 | 3.4.21.3
 542 | 3.4.21.32
 543 | 3.4.21.32-RXN
 544 | 3.4.21.34
 545 | 3.4.21.34-RXN
 546 | 3.4.21.35
 547 | 3.4.21.35-RXN
 548 | 3.4.21.36
 549 | 3.4.21.36-RXN
 550 | 3.4.21.37
 551 | 3.4.21.37-RXN
 552 | 3.4.21.38
 553 | 3.4.21.38-RXN
 554 | 3.4.21.39
 555 | 3.4.21.39-RXN
 556 | 3.4.21.3-RXN
 557 | 3.4.21.4
 558 | 3.4.21.41
 559 | 3.4.21.41-RXN
 560 | 3.4.21.42
 561 | 3.4.21.42-RXN
 562 | 3.4.21.43
 563 | 3.4.21.45
 564 | 3.4.21.45-RXN
 565 | 3.4.21.46
 566 | 3.4.21.46-RXN
 567 | 3.4.21.47
 568 | 3.4.21.47-RXN
 569 | 3.4.21.48
 570 | 3.4.21.48-RXN
 571 | 3.4.21.49
 572 | 3.4.21.49-RXN
 573 | 3.4.21.4-RXN
 574 | 3.4.21.5
 575 | 3.4.21.50
 576 | 3.4.21.50-RXN
 577 | 3.4.21.53
 578 | 3.4.21.53-RXN
 579 | 3.4.21.54
 580 | 3.4.21.54-RXN
 581 | 3.4.21.55
 582 | 3.4.21.55-RXN
 583 | 3.4.21.57
 584 | 3.4.21.57-RXN
 585 | 3.4.21.59
 586 | 3.4.21.59-RXN
 587 | 3.4.21.5-RXN
 588 | 3.4.21.6
 589 | 3.4.21.60
 590 | 3.4.21.60-RXN
 591 | 3.4.21.61
 592 | 3.4.21.61-RXN
 593 | 3.4.21.62
 594 | 3.4.21.62-RXN
 595 | 3.4.21.63
 596 | 3.4.21.63-RXN
 597 | 3.4.21.64
 598 | 3.4.21.64-RXN
 599 | 3.4.21.65
 600 | 3.4.21.65-RXN
 601 | 3.4.21.66
 602 | 3.4.21.66-RXN
 603 | 3.4.21.67
 604 | 3.4.21.67-RXN
 605 | 3.4.21.68
 606 | 3.4.21.68-RXN
 607 | 3.4.21.69
 608 | 3.4.21.69-RXN
 609 | 3.4.21.6-RXN
 610 | 3.4.21.7
 611 | 3.4.21.70
 612 | 3.4.21.70-RXN
 613 | 3.4.21.71
 614 | 3.4.21.71-RXN
 615 | 3.4.21.72
 616 | 3.4.21.72-RXN
 617 | 3.4.21.73
 618 | 3.4.21.73-RXN
 619 | 3.4.21.74
 620 | 3.4.21.74-RXN
 621 | 3.4.21.75
 622 | 3.4.21.75-RXN
 623 | 3.4.21.76
 624 | 3.4.21.76-RXN
 625 | 3.4.21.77
 626 | 3.4.21.77-RXN
 627 | 3.4.21.78
 628 | 3.4.21.78-RXN
 629 | 3.4.21.79
 630 | 3.4.21.79-RXN
 631 | 3.4.21.7-RXN
 632 | 3.4.21.80
 633 | 3.4.21.80-RXN
 634 | 3.4.21.81
 635 | 3.4.21.81-RXN
 636 | 3.4.21.82
 637 | 3.4.21.82-RXN
 638 | 3.4.21.83
 639 | 3.4.21.83-RXN
 640 | 3.4.21.84
 641 | 3.4.21.84-RXN
 642 | 3.4.21.85
 643 | 3.4.21.85-RXN
 644 | 3.4.21.86
 645 | 3.4.21.86-RXN
 646 | 3.4.21.87-RXN
 647 | 3.4.21.88
 648 | 3.4.21.88-RXN
 649 | 3.4.21.89
 650 | 3.4.21.89-RXN
 651 | 3.4.21.9
 652 | 3.4.21.90
 653 | 3.4.21.90-RXN
 654 | 3.4.21.91
 655 | 3.4.21.91-RXN
 656 | 3.4.21.92
 657 | 3.4.21.92-RXN
 658 | 3.4.21.93
 659 | 3.4.21.93-RXN
 660 | 3.4.21.94
 661 | 3.4.21.94-RXN
 662 | 3.4.21.95
 663 | 3.4.21.95-RXN
 664 | 3.4.21.96
 665 | 3.4.21.96-RXN
 666 | 3.4.21.97
 667 | 3.4.21.97-RXN
 668 | 3.4.21.98
 669 | 3.4.21.98-RXN
 670 | 3.4.21.99
 671 | 3.4.21.99-RXN
 672 | 3.4.21.9-RXN
 673 | 3.4.22.1
 674 | 3.4.22.10
 675 | 3.4.22.10-RXN
 676 | 3.4.22.14
 677 | 3.4.22.14-RXN
 678 | 3.4.22.15
 679 | 3.4.22.15-RXN
 680 | 3.4.22.16
 681 | 3.4.22.16-RXN
 682 | 3.4.22.17-RXN
 683 | 3.4.22.1-RXN
 684 | 3.4.22.2
 685 | 3.4.22.24
 686 | 3.4.22.24-RXN
 687 | 3.4.22.25
 688 | 3.4.22.25-RXN
 689 | 3.4.22.26
 690 | 3.4.22.26-RXN
 691 | 3.4.22.27
 692 | 3.4.22.27-RXN
 693 | 3.4.22.28
 694 | 3.4.22.28-RXN
 695 | 3.4.22.29
 696 | 3.4.22.29-RXN
 697 | 3.4.22.2-RXN
 698 | 3.4.22.3
 699 | 3.4.22.30
 700 | 3.4.22.30-RXN
 701 | 3.4.22.31
 702 | 3.4.22.31-RXN
 703 | 3.4.22.32
 704 | 3.4.22.32-RXN
 705 | 3.4.22.33
 706 | 3.4.22.33-RXN
 707 | 3.4.22.34
 708 | 3.4.22.34-RXN
 709 | 3.4.22.35
 710 | 3.4.22.35-RXN
 711 | 3.4.22.36
 712 | 3.4.22.36-RXN
 713 | 3.4.22.37
 714 | 3.4.22.37-RXN
 715 | 3.4.22.38
 716 | 3.4.22.38-RXN
 717 | 3.4.22.39
 718 | 3.4.22.39-RXN
 719 | 3.4.22.3-RXN
 720 | 3.4.22.40
 721 | 3.4.22.40-RXN
 722 | 3.4.22.41
 723 | 3.4.22.41-RXN
 724 | 3.4.22.42
 725 | 3.4.22.42-RXN
 726 | 3.4.22.43
 727 | 3.4.22.43-RXN
 728 | 3.4.22.44
 729 | 3.4.22.44-RXN
 730 | 3.4.22.45
 731 | 3.4.22.45-RXN
 732 | 3.4.22.46
 733 | 3.4.22.46-RXN
 734 | 3.4.22.47
 735 | 3.4.22.47-RXN
 736 | 3.4.22.48
 737 | 3.4.22.49
 738 | 3.4.22.50
 739 | 3.4.22.50-RXN
 740 | 3.4.22.51
 741 | 3.4.22.51-RXN
 742 | 3.4.22.52
 743 | 3.4.22.52-RXN
 744 | 3.4.22.53
 745 | 3.4.22.53-RXN
 746 | 3.4.22.54
 747 | 3.4.22.54-RXN
 748 | 3.4.22.55
 749 | 3.4.22.55-RXN
 750 | 3.4.22.56
 751 | 3.4.22.56-RXN
 752 | 3.4.22.57
 753 | 3.4.22.57-RXN
 754 | 3.4.22.58
 755 | 3.4.22.58-RXN
 756 | 3.4.22.59
 757 | 3.4.22.59-RXN
 758 | 3.4.22.6
 759 | 3.4.22.60
 760 | 3.4.22.60-RXN
 761 | 3.4.22.61
 762 | 3.4.22.61-RXN
 763 | 3.4.22.62
 764 | 3.4.22.62-RXN
 765 | 3.4.22.63
 766 | 3.4.22.63-RXN
 767 | 3.4.22.64
 768 | 3.4.22.64-RXN
 769 | 3.4.22.65
 770 | 3.4.22.65-RXN
 771 | 3.4.22.66
 772 | 3.4.22.66-RXN
 773 | 3.4.22.67
 774 | 3.4.22.67-RXN
 775 | 3.4.22.68
 776 | 3.4.22.68-RXN
 777 | 3.4.22.69
 778 | 3.4.22.6-RXN
 779 | 3.4.22.7
 780 | 3.4.22.70
 781 | 3.4.22.71
 782 | 3.4.22.7-RXN
 783 | 3.4.22.8
 784 | 3.4.22.8-RXN
 785 | 3.4.23.1
 786 | 3.4.23.12
 787 | 3.4.23.12-RXN
 788 | 3.4.23.15
 789 | 3.4.23.15-RXN
 790 | 3.4.23.16
 791 | 3.4.23.16-RXN
 792 | 3.4.23.17
 793 | 3.4.23.17-RXN
 794 | 3.4.23.18
 795 | 3.4.23.18-RXN
 796 | 3.4.23.19
 797 | 3.4.23.19-RXN
 798 | 3.4.23.1-RXN
 799 | 3.4.23.2
 800 | 3.4.23.20
 801 | 3.4.23.20-RXN
 802 | 3.4.23.21
 803 | 3.4.23.21-RXN
 804 | 3.4.23.22
 805 | 3.4.23.22-RXN
 806 | 3.4.23.23
 807 | 3.4.23.23-RXN
 808 | 3.4.23.24
 809 | 3.4.23.24-RXN
 810 | 3.4.23.25
 811 | 3.4.23.25-RXN
 812 | 3.4.23.26
 813 | 3.4.23.26-RXN
 814 | 3.4.23.27-RXN
 815 | 3.4.23.28
 816 | 3.4.23.28-RXN
 817 | 3.4.23.29
 818 | 3.4.23.29-RXN
 819 | 3.4.23.2-RXN
 820 | 3.4.23.3
 821 | 3.4.23.30
 822 | 3.4.23.30-RXN
 823 | 3.4.23.31
 824 | 3.4.23.31-RXN
 825 | 3.4.23.32
 826 | 3.4.23.32-RXN
 827 | 3.4.23.34
 828 | 3.4.23.34-RXN
 829 | 3.4.23.35
 830 | 3.4.23.35-RXN
 831 | 3.4.23.36
 832 | 3.4.23.36-RXN
 833 | 3.4.23.38
 834 | 3.4.23.38-RXN
 835 | 3.4.23.39
 836 | 3.4.23.39-RXN
 837 | 3.4.23.3-RXN
 838 | 3.4.23.4
 839 | 3.4.23.40
 840 | 3.4.23.40-RXN
 841 | 3.4.23.41
 842 | 3.4.23.41-RXN
 843 | 3.4.23.42
 844 | 3.4.23.42-RXN
 845 | 3.4.23.43
 846 | 3.4.23.43-RXN
 847 | 3.4.23.44
 848 | 3.4.23.44-RXN
 849 | 3.4.23.45
 850 | 3.4.23.46
 851 | 3.4.23.47
 852 | 3.4.23.48
 853 | 3.4.23.48-RXN
 854 | 3.4.23.49
 855 | 3.4.23.4-RXN
 856 | 3.4.23.5
 857 | 3.4.23.50
 858 | 3.4.23.51
 859 | 3.4.23.52
 860 | 3.4.23.5-RXN
 861 | 3.4.24
 862 | 3.4.24.1
 863 | 3.4.24.11
 864 | 3.4.24.11-RXN
 865 | 3.4.24.12
 866 | 3.4.24.12-RXN
 867 | 3.4.24.13
 868 | 3.4.24.13-RXN
 869 | 3.4.24.14
 870 | 3.4.24.14-RXN
 871 | 3.4.24.15
 872 | 3.4.24.15-RXN
 873 | 3.4.24.16
 874 | 3.4.24.16-RXN
 875 | 3.4.24.17
 876 | 3.4.24.17-RXN
 877 | 3.4.24.18
 878 | 3.4.24.18-RXN
 879 | 3.4.24.19
 880 | 3.4.24.19-RXN
 881 | 3.4.24.1-RXN
 882 | 3.4.24.20
 883 | 3.4.24.20-RXN
 884 | 3.4.24.21
 885 | 3.4.24.21-RXN
 886 | 3.4.24.22
 887 | 3.4.24.22-RXN
 888 | 3.4.24.23
 889 | 3.4.24.23-RXN
 890 | 3.4.24.24
 891 | 3.4.24.24-RXN
 892 | 3.4.24.25
 893 | 3.4.24.25-RXN
 894 | 3.4.24.26
 895 | 3.4.24.26-RXN
 896 | 3.4.24.27
 897 | 3.4.24.27-RXN
 898 | 3.4.24.28
 899 | 3.4.24.28-RXN
 900 | 3.4.24.29
 901 | 3.4.24.29-RXN
 902 | 3.4.24.3
 903 | 3.4.24.30
 904 | 3.4.24.30-RXN
 905 | 3.4.24.31
 906 | 3.4.24.31-RXN
 907 | 3.4.24.32
 908 | 3.4.24.32-RXN
 909 | 3.4.24.33
 910 | 3.4.24.33-RXN
 911 | 3.4.24.34
 912 | 3.4.24.34-RXN
 913 | 3.4.24.35
 914 | 3.4.24.35-RXN
 915 | 3.4.24.36
 916 | 3.4.24.36-RXN
 917 | 3.4.24.37
 918 | 3.4.24.37-RXN
 919 | 3.4.24.38
 920 | 3.4.24.38-RXN
 921 | 3.4.24.39
 922 | 3.4.24.39-RXN
 923 | 3.4.24.3-RXN
 924 | 3.4.24.40
 925 | 3.4.24.40-RXN
 926 | 3.4.24.41
 927 | 3.4.24.41-RXN
 928 | 3.4.24.42
 929 | 3.4.24.42-RXN
 930 | 3.4.24.43
 931 | 3.4.24.43-RXN
 932 | 3.4.24.44
 933 | 3.4.24.44-RXN
 934 | 3.4.24.45
 935 | 3.4.24.45-RXN
 936 | 3.4.24.46
 937 | 3.4.24.46-RXN
 938 | 3.4.24.47
 939 | 3.4.24.47-RXN
 940 | 3.4.24.48
 941 | 3.4.24.48-RXN
 942 | 3.4.24.49
 943 | 3.4.24.49-RXN
 944 | 3.4.24.50
 945 | 3.4.24.50-RXN
 946 | 3.4.24.51
 947 | 3.4.24.51-RXN
 948 | 3.4.24.52
 949 | 3.4.24.52-RXN
 950 | 3.4.24.53
 951 | 3.4.24.53-RXN
 952 | 3.4.24.54
 953 | 3.4.24.54-RXN
 954 | 3.4.24.55
 955 | 3.4.24.55-RXN
 956 | 3.4.24.56
 957 | 3.4.24.56-RXN
 958 | 3.4.24.57
 959 | 3.4.24.57-RXN
 960 | 3.4.24.58
 961 | 3.4.24.58-RXN
 962 | 3.4.24.59
 963 | 3.4.24.59-RXN
 964 | 3.4.24.6
 965 | 3.4.24.60
 966 | 3.4.24.60-RXN
 967 | 3.4.24.61
 968 | 3.4.24.61-RXN
 969 | 3.4.24.62
 970 | 3.4.24.62-RXN
 971 | 3.4.24.63
 972 | 3.4.24.63-RXN
 973 | 3.4.24.64
 974 | 3.4.24.64-RXN
 975 | 3.4.24.65
 976 | 3.4.24.65-RXN
 977 | 3.4.24.66
 978 | 3.4.24.66-RXN
 979 | 3.4.24.67
 980 | 3.4.24.67-RXN
 981 | 3.4.24.68
 982 | 3.4.24.68-RXN
 983 | 3.4.24.69
 984 | 3.4.24.69-RXN
 985 | 3.4.24.6-RXN
 986 | 3.4.24.7
 987 | 3.4.24.70
 988 | 3.4.24.70-RXN
 989 | 3.4.24.71
 990 | 3.4.24.71-RXN
 991 | 3.4.24.72
 992 | 3.4.24.72-RXN
 993 | 3.4.24.73
 994 | 3.4.24.73-RXN
 995 | 3.4.24.74
 996 | 3.4.24.74-RXN
 997 | 3.4.24.75
 998 | 3.4.24.75-RXN
 999 | 3.4.24.76
1000 | 3.4.24.76-RXN
1001 | 3.4.24.77
1002 | 3.4.24.77-RXN
1003 | 3.4.24.78
1004 | 3.4.24.79
1005 | 3.4.24.7-RXN
1006 | 3.4.24.80
1007 | 3.4.24.81
1008 | 3.4.24.82
1009 | 3.4.24.83
1010 | 3.4.24.83-RXN
1011 | 3.4.24.84
1012 | 3.4.24.85
1013 | 3.4.24.86
1014 | 3.4.24.86-RXN
1015 | 3.4.24.87
1016 | 3.4.25.1
1017 | 3.4.25.1-RXN
1018 | 3.4.25.2
1019 | 3.5.1.44
1020 | 3.5.1.88
1021 | 3.5.1.88-RXN
1022 | 3.5.1.98
1023 | 3.5.1.98-RXN
1024 | 3.6.3.23
1025 | 3.6.3.23-RXN
1026 | 3.6.3.43
1027 | 3.6.3.43-RXN
1028 | 3.6.3.48-RXN
1029 | 3.6.3.51
1030 | 3.6.3.51-RXN
1031 | 3.6.3.52
1032 | 3.6.3.52-RXN
1033 | 3.6.4.12
1034 | 3.6.4.13
1035 | 3.6.4.3
1036 | 3.6.4.3-RXN
1037 | 3.6.4.4-RXN
1038 | 3.6.4.5-RXN
1039 | 3.6.5.5-RXN
1040 | 4.1.99.13
1041 | 4.2.99.18
1042 | 4.2.99.18-RXN
1043 | 4.6.1.16
1044 | 5.2.1.8
1045 | 5.3.4.1
1046 | 5.3.4.1-RXN
1047 | 5.4.99.19
1048 | 5.99.1.2
1049 | 5.99.1.2-RXN
1050 | 5.99.1.3
1051 | 5.99.1.3-RXN
1052 | 6.1.1.23-RXN
1053 | 6.2.1.i
1054 | 6.3.2.19
1055 | 6.3.2.25-RXN
1056 | 6.5.1.1
1057 | 6.5.1.2
1058 | 6.5.1.3
1059 | 6.5.1.4
1060 | 6.5.1.5
1061 | ACYLAMINOACYL-PEPTIDASE-RXN
1062 | ALANINE-CARBOXYPEPTIDASE-RXN
1063 | ALANINE--TRNA-LIGASE-RXN
1064 | AMINOCYL-TRNA-HYDROLASE-RXN
1065 | ARGININE--TRNA-LIGASE-RXN
1066 | ARGINYLTRANSFERASE-RXN
1067 | ASPARAGINE--TRNA-LIGASE-RXN
1068 | ASPARTATE--TRNA-LIGASE-RXN
1069 | CARBOXYPEPTIDASE-A-RXN
1070 | CARBOXYPEPTIDASE-B-RXN
1071 | CARBOXYPEPTIDASE-H-RXN
1072 | CHEBDEAMID-RXN
1073 | CHER-RXN
1074 | CYSTEINE--TRNA-LIGASE-RXN
1075 | DEOXYRIBODIPYRIMIDINE-PHOTOLYASE-RXN
1076 | DEPHOSPHO-REDUCTASE-KINASE-KINASE-RXN
1077 | DISULISOM-RXN
1078 | DNA-CYTOSINE-5--METHYLTRANSFERASE-RXN
1079 | DNA-DIRECTED-DNA-POLYMERASE-RXN
1080 | DNA-DIRECTED-RNA-POLYMERASE-RXN
1081 | DNA-LIGASE-ATP-RXN
1082 | DNA-LIGASE-NAD+-RXN
1083 | DNA-NUCLEOTIDYLEXOTRANSFERASE-RXN
1084 | DOLICHYLDIPHOSPHATASE-RXN
1085 | |EC-3.4.21.b|
1086 | ENTDB-RXN
1087 | GLURS-RXN
1088 | GLUTAMINE--TRNA-LIGASE-RXN
1089 | GLYCINE--TRNA-LIGASE-RXN
1090 | GLYCOGENIN-GLUCOSYLTRANSFERASE-RXN
1091 | GLY-X-CARBOXYPEPTIDASE-RXN
1092 | HISTIDINE--TRNA-LIGASE-RXN
1093 | HISTONE-ACETYLTRANSFERASE-RXN
1094 | HISTONE-LYSINE-N-METHYLTRANSFERASE-RXN
1095 | ISOLEUCINE--TRNA-LIGASE-RXN
1096 | LEUCINE--TRNA-LIGASE-RXN
1097 | LYSINEARGININE-CARBOXYPEPTIDASE-RXN
1098 | LYSINE--TRNA-LIGASE-RXN
1099 | M7G5PPPN-PYROPHOSPHATASE-RXN
1100 | METHCOCLTH-RXN
1101 | METHIONINE--TRNA-LIGASE-RXN
1102 | METHIONYL-TRNA-FORMYLTRANSFERASE-RXN
1103 | MRNA-GUANINE-N7--METHYLTRANSFERASE-RXN
1104 | MRNA-GUANYLYLTRANSFERASE-RXN
1105 | MYOSIN-LIGHT-CHAIN-PHOSPHATASE-RXN
1106 | NAD+-ADP-RIBOSYLTRANSFERASE-RXN
1107 | PEPTIDE-ALPHA-N-ACETYLTRANSFERASE-RXN
1108 | PEPTIDYLAMIDOGLYCOLATE-LYASE-RXN
1109 | PEPTIDYLPROLYL-ISOMERASE-RXN
1110 | PHENYLALANINE--TRNA-LIGASE-RXN
1111 | PHOSICITDEHASE-RXN
1112 | PHOSPHORYLASE-PHOSPHATASE-RXN
1113 | POLYNUCLEOTIDE-3-PHOSPHATASE-RXN
1114 | POLYNUCLEOTIDE-5-HYDROXYL-KINASE-RXN
1115 | POLYNUCLEOTIDE-5-PHOSPHATASE-RXN
1116 | POLYNUCLEOTIDE-ADENYLYLTRANSFERASE-RXN
1117 | PRODISULFREDUCT-RXN
1118 | PROLINE--TRNA-LIGASE-RXN
1119 | PROTEIN-KINASE-RXN
1120 | PROTEIN-LYSINE-6-OXIDASE-RXN
1121 | PROTEIN-TYROSINE-PHOSPHATASE-RXN
1122 | PROTEIN-TYROSINE-SULFOTRANSFERASE-RXN
1123 | PSEUDOURIDYLATE-SYNTHASE-RXN
1124 | PYROGLUTAMYL-PEPTIDASE-I-RXN
1125 | #Reaction ID
1126 | RNA-3-PHOSPHATE-CYCLASE-RXN
1127 | RNA-DIRECTED-DNA-POLYMERASE-RXN
1128 | RNA-DIRECTED-RNA-POLYMERASE-RXN
1129 | RNA-LIGASE-ATP-RXN
1130 | RNA-POLYMERASE-SUBUNIT-KINASE-RXN
1131 | RNA-URIDYLYLTRANSFERASE-RXN
1132 | RUBREDOXIN--NADP+REDUCTASE-RXN
1133 | RXN0-1
1134 | RXN0-1061
1135 | RXN0-1241
1136 | RXN0-2023
1137 | RXN0-2584
1138 | RXN0-2601
1139 | RXN0-2602
1140 | RXN0-2605
1141 | RXN0-2621
1142 | RXN0-2625
1143 | RXN0-3182
1144 | RXN0-3201
1145 | RXN0-3221
1146 | RXN0-3364
1147 | RXN0-3461
1148 | RXN0-4222
1149 | RXN0-4223
1150 | RXN0-4261
1151 | RXN0-4961
1152 | RXN0-5021
1153 | RXN0-5052
1154 | RXN0-5062
1155 | RXN0-5063
1156 | RXN0-5100
1157 | RXN0-5103
1158 | RXN0-5131
1159 | RXN0-5144
1160 | RXN0-5189
1161 | RXN0-5194
1162 | RXN0-5195
1163 | RXN0-5204
1164 | RXN0-5226
1165 | RXN0-5227
1166 | RXN0-5361
1167 | RXN0-5418
1168 | RXN0-5419
1169 | RXN0-5462
1170 | RXN0-5468
1171 | RXN0-5514
1172 | RXN0-6366
1173 | RXN0-6381
1174 | RXN0-6478
1175 | RXN0-6479
1176 | RXN0-6480
1177 | RXN0-6481
1178 | RXN0-6482
1179 | RXN0-6483
1180 | RXN0-6484
1181 | RXN0-6485
1182 | RXN0-6515
1183 | RXN0-6521
1184 | RXN0-6522
1185 | RXN0-6523
1186 | RXN0-6524
1187 | RXN0-6525
1188 | RXN0-6526
1189 | RXN0-6527
1190 | RXN0-6528
1191 | RXN0-6529
1192 | RXN0-6566
1193 | RXN0-6731
1194 | RXN0-6950
1195 | RXN0-6998
1196 | RXN0-7007
1197 | RXN-10771
1198 | RXN-10789
1199 | RXN-10790
1200 | RXN-10791
1201 | RXN-10792
1202 | RXN-11049
1203 | RXN-11065
1204 | RXN-11109
1205 | RXN-11116
1206 | RXN-11135
1207 | RXN-11136
1208 | RXN-11137
1209 | RXN-11138
1210 | RXN-11143
1211 | RXN-11144
1212 | RXN-11145
1213 | RXN-11146
1214 | RXN-11148
1215 | RXN-11176
1216 | RXN-11197
1217 | RXN-11199
1218 | RXN-11302
1219 | RXN-11321
1220 | RXN-11351
1221 | RXN-11573
1222 | RXN-11574
1223 | RXN-11576
1224 | RXN-11578
1225 | RXN-11580
1226 | RXN-11581
1227 | RXN-11586
1228 | RXN-11588
1229 | RXN-11589
1230 | RXN-11591
1231 | RXN-11592
1232 | RXN-11593
1233 | RXN-11594
1234 | RXN-11595
1235 | RXN-11596
1236 | RXN-11597
1237 | RXN-11598
1238 | RXN-11599
1239 | RXN-11600
1240 | RXN-11601
1241 | RXN-11602
1242 | RXN-11633
1243 | RXN-11634
1244 | RXN-11635
1245 | RXN-11637
1246 | RXN-11638
1247 | RXN-11688
1248 | RXN-11692
1249 | RXN-11693
1250 | RXN-11694
1251 | RXN-11697
1252 | RXN-11698
1253 | RXN-11699
1254 | RXN-11700
1255 | RXN-11701
1256 | RXN-11702
1257 | RXN-11833
1258 | RXN-11839
1259 | RXN-11841
1260 | RXN-11842
1261 | RXN-11845
1262 | RXN-11854
1263 | RXN-11855
1264 | RXN-11856
1265 | RXN-11857
1266 | RXN-11858
1267 | RXN-11859
1268 | RXN-11860
1269 | RXN-11865
1270 | RXN-11866
1271 | RXN-11867
1272 | RXN-11868
1273 | RXN-11869
1274 | RXN-11889
1275 | RXN-11890
1276 | RXN-12003
1277 | RXN-12005
1278 | RXN-12014
1279 | RXN-12015
1280 | RXN-12018
1281 | RXN-12227
1282 | RXN-12228
1283 | RXN-12317
1284 | RXN-12322
1285 | RXN-12323
1286 | RXN-12353
1287 | RXN-12368
1288 | RXN-12369
1289 | RXN-12372
1290 | RXN-12373
1291 | RXN-12374
1292 | RXN-12375
1293 | RXN-12376
1294 | RXN-12377
1295 | RXN-12378
1296 | RXN-12379
1297 | RXN-12380
1298 | RXN-12381
1299 | RXN-12382
1300 | RXN-12454
1301 | RXN-12455
1302 | RXN-12456
1303 | RXN-12457
1304 | RXN-12458
1305 | RXN-12459
1306 | RXN-12461
1307 | RXN-12462
1308 | RXN-12463
1309 | RXN-12464
1310 | RXN-12465
1311 | RXN-12466
1312 | RXN-12469
1313 | RXN-12471
1314 | RXN-12472
1315 | RXN-12475
1316 | RXN-12476
1317 | RXN-12477
1318 | RXN-12478
1319 | RXN-12479
1320 | RXN-12480
1321 | RXN-12488
1322 | RXN-12497
1323 | RXN-12499
1324 | RXN-12501
1325 | RXN-12502
1326 | RXN-12503
1327 | RXN-12504
1328 | RXN-12555
1329 | RXN-12619
1330 | RXN-12727
1331 | RXN-12817
1332 | RXN-12826
1333 | RXN-12866
1334 | RXN-12908
1335 | RXN-12909
1336 | RXN-13186
1337 | RXN-13195
1338 | RXN-13225
1339 | RXN-13226
1340 | RXN-13326
1341 | RXN-13327
1342 | RXN-13350
1343 | RXN-13351
1344 | RXN-13352
1345 | RXN-13353
1346 | RXN-13588
1347 | RXN-13757
1348 | RXN-13758
1349 | RXN-13833
1350 | RXN-13992
1351 | RXN-13993
1352 | RXN-13996
1353 | RXN-13997
1354 | RXN-13998
1355 | RXN-13999
1356 | RXN-14341
1357 | RXN-14342
1358 | RXN-14517
1359 | RXN-14519
1360 | RXN-14520
1361 | RXN-14528
1362 | RXN-14539
1363 | RXN-14540
1364 | RXN-14548
1365 | RXN-14549
1366 | RXN-14550
1367 | RXN-14554
1368 | RXN-14567
1369 | RXN-14666
1370 | RXN-14708
1371 | RXN-14840
1372 | RXN-14906
1373 | RXN-14928
1374 | RXN-14998
1375 | RXN-15041
1376 | RXN-15047
1377 | RXN-15120
1378 | RXN-15205
1379 | RXN-15275
1380 | RXN-15316
1381 | RXN-15317
1382 | RXN-15509
1383 | RXN-15510
1384 | RXN-15511
1385 | RXN-15512
1386 | RXN-15563
1387 | RXN-15564
1388 | RXN-15565
1389 | RXN-1961
1390 | RXN1G-479
1391 | RXN-3701
1392 | RXN-4543
1393 | RXN490-3617
1394 | RXN-7667
1395 | RXN-7869
1396 | RXN-7871
1397 | RXN-7873
1398 | RXN-7940
1399 | RXN-7943
1400 | RXN-8409
1401 | RXN-8660
1402 | RXN-8661
1403 | RXN-8668
1404 | RXN-8706
1405 | RXN-8708
1406 | RXN-9594
1407 | RXN-9920
1408 | RXN-9921
1409 | SERINE--TRNA-LIGASE-RXN
1410 | TAU-PROTEIN-KINASE-RXN
1411 | TDCEACT-RXN
1412 | THREONINE--TRNA-LIGASE-RXN
1413 | TRANS-RXN0-181
1414 | TRNA-ADENINE-N6--METHYLTRANSFERASE-RXN
1415 | TRNA-CYTIDYLYLTRANSFERASE-RXN
1416 | TRNA-GUANINE-N7--METHYLTRANSFERASE-RXN
1417 | TRNA-NUCLEOTIDYLTRANSFERASE-RXN
1418 | TRNA-PSEUDOURIDINE-SYNTHASE-I-RXN
1419 | TRNA-S-TRANSFERASE-RXN
1420 | TRNA-URACIL-5--METHYLTRANSFERASE-RXN
1421 | TROPOMYOSIN-KINASE-RXN
1422 | TRYPTOPHAN--TRNA-LIGASE-RXN
1423 | TUBULIN-N-ACETYLTRANSFERASE-RXN
1424 | TYROSINE--TRNA-LIGASE-RXN
1425 | UBIQUITIN--PROTEIN-LIGASE-RXN
1426 | URITRANS-RXN
1427 | VALINE--TRNA-LIGASE-RXN1.14.11.27
1428 | RXN-17859
1429 | RXN-17860
1430 | RXN-17861
1431 | RXN-17862
1432 | RXN-17863
1433 | RXN-17864
1434 | RXN-17865
1435 | RXN-17866
1436 | RXN-18388
1437 | RXN-19025
1438 | RXN-19026
1439 | RXN-19148
1440 | RXN-19149
1441 | 


--------------------------------------------------------------------------------
/e2p2.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging.config
  3 | import os
  4 | import re
  5 | import sys
  6 | 
  7 | from src.definitions import DEFAULT_CONFIG_PATH, ROOT_DIR
  8 | from src.bash.pipeline import *
  9 | from src.lib.classifier import run_available_classifiers
 10 | from src.lib.config import read_config
 11 | from src.lib.ensemble import run_all_ensembles
 12 | from src.lib.process import LoggerConfig, logging_helper, load_module_function_from_path
 13 | from src.lib.read import get_all_seq_ids_from_fasta
 14 | from src.lib.write import PfFiles, write_ensemble_outputs
 15 | 
 16 | 
 17 | project_path = os.path.dirname(__file__)
 18 | sys.path.insert(0, project_path)
 19 | 
 20 | 
 21 | def main():
 22 |     name = 'e2p2.py'
 23 |     description = '''
 24 |     Runs the Ensemble Enzyme Prediction Pipeline (E2P2) on a set of input protein sequences,
 25 |     outputting enzyme functional annotations in the forms of EC numbers or MetaCyc reaction
 26 |     IDs for any predicted enzyme sequences.
 27 |     '''
 28 |     notes = '''
 29 |     - Input protein sequences should be in FASTA format.
 30 |     - Headers in the FASTA file should begin with the sequence ID followed by a space or "|".
 31 |     - Intermediate results files can be found in a temporary directory of its own subdirectory labeled with a date and time stamp.
 32 |     '''
 33 |     time_stamp = str(int(time.time()))
 34 |     cur_logger_config = LoggerConfig()
 35 |     parser = argparse.ArgumentParser(prog=name, description=description, formatter_class=argparse.RawTextHelpFormatter,
 36 |                                      epilog=textwrap.dedent(notes))
 37 |     add_io_arguments(parser)
 38 |     subparsers = parser.add_subparsers()
 39 |     parser_e2p2 = subparsers.add_parser('e2p2', help=textwrap.dedent("Argument to run E2P2."))
 40 | 
 41 |     # Config read in
 42 |     args, others = parser.parse_known_args()
 43 |     config_log_flag = False
 44 |     if args.config_ini is not None and os.path.isfile(args.config_ini):
 45 |         config_path = args.config_ini
 46 |     else:
 47 |         config_log_flag = True
 48 |         config_path = DEFAULT_CONFIG_PATH
 49 |     mapping_files, classifier_dict, ensemble_dict = read_config(config_path)
 50 |     if None in (mapping_files, classifier_dict, ensemble_dict):
 51 |         parser.print_help()
 52 |         raise SystemExit
 53 | 
 54 |     for cls in classifier_dict:
 55 |         cls_path = os.path.join(ROOT_DIR, classifier_dict[cls]["class"])
 56 |         cls_fn = load_module_function_from_path(cls_path, cls)
 57 |         cls_fn.add_arguments(parser_e2p2)
 58 | 
 59 |     for ens in ensemble_dict:
 60 |         ens_path = os.path.join(ROOT_DIR, ensemble_dict[ens]["class"])
 61 |         ens_fn = load_module_function_from_path(ens_path, ens)
 62 |         ens_fn.add_arguments(parser_e2p2)
 63 | 
 64 |     # Parse arguments
 65 |     args = parser.parse_args()
 66 |     output_path, io_dict, create_temp_folder_flag, log_path, logging_level = \
 67 |         start_pipeline(args.input_file, output_path=args.output_path, temp_folder=args.temp_folder,
 68 |                        log_path=args.log_path, verbose=args.verbose, timestamp=time_stamp)
 69 | 
 70 | 
 71 |     if os.path.isfile(os.path.realpath(log_path)):
 72 |         cur_logger_config.add_new_logger(DEFAULT_LOGGER_NAME, log_path, logger_handler_mode='a')
 73 |     else:
 74 |         cur_logger_config.add_new_logger(DEFAULT_LOGGER_NAME, log_path)
 75 |     logging.config.dictConfig(cur_logger_config.dictConfig)
 76 |     logger = logging.getLogger(DEFAULT_LOGGER_NAME)
 77 | 
 78 |     if create_temp_folder_flag:
 79 |         logging_helper("Temp folder created at path %s." % io_dict["IO"]["out"], logging_level=logging_level,
 80 |                        logger_name=DEFAULT_LOGGER_NAME)
 81 |     else:
 82 |         logging_helper("Using path %s as temp folder." % io_dict["IO"]["out"], logging_level=logging_level,
 83 |                        logger_name=DEFAULT_LOGGER_NAME)
 84 |     if os.path.isfile(os.path.realpath(log_path)):
 85 |         logger.log(logging.WARNING, "Log file %s exists, will append to it..." % log_path)
 86 |     if config_log_flag is True:
 87 |         logging_helper("No user provided config.ini is found, attempting to use file at %s." % DEFAULT_CONFIG_PATH,
 88 |                        logging_level="INFO", logger_name=DEFAULT_LOGGER_NAME)
 89 |     fasta_path = \
 90 |         protein_to_gene_helper(args.input_file, output_path, args.protein_gene_path, args.remove_splice_variants,
 91 |                                logger_name=DEFAULT_LOGGER_NAME)
 92 |     io_dict["IO"]["query"] = fasta_path
 93 | 
 94 |     all_query_ids = get_all_seq_ids_from_fasta(fasta_path)
 95 | 
 96 |     # Overwrite config with arguments
 97 |     overwrites = {}
 98 |     for cls in classifier_dict:
 99 |         cls_path = os.path.join(ROOT_DIR, classifier_dict[cls]["class"])
100 |         cls_fn = load_module_function_from_path(cls_path, cls)
101 |         io_dict["IO"][cls] = cls_fn.generate_output_paths(io_dict["IO"]["query"], io_dict["IO"]["out"], cls, time_stamp)
102 |         cls_fn.config_overwrites(args, overwrites)
103 |     for ens in ensemble_dict:
104 |         ens_path = os.path.join(ROOT_DIR, ensemble_dict[ens]["class"])
105 |         ens_fn = load_module_function_from_path(ens_path, ens)
106 |         ens_fn.config_overwrites(args, overwrites)
107 |     _, classifier_dict, ensemble_dict = read_config(config_path, io_dict, overwrites)
108 | 
109 |     # Set up classifiers
110 |     classifier_names = sorted(classifier_dict.keys())
111 |     list_of_classifiers = []
112 |     for cls in classifier_names:
113 |         cls_path = os.path.join(ROOT_DIR, classifier_dict[cls]["class"])
114 |         path_to_weight = classifier_dict[cls]["weight"]
115 |         cls_fn = load_module_function_from_path(cls_path, cls)
116 |         cls_classifier = cls_fn(time_stamp=time_stamp, path_to_weight=path_to_weight, args=args)
117 |         cls_classifier.setup_classifier(io_dict["IO"]["query"], io_dict["IO"]["out"], classifier_dict[cls])
118 |         list_of_classifiers.append(cls_classifier)
119 | 
120 |     # Run Classifiers
121 |     res_cls_list, skipped_classifiers = \
122 |         run_available_classifiers(classifier_names, list_of_classifiers, logging_level, DEFAULT_LOGGER_NAME)
123 | 
124 |     # Set up ensembles
125 |     ensemble_names = sorted(ensemble_dict.keys())
126 |     list_of_ensembles = []
127 |     for ens in ensemble_names:
128 |         ens_path = os.path.join(ROOT_DIR, ensemble_dict[ens]["class"])
129 |         threshold = ensemble_dict[ens]["threshold"]
130 |         ens_fn = load_module_function_from_path(ens_path, ens)
131 |         ens_ensemble = ens_fn(res_cls_list, time_stamp, ens, threshold)
132 |         list_of_ensembles.append(ens_ensemble)
133 | 
134 |     # Run Ensembles
135 |     ensembles_ran, skipped_ensembles = \
136 |         run_all_ensembles(ensemble_names, list_of_ensembles, all_query_ids, DEFAULT_LOGGER_NAME)
137 | 
138 |     for ensemble_cls in ensembles_ran:
139 |         # ensemble_name = ensemble_cls.name
140 |         # ensemble_classifiers = ensemble_cls.list_of_classifiers
141 |         write_ensemble_outputs(ensemble_cls, all_query_ids, output_path,
142 |                                os.path.join(ROOT_DIR, mapping_files['efclasses']),
143 |                                os.path.join(ROOT_DIR, mapping_files['ec_superseded']),
144 |                                os.path.join(ROOT_DIR, mapping_files['metacyc_rxn_ec']),
145 |                                os.path.join(ROOT_DIR, mapping_files['official_ec_metacyc_rxn']),
146 |                                os.path.join(ROOT_DIR, mapping_files['to_remove_non_small_molecule_metabolism']),
147 |                                prot_gene_map_path=args.protein_gene_path, logging_level=logging_level,
148 |                                logger_name=DEFAULT_LOGGER_NAME)
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     main()
153 | 
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/pipeline/weight.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from src.e2p2.classifiers.blast import BLAST
 4 | 
 5 | 
 6 | def blast_pred():
 7 |     partitions = ['f0', 'f1', 'f2', 'f3', 'f4', 'hold']
 8 |     for fold in partitions:
 9 |         output = ('/Users/bxue/Documents/Carnegie/PMNProject/RPSDv5.0/E2P2/blastp/' 
10 |                   'rpsd.v5.2.ef.fasta.rpsd.v5.2.ef.ids.txt-' + fold + '.lst.subset.fa.blastp.out')
11 |         b = BLAST('112223', '/Users/bxue/Documents/Carnegie/PMNProject/RPSDv4.2/weights/blast')
12 |         b.read_classifier_result(output)
13 | 
14 |         file_name, file_ext = os.path.splitext(os.path.basename(output))
15 | 
16 |         predict = os.path.join('/Users/bxue/Documents/Carnegie/PMNProject/RPSDv5.0/E2P2/predict',
17 |                                'rpsd.v5.2.ef.ids.txt-' + fold + '.blastp.e2p2')
18 |         with open(predict, 'w') as op:
19 |             for query in b.res:
20 |                 cls_of_query = set()
21 |                 for cls in b.res[query]:
22 |                     cls_of_query.add(cls.name)
23 |                 op.write(query + '\t' + '|'.join(sorted(cls_of_query)) + '\n')
24 | 
25 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carnegie/E2P2/4d9150bf68f34e8bcf3a363c379a349ba5f2ecb0/src/__init__.py


--------------------------------------------------------------------------------
/src/bash/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carnegie/E2P2/4d9150bf68f34e8bcf3a363c379a349ba5f2ecb0/src/bash/__init__.py


--------------------------------------------------------------------------------
/src/bash/pipeline.py:
--------------------------------------------------------------------------------
  1 | import errno
  2 | import os
  3 | import textwrap
  4 | import time
  5 | 
  6 | from src.definitions import DEFAULT_LOGGER_NAME, DEFAULT_OUTPUT_SUFFIX
  7 | from src.lib.process import PathType, logging_helper
  8 | from src.lib.read import check_fasta_header, remove_splice_variants_from_fasta
  9 | 
 10 | 
 11 | def add_io_arguments(argument_parser):
 12 |     """Function to add IO related arguments
 13 |     Args:
 14 |         argument_parser: argparse
 15 |     Raises:
 16 |     Returns:
 17 |     """
 18 |     argument_parser.add_argument("--input", "-i", dest="input_file", type=PathType('file'),
 19 |                                  help="Path to input protein sequences file", required=True)
 20 |     argument_parser.add_argument("--config", "-c", dest="config_ini", type=PathType('file'),
 21 |                                  help="Path to config.ini file")
 22 |     argument_parser.add_argument("--protein_gene", "-pg", dest="protein_gene_path", type=PathType('file'),
 23 |                                  help="Provide a protein to gene map. This can be used to generate a "
 24 |                                       "splice variant removed fasta file and output the final version of e2p2.")
 25 |     argument_parser.add_argument("--remove_splice_variants", "-rm", dest="remove_splice_variants", action="store_true",
 26 |                                  help="Argument flag to remove splice variants.")
 27 |     argument_parser.add_argument("--output", "-o", dest="output_path", type=PathType('have_parent'),
 28 |                                  help="Path to output file. By Default would be in the same folder of the input.")
 29 |     argument_parser.add_argument("--temp_folder", "-tf", dest="temp_folder", type=PathType('dir'),
 30 |                                  help="Specify the location of the temp folder. "
 31 |                                       "By default would be in the same directory of the output.")
 32 |     argument_parser.add_argument("--log", "-l", dest="log_path", type=PathType('have_parent'),
 33 |                                  help="Specify the location of the log file. "
 34 |                                       "By default would be \"runE2P2.log\" in the temp folder.")
 35 |     verbose_message = '''Verbose level of log output. Default is 0.
 36 |             0: only step information are logged
 37 |             1: all information are logged
 38 |             '''
 39 |     argument_parser.add_argument("--verbose", "-v", dest="verbose", default="0", choices=["0", "1"],
 40 |                                  help=textwrap.dedent(verbose_message))
 41 | 
 42 | 
 43 | def add_mapping_arguments(argument_parser):
 44 |     """Function to add Enzyme Function mapping related arguments
 45 |     Args:
 46 |         argument_parser: argparse
 47 |     Raises:
 48 |     Returns:
 49 |     """
 50 |     # Arguments for maps
 51 |     argument_parser.add_argument("--ef_map", "-ef", dest="ef_map", type=PathType('file'),
 52 |                                  help="Path to efclasses.mapping file.")
 53 |     argument_parser.add_argument("--ec_superseded", "-es", dest="ec_superseded", type=PathType('file'),
 54 |                                  help="Path to EC-superseded.mapping file.")
 55 |     argument_parser.add_argument("--rxn_ec", "-me", dest="metacyc_rxn_ec", type=PathType('file'),
 56 |                                  help="Path to metacyc-RXN-EC.mapping file.")
 57 |     argument_parser.add_argument("--official_ec_rxn", "-oer", dest="official_ec_metacyc_rxn", type=PathType('file'),
 58 |                                  help="Path to official-EC-metacyc-RXN.mapping file.")
 59 |     argument_parser.add_argument("--to_remove", "-tr", dest="to_remove_metabolism", type=PathType('file'),
 60 |                                  help="Path to to-remove-non-small-molecule-metabolism.mapping file.")
 61 | 
 62 | 
 63 | def start_pipeline(input_file, logger_name=DEFAULT_LOGGER_NAME, output_path=None, timestamp=str(time.time()),
 64 |                    temp_folder=None, log_path=None, verbose="0"):
 65 |     """Function for setting up IO related variables
 66 |     Args:
 67 |         input_file: input file path
 68 |         logger_name: logger name
 69 |         output_path: output file path
 70 |         timestamp: time stamp
 71 |         temp_folder: path to the temp file folder
 72 |         log_path: path to the log file
 73 |         verbose: verbose level of logging
 74 |     Raises:
 75 |     Returns:
 76 |     """
 77 |     check_fasta_header(input_file, logger_name)
 78 |     # Setup output paths
 79 |     if output_path is None:
 80 |         output_path = '.'.join([input_file, DEFAULT_OUTPUT_SUFFIX])
 81 |     # Setup temp folder path
 82 |     output_folder = os.path.dirname(output_path)
 83 |     if temp_folder is None:
 84 |         input_file_name, _ = os.path.splitext(os.path.basename(input_file))
 85 |         temp_folder = os.path.join(output_folder, input_file_name + '.' + timestamp)
 86 | 
 87 |     create_temp_folder_flag = False
 88 |     try:
 89 |         os.mkdir(temp_folder)
 90 |         create_temp_folder_flag = True
 91 |     except OSError as exc:
 92 |         if exc.errno != errno.EEXIST:
 93 |             raise
 94 |         pass
 95 | 
 96 |     # Setup logging file path
 97 |     if log_path is None:
 98 |         log_path = os.path.join(temp_folder, '.'.join([DEFAULT_LOGGER_NAME, timestamp, 'log']))
 99 |     if verbose == "0":
100 |         logging_level = "DEBUG"
101 |     else:
102 |         logging_level = "INFO"
103 | 
104 |     io_dict = {"IO": {"query": input_file, "out": temp_folder, "timestamp": timestamp}}
105 |     return output_path, io_dict, create_temp_folder_flag, log_path, logging_level
106 | 
107 | 
108 | def protein_to_gene_helper(input_file, output_path, protein_gene_path, remove_splice_variants,
109 |                            logger_name=DEFAULT_LOGGER_NAME):
110 |     """Function for mapping protein IDs to gene IDs
111 |     Args:
112 |         input_file: input file path
113 |         output_path: output file path
114 |         protein_gene_path: protein to gene mapping file path
115 |         remove_splice_variants: Boolean value to remove splice variants from input
116 |         logger_name: logger name
117 |     Raises:
118 |     Returns:
119 |     """
120 |     output_folder = os.path.dirname(output_path)
121 |     if protein_gene_path is not None and remove_splice_variants is True:
122 |         return remove_splice_variants_from_fasta(input_file, output_folder, protein_gene_path, logger_name=logger_name)
123 |     elif protein_gene_path is not None and remove_splice_variants is False:
124 |         logging_helper("Protein to gene map not used to remove splice variants.", logging_level="DEBUG",
125 |                        logger_name=logger_name)
126 |         return input_file
127 |     elif protein_gene_path is None and remove_splice_variants is True:
128 |         logging_helper("Cannot remove splice variants without protein to gene map.", logging_level="WARNING",
129 |                        logger_name=logger_name)
130 |         return input_file
131 |     else:
132 |         return input_file
133 | 
134 | 


--------------------------------------------------------------------------------
/src/definitions.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 5 | DEFAULT_CONFIG_PATH = os.path.join(ROOT_DIR, 'config.ini')
 6 | SRC_DIR = os.path.join(ROOT_DIR, 'src')
 7 | E2P2_CLS_DIR = os.path.join(SRC_DIR, 'e2p2')
 8 | CLASSIFIERS_CLS_DIR = os.path.join(E2P2_CLS_DIR, 'classifiers')
 9 | ENSEMBLES_CLS_DIR = os.path.join(E2P2_CLS_DIR, 'ensembles')
10 | DATA_DIR = os.path.join(ROOT_DIR, 'data')
11 | MAPS_DIR = os.path.join(DATA_DIR, 'maps')
12 | WEIGHTS_DIR = os.path.join(DATA_DIR, 'weights')
13 | 
14 | DEFAULT_BLAST_E_VALUE = float("1e-2")
15 | DEFAULT_BLAST_BIT_SCORE = float("0")
16 | DEFAULT_PRIAM_E_VALUE = float("1e-2")
17 | DEEPEC_DIR = os.path.join(ROOT_DIR, 'deepec')
18 | EC_TO_EF_MAPPING_PATH = os.path.join(DEEPEC_DIR, 'deepec/data/ec_to_ef.mapping')
19 | 
20 | 
21 | DEFAULT_LOGGER_LEVEL = "DEBUG"
22 | DEFAULT_LOGGER_NAME = "e2p2"
23 | DEFAULT_OUTPUT_SUFFIX = "e2p2"
24 | DEFAULT_LONG_OUTPUT_SUFFIX = "long"
25 | DEFAULT_PF_OUTPUT_SUFFIX = "default.pf"
26 | DEFAULT_ORXN_PF_OUTPUT_SUFFIX = "orxn.pf"
27 | DEFAULT_FINAL_PF_OUTPUT_SUFFIX = "final.pf"
28 | DEFAULT_PTOOLS_CHAR_LIMIT = 40
29 | 
30 | # Website Default
31 | BLAST_PLUS_DOWNLOAD_LINK = "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/"
32 | JAVA_8_DOWNLOAD_LINK = "https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html"
33 | PRIAM_SEARCH_LINK = "http://priam.prabi.fr/utilities/PRIAM_search.jar"
34 | 
35 | 


--------------------------------------------------------------------------------
/src/e2p2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carnegie/E2P2/4d9150bf68f34e8bcf3a363c379a349ba5f2ecb0/src/e2p2/__init__.py


--------------------------------------------------------------------------------
/src/e2p2/classifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carnegie/E2P2/4d9150bf68f34e8bcf3a363c379a349ba5f2ecb0/src/e2p2/classifiers/__init__.py


--------------------------------------------------------------------------------
/src/e2p2/classifiers/blast.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import re
  4 | import textwrap
  5 | 
  6 | from src.definitions import DEFAULT_LOGGER_LEVEL, DEFAULT_LOGGER_NAME, DEFAULT_BLAST_E_VALUE, DEFAULT_BLAST_BIT_SCORE
  7 | from src.lib.classifier import Classifier
  8 | from src.lib.function_class import FunctionClass
  9 | from src.lib.process import logging_helper, PathType
 10 | from src.lib.read import read_delim_itr
 11 | 
 12 | CONFIG_CLASSIFIER_NAME = "BLAST"
 13 | 
 14 | 
 15 | class BLAST(Classifier):
 16 |     def __init__(self, time_stamp, path_to_weight, name=CONFIG_CLASSIFIER_NAME, args=None,
 17 |                  logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 18 |         Classifier.__init__(self, time_stamp, path_to_weight, name, logging_level, logger_name)
 19 |         try:
 20 |             if args.blast_e_value is not None:
 21 |                 self.e_value_threshold = args.blast_e_value
 22 |             else:
 23 |                 self.e_value_threshold = DEFAULT_BLAST_E_VALUE
 24 |         except AttributeError:
 25 |             self.e_value_threshold = DEFAULT_BLAST_E_VALUE
 26 |         try:
 27 |             if args.blast_bit_score is not None:
 28 |                 self.bit_score_threshold = args.blast_bit_score
 29 |             else:
 30 |                 self.bit_score_threshold = DEFAULT_BLAST_BIT_SCORE
 31 |         except AttributeError:
 32 |             self.bit_score_threshold = DEFAULT_BLAST_BIT_SCORE
 33 | 
 34 |     def setup_classifier(self, input_path, output_path, classifier_config_dict, classifier_name=CONFIG_CLASSIFIER_NAME,
 35 |                          logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 36 |         logging_helper("Setting up BLAST", logging_level=logging_level, logger_name=logger_name)
 37 |         self.input = input_path
 38 |         self.output = self.generate_output_paths(input_path, output_path, classifier_name, self._time_stamp)
 39 | 
 40 |         [e_value_threshold, bit_score_threshold, command_string] = \
 41 |             self.classifier_config_dict_helper(self._time_stamp, self.input, self.output, classifier_config_dict,
 42 |                                                classifier_name, ["blast_e_value", "blast_bit_score", "command"],
 43 |                                                logging_level="INFO", logger_name=logger_name)
 44 |         try:
 45 |             self.e_value_threshold = float(e_value_threshold)
 46 |         except (TypeError, ValueError) as e:
 47 |             logging_helper("BLAST E-value in config missing or type error, using default " +
 48 |                            str(DEFAULT_BLAST_E_VALUE) + ".",
 49 |                            logging_level="WARNING", logger_name=logger_name)
 50 |             self.e_value_threshold = DEFAULT_BLAST_E_VALUE
 51 |         try:
 52 |             self.bit_score_threshold = float(bit_score_threshold)
 53 |         except (TypeError, ValueError) as e:
 54 |             logging_helper("BLAST Bit score in config missing or type error, using default " +
 55 |                            str(DEFAULT_BLAST_BIT_SCORE) + ".",
 56 |                            logging_level="WARNING", logger_name=logger_name)
 57 |             self.bit_score_threshold = DEFAULT_BLAST_BIT_SCORE
 58 |         try:
 59 |             self.command = command_string.split()
 60 |         except AttributeError:
 61 |             logging_helper("BLAST Command error, none set.", logging_level="WARNING",
 62 |                            logger_name=logger_name)
 63 |             self.command = None
 64 | 
 65 |     @staticmethod
 66 |     def generate_output_paths(input_path, output_path, classifier_name, time_stamp):
 67 |         input_file_name, input_file_ext = os.path.splitext(os.path.basename(input_path))
 68 |         if os.path.isfile(output_path):
 69 |             output = output_path
 70 |         elif os.path.isdir(output_path):
 71 |             output = os.path.join(output_path, '.'.join(["blast", input_file_name, str(time_stamp), "out"]))
 72 |         else:
 73 |             input_folder = os.path.dirname(input_path)
 74 |             output = os.path.join(input_folder, '.'.join(["blast", input_file_name, str(time_stamp), "out"]))
 75 |         return output
 76 | 
 77 |     def read_classifier_result(self, output_path=None, logging_level=DEFAULT_LOGGER_LEVEL,
 78 |                                logger_name=DEFAULT_LOGGER_NAME):
 79 |         if output_path is None:
 80 |             output_path = self.output
 81 |         bit_score_dict = {}
 82 |         for query_id, _, _, hit_cls, e_value, bit_score in \
 83 |                 self.blast_tab_itr(output_path, self.e_value_threshold, self.bit_score_threshold, logger_name):
 84 |             try:
 85 |                 e_value_function_classes = list(self.res[query_id])
 86 |                 bit_score_function_classes = list(bit_score_dict[query_id])
 87 |             except KeyError:
 88 |                 e_value_function_classes = []
 89 |                 bit_score_function_classes = []
 90 |             if len(hit_cls) == 0:
 91 |                 # Workaround for non-enzyme hits
 92 |                 hit_cls = ['#NA#']
 93 |             for ef_cls in hit_cls:
 94 |                 try:
 95 |                     ef_weight = self.weight_map[ef_cls]
 96 |                 except KeyError:
 97 |                     ef_weight = 0
 98 |                 e_value_function_classes.append(FunctionClass(ef_cls, e_value, ef_weight))
 99 |                 bit_score_function_classes.append(FunctionClass(ef_cls, bit_score, ef_weight))
100 | 
101 |             min_e_value = min(e_value_function_classes).score
102 |             min_e_value_function_classes = (
103 |                 FunctionClass.get_function_classes_by_vals(e_value_function_classes, min_e_value, 'score'))
104 |             min_e_value_function_classes_names = list(set([fc.name for fc in min_e_value_function_classes]))
105 | 
106 |             bit_score_function_classes_with_min_e_value = \
107 |                 FunctionClass.get_function_classes_by_vals(bit_score_function_classes,
108 |                                                            min_e_value_function_classes_names, "name")
109 |             max_bit_score = max(bit_score_function_classes_with_min_e_value).score
110 |             best_bit_score_function_classes = (
111 |                 FunctionClass.get_function_classes_by_vals(bit_score_function_classes_with_min_e_value,
112 |                                                            max_bit_score, 'score'))
113 | 
114 |             best_function_classes_names = list(set([fc.name for fc in best_bit_score_function_classes]))
115 |             best_function_classes = (
116 |                 FunctionClass.get_function_classes_by_vals(min_e_value_function_classes,
117 |                                                            best_function_classes_names, "name"))
118 |             try:
119 |                 self.res[query_id] = best_function_classes
120 |                 bit_score_dict[query_id] = best_bit_score_function_classes
121 |             except KeyError:
122 |                 self.res.setdefault(query_id, best_function_classes)
123 |                 bit_score_dict.setdefault(query_id, best_bit_score_function_classes)
124 |         for query in self.res:
125 |             dup_removed = []
126 |             res_of_query = self.res[query]
127 |             function_cls_names = list(set([fc.name for fc in res_of_query]))
128 |             # Workaround for non-enzyme hits
129 |             if '#NA#' in function_cls_names:
130 |                 self.res[query] = []
131 |                 continue
132 |             for cls_name in function_cls_names:
133 |                 func_classes_w_name = FunctionClass.get_function_classes_by_vals(res_of_query, cls_name, "name")
134 |                 dup_removed.append(random.choice(func_classes_w_name))
135 |             self.res[query] = dup_removed
136 | 
137 |     @staticmethod
138 |     def blast_tab_itr(path_to_blast_out, e_value_threshold=DEFAULT_BLAST_E_VALUE,
139 |                       bit_score_threshold=DEFAULT_BLAST_BIT_SCORE, logger_name=DEFAULT_LOGGER_NAME):
140 |         logging_helper("Reading blast output: \"" + path_to_blast_out + "\"", logging_level="INFO",
141 |                        logger_name=logger_name)
142 |         try:
143 |             with open(path_to_blast_out, 'r') as ptbo:
144 |                 for blast_res in read_delim_itr(ptbo, val_indices=[0, 1, 10, 11]):
145 |                     if blast_res:
146 |                         info = blast_res[1]
147 |                         try:
148 |                             e_value = float(info[2])
149 |                         except ValueError:
150 |                             e_value = float("1" + info[2])
151 |                         try:
152 |                             bit_score = float(info[3])
153 |                         except ValueError:
154 |                             bit_score = float("-1")
155 |                         blast_query = [bq.strip() for bq in re.split(r'[\s|]+', info[0]) if len(bq.strip()) > 0]
156 |                         blast_hits = [bh.strip() for bh in re.split(r'[\s|]+', info[1]) if len(bh.strip()) > 0]
157 |                         try:
158 |                             query_id, query_cls = blast_query[0], blast_query[1:]
159 |                             hit_id, hit_cls = blast_hits[0], blast_hits[1:]
160 |                             if e_value <= float(e_value_threshold) and bit_score >= bit_score_threshold:
161 |                                 yield query_id, query_cls, hit_id, hit_cls, e_value, bit_score
162 |                         except IndexError:
163 |                             continue
164 |         except (FileNotFoundError, TypeError) as e:
165 |             raise e
166 | 
167 |     @staticmethod
168 |     def add_arguments(argument_parser):
169 |         argument_parser.add_argument("--blastp", "-b", dest="blastp",
170 |                                      help=textwrap.dedent("Command of or path to BLAST+ \"blastp\"."))
171 |         argument_parser.add_argument("--num_threads", "-n", dest="num_threads", type=int,
172 |                                      help="Number of threads to run \"blastp\".")
173 |         argument_parser.add_argument("--blast_db", "-bd", dest="blast_db", type=PathType('blast_db'),
174 |                                      help=textwrap.dedent(
175 |                                          "Path to rpsd blast database name.\nFor example, \"/PATH/TO/FOLDER/rpsd.fa\", "
176 |                                          "where you can find the following files in /PATH/TO/FOLDER:rpsd.fa.phr; "
177 |                                          "rpsd.fa.pin; rpsd.fa.psq"))
178 |         argument_parser.add_argument("--blast_e_value", "-be", dest="blast_e_value", type=float,
179 |                                      default=str(DEFAULT_BLAST_E_VALUE), help=textwrap.dedent("Blastp e-value cutoff"))
180 |         argument_parser.add_argument("--blast_weight", "-bw", dest="blast_weight", type=PathType('file'),
181 |                                      help=textwrap.dedent("Path to weight file for the blast classifier"))
182 | 
183 |     @staticmethod
184 |     def config_overwrites(args, overwrites=None):
185 |         blast_dest = ["blastp", "num_threads", "blast_db", "blast_e_value", "blast_weight"]
186 |         if overwrites is None:
187 |             overwrites = {}
188 |         blast_overwrites = {}
189 |         args_dict = vars(args)
190 |         for dest in blast_dest:
191 |             try:
192 |                 val = args_dict[dest]
193 |                 if val is not None:
194 |                     key = dest
195 |                     if dest in ["blast_weight"]:
196 |                         key = key.replace("blast_", "")
197 |                     blast_overwrites.setdefault(key, val)
198 |             except KeyError:
199 |                 continue
200 |         if len(blast_overwrites) > 0:
201 |             overwrites.setdefault(CONFIG_CLASSIFIER_NAME, {})
202 |             overwrites[CONFIG_CLASSIFIER_NAME] = blast_overwrites
203 |         return overwrites
204 | 
205 | 


--------------------------------------------------------------------------------
/src/e2p2/classifiers/deepec.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import textwrap
  3 | 
  4 | from src.definitions import DEFAULT_LOGGER_LEVEL, DEFAULT_LOGGER_NAME, EC_TO_EF_MAPPING_PATH
  5 | from src.lib.classifier import Classifier
  6 | from src.lib.function_class import FunctionClass
  7 | from src.lib.process import PathType, logging_helper
  8 | from src.lib.read import read_delim_itr
  9 | 
 10 | CONFIG_CLASSIFIER_NAME = "DEEPEC"
 11 | 
 12 | 
 13 | class DEEPEC(Classifier):
 14 |     def __init__(self, time_stamp, path_to_weight, name=CONFIG_CLASSIFIER_NAME, args=None,
 15 |                  logging_level=DEFAULT_LOGGER_LEVEL,
 16 |                  logger_name=DEFAULT_LOGGER_NAME):
 17 |         Classifier.__init__(self, time_stamp, path_to_weight, name, logging_level, logger_name)
 18 |         try:
 19 |             if args.ec_to_ef_mapping_path is not None and os.path.isfile(args.ec_to_ef_mapping_path):
 20 |                 self.ec_to_ef_map = args.ec_to_ef_mapping_path
 21 |             else:
 22 |                 self.ec_to_ef_map = EC_TO_EF_MAPPING_PATH
 23 |         except AttributeError:
 24 |             self.ec_to_ef_map = EC_TO_EF_MAPPING_PATH
 25 | 
 26 |     def setup_classifier(self, input_path, output_path, classifier_config_dict, classifier_name=CONFIG_CLASSIFIER_NAME,
 27 |                          logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 28 |         logging_helper("Setting up DeepEC", logging_level=logging_level, logger_name=logger_name)
 29 |         self.input = input_path
 30 |         output_folder = self.generate_output_paths(self.input, output_path, classifier_name, self._time_stamp)
 31 |         self.output = os.path.join(output_folder, "DeepEC_Result.txt")
 32 |         [command_string] = \
 33 |             self.classifier_config_dict_helper(self._time_stamp, self.input, output_folder, classifier_config_dict,
 34 |                                                classifier_name, ["command"],
 35 |                                                logging_level="INFO", logger_name=logger_name)
 36 |         try:
 37 |             self.command = command_string.split()
 38 |         except AttributeError:
 39 |             logging_helper("PRIAM Command error, none set.", logging_level="WARNING",
 40 |                            logger_name=logger_name)
 41 |             self.command = None
 42 | 
 43 |     @staticmethod
 44 |     def generate_output_paths(input_path, output_path, classifier_name, time_stamp):
 45 |         input_file_name, input_file_ext = os.path.splitext(os.path.basename(input_path))
 46 |         if os.path.isfile(output_path):
 47 |             output_folder = os.path.join(os.path.dirname(output_path))
 48 |         elif os.path.isdir(output_path):
 49 |             output_folder = output_path
 50 |         else:
 51 |             input_folder = os.path.dirname(input_path)
 52 |             output_folder = os.path.join(input_folder, "DeepEC_%s_%s" % (input_file_name, time_stamp))
 53 |         return output_folder
 54 | 
 55 |     def read_classifier_result(self, output_path=None, logging_level=DEFAULT_LOGGER_LEVEL,
 56 |                                logger_name=DEFAULT_LOGGER_NAME):
 57 |         if output_path is None:
 58 |             output_path = self.output
 59 |         for query_id, ef_cls in self.read_deepec_result_itr(output_path, self.ec_to_ef_map, logger_name):
 60 |             try:
 61 |                 ef_weight = self.weight_map[ef_cls]
 62 |             except KeyError:
 63 |                 ef_weight = 0
 64 |             try:
 65 |                 self.res[query_id].append(FunctionClass(ef_cls, 1, ef_weight))
 66 |             except KeyError:
 67 |                 self.res.setdefault(query_id, [FunctionClass(ef_cls, 1, ef_weight)])
 68 | 
 69 |     @staticmethod
 70 |     def read_deepec_result_itr(path_to_deepec_result_txt, ec_to_ef_map=None, logger_name=DEFAULT_LOGGER_NAME):
 71 |         logging_helper("Reading DeepEC DeepEC_Result.txt: \"" + path_to_deepec_result_txt + "\"", logging_level="INFO",
 72 |                        logger_name=logger_name)
 73 |         ec_to_ef_dict = {}
 74 |         if ec_to_ef_map is not None and os.path.isfile(ec_to_ef_map):
 75 |             with open(ec_to_ef_map, 'r') as fp:
 76 |                 for ec_num, ef_cls in read_delim_itr(fp):
 77 |                     mapped_efs = set()
 78 |                     for ef in ef_cls:
 79 |                         mapped_efs.update([cls.strip() for cls in ef.split('|') if cls.strip() != ""])
 80 |                     try:
 81 |                         ec_to_ef_dict[ec_num].update(mapped_efs)
 82 |                     except KeyError:
 83 |                         ec_to_ef_dict.setdefault(ec_num, mapped_efs)
 84 |         try:
 85 |             with open(path_to_deepec_result_txt, 'r') as op:
 86 |                 for query_id, pred_ecs in read_delim_itr(op, skip=["Query ID"]):
 87 |                     if query_id != "":
 88 |                         for ec in sorted(pred_ecs):
 89 |                             if ec_to_ef_map is None or len(ec_to_ef_dict) == 0:
 90 |                                 yield query_id, ec
 91 |                             else:
 92 |                                 try:
 93 |                                     ef_cls = sorted(ec_to_ef_dict[ec])
 94 |                                     for ef in ef_cls:
 95 |                                         yield query_id, ef
 96 |                                 except KeyError:
 97 |                                     continue
 98 |         except (FileNotFoundError, TypeError) as e:
 99 |             raise e
100 | 
101 |     @staticmethod
102 |     def add_arguments(argument_parser):
103 |         argument_parser.add_argument("--python_path", "-py", dest="python_path",
104 |                                      help=textwrap.dedent("Command of or path to \"java\"."))
105 |         argument_parser.add_argument("--deepec_path", "-dp", dest="deepec_path", type=PathType('file'),
106 |                                      help=textwrap.dedent("Path to \"deepec.py\"."))
107 |         argument_parser.add_argument("--ec_to_ef_mapping_path", "-ee", dest="ec_to_ef_mapping_path",
108 |                                      type=PathType('file'), help="Path to mapping file from ECs to EFs")
109 | 
110 |     @staticmethod
111 |     def config_overwrites(args, overwrites=None):
112 |         priam_dest = ["python_path", "deepec_path", "ec_to_ef_mapping_path"]
113 |         if overwrites is None:
114 |             overwrites = {}
115 |         deepec_overwrites = {}
116 |         args_dict = vars(args)
117 |         for dest in priam_dest:
118 |             try:
119 |                 val = args_dict[dest]
120 |                 if val is not None:
121 |                     key = dest
122 |                     deepec_overwrites.setdefault(key, val)
123 |             except KeyError:
124 |                 continue
125 |         if len(deepec_overwrites) > 0:
126 |             overwrites.setdefault(CONFIG_CLASSIFIER_NAME, {})
127 |             overwrites[CONFIG_CLASSIFIER_NAME] = deepec_overwrites
128 |         return overwrites
129 | 


--------------------------------------------------------------------------------
/src/e2p2/classifiers/priam.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import textwrap
  4 | 
  5 | from src.definitions import DEFAULT_LOGGER_LEVEL, DEFAULT_LOGGER_NAME, DEFAULT_PRIAM_E_VALUE
  6 | from src.lib.classifier import FunctionClass, Classifier
  7 | from src.lib.process import logging_helper, PathType
  8 | from src.lib.read import read_groups_by_start_itr
  9 | 
 10 | CONFIG_CLASSIFIER_NAME = "PRIAM"
 11 | 
 12 | 
 13 | class PRIAM(Classifier):
 14 |     def __init__(self, time_stamp, path_to_weight, name=CONFIG_CLASSIFIER_NAME, args=None,
 15 |                  logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 16 |         Classifier.__init__(self, time_stamp, path_to_weight, name, logging_level, logger_name)
 17 |         # e value currently not used
 18 |         try:
 19 |             if args.priam_e_value is not None:
 20 |                 self.e_value_threshold = args.priam_e_value
 21 |             else:
 22 |                 self.e_value_threshold = DEFAULT_PRIAM_E_VALUE
 23 |         except AttributeError:
 24 |             self.e_value_threshold = DEFAULT_PRIAM_E_VALUE
 25 | 
 26 |     def setup_classifier(self, input_path, output_path, classifier_config_dict, classifier_name=CONFIG_CLASSIFIER_NAME,
 27 |                          logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 28 |         logging_helper("Setting up PRIAM", logging_level=logging_level, logger_name=logger_name)
 29 |         self.input = input_path
 30 |         output_folder = self.generate_output_paths(self.input, output_path, classifier_name, self._time_stamp)
 31 |         self.output = os.path.join(output_folder, "PRIAM_%s" % self._time_stamp, "ANNOTATION", "sequenceECs.txt")
 32 |         [evalue_threshold, command_string] = \
 33 |             self.classifier_config_dict_helper(self._time_stamp, self.input, output_folder, classifier_config_dict,
 34 |                                                classifier_name, ["priam_e_value", "command"],
 35 |                                                logging_level="INFO", logger_name=logger_name)
 36 |         try:
 37 |             self.e_value_threshold = float(evalue_threshold)
 38 |         except (TypeError, ValueError) as e:
 39 |             logging_helper("PRIAM E-value in config missing or type error, using default " +
 40 |                            str(DEFAULT_PRIAM_E_VALUE) + ".",
 41 |                            logging_level="WARNING", logger_name=logger_name)
 42 |             self.e_value_threshold = DEFAULT_PRIAM_E_VALUE
 43 |         try:
 44 |             self.command = command_string.split()
 45 |         except AttributeError:
 46 |             logging_helper("PRIAM Command error, none set.", logging_level="WARNING",
 47 |                            logger_name=logger_name)
 48 |             self.command = None
 49 | 
 50 |     @staticmethod
 51 |     def generate_output_paths(input_path, output_path, classifier_name, time_stamp):
 52 |         input_file_name, input_file_ext = os.path.splitext(os.path.basename(input_path))
 53 |         if os.path.isfile(output_path):
 54 |             output_folder = os.path.dirname(output_path)
 55 |         elif os.path.isdir(output_path):
 56 |             output_folder = os.path.join(output_path, "PRIAM_%s_%s" % (input_file_name, time_stamp))
 57 |         else:
 58 |             input_folder = os.path.dirname(input_path)
 59 |             output_folder = os.path.join(input_folder, "PRIAM_%s_%s" % (input_file_name, time_stamp))
 60 |         # output = os.path.join(output_folder, "PRIAM_%s" % time_stamp, "ANNOTATION", "sequenceECs.txt")
 61 |         return output_folder
 62 | 
 63 |     def read_classifier_result(self, output_path=None, logging_level=DEFAULT_LOGGER_LEVEL,
 64 |                                logger_name=DEFAULT_LOGGER_NAME):
 65 |         if output_path is None:
 66 |             output_path = self.output
 67 |         for query_id, _, ef_cls, _, ef_e_value in self.read_priam_sequence_ec_itr(output_path, logger_name):
 68 |             try:
 69 |                 ef_weight = self.weight_map[ef_cls]
 70 |             except KeyError:
 71 |                 ef_weight = 0
 72 |             try:
 73 |                 self.res[query_id].append(FunctionClass(ef_cls, ef_e_value, ef_weight))
 74 |             except KeyError:
 75 |                 self.res.setdefault(query_id, [FunctionClass(ef_cls, ef_e_value, ef_weight)])
 76 | 
 77 |     @staticmethod
 78 |     def read_priam_sequence_ec_itr(path_to_sequence_ec_txt, logger_name=DEFAULT_LOGGER_NAME):
 79 |         logging_helper("Reading PRIAM sequenceEC.txt: \"" + path_to_sequence_ec_txt + "\"", logging_level="INFO",
 80 |                        logger_name=logger_name)
 81 |         try:
 82 |             with open(path_to_sequence_ec_txt, 'r') as op:
 83 |                 for info in read_groups_by_start_itr(op, start=['>'], skip=['#']):
 84 |                     priam_query = [pq.strip() for pq in re.split(r'[\s|]+', info[0]) if len(pq.strip()) > 0]
 85 |                     priam_results = [pr.split('\t') for pr in info[1]]
 86 |                     for ef_class_res in priam_results:
 87 |                         try:
 88 |                             query_id, query_cls = priam_query[0], priam_query[1:]
 89 |                             ef_class = ef_class_res[0].strip()
 90 |                             ef_prob = ef_class_res[1].strip()
 91 |                             ef_e_value = ef_class_res[2].strip()
 92 |                             yield query_id.lstrip('>'), query_cls, ef_class, float(ef_prob), float(ef_e_value)
 93 |                         except (IndexError, ValueError) as e:
 94 |                             continue
 95 |         except (FileNotFoundError, TypeError) as e:
 96 |             raise e
 97 | 
 98 |     @staticmethod
 99 |     def add_arguments(argument_parser):
100 |         argument_parser.add_argument("--java_path", "-j", dest="java_path",
101 |                                      help=textwrap.dedent("Command of or path to \"java\"."))
102 |         argument_parser.add_argument("--priam_search", "-ps", dest="priam_search", type=PathType('file'),
103 |                                      help=textwrap.dedent("Path to \"PRIAM_search.jar\"."))
104 |         argument_parser.add_argument("--priam_resume", "-pr", dest="priam_resume", action='store_true',
105 |                                      help="Whether or not to resume a found PRIAM_search.jar process.")
106 |         argument_parser.add_argument("--blast_bin", "-bb", dest="blast_bin", type=PathType('blast_bin'),
107 |                                      help=textwrap.dedent("Command of or path to BLAST+ bin folder."))
108 |         argument_parser.add_argument("--priam_profiles", "-pp", dest="priam_profiles", type=PathType('priam_profiles'),
109 |                                      help=textwrap.dedent(
110 |                                          "Path to PRIAM profiles.\nFor example, \"/PATH/TO/FOLDER/profiles\", "
111 |                                          "where you can find the following in /PATH/TO/FOLDER/profiles:\n "
112 |                                          "files: annotation_rules.xml; genome_rules.xml\n "
113 |                                          "folders: PROFILES: Folder contains \"LIBRARY\" folder and "
114 |                                          "multiple \".chk\" files."))
115 |         argument_parser.add_argument("--priam_weight", "-pw", dest="priam_weight", type=PathType('file'),
116 |                                      help=textwrap.dedent("Path to weight file for the priam classifier"))
117 | 
118 |     @staticmethod
119 |     def config_overwrites(args, overwrites=None):
120 |         priam_dest = ["java_path", "priam_search", "priam_resume", "blast_bin", "priam_profiles", "priam_weight"]
121 |         if overwrites is None:
122 |             overwrites = {}
123 |         priam_overwrites = {}
124 |         args_dict = vars(args)
125 |         for dest in priam_dest:
126 |             try:
127 |                 val = args_dict[dest]
128 |                 if val is not None:
129 |                     key = dest
130 |                     if dest in ["priam_resume", "priam_weight"]:
131 |                         key = key.replace("priam_", "")
132 |                         if dest == 'priam_resume':
133 |                             if val is True:
134 |                                 val = "fr"
135 |                             else:
136 |                                 val = "fn"
137 |                     priam_overwrites.setdefault(key, val)
138 |             except KeyError:
139 |                 continue
140 |         if len(priam_overwrites) > 0:
141 |             overwrites.setdefault(CONFIG_CLASSIFIER_NAME, {})
142 |             overwrites[CONFIG_CLASSIFIER_NAME] = priam_overwrites
143 |         return overwrites
144 | 
145 | 


--------------------------------------------------------------------------------
/src/e2p2/ensembles/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carnegie/E2P2/4d9150bf68f34e8bcf3a363c379a349ba5f2ecb0/src/e2p2/ensembles/__init__.py


--------------------------------------------------------------------------------
/src/e2p2/ensembles/max_weight_absolute_threshold.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from src.lib.ensemble import Ensemble
 4 | from src.lib.classifier import FunctionClass
 5 | 
 6 | 
 7 | class MaxWeightAbsoluteThreshold(Ensemble):
 8 |     @staticmethod
 9 |     def weighting(list_of_classifiers, weighted_res=None):
10 |         if weighted_res is None:
11 |             weighted_res = {}
12 |         for classifier in list_of_classifiers:
13 |             for query in classifier.res:
14 |                 query_res = list(classifier.res[query])
15 |                 try:
16 |                     weighted_res[query] = list(weighted_res[query]) + query_res
17 |                 except KeyError:
18 |                     weighted_res.setdefault(query, query_res)
19 |         for query in weighted_res:
20 |             query_res = list(weighted_res[query])
21 |             res_function_names = list(set([fc.name for fc in query_res]))
22 | 
23 |             weighted_query_res = []
24 |             for function_name in sorted(res_function_names):
25 |                 function_classes_w_name = (
26 |                     FunctionClass.get_function_classes_by_vals(query_res, function_name))
27 |                 max_weight_w_name = FunctionClass.max_weight(function_classes_w_name).weight
28 |                 weighted_query_res.append(
29 |                     random.choice(FunctionClass.get_function_classes_by_vals(function_classes_w_name, max_weight_w_name,
30 |                                                                              "weight")))
31 |             weighted_res[query] = weighted_query_res
32 |         return weighted_res
33 | 
34 |     @staticmethod
35 |     def voting(voted_res, threshold=float(0.5)):
36 |         if voted_res is None:
37 |             voted_res = {}
38 |         for query in voted_res:
39 |             query_res = list(voted_res[query])
40 |             if len(query_res) == 0:
41 |                 continue
42 |             max_weight = FunctionClass.max_weight(query_res).weight
43 |             t = float(max_weight) - float(threshold)
44 |             # Check to make sure the threshold is not negative.
45 |             if t < 0.0:
46 |                 t = 0.0
47 |             voted_query_res = []
48 |             for res_function_class in query_res:
49 |                 if res_function_class.ge_threshold(t, attr='weight'):
50 |                     voted_query_res.append(res_function_class)
51 |             voted_res[query] = voted_query_res
52 |         return voted_res
53 | 
54 |     @staticmethod
55 |     def add_arguments(argument_parser):
56 |         """Function to add E2P2 ensemble related arguments
57 |         Args:
58 |             argument_parser: argparse
59 |         Raises:
60 |         Returns:
61 |         """
62 |         # Arguments for E2P2 ensembles
63 |         argument_parser.add_argument("--threshold", "-t", dest="threshold", type=float,
64 |                                      help="Threshold for voting results. Default is 0.5.")
65 | 
66 |     @staticmethod
67 |     def config_overwrites(args, overwrites=None):
68 |         max_weight_abs_threshold_dest = ["threshold"]
69 |         if overwrites is None:
70 |             overwrites = {}
71 |         max_weight_abs_threshold_overwrites = {}
72 |         args_dict = vars(args)
73 |         for dest in max_weight_abs_threshold_dest:
74 |             try:
75 |                 val = args_dict[dest]
76 |                 if val is not None:
77 |                     key = dest
78 |                     max_weight_abs_threshold_overwrites.setdefault(key, val)
79 |             except KeyError:
80 |                 continue
81 |         if len(max_weight_abs_threshold_overwrites) > 0:
82 |             overwrites.setdefault("MaxWeightAbsoluteThreshold", {})
83 |             overwrites["MaxWeightAbsoluteThreshold"] = max_weight_abs_threshold_overwrites
84 |         return overwrites
85 | 


--------------------------------------------------------------------------------
/src/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carnegie/E2P2/4d9150bf68f34e8bcf3a363c379a349ba5f2ecb0/src/lib/__init__.py


--------------------------------------------------------------------------------
/src/lib/classifier.py:
--------------------------------------------------------------------------------
  1 | import configparser
  2 | import multiprocessing
  3 | import os.path
  4 | 
  5 | from src.definitions import DEFAULT_LOGGER_NAME, DEFAULT_LOGGER_LEVEL
  6 | from src.lib.config import get_values_from_config_option
  7 | from src.lib.function_class import FunctionClass
  8 | from src.lib.process import RunProcess, logging_helper
  9 | from src.lib.read import read_delim_itr
 10 | 
 11 | _available_class_score_attr = ['weight', 'score']
 12 | 
 13 | 
 14 | class Error(Exception):
 15 |     pass
 16 | 
 17 | 
 18 | class NonCommandError(Error):
 19 |     """Error for Classifier missing "_command"
 20 |     """
 21 |     pass
 22 | 
 23 | 
 24 | class Classifier(object):
 25 |     def __init__(self, time_stamp, path_to_weight=None, name="", input_path="", output_path="", args=None,
 26 |                  logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 27 |         logging_helper("Initializing Classifier: \"" + str(name) + "\"", logging_level=logging_level,
 28 |                        logger_name=logger_name)
 29 |         self.name = name
 30 |         self._time_stamp = time_stamp
 31 |         if path_to_weight is not None and os.path.isfile(path_to_weight):
 32 |             self.weight_map = self.read_weights(path_to_weight)
 33 |         else:
 34 |             self.weight_map = {}
 35 |         # _command: list strings of the bash call
 36 |         self.command = None
 37 |         # key: Seq ID, val: [FunctionClass, ..]
 38 |         self.res = {}
 39 |         # IO tracking
 40 |         self.input = input_path
 41 |         self.output = output_path
 42 |         if args is not None:
 43 |             pass
 44 | 
 45 |     def __repr__(self):
 46 |         return f'Classifier(\'{self.name}\', {self.command}, {self._time_stamp})'
 47 | 
 48 |     def setup_classifier(self, input_path, output_path, classifier_config_dict, classifier_name=None,
 49 |                          logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 50 |         """Placeholder function to set up a classifier using a processed config dict read from a config file
 51 |         Args:
 52 |             input_path: Input file path for the classifier
 53 |             output_path: Output file path for the classifier
 54 |             classifier_config_dict: Dictionary of the classifier read from the config
 55 |             classifier_name: Name of the classifier in the config file
 56 |             logging_level: The logging level set for this command
 57 |             logger_name: The name of the logger for this command
 58 |         Raises: AttributeError
 59 |         Returns:
 60 |         """
 61 |         if classifier_name is None:
 62 |             classifier_name = self.name
 63 |         logging_helper("Setting up " + classifier_name, logging_level=logging_level, logger_name=logger_name)
 64 |         if os.path.isfile(input_path):
 65 |             self.input = input_path
 66 |         self.output = self.generate_output_paths(input_path, output_path, classifier_name, self._time_stamp)
 67 |         [command_string] = self.classifier_config_dict_helper(self._time_stamp, self.input, self.output,
 68 |                                                               classifier_config_dict, classifier_name, ["command"],
 69 |                                                               logging_level=logging_level, logger_name=logger_name)
 70 |         try:
 71 |             self.command = command_string.split()
 72 |         except AttributeError:
 73 |             self.command = None
 74 | 
 75 |     @staticmethod
 76 |     def generate_output_paths(input_path, output_path, classifier_name, time_stamp):
 77 |         input_file_name, input_file_ext = os.path.splitext(os.path.basename(input_path))
 78 |         if os.path.isfile(output_path):
 79 |             output = output_path
 80 |         elif os.path.isdir(output_path):
 81 |             output = os.path.join(output_path, input_file_name, '.'.join([classifier_name, str(time_stamp)]))
 82 |         else:
 83 |             input_folder = os.path.dirname(input_path)
 84 |             output = os.path.join(input_folder, input_file_name, '.'.join([classifier_name, str(time_stamp)]))
 85 |         return output
 86 | 
 87 |     @staticmethod
 88 |     def classifier_config_dict_helper(timestamp, input_path, output_path, classifier_config_dict, classifier_name,
 89 |                                       option_list, logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 90 |         """Retrieve option values from a processed config dict read from a config file, include interpolation.
 91 |         Args:
 92 |             timestamp: The time stamp of the function
 93 |             input_path: Input file path for the classifier
 94 |             output_path: Output file path for the classifier
 95 |             classifier_config_dict: Dictionary of the classifier read from the config
 96 |             classifier_name: Name of the classifier in the config file
 97 |             option_list: list of config options to retrieve
 98 |             logging_level: The logging level set for this command
 99 |             logger_name: The name of the logger for this command
100 |         Raises: AttributeError
101 |         Returns:
102 |         """
103 |         config_dict = {
104 |             "IO": {
105 |                 "query": input_path,
106 |                 classifier_name: output_path,
107 |                 "timestamp": timestamp
108 |             },
109 |             classifier_name: classifier_config_dict
110 |         }
111 |         classifier_config = configparser.ConfigParser(allow_no_value=True,
112 |                                                       interpolation=configparser.ExtendedInterpolation())
113 |         try:
114 |             classifier_config.read_dict(config_dict)
115 |             return [get_values_from_config_option(classifier_config, classifier_name, opt,
116 |                                                   logging_level=logging_level, logger_name=logger_name)
117 |                     for opt in option_list]
118 |         except AttributeError:
119 |             return None
120 | 
121 |     def read_classifier_result(self, output_path=None, logging_level=DEFAULT_LOGGER_LEVEL,
122 |                                logger_name=DEFAULT_LOGGER_NAME):
123 |         """Placeholder function to read classifier results from output file path.
124 |         Args:
125 |             output_path: Output file path for the classifier
126 |             logging_level: The logging level set for this command
127 |             logger_name: The name of the logger for this command
128 |         Raises: FileNotFoundError, TypeError
129 |         Returns:
130 |         """
131 |         if output_path is None:
132 |             output_path = self.output
133 |         try:
134 |             with open(output_path, 'r') as op:
135 |                 for info in read_delim_itr(op, key_idx=0, val_indices=[1, 2]):
136 |                     if info:
137 |                         seq_id = info[0]
138 |                         ef_class = info[1]
139 |                         ef_score = float(info[2])
140 |                         try:
141 |                             ef_weight = self.weight_map[ef_class]
142 |                         except KeyError:
143 |                             ef_weight = float(0)
144 |                         try:
145 |                             self.res[seq_id].append(FunctionClass(ef_class, ef_score, ef_weight))
146 |                         except KeyError:
147 |                             self.res.setdefault(seq_id, [FunctionClass(ef_class, ef_score, ef_weight)])
148 |         except (FileNotFoundError, TypeError) as e:
149 |             raise e
150 | 
151 |     @staticmethod
152 |     def read_weights(path_to_weight, logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
153 |         """Read in weights from file
154 |         Args:
155 |             path_to_weight: File path to classifier's weight
156 |             logging_level: The logging level set for this command
157 |             logger_name: The name of the logger for this command
158 |         Raises: ValueError
159 |         Returns:
160 |         """
161 |         logging_helper("Reading weight file: \"" + str(path_to_weight) + "\"", logging_level=logging_level,
162 |                        logger_name=logger_name)
163 |         weight = {}
164 |         with open(path_to_weight, 'r') as fp:
165 |             for function_cls, cls_weight in read_delim_itr(fp):
166 |                 try:
167 |                     weight.setdefault(function_cls, float(cls_weight[0]))
168 |                 except ValueError:
169 |                     weight.setdefault(function_cls, float("0.0"))
170 |         return weight
171 | 
172 |     def get_res(self):
173 |         return self.res
174 | 
175 | 
176 | class RunClassifiers(object):
177 |     """Object for running all the classifiers
178 |     """
179 |     def __init__(self, classifiers=None):
180 |         self.queue = multiprocessing.Queue()
181 |         self.run_process = RunProcess()
182 |         self.workers = []
183 |         if classifiers is None or type(classifiers) not in [list, set]:
184 |             self.classifiers = []
185 |         else:
186 |             valid_classifiers = [classifier for classifier in classifiers if isinstance(classifier, Classifier)]
187 |             self.classifiers = valid_classifiers
188 | 
189 |     def add_classifier(self, classifier):
190 |         """Add a classifier to this object
191 |         Args:
192 |             classifier: A Classifier
193 |         Raises:
194 |         Returns:
195 |         """
196 |         if isinstance(classifier, Classifier):
197 |             self.classifiers.append(classifier)
198 | 
199 |     def add_classifier_to_queue(self, classifier, logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
200 |         """Add a classifier to this object, and it's multiprocessing queue
201 |         Args:
202 |             classifier: A Classifier
203 |             logging_level: The logging level set for this command
204 |             logger_name: The name of the logger for this command
205 |         Raises:
206 |         Returns:
207 |         """
208 |         if isinstance(classifier, Classifier) and classifier.command is not None:
209 |             self.classifiers.append(classifier)
210 |             logging_helper("New process: " + classifier.name + ": \"" + " ".join(classifier.command) + "\"",
211 |                            logging_level="INFO", logger_name=logger_name)
212 |             self.run_process.add_process_to_workers(self.workers, self.queue, logging_level, logger_name,
213 |                                                     classifier.command, classifier.name)
214 | 
215 |     def add_available_classifiers_to_queue(self, logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
216 |         """Add all classifiers of this object to its multiprocessing queue
217 |         Args:
218 |             logging_level: The logging level set for this command
219 |             logger_name: The name of the logger for this command
220 |         Raises:
221 |         Returns:
222 |         """
223 |         for classifier in self.classifiers:
224 |             logging_helper("New process: " + classifier.name + ": \"" + " ".join(classifier.command) + "\"",
225 |                            logging_level="INFO", logger_name=logger_name)
226 |             self.run_process.add_process_to_workers(self.workers, self.queue, logging_level, logger_name,
227 |                                                     classifier.command, classifier.name)
228 | 
229 |     def run(self, logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
230 |         """Run all classifiers of this object
231 |         Args:
232 |         Raises:
233 |         Returns:
234 |         """
235 |         logging_helper("Running all available processes.", logging_level=logging_level, logger_name=logger_name)
236 |         self.run_process.run_all_worker_processes(self.workers, self.queue)
237 | 
238 |     def res(self):
239 |         """Get list of classifier results
240 |         Args:
241 |         Raises:
242 |         Returns:
243 |             res_list: List of classifier results
244 |         """
245 |         res_list = []
246 |         for classifier in self.classifiers:
247 |             res_list.append(classifier.get_res())
248 |         return res_list
249 | 
250 |     @staticmethod
251 |     def add_arguments(argument_parser):
252 |         """Placeholder function that add classifier arguments to the pipeline.
253 |            Args:
254 |                argument_parser: ArgumentParser
255 |             Raises:
256 |            Returns:
257 |         """
258 |         argument_parser.add_argument('Classifier')
259 | 
260 |     @staticmethod
261 |     def config_overwrites(args, overwrites=None):
262 |         """Placeholder function that returns a dictionary that will overwrite the config dictionary using arguments.
263 |             Args:
264 |                 args: parsed arguments
265 |                 overwrites: a dictionary that overwrites the parsed arguments
266 |             Raises:
267 |             Returns:
268 |         """
269 |         if overwrites is None:
270 |             overwrites = {}
271 |         return overwrites
272 | 
273 | 
274 | def run_available_classifiers(classifiers_to_run, list_of_classifiers, logging_level=DEFAULT_LOGGER_LEVEL,
275 |                               logger_name=DEFAULT_LOGGER_NAME):
276 |     """Placeholder function to read classifier results from output file path.
277 |     Args:
278 |         classifiers_to_run: List of the classifier names that will be run
279 |         list_of_classifiers: List of the classifier classes
280 |         logging_level: The logging level set for this command
281 |         logger_name: The name of the logger for this command
282 |     Raises:
283 |     Returns:
284 |         list of classifiers that were run, list of classifiers that were skipped
285 |     """
286 |     run_cls = RunClassifiers()
287 |     skipped_classifiers = []
288 |     for idx, cls in enumerate(list_of_classifiers):
289 |         if isinstance(cls, Classifier) and cls.command is not None and cls.name in classifiers_to_run:
290 |             run_cls.add_classifier(cls)
291 |         else:
292 |             skipped_classifiers.append(classifiers_to_run[idx])
293 |     run_cls.add_available_classifiers_to_queue(logging_level, logger_name)
294 |     run_cls.run(logging_level, logger_name)
295 |     for cls in run_cls.classifiers:
296 |         cls_output = cls.output
297 |         cls.read_classifier_result(cls_output, logging_level, logger_name)
298 | 
299 |     return run_cls.classifiers, skipped_classifiers
300 | 
301 | 


--------------------------------------------------------------------------------
/src/lib/config.py:
--------------------------------------------------------------------------------
  1 | import configparser
  2 | import os
  3 | from argparse import ArgumentParser
  4 | 
  5 | from src.definitions import ROOT_DIR, DEFAULT_LOGGER_NAME, DEFAULT_CONFIG_PATH, CLASSIFIERS_CLS_DIR, WEIGHTS_DIR, ENSEMBLES_CLS_DIR, MAPS_DIR
  6 | from src.lib.process import logging_helper, load_module_function_from_path
  7 | 
  8 | 
  9 | _DEFAULT_SECTIONS = ["Mapping", "Ensembles", "Classifiers"]
 10 | 
 11 | 
 12 | def get_options_from_config_section(config, section_name, name_only=True, selected_options=None,
 13 |                                     logging_level="WARNING", logger_name=DEFAULT_LOGGER_NAME):
 14 |     """Helper function to retrieve options from a configparser section
 15 |     Args:
 16 |         config: configparser
 17 |         section_name: Name of the sections to retrieve
 18 |         name_only: Boolean value for
 19 |         selected_options: specific options to retrieve
 20 |         logging_level: The logging level set for read map
 21 |         logger_name: The name of the logger for read map
 22 |     Raises:
 23 |     Returns:
 24 |         option names or options with their items
 25 |     """
 26 |     if selected_options is None:
 27 |         selected_options = set()
 28 |     elif type(selected_options) in [list, tuple, range, 'generator', set, dict]:
 29 |         selected_options = set(selected_options)
 30 |     else:
 31 |         return None
 32 |     try:
 33 |         try:
 34 |             section_options = config.items(section_name)
 35 |         except configparser.InterpolationMissingOptionError:
 36 |             section_options = config.items(section_name, raw=True)
 37 |         section_options_names = [tup[0] for tup in section_options]
 38 |         if len(selected_options) == 0:
 39 |             selected_options = set(section_options_names)
 40 |         if len(selected_options & set(section_options_names)) == 0:
 41 |             return None
 42 |         elif name_only is True:
 43 |             return sorted(selected_options & set(section_options_names))
 44 |         else:
 45 |             return {tup[0]: tup[1] for tup in section_options if tup[0] in selected_options}
 46 |     except configparser.NoSectionError:
 47 |         logging_helper("Missing '" + str(section_name) + "' section in config.ini",
 48 |                        logging_level=logging_level, logger_name=logger_name)
 49 |         return None
 50 | 
 51 | 
 52 | def get_values_from_config_option(config, section_name, option_name, logging_level="WARNING",
 53 |                                   logger_name=DEFAULT_LOGGER_NAME):
 54 |     """Helper function to retrieve values from configparser options
 55 |     Args:
 56 |         config: configparser
 57 |         section_name: Name of the section to retrieve
 58 |         option_name: Name of the option to retrieve
 59 |         logging_level: The logging level set for read map
 60 |         logger_name: The name of the logger for read map
 61 |     Raises:
 62 |     Returns:
 63 |         option_val: values of the option
 64 |     """
 65 |     try:
 66 |         try:
 67 |             option_val = config.get(section_name, option_name)
 68 |         except configparser.InterpolationMissingOptionError:
 69 |             option_val = config.get(section_name, option_name, raw=True)
 70 |         if option_val == '':
 71 |             return None
 72 |         else:
 73 |             return option_val
 74 |     except configparser.NoSectionError:
 75 |         logging_helper("Missing '" + str(section_name) + "' section in config",
 76 |                        logging_level=logging_level, logger_name=logger_name)
 77 |         return None
 78 |     except configparser.NoOptionError:
 79 |         logging_helper("Missing '" + str(option_name) + "' in '" + str(section_name) + "' section in config",
 80 |                        logging_level=logging_level, logger_name=logger_name)
 81 |         return None
 82 | 
 83 | 
 84 | def config_section_to_multi_sections_helper(config, section_name, selected_options=None,
 85 |                                             logging_level="WARNING", logger_name=DEFAULT_LOGGER_NAME):
 86 |     """Helper function for a section that points to multiple sections, i.e. Ensembles & Classifiers
 87 |     Args:
 88 |         config: configparser
 89 |         section_name: Name of the section to retrieve
 90 |         selected_options: Name of the options to retrieve
 91 |         logging_level: The logging level set for read map
 92 |         logger_name: The name of the logger for read map
 93 |     Raises:
 94 |     Returns:
 95 |         sections_names: names of the referenced multiple sections
 96 |         sections_dict: a dictionary that contains options and their values
 97 |     """
 98 |     potential_sections = get_options_from_config_section(config, section_name, logging_level=logging_level,
 99 |                                                          logger_name=logger_name)
100 |     if potential_sections is not None:
101 |         available_sections_names = [get_values_from_config_option(config, section_name, potential_section_name,
102 |                                                                   logging_level=logging_level, logger_name=logger_name)
103 |                                     for potential_section_name in potential_sections]
104 |     else:
105 |         available_sections_names = None
106 |     if available_sections_names is not None:
107 |         available_sections_dicts = [get_options_from_config_section(config, section, selected_options=selected_options,
108 |                                                                     name_only=False)
109 |                                     for section in available_sections_names]
110 |     else:
111 |         available_sections_dicts = None
112 |     return available_sections_names, available_sections_dicts
113 | 
114 | 
115 | def default_path_helper(input_path, default_folder, root_dir=ROOT_DIR):
116 |     """Get full paths using defaults
117 |     Args:
118 |         input_path: input file path
119 |         default_folder: Path to default files
120 |         root_dir: Project root
121 |     Raises:
122 |     Returns:
123 |         The full path for a default file
124 |     """
125 |     if os.path.join(root_dir, os.path.dirname(input_path)) == default_folder:
126 |         return os.path.join(root_dir, input_path)
127 |     else:
128 |         return input_path
129 | 
130 | 
131 | def read_config_ini(timestamp, config_ini, io_dict, overwrites=None, logging_level="WARNING",
132 |                     logger_name=DEFAULT_LOGGER_NAME):
133 |     """Read in config.ini
134 |     Args:
135 |         timestamp:
136 |         config_ini: configparser
137 |         io_dict: Dictionary that represents an "IO" section for the configparser
138 |         overwrites: A dictionary to overwrite values of the config.ini
139 |         logging_level: The logging level set for read map
140 |         logger_name: The name of the logger for read map
141 |     Raises:
142 |     Returns:
143 |         sections_names: names of the referenced multiple sections
144 |         sections_dict: a dictionary that contains options and their values
145 |     """
146 |     pipeline_config = configparser.ConfigParser(allow_no_value=True, interpolation=configparser.ExtendedInterpolation())
147 |     pipeline_config.read_dict(io_dict)
148 |     pipeline_config.read(config_ini)
149 | 
150 |     if overwrites is not None:
151 |         pipeline_config.read_dict(overwrites)
152 | 
153 |     # IO
154 |     query_path = \
155 |         get_values_from_config_option(pipeline_config, 'IO', 'query', logging_level=logging_level, logger_name=logger_name)
156 |     temp_path = \
157 |         get_values_from_config_option(pipeline_config, 'IO', 'out', logging_level=logging_level, logger_name=logger_name)
158 | 
159 |     # Mapping Files
160 |     efclasses = get_values_from_config_option(pipeline_config, 'Mapping', 'efclasses', logging_level=logging_level,
161 |                                               logger_name=logger_name)
162 |     efclasses = default_path_helper(efclasses, MAPS_DIR)
163 |     ec_superseded = get_values_from_config_option(pipeline_config, 'Mapping', 'ec_superseded', logging_level=logging_level,
164 |                                                   logger_name=logger_name)
165 |     ec_superseded = default_path_helper(ec_superseded, MAPS_DIR)
166 |     metacyc_rxn_ec = get_values_from_config_option(pipeline_config, 'Mapping', 'metacyc_rxn_ec', logging_level=logging_level,
167 |                                                    logger_name=logger_name)
168 |     metacyc_rxn_ec = default_path_helper(metacyc_rxn_ec, MAPS_DIR)
169 |     official_ec_metacyc_rxn = get_values_from_config_option(pipeline_config, 'Mapping', 'official_ec_metacyc_rxn',
170 |                                                             logging_level=logging_level, logger_name=logger_name)
171 |     official_ec_metacyc_rxn = default_path_helper(official_ec_metacyc_rxn, MAPS_DIR)
172 |     to_remove_non_small_molecule_metabolism = \
173 |         get_values_from_config_option(pipeline_config, 'Mapping', 'to_remove_non_small_molecule_metabolism',
174 |                                       logging_level=logging_level, logger_name=logger_name)
175 |     to_remove_non_small_molecule_metabolism = default_path_helper(to_remove_non_small_molecule_metabolism, MAPS_DIR)
176 |     mapping_files = {
177 |         'efclasses': efclasses, 'ec_superseded': ec_superseded, 'metacyc_rxn_ec': metacyc_rxn_ec,
178 |         "official_ec_metacyc_rxn": official_ec_metacyc_rxn,
179 |         "to_remove_non_small_molecule_metabolism": to_remove_non_small_molecule_metabolism}
180 |     # Classifiers
181 |     classifier_sections, classifier_tuples = \
182 |         config_section_to_multi_sections_helper(pipeline_config, "Classifiers",
183 |                                                 logging_level=logging_level, logger_name=logger_name)
184 |     list_of_classifiers = []
185 |     if classifier_sections is not None and classifier_tuples is not None:
186 |         for idx, cls in enumerate(classifier_sections):
187 |             if cls is not None:
188 |                 try:
189 |                     cls_module_path = overwrites[cls]["class"]
190 |                 except (KeyError, TypeError) as e:
191 |                     cls_module_path = get_values_from_config_option(pipeline_config, cls, "class")
192 |                     if cls_module_path is not None:
193 |                         cls_module_path = default_path_helper(cls_module_path, CLASSIFIERS_CLS_DIR)
194 |                 cls_fn = load_module_function_from_path(cls_module_path, cls)
195 |                 if cls_fn is not None:
196 |                     cls_config_dict = classifier_tuples[idx]
197 |                     if cls_config_dict is None:
198 |                         list_of_classifiers.append(None)
199 |                         continue
200 |                     try:
201 |                         weight_path = overwrites[cls]["weight"]
202 |                     except (KeyError, TypeError) as e:
203 |                         weight_path = cls_config_dict["weight"]
204 |                         if weight_path is not None:
205 |                             weight_path = default_path_helper(weight_path, WEIGHTS_DIR)
206 |                     cls_class = cls_fn(timestamp, weight_path, cls)
207 |                     cls_class.setup_classifier(query_path, temp_path, cls_config_dict, cls)
208 |                     list_of_classifiers.append(cls_class)
209 |                 else:
210 |                     list_of_classifiers.append(None)
211 |             else:
212 |                 list_of_classifiers.append(None)
213 | 
214 |     # Ensembles
215 |     ensemble_sections, ensemble_tuples = \
216 |         config_section_to_multi_sections_helper(pipeline_config, "Ensembles",
217 |                                                 logging_level=logging_level, logger_name=logger_name)
218 |     list_of_ensembles = []
219 |     if ensemble_sections is not None and ensemble_tuples is not None:
220 |         for idx, ens in enumerate(ensemble_sections):
221 |             if ens is not None:
222 |                 try:
223 |                     ens_module_path = overwrites[ens]["weight"]
224 |                 except (KeyError, TypeError) as e:
225 |                     ens_module_path = get_values_from_config_option(pipeline_config, ens, "class")
226 |                     if ens_module_path is not None:
227 |                         ens_module_path = default_path_helper(ens_module_path, ENSEMBLES_CLS_DIR)
228 |                 ens_fn = load_module_function_from_path(ens_module_path, ens)
229 |                 list_of_ensembles.append(ens_fn)
230 |             else:
231 |                 list_of_ensembles.append(None)
232 |     return classifier_sections, list_of_classifiers, ensemble_sections, list_of_ensembles, mapping_files
233 | 
234 | 
235 | def read_config(config_ini, io_dict=None, overwrites=None, logging_level="DEBUG", logger_name=DEFAULT_LOGGER_NAME):
236 |     logging_helper("Processing config.ini", logging_level, logger_name)
237 |     if not os.path.isfile(config_ini):
238 |         logging_helper("Cannot find 'config.ini' at path %s." % config_ini, logging_level="ERROR",
239 |                        logger_name=DEFAULT_LOGGER_NAME)
240 |         return None, None, None
241 |     pipeline_config = configparser.ConfigParser(allow_no_value=True, interpolation=configparser.ExtendedInterpolation())
242 |     if io_dict is not None and type(io_dict) is dict:
243 |         pipeline_config.read_dict(io_dict)
244 |     pipeline_config.read(config_ini)
245 |     if overwrites is not None and type(overwrites) is dict:
246 |         pipeline_config.read_dict(overwrites)
247 | 
248 |     mapping_dict = {}
249 |     mappings_options = get_options_from_config_section(pipeline_config, "Mapping")
250 |     if mappings_options is None:
251 |         logging_helper("No [Mapping] section in config.ini", "ERROR", logger_name)
252 |         raise SystemError
253 |     for map_option in mappings_options:
254 |         map_value = get_values_from_config_option(pipeline_config, "Mapping", map_option)
255 |         mapping_dict.setdefault(map_option, map_value)
256 | 
257 |     classifier_dict = {}
258 |     classifier_sections = config_section_to_multi_sections_helper(pipeline_config, "Classifiers")
259 |     if classifier_sections is None:
260 |         logging_helper("No [Classifiers] section in config.ini", "ERROR", logger_name)
261 |         raise SystemError
262 |     num_of_classifiers = len(classifier_sections[0])
263 |     for idx in range(num_of_classifiers):
264 |         classifier_dict.setdefault(classifier_sections[0][idx], classifier_sections[1][idx])
265 | 
266 |     ensemble_dict = {}
267 |     ensemble_sections = config_section_to_multi_sections_helper(pipeline_config, "Ensembles")
268 |     if ensemble_sections is None:
269 |         logging_helper("No [Ensembles] section in config.ini", "ERROR", logger_name)
270 |         raise SystemError
271 |     num_of_ensembles = len(ensemble_sections[0])
272 |     for idx in range(num_of_ensembles):
273 |         ensemble_dict.setdefault(ensemble_sections[0][idx], ensemble_sections[1][idx])
274 | 
275 |     return mapping_dict, classifier_dict, ensemble_dict
276 | 
277 | 


--------------------------------------------------------------------------------
/src/lib/ensemble.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | from src.definitions import DEFAULT_LOGGER_LEVEL, DEFAULT_LOGGER_NAME
  4 | from src.lib.classifier import Classifier, FunctionClass
  5 | from src.lib.process import logging_helper
  6 | 
  7 | 
  8 | class Ensemble(object):
  9 |     """Object to ensemble classifier results
 10 |     """
 11 |     def __init__(self, list_of_classifiers, time_stamp, name="", threshold=float(0), logging_level=DEFAULT_LOGGER_LEVEL,
 12 |                  logger_name=DEFAULT_LOGGER_NAME):
 13 |         """Placeholder function that retrieve and filter classifier results based on weights.
 14 |         Args:
 15 |             list_of_classifiers: A list of Classifier objects
 16 |             time_stamp: time stamp
 17 |             name: name of the ensemble
 18 |             logging_level: The logging level set for this ensemble
 19 |             logger_name: The name of the logger for this ensemble
 20 |         Raises: KeyError
 21 |         Returns:
 22 |         """
 23 |         self.list_of_classifiers = list_of_classifiers
 24 |         for idx, classifier in enumerate(list_of_classifiers):
 25 |             if not isinstance(classifier, Classifier):
 26 |                 logging_helper("Exist non-Classifier object", logging_level="ERROR",
 27 |                                logger_name=logger_name)
 28 |                 raise SystemError
 29 |             if len(classifier.res) > 0:
 30 |                 for query in classifier.res:
 31 |                     for res_function_class in classifier.res[query]:
 32 |                         if not isinstance(res_function_class, FunctionClass):
 33 |                             logging_helper("Query\"" + str(query) + "\" results include none FunctionClass",
 34 |                                            logging_level="ERROR", logger_name=logger_name)
 35 |                             raise SystemError
 36 |         self.name = name
 37 |         self.threshold = threshold
 38 |         self.prediction = Classifier(time_stamp, None, name, logging_level, logger_name)
 39 | 
 40 |     def __repr__(self):
 41 |         return f'Ensemble(\'{self.name}\', {self.threshold})'
 42 | 
 43 |     @staticmethod
 44 |     def weighting(list_of_classifiers, weighted_res=None):
 45 |         """Placeholder function that retrieve and filter classifier results based on weights.
 46 |         Args:
 47 |             list_of_classifiers: A list of Classifier objects to be weighted
 48 |             weighted_res: A previously available result dictionary
 49 |         Raises: KeyError
 50 |         Returns:
 51 |         """
 52 |         if weighted_res is None:
 53 |             weighted_res = {}
 54 |         for classifier in list_of_classifiers:
 55 |             for query in classifier.res:
 56 |                 try:
 57 |                     weighted_res[query] += list(classifier.res[query])
 58 |                 except KeyError:
 59 |                     weighted_res.setdefault(query, list(classifier.res[query]))
 60 |         return weighted_res
 61 | 
 62 |     @staticmethod
 63 |     def voting(voted_res, threshold=float(0)):
 64 |         """Placeholder function that preforms voting on classifier results based on weights.
 65 |         Args:
 66 |             voted_res: A dictionary of results to be voted
 67 |             threshold: The threshold for the voting process
 68 |         Raises:
 69 |         Returns:
 70 |         """
 71 |         if voted_res is None:
 72 |             voted_res = {}
 73 |         for query in voted_res:
 74 |             voted_function_classes = []
 75 |             for function_class in voted_res[query]:
 76 |                 if function_class.ge_threshold(threshold=threshold, attr="weight"):
 77 |                     voted_function_classes.append(function_class)
 78 |             voted_res[query] = voted_function_classes
 79 |         return voted_res
 80 | 
 81 |     @staticmethod
 82 |     def ensemble(ensemble_res, queries=None):
 83 |         """Function that preforms the weighting and voting process on the list of classifiers.
 84 |         Args:
 85 |             ensemble_res: A dictionary of results to be ensembled.
 86 |             queries: List of input queries
 87 |         Raises: KeyError
 88 |         Returns:
 89 |         """
 90 |         if ensemble_res is None:
 91 |             ensemble_res = {}
 92 |         for query in ensemble_res:
 93 |             res_of_query = ensemble_res[query]
 94 |             function_names_of_res = list(set([fc.name for fc in res_of_query]))
 95 |             for function_name in sorted(function_names_of_res):
 96 |                 # final result: here we randomly pick one
 97 |                 function_class_of_ensemble = \
 98 |                     random.choice(FunctionClass.get_function_classes_by_vals(res_of_query, function_name))
 99 |                 try:
100 |                     ensemble_res[query].append(function_class_of_ensemble)
101 |                 except KeyError:
102 |                     ensemble_res.setdefault(query, [function_class_of_ensemble])
103 |         if queries is not None:
104 |             for prot in queries:
105 |                 if prot not in ensemble_res:
106 |                     ensemble_res.setdefault(prot, [])
107 |         return ensemble_res
108 | 
109 |     def run(self, threshold=None, previous_res=None, queries=None):
110 |         """Function to retrieve the prediction from this ensemble class.
111 |         Args:
112 |             threshold: threshold for voting
113 |             previous_res: a dictionary that contains previously ran results
114 |             queries: List of input proteins
115 |         Raises: KeyError
116 |         Returns:
117 |         """
118 |         if threshold is None:
119 |             threshold = self.threshold
120 |         weighted_res = self.weighting(self.list_of_classifiers, previous_res)
121 |         voted_res = self.voting(weighted_res, threshold)
122 |         ensemble_res = self.ensemble(voted_res, queries)
123 | 
124 |         self.prediction.res = ensemble_res
125 | 
126 |     @staticmethod
127 |     def add_arguments(argument_parser):
128 |         argument_parser.add_argument('Ensemble')
129 | 
130 |     @staticmethod
131 |     def config_overwrites(args, overwrites=None):
132 |         pass
133 | 
134 | 
135 | def run_all_ensembles(list_of_ensemble_names, list_of_ensemble_cls, queries=None, logger_name=DEFAULT_LOGGER_NAME):
136 |     ensembles_ran = []
137 |     skipped_ensembles = []
138 |     for idx, ensemble_cls in enumerate(list_of_ensemble_cls):
139 |         try:
140 |             if isinstance(ensemble_cls, Ensemble):
141 |                 logging_helper("Performing Ensemble: %s." % list_of_ensemble_names[idx], logging_level="INFO",
142 |                                logger_name=logger_name)
143 |                 ensemble_cls.run(queries=queries)
144 |                 ensembles_ran.append(ensemble_cls)
145 |             else:
146 |                 skipped_ensembles.append(list_of_ensemble_names[idx])
147 |         except TypeError:
148 |             skipped_ensembles.append(list_of_ensemble_names[idx])
149 |     return ensembles_ran, skipped_ensembles
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/src/lib/function_class.py:
--------------------------------------------------------------------------------
  1 | from functools import total_ordering
  2 | 
  3 | _available_class_score_attr = ['weight', 'score']
  4 | 
  5 | 
  6 | @total_ordering
  7 | class FunctionClass(object):
  8 |     """Object for function classes
  9 |     """
 10 |     def __init__(self, name, score=float(0), weight=float(0)):
 11 |         self.name = name
 12 |         try:
 13 |             self.score = float(score)
 14 |         except TypeError:
 15 |             self.score = float(0)
 16 |         try:
 17 |             self.weight = float(weight)
 18 |         except TypeError:
 19 |             self.weight = float(0)
 20 | 
 21 |     def __repr__(self):
 22 |         return f'FunctionClass(\'{self.name}\', {self.score}, {self.weight})'
 23 | 
 24 |     def retrieve_weight(self, weight_dict):
 25 |         """Set the weight for this function class
 26 |         Args:
 27 |             weight_dict: Dictionary containing weights for function classes
 28 |         Raises: KeyError
 29 |         Returns:
 30 |         """
 31 |         try:
 32 |             self.weight = weight_dict[self.name]
 33 |         except KeyError:
 34 |             self.weight = float(0)
 35 | 
 36 |     def __eq__(self, function_class):
 37 |         """Test if score of this class is equal to function class
 38 |         Args:
 39 |             function_class: FunctionClass
 40 |         Raises: NotImplementedError
 41 |         Returns:
 42 |             boolean value
 43 |         """
 44 |         if isinstance(function_class, FunctionClass):
 45 |             return self.score == function_class.score
 46 |         else:
 47 |             raise NotImplementedError
 48 | 
 49 |     def __lt__(self, function_class):
 50 |         """Test if attribute of this class is lesser than function class
 51 |         Args:
 52 |             function_class: FunctionClass
 53 |         Raises: NotImplementedError
 54 |         Returns:
 55 |             boolean value
 56 |         """
 57 |         if isinstance(function_class, FunctionClass):
 58 |             return self.score < function_class.score
 59 |         else:
 60 |             raise NotImplementedError
 61 | 
 62 |     def gt_threshold(self, threshold, attr='score'):
 63 |         """Test if attribute of function class is greater than a threshold value
 64 |         Args:
 65 |             threshold: Threshold for comparison
 66 |             attr: Attribute to compare
 67 |         Raises: NotImplementedError
 68 |         Returns:
 69 |             boolean value
 70 |         """
 71 |         if attr not in _available_class_score_attr:
 72 |             attr = 'score'
 73 |         return getattr(self, attr) > threshold
 74 | 
 75 |     def ge_threshold(self, threshold, attr='score'):
 76 |         """Test if attribute of function class is greater than or equal to a threshold value
 77 |         Args:
 78 |             threshold: Threshold for comparison
 79 |             attr: Attribute to compare
 80 |         Raises: NotImplementedError
 81 |         Returns:
 82 |             boolean value
 83 |         """
 84 |         if attr not in _available_class_score_attr:
 85 |             attr = 'score'
 86 |         return getattr(self, attr) >= threshold
 87 | 
 88 |     def eq_threshold(self, threshold, attr='score'):
 89 |         """Test if attribute of function class is equal to a threshold value
 90 |         Args:
 91 |             threshold: Threshold for comparison
 92 |             attr: Attribute to compare
 93 |         Raises: NotImplementedError
 94 |         Returns:
 95 |             boolean value
 96 |         """
 97 |         if attr not in _available_class_score_attr:
 98 |             attr = 'score'
 99 |         return getattr(self, attr) == threshold
100 | 
101 |     def ne_threshold(self, threshold, attr='score'):
102 |         """Test if attribute of function class is not equal to a threshold value
103 |         Args:
104 |             threshold: Threshold for comparison
105 |             attr: Attribute to compare
106 |         Raises: NotImplementedError
107 |         Returns:
108 |             boolean value
109 |         """
110 |         if attr not in _available_class_score_attr:
111 |             attr = 'score'
112 |         return getattr(self, attr) != threshold
113 | 
114 |     def lt_threshold(self, threshold, attr='score'):
115 |         """Test if attribute of function class is lesser than a threshold value
116 |         Args:
117 |             threshold: Threshold for comparison
118 |             attr: Attribute to compare
119 |         Raises: NotImplementedError
120 |         Returns:
121 |             boolean value
122 |         """
123 |         if attr not in _available_class_score_attr:
124 |             attr = 'score'
125 |         return getattr(self, attr) < threshold
126 | 
127 |     def le_threshold(self, threshold, attr='score'):
128 |         """Test if attribute of function class is lesser than or equal to a threshold value
129 |         Args:
130 |             threshold: Threshold for comparison
131 |             attr: Attribute to compare
132 |         Raises: NotImplementedError
133 |         Returns:
134 |             boolean value
135 |         """
136 |         if attr not in _available_class_score_attr:
137 |             attr = 'score'
138 |         return getattr(self, attr) <= threshold
139 | 
140 |     @staticmethod
141 |     def max_weight(list_of_function_classes):
142 |         """Retrieve class with the maximum weight of a list of FunctionClasses
143 |         Args:
144 |             list_of_function_classes: List of FunctionClasses
145 |         Raises: NotImplementedError
146 |         Returns:
147 |             First function classes that has the maximum weight
148 |         """
149 |         if False not in [isinstance(function_class, FunctionClass) for function_class in list_of_function_classes]:
150 |             top_weight = max(function_class.weight for function_class in list_of_function_classes)
151 |             return [function_class for function_class in list_of_function_classes
152 |                     if function_class.weight == top_weight][0]
153 |         else:
154 |             raise NotImplementedError
155 | 
156 |     @staticmethod
157 |     def min_weight(list_of_function_classes):
158 |         """Retrieve class with the minimum weight of a list of FunctionClasses
159 |         Args:
160 |             list_of_function_classes: List of FunctionClasses
161 |         Raises: NotImplementedError
162 |         Returns:
163 |             First function classes that has the minimum weight
164 |         """
165 |         if False not in [isinstance(function_class, FunctionClass) for function_class in list_of_function_classes]:
166 |             bottom_weight = min(function_class.weight for function_class in list_of_function_classes)
167 |             return [function_class for function_class in list_of_function_classes
168 |                     if function_class.weight == bottom_weight][0]
169 |         else:
170 |             raise NotImplementedError
171 | 
172 |     @staticmethod
173 |     def get_function_classes_by_vals(list_of_function_classes, vals, attr='name'):
174 |         """Retrieve classes that has attributes of certain values from a list of FunctionClasses
175 |         Args:
176 |             list_of_function_classes: List of FunctionClasses
177 |             vals: List of values of the attribute
178 |             attr: Attribute to retrieve
179 |         Raises: NotImplementedError
180 |         Returns:
181 |             List of function classes that has the attribute
182 |         """
183 |         if attr not in _available_class_score_attr:
184 |             attr = 'name'
185 |         if type(vals) not in [list, tuple, range, 'generator', set, dict]:
186 |             vals = [vals]
187 |         else:
188 |             vals = list(vals)
189 |         if False not in [isinstance(function_class, FunctionClass) for function_class in list_of_function_classes]:
190 |             return [function_class for function_class in list_of_function_classes
191 |                     if getattr(function_class, attr) in vals]
192 |         else:
193 |             raise NotImplementedError
194 | 
195 | 


--------------------------------------------------------------------------------
/src/lib/process.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import logging.config
  3 | import logging.handlers
  4 | import multiprocessing
  5 | import os
  6 | import subprocess
  7 | import sys
  8 | import threading
  9 | from argparse import ArgumentTypeError
 10 | from importlib import util
 11 | 
 12 | from src.definitions import DEFAULT_LOGGER_NAME, DEFAULT_LOGGER_LEVEL
 13 | 
 14 | logging_levels = {
 15 |     "DEBUG": logging.DEBUG,
 16 |     "INFO": logging.INFO,
 17 |     "WARNING": logging.WARNING,
 18 |     "ERROR": logging.ERROR,
 19 |     "CRITICAL": logging.CRITICAL
 20 | }
 21 | 
 22 | 
 23 | def logging_helper(log_message, logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 24 |     """Helper function to add log messages to logger
 25 |     Args:
 26 |         log_message: Message to be logged
 27 |         logging_level: The logging level set for this command
 28 |         logger_name: The name of the logger for this command
 29 |     Raises:
 30 |     Returns:
 31 |     """
 32 |     found_logger = logging.getLogger(logger_name)
 33 |     try:
 34 |         found_logger.log(logging_levels[logging_level], log_message)
 35 |     except KeyError:
 36 |         found_logger.log(logging.DEBUG, log_message)
 37 | 
 38 | 
 39 | class LoggerConfig(object):
 40 |     """Object for generating logging config
 41 |     """
 42 |     def __init__(self, version=1):
 43 |         self.dictConfig = {
 44 |             'version': version,
 45 |             'formatters': {
 46 |                 'detailed': {
 47 |                     'class': 'logging.Formatter',
 48 |                     'format': '%(asctime)s %(name)-15s %(levelname)-8s %(processName)-10s %(message)s'
 49 |                 }
 50 |             },
 51 |             'handlers': {
 52 |             },
 53 |             'loggers': {
 54 |             },
 55 |             'root': {
 56 |                 'level': 'DEBUG',
 57 |                 'handlers': ['console']
 58 |             },
 59 |         }
 60 |         self.dictConfig['handlers'].setdefault('console', {
 61 |             'class': 'logging.StreamHandler',
 62 |             'level': 'INFO'
 63 |         })
 64 | 
 65 |     def add_new_logger(self, logger_name, logger_handler_filename, logger_handler_level="INFO",
 66 |                        logger_handler_mode='w'):
 67 |         """Adding a new Logger to dictConfig
 68 |         Args:
 69 |             logger_name: Name of the new Logger
 70 |             logger_handler_filename: Path to the log
 71 |             logger_handler_level: Logger lever, from [DEBUG, INFO, WARNING, ERROR, CRITICAL]
 72 |             logger_handler_mode: IO mode for logger
 73 |         Raises:
 74 |         Returns:
 75 |         """
 76 |         logger_levels = list(logging_levels.keys())
 77 |         new_logger = {
 78 |             'handlers': [logger_name]
 79 |         }
 80 |         if logger_handler_mode not in ['w', 'a', 'w+', 'a+']:
 81 |             logger_handler_mode = 'w'
 82 |         new_logger_handler = {
 83 |             'class': 'logging.FileHandler',
 84 |             'filename': logger_handler_filename,
 85 |             'mode': logger_handler_mode,
 86 |             'formatter': 'detailed',
 87 |         }
 88 |         if logger_handler_level in logger_levels:
 89 |             new_logger_handler.setdefault('level', logger_handler_level)
 90 | 
 91 |         self.dictConfig['handlers'].setdefault(logger_name, new_logger_handler)
 92 |         self.dictConfig['loggers'].setdefault(logger_name, new_logger)
 93 | 
 94 | 
 95 | class RunProcess(object):
 96 |     """Object for running processes
 97 |     """
 98 |     def __init__(self):
 99 |         self.mp_queue = multiprocessing.Queue()
100 |         self.run_results = []
101 | 
102 |     @staticmethod
103 |     def _logger_thread(mpq):
104 |         """Separate thread for logging
105 |         Args:
106 |             mpq: A multiprocessing Queue
107 |         Raises:
108 |         Returns:
109 |         """
110 |         while True:
111 |             record = mpq.get()
112 |             if record is None:
113 |                 break
114 |             logger = logging.getLogger(record.name)
115 |             logger.handle(record)
116 | 
117 |     def _worker_process(self, mpq, logging_level, logger_name, cmd, process_name="Process"):
118 |         """Worker process to run a command, puts process result in a queue
119 |         Args:
120 |             mpq: A multiprocessing Queue
121 |             logging_level: The logging level set for this command
122 |             logger_name: The name of the logger for this command
123 |             cmd: A list for the command
124 |             process_name: Name of the process
125 |         Raises:
126 |         Returns:
127 |         """
128 |         qh = logging.handlers.QueueHandler(mpq)
129 |         root = logging.getLogger()
130 |         root.setLevel(logging.DEBUG)
131 |         root.addHandler(qh)
132 |         try:
133 |             command_list = []
134 |             # Clean up command list
135 |             for i in cmd:
136 |                 command_list += i.split()
137 |             call_output = subprocess.check_output(command_list, stderr=subprocess.STDOUT)
138 |         except subprocess.CalledProcessError as exc:
139 |             self.mp_queue.put(
140 |                 (' '.join(cmd), logging_level, logger_name, exc.returncode, str(exc.output.strip(), "utf-8"),
141 |                  process_name))
142 |         except FileNotFoundError as e:
143 |             self.mp_queue.put(
144 |                 (' '.join(cmd), logging_level, logger_name, e.errno, e.strerror, process_name))
145 |         else:
146 |             self.mp_queue.put(
147 |                 (' '.join(cmd), logging_level, logger_name, 0, str(call_output.strip(), "utf-8"), process_name))
148 | 
149 |     def add_process_to_workers(self, workers, mpq, logging_level, logger_name, cmd, process_name="Process"):
150 |         """Add a worker process to the workers
151 |         Args:
152 |             workers: List of workers
153 |             mpq: A multiprocessing Queue
154 |             logging_level: The logging level set for this command
155 |             logger_name: The name of the logger for this command
156 |             cmd: A string list of the command
157 |             process_name: Name of the process
158 |         Raises:
159 |         Returns:
160 |         """
161 |         wp = multiprocessing.Process(target=self._worker_process,
162 |                                      args=(mpq, logging_level, logger_name, cmd, process_name,))
163 |         workers.append((wp, ' '.join(cmd), logging_level, logger_name, process_name))
164 | 
165 |     def run_all_worker_processes(self, workers, mpq):
166 |         """add a worker process to the workers
167 |         Args:
168 |             workers: List of workers
169 |             mpq: A multiprocessing Queue
170 |         Raises:
171 |         Returns:
172 |         """
173 |         for worker in workers:
174 |             worker[0].daemon = True
175 |             worker[0].start()
176 |             logging_helper("Starting Process \"" + worker[1] + "\"", logging_level=worker[2],
177 |                            logger_name=worker[3])
178 |         lp = threading.Thread(target=self._logger_thread, args=(mpq,))
179 |         lp.start()
180 |         # Main process waiting for workers terminate
181 |         try:
182 |             for worker in workers:
183 |                 worker[0].join()
184 |             # Finish logging
185 |             mpq.put(None)
186 |             lp.join()
187 |         except KeyboardInterrupt:
188 |             for worker in workers:
189 |                 worker[0].terminate()
190 |             mpq.put(None)
191 |             lp.join()
192 |             logger = logging.getLogger()
193 |             logger.log(logging.ERROR, "Program interrupted, exiting...")
194 |             sys.exit(1)
195 |         # Retrieve stdout of all queued workers
196 |         for i in range(len(workers)):
197 |             cmd, logging_level, logger_name, return_code, output, process_name = (self.mp_queue.get())
198 |             self.run_results.append((cmd, return_code, output))
199 |             if return_code != 0:
200 |                 for index, line in enumerate(output.split('\n')):
201 |                     logging_helper("Process Error \"" + cmd + "\", stdout[" + str(index) + "]: " + line,
202 |                                    logging_level="ERROR", logger_name=logger_name)
203 |             else:
204 |                 for index, line in enumerate(output.split('\n')):
205 |                     logging_helper(process_name + " Ended, stdout[" + str(index) + "]: " + line,
206 |                                    logging_level=logging_level, logger_name=logger_name)
207 | 
208 | 
209 | def load_module_function_from_path(module_path, function_name, module_name=None):
210 |     """Load in function from a module file path
211 |     Args:
212 |         module_path: File path to module's '.py' file
213 |         function_name: The name of the function to load in
214 |         module_name: A name for the module
215 |     Raises:
216 |     Returns:
217 |         fn: function to call
218 |     """
219 |     if module_name is None:
220 |         module_name = os.path.basename(os.path.dirname(module_path))
221 |     spec = util.spec_from_file_location(module_name, module_path)
222 |     mod = util.module_from_spec(spec)
223 |     sys.modules[module_name] = mod
224 |     spec.loader.exec_module(mod)
225 |     fn = mod and getattr(mod, function_name, None)
226 |     return fn
227 | 
228 | 
229 | class PathType(object):
230 |     def __init__(self, file_type='file'):
231 |         assert file_type in ('file', 'dir', 'have_parent', 'blast_db', 'blast_bin', 'priam_profiles', None) \
232 |                or hasattr(file_type, '__call__')
233 |         self._type = file_type
234 | 
235 |     def __call__(self, string):
236 |         path = os.path.realpath(string)
237 |         if self._type is None:
238 |             pass
239 |         elif self._type == 'file':
240 |             if not os.path.isfile(path):
241 |                 raise ArgumentTypeError("Path is not a file: '%s'" % string)
242 |         elif self._type == 'dir':
243 |             if not os.path.isdir(path):
244 |                 raise ArgumentTypeError("Path is not a directory: '%s'" % string)
245 |         elif self._type == 'have_parent':
246 |             if not os.path.isdir(os.path.dirname(path)):
247 |                 raise ArgumentTypeError("Parent of path is not a directory: '%s'" % string)
248 |         elif self._type == 'blast_db':
249 |             phr_path = path + '.phr'
250 |             pin_path = path + '.pin'
251 |             psq_path = path + '.psq'
252 |             if not os.path.isfile(phr_path) or not os.path.isfile(pin_path) or not os.path.isfile(psq_path):
253 |                 raise ArgumentTypeError("Cannot find blast database at path: '%s'" % string)
254 |         elif self._type == 'blast_bin':
255 |             bin_files = os.listdir(path)
256 |             if not os.path.isdir(path) or 'makeprofiledb' not in bin_files or 'rpsblast' not in bin_files \
257 |                     or 'rpstblastn' not in bin_files:
258 |                 raise ArgumentTypeError("Path not a valid Blast+ bin folder: '%s'" % string)
259 |         elif self._type == 'priam_profiles':
260 |             bin_files = os.listdir(path)
261 |             if not os.path.isdir(path) or 'PROFILES' not in bin_files or 'annotation_rules.xml' not in bin_files \
262 |                     or 'genome_rules.xml' not in bin_files:
263 |                 raise ArgumentTypeError("Path not a valid Priam profiles folder: '%s'" % string)
264 |             else:
265 |                 profiles_path = os.path.join(path, 'PROFILES')
266 |                 annotation_rules = os.path.join(path, 'annotation_rules.xml')
267 |                 genome_rules = os.path.join(path, 'genome_rules.xml')
268 |                 if not os.path.isdir(profiles_path) or not os.path.isfile(annotation_rules) \
269 |                         or not os.path.isfile(genome_rules):
270 |                     raise ArgumentTypeError("Path not a valid Priam profiles folder: '%s'" % string)
271 |         else:
272 |             raise ArgumentTypeError("path not valid: '%s'" % string)
273 |         return string
274 | 
275 | 


--------------------------------------------------------------------------------
/src/lib/read.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import re
  4 | 
  5 | from src.definitions import DEFAULT_LOGGER_NAME, DEFAULT_LOGGER_LEVEL, DEFAULT_PTOOLS_CHAR_LIMIT
  6 | from src.lib.process import logging_helper
  7 | 
  8 | 
  9 | def read_delim_itr(fp, key_idx=0, val_indices=None, delim=None, skip=None):
 10 |     """Iterator to read delimited files
 11 |     Args:
 12 |         fp: Opened file object
 13 |         key_idx: The index for they key value
 14 |         val_indices: The indices for the values to retrieve
 15 |         delim: The list of substrings for delimiters
 16 |         skip: The list of substrings that will be skipped
 17 |     Raises:
 18 |     Yields: A key and it's values
 19 |      """
 20 |     if val_indices is None:
 21 |         val_indices = [1]
 22 |     if delim is None:
 23 |         delim = ["\t"]
 24 |     if skip is None:
 25 |         skip = ["!", "#"]
 26 |     delim_regex = '|'.join(map(re.escape, delim))
 27 |     for line in fp:
 28 |         if line.startswith(tuple(skip)):
 29 |             continue
 30 |         line = line.rstrip('\n')
 31 |         info = re.split(delim_regex, line)
 32 |         try:
 33 |             yield info[key_idx], [i for idx, i in enumerate(info) if idx in val_indices]
 34 |         except IndexError:
 35 |             continue
 36 | 
 37 | 
 38 | def read_e2p2_maps(ef_map_path, key_idx=0, val_idx=1, logging_level=DEFAULT_LOGGER_LEVEL,
 39 |                    logger_name=DEFAULT_LOGGER_NAME):
 40 |     """Read in mapping files of E2P2
 41 |     Args:
 42 |         ef_map_path: Path to an E2P2 mapping file
 43 |         key_idx: Index of the key
 44 |         val_idx: Index of the value
 45 |         logging_level: The logging level set for read map
 46 |         logger_name: The name of the logger for read map
 47 |     Raises:
 48 |     Returns:
 49 |         map_dict: Key to value of the E2P2 map
 50 |     """
 51 |     map_dict = {}
 52 |     logging_helper("Loading map: \"" + ef_map_path + "\"", logging_level=logging_level, logger_name=logger_name)
 53 |     with open(ef_map_path, 'r') as fp:
 54 |         for info in read_delim_itr(fp, key_idx=key_idx, val_indices=[val_idx]):
 55 |             if info:
 56 |                 key = info[0]
 57 |                 val = info[1]
 58 |                 try:
 59 |                     map_dict[key] += val
 60 |                 except KeyError:
 61 |                     map_dict.setdefault(key, val)
 62 |     return map_dict
 63 | 
 64 | 
 65 | def read_groups_by_start_itr(fp, start=None, skip=None):
 66 |     """Iterator to group file strings by their starting string
 67 |     Args:
 68 |         fp: Opened file object
 69 |         start: The list of substrings that indicates new groups
 70 |         skip: The list of substrings that will be skipped
 71 |     Raises:
 72 |     Yields: Group of lines based on 'start'
 73 |     """
 74 |     if start is None:
 75 |         start = ['>']
 76 |     if skip is None:
 77 |         skip = ["!", "#"]
 78 |     header, group = None, []
 79 |     for line in fp:
 80 |         line = line.rstrip('\n')
 81 |         if line.startswith(tuple(skip)):
 82 |             continue
 83 |         elif line.startswith(tuple(start)):
 84 |             if header:
 85 |                 yield header, group
 86 |             header, group = line, []
 87 |         else:
 88 |             group.append(line)
 89 |     if header:
 90 |         yield header, group
 91 | 
 92 | 
 93 | def read_fasta(fp):
 94 |     """Iterator for reading fasta files. Source: Biopython
 95 |     Args:
 96 |         fp: file pointer to fasta file
 97 |     Raises:
 98 |     Yields:
 99 |         header: fasta header
100 |         seq: fasta sequence
101 |     """
102 |     header, seq = None, []
103 |     for line in fp:
104 |         line = line.rstrip()
105 |         if line.startswith('>'):
106 |             if header:
107 |                 yield header, '\n'.join(seq)
108 |             header, seq = line, []
109 |         else:
110 |             seq.append(line)
111 |     if header:
112 |         yield header, '\n'.join(seq)
113 | 
114 | 
115 | def check_fasta_header(fasta_path, logger_name=DEFAULT_LOGGER_NAME):
116 |     """Warn if fasta sequence ID length increases Pathway Tools current limit
117 |     Args:
118 |         fasta_path: The path to fasta input
119 |         logger_name: The name of the logger for checking fasta header
120 |     Raises: IndexError, KeyError
121 |     Returns:
122 |     """
123 |     existing_headers = []
124 |     with open(fasta_path, 'r') as fp:
125 |         for header, seq in read_fasta(fp):
126 |             try:
127 |                 header_info = re.split('[\s|]+', header)
128 |                 header_id = header_info[0].replace('>', '', 1)
129 |                 if len(header_id) > DEFAULT_PTOOLS_CHAR_LIMIT:
130 |                     logging_helper("ID exceeds Pathway-Tools character limit: " + header_id,
131 |                                    logging_level="WARNING", logger_name=logger_name)
132 |                 if header_id in existing_headers:
133 |                     logging_helper("Duplicate IDs: " + header_id,
134 |                                    logging_level="ERROR", logger_name=logger_name)
135 |                     break
136 |                 else:
137 |                     existing_headers.append(header_id)
138 |             except (IndexError, KeyError):
139 |                 logging_helper("Cannot Parse Header: " + header,
140 |                                logging_level="ERROR", logger_name=logger_name)
141 |                 continue
142 | 
143 | 
144 | def remove_splice_variants_from_fasta(fasta_path, output_dir, prot_gene_map, logger_name=DEFAULT_LOGGER_NAME):
145 |     """Remove splice variants from input fasta
146 |     Args:
147 |         fasta_path: Path to fasta input
148 |         output_dir: Path to splice variants removed fasta output
149 |         prot_gene_map: Path to Mapping file of protein IDs to gene IDs.
150 |         logger_name: The name of the logger for remove splice variants
151 |     Raises: IndexError, KeyError
152 |     Returns:
153 |     """
154 |     logger = logging.getLogger(logger_name)
155 |     fasta_dict = {}
156 |     # The input's header should already be formatted
157 |     file_name, file_extension = os.path.splitext(os.path.basename(fasta_path))
158 |     prot_gene_map_dict = read_e2p2_maps(prot_gene_map, 0, 1)
159 |     output_path = os.path.join(output_dir, file_name + '.rmspl' + file_extension)
160 |     if os.path.isfile(output_path):
161 |         logger.log(logging.WARNING, "Output path %s exists, will overwrite..." % output_path)
162 |     with open(fasta_path, 'r') as fp:
163 |         for header, seq in read_fasta(fp):
164 |             try:
165 |                 header_info = re.split('[\s|]+', header)
166 |                 header_id = header_info[0].replace('>', '', 1)
167 |                 try:
168 |                     locus = prot_gene_map_dict[header_id][0]
169 |                 except KeyError:
170 |                     locus = header_id
171 |                 fasta_tuple = fasta_dict.setdefault(locus, (header, seq))
172 |                 if len(seq) > len(fasta_tuple[1]):
173 |                     fasta_dict[locus] = (header, seq)
174 |             except (IndexError, KeyError):
175 |                 logging_helper("Cannot Parse Header: " + header,
176 |                                logging_level="WARNING", logger_name=logger_name)
177 |                 continue
178 |     logging_helper("Removing splice variants from: \"" + fasta_path + "\"",
179 |                    logging_level="INFO", logger_name=logger_name)
180 |     with open(output_path, 'w') as op:
181 |         for locus in sorted(fasta_dict.keys()):
182 |             try:
183 |                 header = fasta_dict[locus][0]
184 |                 seq = fasta_dict[locus][1]
185 |                 op.write(header + '\n' + seq + '\n')
186 |             except (IndexError, KeyError):
187 |                 continue
188 |     return output_path
189 | 
190 | 
191 | def get_all_seq_ids_from_fasta(fasta_path, logger_name=DEFAULT_LOGGER_NAME):
192 |     """Get all sequence IDs from a fasta file
193 |     Args:
194 |         fasta_path: Path to fasta input
195 |         logger_name: The name of the logger
196 |     Raises: IndexError, KeyError
197 |     Returns:
198 |     """
199 |     seq_ids = []
200 |     with open(fasta_path, 'r') as fp:
201 |         for header, seq in read_fasta(fp):
202 |             try:
203 |                 header_info = re.split('[|\s]+', header)
204 |                 header_id = header_info[0].replace('>', '', 1)
205 |                 seq_ids.append(header_id)
206 |             except (IndexError, KeyError):
207 |                 logging_helper("Cannot Parse Header: " + header,
208 |                                logging_level="WARNING", logger_name=logger_name)
209 |                 continue
210 |     return seq_ids
211 | 


--------------------------------------------------------------------------------
/src/lib/write.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import re
  3 | from datetime import datetime
  4 | 
  5 | from src.definitions import DEFAULT_LOGGER_LEVEL, DEFAULT_LOGGER_NAME, DEFAULT_LONG_OUTPUT_SUFFIX, \
  6 |     DEFAULT_PF_OUTPUT_SUFFIX, DEFAULT_ORXN_PF_OUTPUT_SUFFIX, DEFAULT_FINAL_PF_OUTPUT_SUFFIX
  7 | from src.lib.classifier import Classifier, FunctionClass
  8 | from src.lib.ensemble import Ensemble
  9 | from src.lib.process import logging_helper
 10 | from src.lib.read import read_e2p2_maps
 11 | 
 12 | 
 13 | class PfFiles(object):
 14 |     def __init__(self, cls_to_write, input_proteins=None, logger_name=DEFAULT_LOGGER_NAME):
 15 |         """Initialize class
 16 |         Args:
 17 |             cls_to_write: Input can be an Ensemble class or a prediction dictionary
 18 |             input_proteins: List of input protein IDs
 19 |             logger_name: The name of the logger
 20 |         Raises: SystemError
 21 |         Returns:
 22 |         """
 23 |         if isinstance(cls_to_write, Ensemble):
 24 |             self.final_prediction = cls_to_write.prediction.res
 25 |         elif isinstance(cls_to_write, Classifier):
 26 |             self.final_prediction = cls_to_write.res
 27 |         elif type(cls_to_write) is dict:
 28 |             self.final_prediction = cls_to_write
 29 |         else:
 30 |             logging_helper("PfFiles initialization failure", logging_level="ERROR", logger_name=logger_name)
 31 |             raise SystemError
 32 |         if input_proteins is not None:
 33 |             for prot in input_proteins:
 34 |                 if prot not in self.final_prediction:
 35 |                     self.final_prediction.setdefault(prot, [])
 36 | 
 37 |     def write_short_results(self, ensemble_name, output_path, logging_level=DEFAULT_LOGGER_LEVEL,
 38 |                             logger_name=DEFAULT_LOGGER_NAME):
 39 |         """Write E2P2 short version of result to output
 40 |         Args:
 41 |             ensemble_name: Name of the ensemble method
 42 |             output_path: Path to output for short version of result
 43 |             logging_level: The logging level set for write short results
 44 |             logger_name: The name of the logger for write short results
 45 |         Raises: AttributeError, NotImplementedError
 46 |         Returns:
 47 |         """
 48 |         cur_time = datetime.now()
 49 |         header = "# Result Generation time:  %s\n# Ensemble method used:  %s\n" % (cur_time, ensemble_name)
 50 |         with open(output_path, 'w') as op:
 51 |             op.write(header)
 52 |             try:
 53 |                 for query in sorted(self.final_prediction.keys()):
 54 |                     predictions = self.final_prediction[query]
 55 |                     if len(predictions) == 0:
 56 |                         op.write('\t'.join([query, 'NA']) + '\n')
 57 |                     else:
 58 |                         predicted_classes = list(set([fc.name for fc in predictions]))
 59 |                         op.write('\t'.join([query, '|'.join(predicted_classes)]) + '\n')
 60 |             except (AttributeError, NotImplementedError) as e:
 61 |                 logging_helper(
 62 |                     "Error when writing results: " + str(e), logging_level="ERROR", logger_name=logger_name)
 63 |         logging_helper(
 64 |             "Results written to: \"" + output_path + "\"", logging_level=logging_level, logger_name=logger_name)
 65 | 
 66 |     def write_long_results(self, list_of_classifiers, ensemble_name, output_path,
 67 |                            logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
 68 |         """Write E2P2 long version of result to output
 69 |         Args:
 70 |             list_of_classifiers: List of classifiers used in ensemble
 71 |             ensemble_name: Name of the ensemble method
 72 |             output_path: Path to output for short version of result
 73 |             logging_level: The logging level set for write long results
 74 |             logger_name: The name of the logger for write long results
 75 |         Raises: AttributeError, NotImplementedError
 76 |         Returns:
 77 |         """
 78 |         cur_time = datetime.now()
 79 |         header = "# Result Generation time:  %s\n# Ensemble method used:  %s\n" % (cur_time, ensemble_name)
 80 |         with open(output_path, 'w') as op:
 81 |             op.write(header)
 82 |             try:
 83 |                 for query in sorted(self.final_prediction.keys()):
 84 |                     predictions = self.final_prediction[query]
 85 |                     if len(predictions) == 0:
 86 |                         op.write('\t'.join(['>' + query, 'NA']) + '\n')
 87 |                     else:
 88 |                         predicted_classes = list(set([fc.name for fc in predictions]))
 89 |                         op.write('\t'.join(['>' + query, '|'.join(predicted_classes)]) + '\n')
 90 |                         for classifier in list_of_classifiers:
 91 |                             if isinstance(classifier, Classifier):
 92 |                                 classifier_name = classifier.name
 93 |                                 classifier_res = classifier.res
 94 |                                 output_list = []
 95 |                                 try:
 96 |                                     classifier_classes = classifier_res[query]
 97 |                                     for function_cls in classifier_classes:
 98 |                                         if isinstance(function_cls, FunctionClass):
 99 |                                             output_list.append('|'.join(
100 |                                                 [function_cls.name, str(function_cls.weight), str(function_cls.score)]))
101 |                                 except KeyError:
102 |                                     continue
103 |                                 op.write('\t'.join([classifier_name + ':'] + output_list) + '\n')
104 |             except (AttributeError, NotImplementedError) as e:
105 |                 logging_helper(
106 |                     "Error when writing results: " + str(e), logging_level="ERROR", logger_name=logger_name)
107 |         logging_helper(
108 |             "Results written to: \"" + output_path + "\"", logging_level=logging_level, logger_name=logger_name)
109 | 
110 |     def write_pf_results(self, ef_map_path, output_path, logging_level=DEFAULT_LOGGER_LEVEL,
111 |                          logger_name=DEFAULT_LOGGER_NAME):
112 |         """Write pf file of result
113 |         Args:
114 |             ef_map_path: Path to EF to EC/RXN mapping file
115 |             output_path: Path to output for short version of result
116 |             logging_level: The logging level set for write pf results
117 |             logger_name: The name of the logger for write pf results
118 |         Raises: AttributeError, KeyError
119 |         Returns:
120 |         """
121 |         ef_map_dict = read_e2p2_maps(ef_map_path, 0, 1)
122 |         with open(output_path, 'w') as op:
123 |             try:
124 |                 for query in sorted(self.final_prediction.keys()):
125 |                     predictions = self.final_prediction[query]
126 |                     if len(predictions) == 0:
127 |                         continue
128 |                     else:
129 |                         op.write("ID\t%s\nNAME\t%s\nPRODUCT-TYPE\tP\n" % (query, query))
130 |                         predicted_classes = list(set([fc.name for fc in predictions]))
131 |                         for ef_class in sorted(predicted_classes):
132 |                             try:
133 |                                 mapped_ids = ["METACYC\t" + i if "RXN" in i else "EC\t" + i for i in
134 |                                               ef_map_dict[ef_class]]
135 |                                 op.write('\n'.join(mapped_ids) + '\n')
136 |                             except KeyError:
137 |                                 logging_helper(
138 |                                     "EF Class: \"" + ef_class + "\" assigned to \"" + query + "\" not found in map.",
139 |                                     logging_level="ERROR", logger_name=logger_name)
140 |                         op.write("//\n")
141 |             except (AttributeError, NotImplementedError) as e:
142 |                 logging_helper(
143 |                     "Error when writing results: " + str(e), logging_level="ERROR", logger_name=logger_name)
144 |         logging_helper(
145 |             "Results written to: \"" + output_path + "\"", logging_level=logging_level, logger_name=logger_name)
146 | 
147 |     @staticmethod
148 |     def map_ec_to_rxns(list_of_ecs, ec_superseded_dict, metacyc_rxn_ec_dict, official_ec_metacyc_rxn_dict,
149 |                        to_remove_metabolsim_list):
150 |         """Write orxn file of result
151 |         Args:
152 |             list_of_ecs: EF to EC/MetaCyc RXN mapping file
153 |             ec_superseded_dict: EC superseded mapping
154 |             metacyc_rxn_ec_dict: MetaCyc RXN to EC mapping (EC -> RXN)
155 |             official_ec_metacyc_rxn_dict: Official EC to MetaCyc RXN mapping
156 |             to_remove_metabolsim_list: List of non-small molecule metabolisms
157 |         Raises:
158 |         Returns:
159 |             List of official MetaCyc RXN IDs
160 |             List of unofficial MetaCyc RXN IDs
161 |         """
162 |         metacyc_official = set()
163 |         metacyc_unofficial = set()
164 |         ec_ids_updated = set()
165 |         for ec in sorted(list_of_ecs):
166 |             try:
167 |                 ec_superseded_ids = [e.replace('EC-', '') for e in ec_superseded_dict["EC-" + ec]]
168 |                 ec_ids_updated.update(ec_superseded_ids)
169 |             except KeyError:
170 |                 ec_ids_updated.add(ec)
171 |         for ec_updated in sorted(ec_ids_updated):
172 |             if ec_updated in official_ec_metacyc_rxn_dict:
173 |                 metacyc_official.update(official_ec_metacyc_rxn_dict[ec_updated])
174 |             elif ec_updated in metacyc_rxn_ec_dict:
175 |                 metacyc_unofficial.update(metacyc_rxn_ec_dict[ec_updated])
176 |         return \
177 |             sorted(set([rxn for rxn in sorted(metacyc_official)
178 |                         if rxn not in to_remove_metabolsim_list])), \
179 |             sorted(set([rxn for rxn in sorted(metacyc_unofficial)
180 |                         if rxn not in to_remove_metabolsim_list]))
181 | 
182 |     def write_orxn_results(self, ef_map_path, ec_superseded_path, metacyc_rxn_ec_path, official_ec_metacyc_rxn_path,
183 |                            to_remove_metabolism_path, output_path, prot_gene_map_path=None,
184 |                            logging_level=DEFAULT_LOGGER_LEVEL, logger_name=DEFAULT_LOGGER_NAME):
185 |         """Write orxn file of result
186 |         Args:
187 |             ef_map_path: Path to EF to EC/RXN mapping file
188 |             ec_superseded_path: Path to EC superseded mapping
189 |             metacyc_rxn_ec_path: Path to MetaCyc RXN to EC mapping
190 |             official_ec_metacyc_rxn_path: Path to official EC to MetaCyc RXN mapping
191 |             to_remove_metabolism_path: Path to list of non-small molecule metabolisms
192 |             output_path: Path to output for short version of result
193 |             prot_gene_map_path: Path to protein to gene ID mapping
194 |             logging_level: The logging level set for write orxn results
195 |             logger_name: The name of the logger for write orxn results
196 |         Raises: AttributeError, KeyError
197 |         Returns:
198 |         """
199 |         ef_map_dict = read_e2p2_maps(ef_map_path, 0, 1)
200 |         ec_superseded_dict = read_e2p2_maps(ec_superseded_path, 2, 0)
201 |         metacyc_rxn_ec_dict = read_e2p2_maps(metacyc_rxn_ec_path, 1, 0)
202 |         official_ec_metacyc_rxn_dict = read_e2p2_maps(official_ec_metacyc_rxn_path, 0, 1)
203 |         to_remove_metabolism_list = sorted(read_e2p2_maps(to_remove_metabolism_path, 0, 0).keys())
204 |         if prot_gene_map_path is not None:
205 |             prot_gene_map_dict = read_e2p2_maps(prot_gene_map_path, 0, 1)
206 |         else:
207 |             prot_gene_map_dict = {}
208 |         with open(output_path, 'w') as op:
209 |             try:
210 |                 for query in sorted(self.final_prediction.keys()):
211 |                     metacyc_ids = set()
212 |                     metacyc_unofficial = set()
213 |                     predictions = self.final_prediction[query]
214 |                     if len(predictions) == 0:
215 |                         continue
216 |                     else:
217 |                         predicted_classes = list(set([fc.name for fc in predictions]))
218 |                         for ef_class in sorted(predicted_classes):
219 |                             try:
220 |                                 metacyc_ids.update([i for i in ef_map_dict[ef_class] if "RXN" in i and
221 |                                                     i not in to_remove_metabolism_list])
222 |                                 ec_ids = [i for i in ef_map_dict[ef_class] if "RXN" not in i and
223 |                                           i not in to_remove_metabolism_list]
224 |                                 metacyc_from_ecs = self.map_ec_to_rxns(
225 |                                     ec_ids, ec_superseded_dict, metacyc_rxn_ec_dict, official_ec_metacyc_rxn_dict,
226 |                                     to_remove_metabolism_list)
227 |                                 metacyc_ids.update(metacyc_from_ecs[0])
228 |                                 metacyc_unofficial.update(metacyc_from_ecs[1])
229 |                             except KeyError:
230 |                                 logging_helper(
231 |                                     "EF Class: \"" + ef_class + "\" assigned to \"" + query + "\" not found in map.",
232 |                                     logging_level="ERROR", logger_name=logger_name)
233 |                     if len(metacyc_ids) > 0 or len(metacyc_unofficial) > 0:
234 |                         try:
235 |                             gene_id = prot_gene_map_dict[query][0]
236 |                             op.write("ID\t%s\nNAME\t%s\nPRODUCT-ACCESSION\t%s\nPRODUCT-TYPE\tP\n" %
237 |                                      (gene_id, gene_id, query))
238 |                         except KeyError:
239 |                             if len(prot_gene_map_dict) == 0:
240 |                                 op.write("ID\t%s\nNAME\t%s\nPRODUCT-TYPE\tP\n" % (query, query))
241 |                             else:
242 |                                 logging_helper("No Gene found for protein: " + query, logging_level="ERROR",
243 |                                                logger_name=logger_name)
244 |                                 raise SystemError
245 |                         if len(metacyc_ids) > 0:
246 |                             op.write('\n'.join(['METACYC\t' + m for m in sorted(metacyc_ids)]) + '\n')
247 |                         if len(metacyc_unofficial) > 0:
248 |                             op.write('\n'.join(['METACYC\t' + m + '\n#unofficial'
249 |                                                 for m in sorted(metacyc_unofficial)]) + '\n')
250 |                         op.write("//\n")
251 |             except (AttributeError, NotImplementedError) as e:
252 |                 logging_helper(
253 |                     "Error when writing results: " + str(e), logging_level="ERROR", logger_name=logger_name)
254 |         logging_helper(
255 |             "Results written to: \"" + output_path + "\"", logging_level=logging_level, logger_name=logger_name)
256 | 
257 | 
258 | def write_ensemble_outputs(ensemble_cls, all_query_ids, output_path, ef_map_path, ec_superseded_path,
259 |                            metacyc_rxn_ec_path, official_ec_metacyc_rxn_path, to_remove_metabolism_path,
260 |                            prot_gene_map_path=None, logging_level=DEFAULT_LOGGER_LEVEL,
261 |                            logger_name=DEFAULT_LOGGER_NAME):
262 |     """Write all ensemble results
263 |     Args:
264 |         ensemble_cls: class of the ensemble that was used
265 |         all_query_ids: list of all query IDs
266 |         output_path: output path to the short output file
267 |         ef_map_path: path to efclasses.mapping
268 |         ec_superseded_path: path to EC-superseded.mapping
269 |         metacyc_rxn_ec_path: path to metacyc-RXN-EC.mapping
270 |         official_ec_metacyc_rxn_path: path to official-EC-metacyc-RXN.mapping
271 |         to_remove_metabolism_path: path to to-remove-non-small-molecule-metabolism.mapping
272 |         prot_gene_map_path: Path to protein to gene ID mapping
273 |         logging_level: The logging level set for write orxn results
274 |         logger_name: The name of the logger for write orxn results
275 |     Raises:
276 |     Returns:
277 |     """
278 |     logging_helper("Writing outputs...", logging_level=logging_level, logger_name=logger_name)
279 |     ensemble_name = re.sub(r'[^\w\-_\. ]', '_', ensemble_cls.prediction.name)
280 |     ensemble_classifiers = ensemble_cls.list_of_classifiers
281 | 
282 |     output_name, output_ext = os.path.splitext(output_path)
283 | 
284 |     ensemble_output = PfFiles(ensemble_cls, all_query_ids)
285 |     short_output_path = '.'.join([output_name, ensemble_name, output_ext.lstrip(".")])
286 |     ensemble_output.write_short_results(ensemble_name, short_output_path, logging_level="INFO", logger_name=logger_name)
287 | 
288 |     long_output_path = '.'.join([output_name, ensemble_name, DEFAULT_LONG_OUTPUT_SUFFIX])
289 |     ensemble_output.write_long_results(ensemble_classifiers, ensemble_name, long_output_path, logging_level="INFO",
290 |                                        logger_name=logger_name)
291 | 
292 |     pf_output_path = '.'.join([output_name, ensemble_name, DEFAULT_PF_OUTPUT_SUFFIX])
293 |     ensemble_output.write_pf_results(ef_map_path, pf_output_path, logging_level="INFO", logger_name=logger_name)
294 | 
295 |     orxn_output_path = '.'.join([output_name, ensemble_name, DEFAULT_ORXN_PF_OUTPUT_SUFFIX])
296 |     ensemble_output.write_orxn_results(ef_map_path, ec_superseded_path, metacyc_rxn_ec_path,
297 |                                        official_ec_metacyc_rxn_path, to_remove_metabolism_path, orxn_output_path,
298 |                                        prot_gene_map_path=None, logging_level="INFO",
299 |                                        logger_name=logger_name)
300 |     if prot_gene_map_path is not None:
301 |         final_output_path = '.'.join([output_name, ensemble_name, DEFAULT_FINAL_PF_OUTPUT_SUFFIX])
302 |         ensemble_output.write_orxn_results(ef_map_path, ec_superseded_path, metacyc_rxn_ec_path,
303 |                                            official_ec_metacyc_rxn_path, to_remove_metabolism_path, final_output_path,
304 |                                            prot_gene_map_path=prot_gene_map_path, logging_level="INFO",
305 |                                            logger_name=logger_name)
306 | 
307 | 


--------------------------------------------------------------------------------
/version:
--------------------------------------------------------------------------------
1 | E2P2	5.0
2 | RPSD	5.2
3 | 
4 | 


--------------------------------------------------------------------------------