├── .gitignore ├── LICENSE ├── README.md ├── examples ├── dip.sif ├── huri.sif ├── string.sif └── tnfa_active_genes_file.txt ├── setup.py └── src ├── __init__.py ├── constants.py ├── core ├── __init__.py ├── domino.py ├── network_builder.py └── preprocess_slices.py ├── data ├── __init__.py ├── ensg2gene_symbol.txt ├── ensmusg2gene_symbol.txt └── graph.html.format ├── runner.py └── utils ├── __init__.py ├── ensembl2gene_symbol.py ├── graph_influence_linear_th.py ├── scripts.py └── visualize_modules.py /.gitignore: -------------------------------------------------------------------------------- 1 | /python 2 | /domino-env 3 | /domino_hagai.egg-info 4 | /.idea 5 | /build 6 | /config.js 7 | /dist 8 | /*.egg-info/ 9 | *__pycache__* 10 | /examples/* 11 | /domino4web-env 12 | *.whl 13 | !/examples/huri.sif 14 | !/examples/string.sif 15 | !/examples/dip.sif 16 | !/examples/tnfa_active_genes_file.txt 17 | *.swp 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Hagai Levi 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DOMINO 2 | 3 | DOMINO: Discovery of Modules In Networks using Omic. 4 | 5 | DOMINO is an active module identification (AMI) algorithm. It recieves a gene network and nodes' activity scores as input and report sub-networks (modules) that are putatively biologically meaningful in the context of the activity data. 6 | 7 | 8 | In extensive evaluation conducted on gene expression and genome-wide association study data we discovered that AMI algorithms tended to over-reporting of enrichment: GO terms enriched in the modules on real data were often also enriched when the algorithms were run on randomly permuted activity scores. 9 | 10 | In constrast, modules retrieved by DOMINO had high rate of empirically validated GO terms. 11 | 12 | The study is available at https://www.embopress.org/doi/full/10.15252/msb.20209593. 13 | 14 | - [Requirements](#requirements) 15 | - [Installation](#installation) 16 | - [From pip](#from-pip) 17 | - [From conda](#from-conda-bioconda) 18 | - [From source](#from-source) 19 | - [Input File Formats](#input-file-formats) 20 | - [Basic Usage](#basic-usage) 21 | - [Advanced usage](#advanced-usage) 22 | - [Main output files](#main-output-files) 23 | - [Example files](#example-files) 24 | 25 | 26 | 27 | ## Requirements 28 | DOMINO was tested under the following settings: 29 | - Python 3.8 (Note that for further versions of python some dependency packages are currently not available via pip) 30 | - Linux OS (Ubuntu 14.04 LTS, Ubuntu 18.04.4 LTS) 31 | 32 | ## Installation 33 | 34 | ### From pip 35 | 36 | We recommend using a virtual environment. For example: 37 | ``` 38 | python3 -m venv domino-env 39 | source domino-env/bin/activate 40 | ``` 41 | Then, install domino via pip: 42 | ``` 43 | pip install domino-python 44 | ``` 45 | 46 | ### From conda (Bioconda) 47 | 48 | Make sure the Bioconda repository and its dependencies are available: 49 | ``` 50 | conda config --add channels defaults 51 | conda config --add channels conda-forge 52 | conda config --add channels bioconda 53 | ``` 54 | 55 | Create a virtual environment in conda. For example: 56 | ``` 57 | conda create --name domino-env 58 | conda activate domino-env 59 | ``` 60 | 61 | Then, install domino via pip: 62 | ``` 63 | conda install domino 64 | ``` 65 | 66 | ### From source 67 | Download the source files and install according to the following: 68 | 69 | Clone the repo from Github: 70 | ``` 71 | git clone https://github.com/Shamir-Lab/DOMINO.git 72 | cd DOMINO 73 | ``` 74 | 75 | DOMINO is written in Python3. The necessary libraries will all be installed by the `setup.py` script. 76 | We recommend using a virtual environment. For example: 77 | ``` 78 | python3 -m venv domino-env 79 | source domino-env/bin/activate 80 | ``` 81 | Then, run setup.py: 82 | ``` 83 | python setup.py install 84 | ``` 85 | 86 | ## Input File Formats 87 | 88 | - A network file should be in a simplified sif format: 89 | * Only single node should appear in the first and last column. 90 | * The First row is headers 91 | 92 | - An active gene file contains the gene ids in Ensemble format, separated by a newline char 93 | 94 | - The slices file format is automatically generated by the `slicer` command. 95 | 96 | 97 | For examples, see files in "examples" folder 98 | 99 | ## Basic Usage 100 | 101 | To run preprocessing step 0 (partitioning network using Louvain algorithm): 102 | ``` 103 | slicer --network_file --output_file 104 | ``` 105 | 106 | 107 | `-n/--network_file`: A path to network file (sif format). e.g., /path/to/network_file.sif. 108 | 109 | `-o/--output_file`: A path to the output slices file. e.g., /path/to/output/slices_file.txt, 110 | 111 | To run DOMINO: 112 | ``` 113 | domino --active_genes_files --network_file --slices_file --output_folder [-sth -mth ] 114 | ``` 115 | 116 | The common command line options are: 117 | 118 | `-a/--active_genes_files`: Comma delimited list of absolute paths to files, each containing a list of active genes, separated by a new line char (\n). e.g. /path/to/active_genes_files_1,/path/to/active_genes_files_2. 119 | 120 | `-n/--network_file`: A path to network file (sif format). e.g., /path/to/network_file.sif. 121 | 122 | `-s/--slices_file`: A path to slices file (i.e. the output of "slicer" script). e.g., /path/to/slices_file.txt, 123 | 124 | 125 | ## Advanced usage 126 | 127 | `-c/--use_cache`: Use auto-generated cache network files (*.pkl) from previous executions with the same network. NOTE: (1) THIS IS NOT THE SLICES FILE! (2) If the content of the file has changed, you should set this option to "false" 128 | 129 | `-p/--parallelization`: The number of threads allocated to the run (usually single thread is enough) 130 | 131 | `-v/--visualization`: Indicates whether a visualization of the modules ought to be generated 132 | 133 | `-sth/--slices_threshold`: The threshold for considering a slice as relevant 134 | 135 | `-mth/--module_threshold`: The threshold for considering a putative module as final module. 136 | 137 | 138 | ## Main output files 139 | 140 | `output_folder/active_gene_file_name/modules.out`: list of final modules 141 | `output_folder/active_gene_file_name/module_i.html`: visualization of the i'th module 142 | 143 | 144 | 145 | ## Example files 146 | 147 | Example files of networks in simplified sif format and an active gene file are available under "examples" folder 148 | -------------------------------------------------------------------------------- /examples/huri.sif: -------------------------------------------------------------------------------- 1 | # of cc after modularity optimization: 258 2 | cc #0: n=168 3 || cc #1: n=28 5 | [ENSG00000099968, ENSG00000089356, ENSG00000088682, ENSG00000151117, ENSG00000074660, ENSG00000109084, ENSG00000120742, ENSG00000165685, ENSG00000049249, ENSG00000124508, ENSG00000092931, ENSG00000170075, ENSG00000171861, ENSG00000142188, ENSG00000143641, ENSG00000182557, ENSG00000188910, ENSG00000176087, ENSG00000152377, ENSG00000177710, ENSG00000172159, ENSG00000214160, ENSG00000137806, ENSG00000135077, ENSG00000164530, ENSG00000189430, ENSG00000145248, ENSG00000149534] 6 | cc #2: n=39 7 || cc #3: n=174 9 || cc #4: n=51 11 || cc #5: n=54 13 || cc #6: n=113 15 || cc #7: n=26 17 | [ENSG00000000457, ENSG00000198420, ENSG00000112130, ENSG00000076604, ENSG00000162946, ENSG00000172053, ENSG00000143368, ENSG00000166526, ENSG00000067334, ENSG00000130695, ENSG00000079805, ENSG00000149289, ENSG00000187690, ENSG00000111711, ENSG00000160688, ENSG00000087365, ENSG00000140600, ENSG00000182183, ENSG00000112208, ENSG00000143320, ENSG00000119242, ENSG00000170374, ENSG00000137074, ENSG00000198887, ENSG00000171916, ENSG00000171984] 18 | cc #8: n=43 19 || cc #9: n=35 21 || cc #10: n=30 23 || cc #11: n=117 25 || cc #12: n=36 27 | [ENSG00000105576, ENSG00000070808, ENSG00000244025, ENSG00000212938, ENSG00000101331, ENSG00000148660, ENSG00000121068, ENSG00000058404, ENSG00000064419, ENSG00000136319, ENSG00000145349, ENSG00000169718, ENSG00000174547, ENSG00000108578, ENSG00000131979, ENSG00000150722, ENSG00000163596, ENSG00000198721, ENSG00000239672, ENSG00000149328, ENSG00000186792, ENSG00000204394, ENSG00000072195, ENSG00000126457, ENSG00000156574, ENSG00000196152, ENSG00000111218, ENSG00000163098, ENSG00000183010, ENSG00000130313, ENSG00000132481, ENSG00000109208, ENSG00000135316, ENSG00000158321, ENSG00000168961, ENSG00000183617] 28 | cc #13: n=17 29 | [ENSG00000105991, ENSG00000155367, ENSG00000187583, ENSG00000172482, ENSG00000110031, ENSG00000064205, ENSG00000167257, ENSG00000123178, ENSG00000116005, ENSG00000117122, ENSG00000124593, ENSG00000160271, ENSG00000184613, ENSG00000188293, ENSG00000215454, ENSG00000278224, ENSG00000214215] 30 | cc #14: n=36 31 || cc #15: n=14 33 | [ENSG00000138867, ENSG00000108091, ENSG00000007384, ENSG00000107187, ENSG00000020633, ENSG00000067955, ENSG00000106261, ENSG00000166484, ENSG00000177932, ENSG00000183918, ENSG00000156170, ENSG00000137764, ENSG00000215343, ENSG00000175063] 34 | cc #16: n=117 35 || cc #17: n=126 37 || cc #18: n=72 39 || cc #19: n=39 41 || cc #20: n=73 43 | [ENSG00000001461, ENSG00000158813, ENSG00000160131, ENSG00000265107, ENSG00000135211, ENSG00000005483, ENSG00000105726, ENSG00000204427, ENSG00000006327, ENSG00000158014, ENSG00000133065, ENSG00000204979, ENSG00000165794, ENSG00000009790, ENSG00000015592, ENSG00000099625, ENSG00000147654, ENSG00000178826, ENSG00000185862, ENSG00000135749, ENSG00000134812, ENSG00000163378, ENSG00000108960, ENSG00000027697, ENSG00000155755, ENSG00000167323, ENSG00000198689, ENSG00000152078, ENSG00000040487, ENSG00000122557, ENSG00000139514, ENSG00000128699, ENSG00000106366, ENSG00000070081, ENSG00000139133, ENSG00000137331, ENSG00000179029, ENSG00000176402, ENSG00000112715, ENSG00000127774, ENSG00000132517, ENSG00000144057, ENSG00000164124, ENSG00000184276, ENSG00000186480, ENSG00000243279, ENSG00000089057, ENSG00000164707, ENSG00000182938, ENSG00000188820, ENSG00000165349, ENSG00000128973, ENSG00000180871, ENSG00000106080, ENSG00000111452, ENSG00000111666, ENSG00000160752, ENSG00000119048, ENSG00000163624, ENSG00000183607, ENSG00000182450, ENSG00000130037, ENSG00000237110, ENSG00000170266, ENSG00000167614, ENSG00000135298, ENSG00000140505, ENSG00000145217, ENSG00000179855, ENSG00000150556, ENSG00000185008, ENSG00000226979, ENSG00000188404] 44 | cc #21: n=53 45 || cc #22: n=21 47 | [ENSG00000105369, ENSG00000121073, ENSG00000162366, ENSG00000099785, ENSG00000117758, ENSG00000185105, ENSG00000148985, ENSG00000158769, ENSG00000205213, ENSG00000103375, ENSG00000111181, ENSG00000139508, ENSG00000161091, ENSG00000163914, ENSG00000175548, ENSG00000179820, ENSG00000188921, ENSG00000159884, ENSG00000145087, ENSG00000163251, ENSG00000121775] 48 | cc #23: n=32 49 || cc #24: n=19 51 | [ENSG00000164294, ENSG00000137404, ENSG00000010932, ENSG00000104660, ENSG00000178719, ENSG00000085552, ENSG00000137819, ENSG00000102076, ENSG00000163646, ENSG00000105983, ENSG00000139187, ENSG00000171302, ENSG00000172602, ENSG00000172461, ENSG00000163517, ENSG00000173200, ENSG00000198569, ENSG00000213185, ENSG00000197822] 52 | cc #25: n=36 53 || cc #26: n=57 55 || cc #27: n=30 57 || cc #28: n=67 59 || cc #29: n=37 61 || cc #30: n=45 63 || cc #31: n=45 65 || cc #32: n=28 67 | [ENSG00000278023, ENSG00000242689, ENSG00000167302, ENSG00000145982, ENSG00000038274, ENSG00000184381, ENSG00000049656, ENSG00000162975, ENSG00000102900, ENSG00000100379, ENSG00000100300, ENSG00000088280, ENSG00000172367, ENSG00000089050, ENSG00000136161, ENSG00000159871, ENSG00000121380, ENSG00000180785, ENSG00000183715, ENSG00000242802, ENSG00000198865, ENSG00000113205, ENSG00000117643, ENSG00000121314, ENSG00000184162, ENSG00000181409, ENSG00000185674, ENSG00000198198] 68 | cc #33: n=42 69 || cc #34: n=37 71 || cc #35: n=22 73 | [ENSG00000094796, ENSG00000168710, ENSG00000086506, ENSG00000130656, ENSG00000175449, ENSG00000188536, ENSG00000196565, ENSG00000206172, ENSG00000213931, ENSG00000223609, ENSG00000244734, ENSG00000110811, ENSG00000128789, ENSG00000131966, ENSG00000136720, ENSG00000137831, ENSG00000243543, ENSG00000183527, ENSG00000206177, ENSG00000132912, ENSG00000164944, ENSG00000171680] 74 | cc #36: n=52 75 || cc #37: n=48 77 || cc #38: n=59 79 || cc #39: n=33 81 || cc #40: n=120 83 || cc #41: n=63 85 || cc #42: n=68 87 || cc #43: n=23 89 | [ENSG00000164438, ENSG00000100105, ENSG00000028277, ENSG00000120149, ENSG00000170276, ENSG00000254445, ENSG00000212901, ENSG00000148584, ENSG00000178996, ENSG00000149532, ENSG00000099139, ENSG00000170689, ENSG00000134516, ENSG00000198881, ENSG00000100121, ENSG00000106123, ENSG00000110719, ENSG00000149256, ENSG00000179348, ENSG00000157353, ENSG00000169567, ENSG00000197857, ENSG00000259431] 90 | cc #44: n=91 91 || cc #45: n=39 93 | [ENSG00000002822, ENSG00000078114, ENSG00000103528, ENSG00000131375, ENSG00000137699, ENSG00000157514, ENSG00000166170, ENSG00000185264, ENSG00000168070, ENSG00000159289, ENSG00000204713, ENSG00000260220, ENSG00000124429, ENSG00000032742, ENSG00000198736, ENSG00000177685, ENSG00000122644, ENSG00000072818, ENSG00000156467, ENSG00000215251, ENSG00000084676, ENSG00000173928, ENSG00000100814, ENSG00000166266, ENSG00000128276, ENSG00000101421, ENSG00000254505, ENSG00000105393, ENSG00000107185, ENSG00000114125, ENSG00000115290, ENSG00000173674, ENSG00000188315, ENSG00000129317, ENSG00000277791, ENSG00000140577, ENSG00000185015, ENSG00000214941, ENSG00000167014] 94 | cc #46: n=27 95 | [ENSG00000012779, ENSG00000141543, ENSG00000100462, ENSG00000108786, ENSG00000154781, ENSG00000203907, ENSG00000165898, ENSG00000065548, ENSG00000185721, ENSG00000153721, ENSG00000145911, ENSG00000164325, ENSG00000133119, ENSG00000165995, ENSG00000181852, ENSG00000108349, ENSG00000108591, ENSG00000111832, ENSG00000163918, ENSG00000175215, ENSG00000154945, ENSG00000136271, ENSG00000198455, ENSG00000150593, ENSG00000145293, ENSG00000156976, ENSG00000167720] 96 | cc #47: n=25 97 | [ENSG00000048540, ENSG00000188566, ENSG00000164591, ENSG00000164746, ENSG00000188803, ENSG00000116752, ENSG00000141946, ENSG00000143367, ENSG00000162373, ENSG00000162458, ENSG00000177842, ENSG00000189367, ENSG00000165863, ENSG00000075856, ENSG00000087095, ENSG00000174306, ENSG00000162511, ENSG00000112514, ENSG00000112110, ENSG00000204316, ENSG00000186166, ENSG00000213638, ENSG00000140043, ENSG00000142507, ENSG00000240344] 98 | cc #48: n=23 99 | [ENSG00000057663, ENSG00000103852, ENSG00000179627, ENSG00000008130, ENSG00000121671, ENSG00000137504, ENSG00000102081, ENSG00000152348, ENSG00000160799, ENSG00000205356, ENSG00000241837, ENSG00000168255, ENSG00000088930, ENSG00000099992, ENSG00000268629, ENSG00000183066, ENSG00000164871, ENSG00000143450, ENSG00000115758, ENSG00000119042, ENSG00000155096, ENSG00000172534, ENSG00000142920] 100 | cc #49: n=59 101 || cc #50: n=29 103 | [ENSG00000072201, ENSG00000163935, ENSG00000010803, ENSG00000198879, ENSG00000134594, ENSG00000106785, ENSG00000047634, ENSG00000102032, ENSG00000102098, ENSG00000163161, ENSG00000119965, ENSG00000125818, ENSG00000134287, ENSG00000167216, ENSG00000169710, ENSG00000170854, ENSG00000176774, ENSG00000176979, ENSG00000197275, ENSG00000203867, ENSG00000213047, ENSG00000196369, ENSG00000085978, ENSG00000172007, ENSG00000102547, ENSG00000119906, ENSG00000120992, ENSG00000204130, ENSG00000175931] 104 | cc #51: n=39 105 || cc #52: n=71 107 || cc #53: n=51 109 || cc #54: n=44 111 || cc #55: n=39 113 || cc #56: n=47 115 || cc #57: n=40 117 || cc #58: n=21 119 | [ENSG00000140259, ENSG00000081377, ENSG00000114107, ENSG00000178175, ENSG00000185730, ENSG00000174652, ENSG00000137760, ENSG00000186660, ENSG00000112096, ENSG00000129347, ENSG00000129657, ENSG00000131115, ENSG00000176256, ENSG00000197385, ENSG00000197647, ENSG00000115053, ENSG00000181638, ENSG00000264668, ENSG00000139233, ENSG00000198298, ENSG00000139146] 120 | cc #59: n=67 121 | [ENSG00000140416, ENSG00000143549, ENSG00000185359, ENSG00000132570, ENSG00000178988, ENSG00000105619, ENSG00000181404, ENSG00000142686, ENSG00000205363, ENSG00000134138, ENSG00000104375, ENSG00000139496, ENSG00000165495, ENSG00000177238, ENSG00000188428, ENSG00000068028, ENSG00000070031, ENSG00000171560, ENSG00000132286, ENSG00000283977, ENSG00000196544, ENSG00000230989, ENSG00000096872, ENSG00000175390, ENSG00000153044, ENSG00000187268, ENSG00000196553, ENSG00000164972, ENSG00000127399, ENSG00000122970, ENSG00000170955, ENSG00000173013, ENSG00000152944, ENSG00000179010, ENSG00000101265, ENSG00000103121, ENSG00000119559, ENSG00000107551, ENSG00000120162, ENSG00000151748, ENSG00000173542, ENSG00000104892, ENSG00000162413, ENSG00000169122, ENSG00000105479, ENSG00000160199, ENSG00000154832, ENSG00000122692, ENSG00000164104, ENSG00000125975, ENSG00000127824, ENSG00000166046, ENSG00000164326, ENSG00000136738, ENSG00000176531, ENSG00000147885, ENSG00000149636, ENSG00000241852, ENSG00000165714, ENSG00000165752, ENSG00000183785, ENSG00000254999, ENSG00000167797, ENSG00000185689, ENSG00000185261, ENSG00000204991, ENSG00000222011] 122 | cc #60: n=20 123 | [ENSG00000141013, ENSG00000183323, ENSG00000136936, ENSG00000012061, ENSG00000156050, ENSG00000135604, ENSG00000100122, ENSG00000092531, ENSG00000169740, ENSG00000189132, ENSG00000178602, ENSG00000165671, ENSG00000109061, ENSG00000113328, ENSG00000159882, ENSG00000134308, ENSG00000145191, ENSG00000136603, ENSG00000150676, ENSG00000228623] 124 | cc #61: n=77 125 || cc #62: n=45 127 || cc #63: n=48 129 || cc #64: n=39 131 || cc #65: n=12 133 | [ENSG00000176155, ENSG00000183628, ENSG00000091483, ENSG00000100056, ENSG00000172578, ENSG00000154134, ENSG00000120802, ENSG00000132801, ENSG00000148153, ENSG00000169715, ENSG00000170390, ENSG00000182077] 134 | cc #66: n=48 135 || cc #67: n=32 137 || cc #68: n=15 139 | [ENSG00000064218, ENSG00000204889, ENSG00000183621, ENSG00000119335, ENSG00000186272, ENSG00000159720, ENSG00000132702, ENSG00000117155, ENSG00000166261, ENSG00000105750, ENSG00000132591, ENSG00000163535, ENSG00000189057, ENSG00000214022, ENSG00000154734] 140 | cc #69: n=15 141 | [ENSG00000064999, ENSG00000050820, ENSG00000035403, ENSG00000133246, ENSG00000167106, ENSG00000106554, ENSG00000109107, ENSG00000111859, ENSG00000136848, ENSG00000176293, ENSG00000187555, ENSG00000112877, ENSG00000100347, ENSG00000159685, ENSG00000141293] 142 | cc #70: n=50 143 || cc #71: n=36 145 || cc #72: n=38 147 || cc #73: n=29 149 | [ENSG00000111077, ENSG00000146281, ENSG00000169330, ENSG00000162757, ENSG00000167779, ENSG00000027847, ENSG00000137955, ENSG00000037897, ENSG00000160193, ENSG00000198919, ENSG00000154305, ENSG00000068438, ENSG00000113163, ENSG00000117560, ENSG00000100934, ENSG00000105323, ENSG00000086289, ENSG00000099994, ENSG00000122386, ENSG00000101310, ENSG00000107651, ENSG00000150961, ENSG00000100949, ENSG00000102218, ENSG00000213762, ENSG00000117148, ENSG00000133275, ENSG00000128346, ENSG00000159788] 150 | cc #74: n=62 151 || cc #75: n=16 153 | [ENSG00000115507, ENSG00000144810, ENSG00000203786, ENSG00000040608, ENSG00000104408, ENSG00000116691, ENSG00000174808, ENSG00000102100, ENSG00000187678, ENSG00000104415, ENSG00000175121, ENSG00000181652, ENSG00000110237, ENSG00000188610, ENSG00000237441, ENSG00000157540] 154 | cc #76: n=38 155 || cc #77: n=41 157 || cc #78: n=35 159 || cc #79: n=106 161 || cc #80: n=25 163 | [ENSG00000143013, ENSG00000162148, ENSG00000135363, ENSG00000204822, ENSG00000169744, ENSG00000172379, ENSG00000049449, ENSG00000068796, ENSG00000187650, ENSG00000153140, ENSG00000168004, ENSG00000069188, ENSG00000131730, ENSG00000103363, ENSG00000154582, ENSG00000121101, ENSG00000108840, ENSG00000112246, ENSG00000134864, ENSG00000147065, ENSG00000163380, ENSG00000137941, ENSG00000139780, ENSG00000143536, ENSG00000280071] 164 | cc #81: n=44 165 | [ENSG00000156735, ENSG00000007255, ENSG00000100842, ENSG00000109101, ENSG00000153130, ENSG00000169217, ENSG00000181938, ENSG00000010810, ENSG00000116285, ENSG00000156463, ENSG00000242173, ENSG00000134253, ENSG00000051620, ENSG00000249915, ENSG00000183166, ENSG00000205078, ENSG00000151967, ENSG00000188368, ENSG00000154642, ENSG00000212122, ENSG00000101901, ENSG00000155087, ENSG00000206203, ENSG00000169136, ENSG00000113597, ENSG00000118271, ENSG00000128609, ENSG00000119574, ENSG00000120616, ENSG00000126467, ENSG00000126803, ENSG00000213619, ENSG00000133398, ENSG00000179163, ENSG00000142396, ENSG00000171055, ENSG00000153820, ENSG00000167996, ENSG00000179021, ENSG00000182950, ENSG00000204388, ENSG00000204389, ENSG00000240303, ENSG00000203778] 166 | cc #82: n=15 167 | [ENSG00000163060, ENSG00000124191, ENSG00000064655, ENSG00000185002, ENSG00000101336, ENSG00000087903, ENSG00000171747, ENSG00000124602, ENSG00000129654, ENSG00000139445, ENSG00000167740, ENSG00000178397, ENSG00000171314, ENSG00000180008, ENSG00000181513] 168 | cc #83: n=43 169 || cc #84: n=25 171 | [ENSG00000167123, ENSG00000179455, ENSG00000186153, ENSG00000139714, ENSG00000159214, ENSG00000215187, ENSG00000140718, ENSG00000175946, ENSG00000257365, ENSG00000101282, ENSG00000103254, ENSG00000163406, ENSG00000102145, ENSG00000119431, ENSG00000185742, ENSG00000115392, ENSG00000184160, ENSG00000153066, ENSG00000133619, ENSG00000160188, ENSG00000164219, ENSG00000168522, ENSG00000221838, ENSG00000170242, ENSG00000257727] 172 | cc #85: n=41 173 || cc #86: n=39 175 || cc #87: n=37 177 || cc #88: n=11 179 | [ENSG00000186930, ENSG00000111701, ENSG00000176723, ENSG00000164951, ENSG00000109775, ENSG00000118762, ENSG00000163909, ENSG00000139180, ENSG00000186143, ENSG00000215475, ENSG00000248483] 180 | cc #89: n=51 181 || cc #90: n=149 183 || cc #91: n=260 185 || cc #92: n=46 187 || cc #93: n=29 189 | [ENSG00000004838, ENSG00000089225, ENSG00000119650, ENSG00000130950, ENSG00000149100, ENSG00000239779, ENSG00000006453, ENSG00000115365, ENSG00000175866, ENSG00000020129, ENSG00000111196, ENSG00000197024, ENSG00000163848, ENSG00000175879, ENSG00000162066, ENSG00000132122, ENSG00000170260, ENSG00000152433, ENSG00000186017, ENSG00000132341, ENSG00000173575, ENSG00000184575, ENSG00000142065, ENSG00000163281, ENSG00000164548, ENSG00000165861, ENSG00000204946, ENSG00000130227, ENSG00000138768] 190 | cc #94: n=51 191 || cc #95: n=37 193 || cc #96: n=94 195 | [ENSG00000141699, ENSG00000196329, ENSG00000012660, ENSG00000166257, ENSG00000105701, ENSG00000008283, ENSG00000100292, ENSG00000101558, ENSG00000102007, ENSG00000105829, ENSG00000108433, ENSG00000113734, ENSG00000123353, ENSG00000124164, ENSG00000147155, ENSG00000151778, ENSG00000164983, ENSG00000166900, ENSG00000171045, ENSG00000188167, ENSG00000198833, ENSG00000213689, ENSG00000103496, ENSG00000106089, ENSG00000079950, ENSG00000103415, ENSG00000135185, ENSG00000220205, ENSG00000170310, ENSG00000118402, ENSG00000171729, ENSG00000049245, ENSG00000125814, ENSG00000186115, ENSG00000168899, ENSG00000171135, ENSG00000105518, ENSG00000130545, ENSG00000111450, ENSG00000213203, ENSG00000099940, ENSG00000124098, ENSG00000126950, ENSG00000137824, ENSG00000145107, ENSG00000148344, ENSG00000174132, ENSG00000273331, ENSG00000053501, ENSG00000162236, ENSG00000071967, ENSG00000189077, ENSG00000073737, ENSG00000219200, ENSG00000117533, ENSG00000129422, ENSG00000134490, ENSG00000100568, ENSG00000099365, ENSG00000135823, ENSG00000139190, ENSG00000169605, ENSG00000175893, ENSG00000181585, ENSG00000186007, ENSG00000186501, ENSG00000244045, ENSG00000265354, ENSG00000091947, ENSG00000104915, ENSG00000170906, ENSG00000118596, ENSG00000166562, ENSG00000124222, ENSG00000100593, ENSG00000151229, ENSG00000157570, ENSG00000127324, ENSG00000139921, ENSG00000166069, ENSG00000214253, ENSG00000125726, ENSG00000155158, ENSG00000136631, ENSG00000163823, ENSG00000145936, ENSG00000155659, ENSG00000150687, ENSG00000151176, ENSG00000162706, ENSG00000169347, ENSG00000170615, ENSG00000171864, ENSG00000181826] 196 | cc #97: n=87 197 | [ENSG00000148110, ENSG00000213593, ENSG00000170509, ENSG00000188001, ENSG00000006007, ENSG00000100652, ENSG00000211456, ENSG00000080189, ENSG00000213463, ENSG00000215712, ENSG00000168476, ENSG00000179363, ENSG00000204070, ENSG00000074416, ENSG00000119820, ENSG00000136986, ENSG00000158869, ENSG00000113811, ENSG00000140931, ENSG00000079459, ENSG00000026652, ENSG00000160856, ENSG00000185176, ENSG00000188822, ENSG00000198189, ENSG00000029534, ENSG00000164756, ENSG00000240045, ENSG00000121680, ENSG00000156738, ENSG00000176410, ENSG00000184986, ENSG00000215717, ENSG00000052802, ENSG00000171954, ENSG00000174567, ENSG00000134198, ENSG00000171928, ENSG00000144468, ENSG00000072042, ENSG00000115446, ENSG00000144035, ENSG00000115194, ENSG00000160216, ENSG00000166278, ENSG00000265808, ENSG00000100036, ENSG00000107819, ENSG00000145569, ENSG00000156398, ENSG00000164209, ENSG00000164466, ENSG00000167656, ENSG00000170271, ENSG00000183160, ENSG00000087253, ENSG00000132563, ENSG00000168701, ENSG00000127564, ENSG00000100196, ENSG00000100532, ENSG00000100612, ENSG00000157131, ENSG00000185201, ENSG00000197296, ENSG00000110628, ENSG00000133318, ENSG00000115310, ENSG00000145014, ENSG00000121207, ENSG00000144648, ENSG00000141424, ENSG00000135452, ENSG00000151353, ENSG00000147383, ENSG00000154518, ENSG00000163959, ENSG00000168389, ENSG00000182326, ENSG00000250361, ENSG00000172292, ENSG00000177291, ENSG00000186562, ENSG00000186599, ENSG00000206013, ENSG00000196407, ENSG00000204655] 198 | cc #98: n=46 199 || cc #99: n=54 201 || cc #100: n=46 203 || cc #101: n=46 205 || cc #102: n=43 207 | [ENSG00000106153, ENSG00000066185, ENSG00000006015, ENSG00000184033, ENSG00000268651, ENSG00000079134, ENSG00000159377, ENSG00000114735, ENSG00000257218, ENSG00000133134, ENSG00000025708, ENSG00000113441, ENSG00000052723, ENSG00000161180, ENSG00000165643, ENSG00000064489, ENSG00000137478, ENSG00000137473, ENSG00000146857, ENSG00000151365, ENSG00000165943, ENSG00000066735, ENSG00000127914, ENSG00000276234, ENSG00000092020, ENSG00000100219, ENSG00000244687, ENSG00000237190, ENSG00000254901, ENSG00000111011, ENSG00000270379, ENSG00000275835, ENSG00000122678, ENSG00000130559, ENSG00000179593, ENSG00000132681, ENSG00000133393, ENSG00000156876, ENSG00000141084, ENSG00000149357, ENSG00000164305, ENSG00000175556, ENSG00000214944] 208 | cc #103: n=61 209 | [ENSG00000051128, ENSG00000161010, ENSG00000088986, ENSG00000264364, ENSG00000101751, ENSG00000111554, ENSG00000129116, ENSG00000149657, ENSG00000163492, ENSG00000165588, ENSG00000171201, ENSG00000182795, ENSG00000170074, ENSG00000070495, ENSG00000121579, ENSG00000072182, ENSG00000138794, ENSG00000072832, ENSG00000092964, ENSG00000113657, ENSG00000185009, ENSG00000138439, ENSG00000127529, ENSG00000140548, ENSG00000166847, ENSG00000168734, ENSG00000171033, ENSG00000177947, ENSG00000180116, ENSG00000181013, ENSG00000187569, ENSG00000189319, ENSG00000189401, ENSG00000116990, ENSG00000157851, ENSG00000156206, ENSG00000198901, ENSG00000103942, ENSG00000124067, ENSG00000116991, ENSG00000112599, ENSG00000142875, ENSG00000181284, ENSG00000167080, ENSG00000135973, ENSG00000144451, ENSG00000137996, ENSG00000169288, ENSG00000149196, ENSG00000151224, ENSG00000168906, ENSG00000151418, ENSG00000165275, ENSG00000171481, ENSG00000173464, ENSG00000189159, ENSG00000205330, ENSG00000170370, ENSG00000241935, ENSG00000182223, ENSG00000213397] 210 | cc #104: n=34 211 || cc #105: n=26 213 | [ENSG00000100109, ENSG00000179046, ENSG00000005436, ENSG00000167604, ENSG00000205002, ENSG00000068024, ENSG00000198920, ENSG00000182173, ENSG00000066379, ENSG00000137960, ENSG00000068305, ENSG00000081189, ENSG00000106246, ENSG00000115138, ENSG00000147905, ENSG00000205899, ENSG00000109832, ENSG00000166887, ENSG00000176714, ENSG00000196453, ENSG00000111906, ENSG00000162438, ENSG00000137265, ENSG00000160695, ENSG00000174206, ENSG00000182315] 214 | cc #106: n=46 215 | [ENSG00000103154, ENSG00000117625, ENSG00000104147, ENSG00000168404, ENSG00000166681, ENSG00000120289, ENSG00000197747, ENSG00000043143, ENSG00000111653, ENSG00000163875, ENSG00000168395, ENSG00000047579, ENSG00000104164, ENSG00000143553, ENSG00000186628, ENSG00000136146, ENSG00000112290, ENSG00000160307, ENSG00000188015, ENSG00000136811, ENSG00000154839, ENSG00000116017, ENSG00000174450, ENSG00000099399, ENSG00000101997, ENSG00000171643, ENSG00000101746, ENSG00000165480, ENSG00000188283, ENSG00000163993, ENSG00000126953, ENSG00000135441, ENSG00000182606, ENSG00000123307, ENSG00000132589, ENSG00000120708, ENSG00000196754, ENSG00000165584, ENSG00000166669, ENSG00000139684, ENSG00000176177, ENSG00000205744, ENSG00000160678, ENSG00000196154, ENSG00000172785, ENSG00000188425] 216 | cc #107: n=20 217 | [ENSG00000104964, ENSG00000163155, ENSG00000198807, ENSG00000120875, ENSG00000126003, ENSG00000071242, ENSG00000100030, ENSG00000079277, ENSG00000102882, ENSG00000153233, ENSG00000162734, ENSG00000136826, ENSG00000118495, ENSG00000130164, ENSG00000149634, ENSG00000152669, ENSG00000170561, ENSG00000140280, ENSG00000148386, ENSG00000175793] 218 | cc #108: n=15 219 | [ENSG00000105717, ENSG00000155754, ENSG00000158435, ENSG00000107816, ENSG00000151338, ENSG00000179271, ENSG00000242220, ENSG00000164949, ENSG00000159495, ENSG00000162526, ENSG00000065609, ENSG00000130159, ENSG00000170166, ENSG00000146067, ENSG00000159592] 220 | cc #109: n=22 221 | [ENSG00000116044, ENSG00000112182, ENSG00000042062, ENSG00000067560, ENSG00000146285, ENSG00000155366, ENSG00000111913, ENSG00000116584, ENSG00000204052, ENSG00000078246, ENSG00000100124, ENSG00000178718, ENSG00000105732, ENSG00000222046, ENSG00000120051, ENSG00000149150, ENSG00000182156, ENSG00000188916, ENSG00000197063, ENSG00000198517, ENSG00000164967, ENSG00000172336] 222 | cc #110: n=25 223 | [ENSG00000121058, ENSG00000008324, ENSG00000104412, ENSG00000162188, ENSG00000163374, ENSG00000186889, ENSG00000187123, ENSG00000204406, ENSG00000170903, ENSG00000183873, ENSG00000185666, ENSG00000178809, ENSG00000100908, ENSG00000134690, ENSG00000131148, ENSG00000166598, ENSG00000151388, ENSG00000113448, ENSG00000115211, ENSG00000119718, ENSG00000127928, ENSG00000147894, ENSG00000198839, ENSG00000144224, ENSG00000176994] 224 | cc #111: n=21 225 | [ENSG00000123349, ENSG00000204438, ENSG00000204344, ENSG00000042753, ENSG00000103591, ENSG00000155959, ENSG00000101132, ENSG00000101134, ENSG00000152056, ENSG00000166747, ENSG00000183020, ENSG00000104218, ENSG00000116035, ENSG00000113578, ENSG00000143256, ENSG00000178403, ENSG00000138629, ENSG00000155368, ENSG00000213967, ENSG00000176261, ENSG00000206562] 226 | cc #112: n=59 227 | [ENSG00000136997, ENSG00000004799, ENSG00000175895, ENSG00000087338, ENSG00000172432, ENSG00000113272, ENSG00000082126, ENSG00000121741, ENSG00000143409, ENSG00000143851, ENSG00000144736, ENSG00000145715, ENSG00000155622, ENSG00000160226, ENSG00000163346, ENSG00000171402, ENSG00000176302, ENSG00000205777, ENSG00000215269, ENSG00000215274, ENSG00000224659, ENSG00000236362, ENSG00000274274, ENSG00000088538, ENSG00000159674, ENSG00000283632, ENSG00000138621, ENSG00000100804, ENSG00000144395, ENSG00000101222, ENSG00000104852, ENSG00000105963, ENSG00000111845, ENSG00000109685, ENSG00000176635, ENSG00000111231, ENSG00000112699, ENSG00000170049, ENSG00000196850, ENSG00000243477, ENSG00000120337, ENSG00000125952, ENSG00000128908, ENSG00000129682, ENSG00000132274, ENSG00000141376, ENSG00000143032, ENSG00000164978, ENSG00000145919, ENSG00000147419, ENSG00000148908, ENSG00000166164, ENSG00000167941, ENSG00000170234, ENSG00000173302, ENSG00000173578, ENSG00000184110, ENSG00000185101, ENSG00000197713] 228 | cc #113: n=20 229 | [ENSG00000139722, ENSG00000154127, ENSG00000119725, ENSG00000163156, ENSG00000011332, ENSG00000112511, ENSG00000035115, ENSG00000109686, ENSG00000269699, ENSG00000169756, ENSG00000127220, ENSG00000152076, ENSG00000173917, ENSG00000146243, ENSG00000138764, ENSG00000142546, ENSG00000163743, ENSG00000149927, ENSG00000162639, ENSG00000239474] 230 | cc #114: n=23 231 | [ENSG00000140859, ENSG00000141965, ENSG00000141570, ENSG00000060971, ENSG00000135457, ENSG00000204209, ENSG00000146830, ENSG00000175203, ENSG00000087263, ENSG00000186468, ENSG00000136152, ENSG00000180332, ENSG00000182040, ENSG00000168434, ENSG00000112137, ENSG00000115137, ENSG00000142661, ENSG00000213079, ENSG00000125885, ENSG00000145780, ENSG00000132475, ENSG00000172175, ENSG00000163041] 232 | cc #115: n=42 233 | [ENSG00000143514, ENSG00000160972, ENSG00000079257, ENSG00000007923, ENSG00000204619, ENSG00000117751, ENSG00000121766, ENSG00000144036, ENSG00000091640, ENSG00000111775, ENSG00000118620, ENSG00000119938, ENSG00000144847, ENSG00000144857, ENSG00000164879, ENSG00000167645, ENSG00000178460, ENSG00000197123, ENSG00000203747, ENSG00000213639, ENSG00000143375, ENSG00000172531, ENSG00000101605, ENSG00000176058, ENSG00000173281, ENSG00000102055, ENSG00000186298, ENSG00000213719, ENSG00000110925, ENSG00000112782, ENSG00000115685, ENSG00000187164, ENSG00000121769, ENSG00000123569, ENSG00000231989, ENSG00000182676, ENSG00000141971, ENSG00000182175, ENSG00000275713, ENSG00000144655, ENSG00000155962, ENSG00000169504] 234 | cc #116: n=39 235 || cc #117: n=46 237 || cc #118: n=31 239 || cc #119: n=26 241 | [ENSG00000167110, ENSG00000075539, ENSG00000064607, ENSG00000114209, ENSG00000079432, ENSG00000102572, ENSG00000135932, ENSG00000179115, ENSG00000115808, ENSG00000111707, ENSG00000115694, ENSG00000134602, ENSG00000148426, ENSG00000148735, ENSG00000116120, ENSG00000119707, ENSG00000120158, ENSG00000155868, ENSG00000182199, ENSG00000182791, ENSG00000144283, ENSG00000164867, ENSG00000176692, ENSG00000165805, ENSG00000186976, ENSG00000189046] 242 | cc #120: n=20 243 | [ENSG00000170264, ENSG00000234616, ENSG00000112578, ENSG00000006659, ENSG00000130475, ENSG00000160062, ENSG00000062194, ENSG00000204628, ENSG00000130818, ENSG00000158805, ENSG00000075568, ENSG00000139174, ENSG00000089876, ENSG00000145016, ENSG00000107929, ENSG00000135521, ENSG00000147573, ENSG00000115289, ENSG00000116809, ENSG00000149273] 244 | cc #121: n=35 245 || cc #122: n=41 247 || cc #123: n=20 249 | [ENSG00000104756, ENSG00000117475, ENSG00000099812, ENSG00000205643, ENSG00000104388, ENSG00000129472, ENSG00000104872, ENSG00000144485, ENSG00000092820, ENSG00000113851, ENSG00000249481, ENSG00000141682, ENSG00000153303, ENSG00000162771, ENSG00000175606, ENSG00000205085, ENSG00000122484, ENSG00000152767, ENSG00000198522, ENSG00000142751] 250 | cc #124: n=42 251 || cc #125: n=44 253 || cc #126: n=16 255 | [ENSG00000119705, ENSG00000132938, ENSG00000020577, ENSG00000164331, ENSG00000060339, ENSG00000121297, ENSG00000152207, ENSG00000160447, ENSG00000181035, ENSG00000196793, ENSG00000146457, ENSG00000196182, ENSG00000169188, ENSG00000127314, ENSG00000168792, ENSG00000197780] 256 | cc #127: n=61 257 || cc #128: n=87 259 | [ENSG00000184515, ENSG00000136305, ENSG00000005486, ENSG00000008517, ENSG00000050426, ENSG00000069535, ENSG00000075415, ENSG00000076258, ENSG00000120329, ENSG00000125995, ENSG00000126768, ENSG00000132313, ENSG00000134321, ENSG00000135093, ENSG00000139973, ENSG00000140905, ENSG00000143158, ENSG00000161055, ENSG00000165948, ENSG00000167721, ENSG00000169599, ENSG00000173269, ENSG00000175229, ENSG00000177673, ENSG00000178726, ENSG00000196312, ENSG00000197102, ENSG00000204183, ENSG00000204370, ENSG00000270170, ENSG00000275663, ENSG00000276410, ENSG00000170178, ENSG00000128335, ENSG00000130208, ENSG00000224916, ENSG00000267467, ENSG00000060762, ENSG00000066855, ENSG00000123609, ENSG00000102466, ENSG00000203667, ENSG00000071243, ENSG00000072501, ENSG00000143740, ENSG00000187559, ENSG00000149930, ENSG00000125875, ENSG00000169020, ENSG00000157927, ENSG00000102312, ENSG00000144712, ENSG00000278540, ENSG00000103423, ENSG00000143443, ENSG00000106009, ENSG00000109851, ENSG00000109919, ENSG00000156411, ENSG00000111664, ENSG00000119927, ENSG00000136155, ENSG00000144524, ENSG00000152086, ENSG00000161277, ENSG00000125462, ENSG00000272414, ENSG00000205138, ENSG00000162972, ENSG00000167914, ENSG00000133028, ENSG00000134108, ENSG00000134339, ENSG00000135537, ENSG00000148965, ENSG00000152700, ENSG00000162368, ENSG00000171421, ENSG00000173141, ENSG00000173436, ENSG00000186493, ENSG00000241878, ENSG00000250486, ENSG00000284194, ENSG00000165949, ENSG00000205560, ENSG00000174928] 260 | cc #129: n=26 261 | [ENSG00000185869, ENSG00000100395, ENSG00000198586, ENSG00000244607, ENSG00000079337, ENSG00000187796, ENSG00000036549, ENSG00000100721, ENSG00000112312, ENSG00000146587, ENSG00000124596, ENSG00000197961, ENSG00000125046, ENSG00000130024, ENSG00000197226, ENSG00000100483, ENSG00000118412, ENSG00000119772, ENSG00000166166, ENSG00000173273, ENSG00000179295, ENSG00000116863, ENSG00000163795, ENSG00000152439, ENSG00000183048, ENSG00000168028] 262 | cc #130: n=39 263 || cc #131: n=63 265 || cc #132: n=26 267 | [ENSG00000248712, ENSG00000180938, ENSG00000197279, ENSG00000197905, ENSG00000109089, ENSG00000215271, ENSG00000102870, ENSG00000196150, ENSG00000169981, ENSG00000171970, ENSG00000186352, ENSG00000104957, ENSG00000140743, ENSG00000069956, ENSG00000089022, ENSG00000165887, ENSG00000100276, ENSG00000101745, ENSG00000166135, ENSG00000164749, ENSG00000180787, ENSG00000167394, ENSG00000183475, ENSG00000151458, ENSG00000158552, ENSG00000171962] 268 | cc #133: n=46 269 || cc #134: n=44 271 || cc #135: n=15 273 | [ENSG00000138964, ENSG00000052850, ENSG00000240583, ENSG00000071575, ENSG00000095209, ENSG00000099860, ENSG00000144580, ENSG00000177556, ENSG00000103522, ENSG00000105664, ENSG00000108342, ENSG00000186086, ENSG00000116717, ENSG00000166333, ENSG00000197702] 274 | cc #136: n=19 275 | [ENSG00000139946, ENSG00000162924, ENSG00000095059, ENSG00000163818, ENSG00000114115, ENSG00000132507, ENSG00000204930, ENSG00000167333, ENSG00000073282, ENSG00000160087, ENSG00000087245, ENSG00000163577, ENSG00000138136, ENSG00000181778, ENSG00000130779, ENSG00000154114, ENSG00000196632, ENSG00000188000, ENSG00000142619] 276 | cc #137: n=29 277 | [ENSG00000140553, ENSG00000179456, ENSG00000138698, ENSG00000181472, ENSG00000172680, ENSG00000181467, ENSG00000133818, ENSG00000066027, ENSG00000116667, ENSG00000134874, ENSG00000204978, ENSG00000078061, ENSG00000126934, ENSG00000213281, ENSG00000176490, ENSG00000205517, ENSG00000122783, ENSG00000128585, ENSG00000182150, ENSG00000123728, ENSG00000126458, ENSG00000132155, ENSG00000165023, ENSG00000174775, ENSG00000173838, ENSG00000141524, ENSG00000143622, ENSG00000152214, ENSG00000221909] 278 | cc #138: n=39 279 || cc #139: n=21 281 | [ENSG00000150636, ENSG00000101298, ENSG00000174007, ENSG00000047249, ENSG00000158480, ENSG00000076201, ENSG00000079974, ENSG00000114268, ENSG00000110200, ENSG00000154274, ENSG00000165501, ENSG00000119328, ENSG00000144134, ENSG00000154133, ENSG00000198682, ENSG00000161956, ENSG00000162928, ENSG00000163040, ENSG00000163938, ENSG00000197779, ENSG00000213066] 282 | cc #140: n=49 283 || cc #141: n=40 285 || cc #142: n=28 287 | [ENSG00000173207, ENSG00000108395, ENSG00000164287, ENSG00000088808, ENSG00000100211, ENSG00000105122, ENSG00000138101, ENSG00000129083, ENSG00000183092, ENSG00000165325, ENSG00000174015, ENSG00000128185, ENSG00000103449, ENSG00000111879, ENSG00000124159, ENSG00000176715, ENSG00000177380, ENSG00000095139, ENSG00000162194, ENSG00000100083, ENSG00000158163, ENSG00000183578, ENSG00000188343, ENSG00000132970, ENSG00000107937, ENSG00000164299, ENSG00000168930, ENSG00000157869] 288 | cc #143: n=25 289 | [ENSG00000174788, ENSG00000015133, ENSG00000065135, ENSG00000178935, ENSG00000213654, ENSG00000137218, ENSG00000186451, ENSG00000122733, ENSG00000169220, ENSG00000204669, ENSG00000164924, ENSG00000187288, ENSG00000132879, ENSG00000174370, ENSG00000261796, ENSG00000162377, ENSG00000165383, ENSG00000114353, ENSG00000127955, ENSG00000128928, ENSG00000153086, ENSG00000131910, ENSG00000132825, ENSG00000183421, ENSG00000175202] 290 | cc #144: n=35 291 || cc #145: n=31 293 || cc #146: n=41 295 || cc #147: n=30 297 || cc #148: n=22 299 | [ENSG00000197256, ENSG00000167306, ENSG00000106400, ENSG00000072121, ENSG00000108953, ENSG00000146809, ENSG00000142961, ENSG00000151790, ENSG00000123411, ENSG00000107560, ENSG00000126368, ENSG00000169093, ENSG00000114978, ENSG00000103769, ENSG00000175137, ENSG00000185236, ENSG00000132698, ENSG00000181026, ENSG00000127837, ENSG00000136169, ENSG00000152219, ENSG00000243943] 300 | cc #149: n=36 301 || cc #150: n=45 303 | [ENSG00000093010, ENSG00000134531, ENSG00000039987, ENSG00000184349, ENSG00000182093, ENSG00000011638, ENSG00000211584, ENSG00000057019, ENSG00000143119, ENSG00000213316, ENSG00000276070, ENSG00000126251, ENSG00000164081, ENSG00000110057, ENSG00000114638, ENSG00000134873, ENSG00000104381, ENSG00000173214, ENSG00000185897, ENSG00000067113, ENSG00000068615, ENSG00000159176, ENSG00000100100, ENSG00000143147, ENSG00000165269, ENSG00000213694, ENSG00000143603, ENSG00000103249, ENSG00000111897, ENSG00000104894, ENSG00000179796, ENSG00000135960, ENSG00000107159, ENSG00000241399, ENSG00000125255, ENSG00000142046, ENSG00000125734, ENSG00000160791, ENSG00000232258, ENSG00000187824, ENSG00000145194, ENSG00000152689, ENSG00000156959, ENSG00000185896, ENSG00000188782] 304 | cc #151: n=31 305 || cc #152: n=50 307 || cc #153: n=30 309 || cc #154: n=17 311 | [ENSG00000131737, ENSG00000077348, ENSG00000162390, ENSG00000197050, ENSG00000148655, ENSG00000100330, ENSG00000188386, ENSG00000157322, ENSG00000118508, ENSG00000125533, ENSG00000128045, ENSG00000151276, ENSG00000152147, ENSG00000136243, ENSG00000144381, ENSG00000242616, ENSG00000142252] 312 | cc #155: n=48 313 || cc #156: n=20 315 | [ENSG00000005448, ENSG00000149761, ENSG00000179930, ENSG00000198429, ENSG00000048545, ENSG00000102781, ENSG00000131068, ENSG00000223953, ENSG00000088876, ENSG00000137171, ENSG00000100246, ENSG00000095574, ENSG00000176953, ENSG00000100325, ENSG00000164708, ENSG00000141527, ENSG00000144635, ENSG00000172331, ENSG00000187860, ENSG00000254004] 316 | cc #157: n=33 317 || cc #158: n=48 319 || cc #159: n=19 321 | [ENSG00000006042, ENSG00000069849, ENSG00000148175, ENSG00000099194, ENSG00000180398, ENSG00000109099, ENSG00000066405, ENSG00000115602, ENSG00000133027, ENSG00000135750, ENSG00000143786, ENSG00000144040, ENSG00000104783, ENSG00000182107, ENSG00000256660, ENSG00000105223, ENSG00000170956, ENSG00000171217, ENSG00000175264] 322 | cc #160: n=47 323 || cc #161: n=47 325 || cc #162: n=34 327 || cc #163: n=31 329 || cc #164: n=95 331 || cc #165: n=35 333 || cc #166: n=41 335 || cc #167: n=44 337 || cc #168: n=24 339 | [ENSG00000185418, ENSG00000145414, ENSG00000144591, ENSG00000205143, ENSG00000134996, ENSG00000197283, ENSG00000132109, ENSG00000074054, ENSG00000147642, ENSG00000121570, ENSG00000173548, ENSG00000273559, ENSG00000179111, ENSG00000105997, ENSG00000111229, ENSG00000108590, ENSG00000143537, ENSG00000131023, ENSG00000184451, ENSG00000113407, ENSG00000154016, ENSG00000173540, ENSG00000135723, ENSG00000141582] 340 | cc #169: n=28 341 | [ENSG00000121454, ENSG00000145287, ENSG00000066651, ENSG00000102931, ENSG00000138814, ENSG00000154319, ENSG00000091651, ENSG00000101849, ENSG00000138175, ENSG00000164484, ENSG00000213465, ENSG00000228075, ENSG00000185305, ENSG00000109103, ENSG00000117602, ENSG00000120910, ENSG00000154102, ENSG00000157379, ENSG00000167523, ENSG00000221823, ENSG00000122741, ENSG00000124532, ENSG00000172725, ENSG00000175279, ENSG00000197930, ENSG00000251503, ENSG00000156973, ENSG00000146476] 342 | cc #170: n=18 343 | [ENSG00000144579, ENSG00000143772, ENSG00000111665, ENSG00000196233, ENSG00000119801, ENSG00000198963, ENSG00000067840, ENSG00000142182, ENSG00000137802, ENSG00000147162, ENSG00000182866, ENSG00000173611, ENSG00000144677, ENSG00000131653, ENSG00000137876, ENSG00000197971, ENSG00000161526, ENSG00000171056] 344 | cc #171: n=28 345 | [ENSG00000090487, ENSG00000118894, ENSG00000099256, ENSG00000205838, ENSG00000075643, ENSG00000143862, ENSG00000179922, ENSG00000105372, ENSG00000108384, ENSG00000132646, ENSG00000134255, ENSG00000138735, ENSG00000163840, ENSG00000165704, ENSG00000182568, ENSG00000119865, ENSG00000147601, ENSG00000163947, ENSG00000102471, ENSG00000173660, ENSG00000186001, ENSG00000129295, ENSG00000113318, ENSG00000174348, ENSG00000130962, ENSG00000131507, ENSG00000166803, ENSG00000259316] 346 | cc #172: n=42 347 || cc #173: n=70 349 || cc #174: n=16 351 | [ENSG00000188761, ENSG00000100353, ENSG00000184436, ENSG00000168002, ENSG00000206560, ENSG00000090659, ENSG00000180964, ENSG00000094841, ENSG00000107833, ENSG00000121350, ENSG00000173163, ENSG00000187024, ENSG00000110442, ENSG00000188243, ENSG00000174469, ENSG00000168263] 352 | cc #175: n=21 353 | [ENSG00000013392, ENSG00000129675, ENSG00000197771, ENSG00000178773, ENSG00000065989, ENSG00000158571, ENSG00000164185, ENSG00000154957, ENSG00000096060, ENSG00000152133, ENSG00000171540, ENSG00000126522, ENSG00000137077, ENSG00000124795, ENSG00000134779, ENSG00000140365, ENSG00000169019, ENSG00000143627, ENSG00000170525, ENSG00000163508, ENSG00000166508] 354 | cc #176: n=33 355 || cc #177: n=30 357 || cc #178: n=21 359 | [ENSG00000008394, ENSG00000138798, ENSG00000146469, ENSG00000254521, ENSG00000162300, ENSG00000033627, ENSG00000163995, ENSG00000064601, ENSG00000185567, ENSG00000106049, ENSG00000114646, ENSG00000133874, ENSG00000164023, ENSG00000198851, ENSG00000213996, ENSG00000099721, ENSG00000147123, ENSG00000174871, ENSG00000156284, ENSG00000197140, ENSG00000169550] 360 | cc #179: n=12 361 | [ENSG00000011523, ENSG00000116918, ENSG00000135775, ENSG00000133250, ENSG00000102313, ENSG00000072133, ENSG00000198369, ENSG00000204613, ENSG00000211460, ENSG00000092978, ENSG00000166326, ENSG00000186577] 362 | cc #180: n=20 363 | [ENSG00000171953, ENSG00000028310, ENSG00000158428, ENSG00000163666, ENSG00000221818, ENSG00000102802, ENSG00000079335, ENSG00000134030, ENSG00000153714, ENSG00000100413, ENSG00000125457, ENSG00000130811, ENSG00000164330, ENSG00000108001, ENSG00000110955, ENSG00000113838, ENSG00000157349, ENSG00000168872, ENSG00000132664, ENSG00000170298] 364 | cc #181: n=29 365 | [ENSG00000015153, ENSG00000114446, ENSG00000116141, ENSG00000178934, ENSG00000186074, ENSG00000205076, ENSG00000028839, ENSG00000106355, ENSG00000139343, ENSG00000204392, ENSG00000166889, ENSG00000074201, ENSG00000100028, ENSG00000125835, ENSG00000143977, ENSG00000164167, ENSG00000165694, ENSG00000167088, ENSG00000170860, ENSG00000104055, ENSG00000100324, ENSG00000175324, ENSG00000130332, ENSG00000213231, ENSG00000106028, ENSG00000182004, ENSG00000182628, ENSG00000130703, ENSG00000161654] 366 | cc #182: n=39 367 | [ENSG00000118946, ENSG00000130347, ENSG00000023330, ENSG00000034510, ENSG00000106078, ENSG00000112186, ENSG00000132613, ENSG00000135722, ENSG00000163961, ENSG00000166432, ENSG00000215203, ENSG00000079102, ENSG00000078699, ENSG00000094880, ENSG00000173588, ENSG00000126461, ENSG00000167491, ENSG00000170365, ENSG00000169592, ENSG00000129993, ENSG00000108244, ENSG00000123329, ENSG00000128016, ENSG00000103365, ENSG00000129055, ENSG00000133083, ENSG00000163607, ENSG00000166925, ENSG00000095637, ENSG00000105270, ENSG00000141646, ENSG00000174106, ENSG00000175832, ENSG00000108854, ENSG00000181915, ENSG00000120693, ENSG00000143322, ENSG00000136541, ENSG00000170473] 368 | cc #183: n=39 369 || cc #184: n=25 371 | [ENSG00000196792, ENSG00000068971, ENSG00000105568, ENSG00000119383, ENSG00000112640, ENSG00000156509, ENSG00000172346, ENSG00000113558, ENSG00000108306, ENSG00000110429, ENSG00000112029, ENSG00000112146, ENSG00000116663, ENSG00000118564, ENSG00000127452, ENSG00000147364, ENSG00000151876, ENSG00000153558, ENSG00000161243, ENSG00000167196, ENSG00000171823, ENSG00000177051, ENSG00000197361, ENSG00000204923, ENSG00000116783] 372 | cc #185: n=32 373 || -------------------------------------------------------------------------------- /examples/tnfa_active_genes_file.txt: -------------------------------------------------------------------------------- 1 | ENSG00000118503 2 | ENSG00000125347 3 | ENSG00000163874 4 | ENSG00000137331 5 | ENSG00000164949 6 | ENSG00000169429 7 | ENSG00000081041 8 | ENSG00000123610 9 | ENSG00000185215 10 | ENSG00000100906 11 | ENSG00000090339 12 | ENSG00000023445 13 | ENSG00000144802 14 | ENSG00000146232 15 | ENSG00000163739 16 | ENSG00000163734 17 | ENSG00000162692 18 | ENSG00000108691 19 | ENSG00000115009 20 | ENSG00000168386 21 | ENSG00000243649 22 | ENSG00000077150 23 | ENSG00000232810 24 | ENSG00000162924 25 | ENSG00000104312 26 | ENSG00000184898 27 | ENSG00000136244 28 | ENSG00000163661 29 | ENSG00000128342 30 | ENSG00000198342 31 | ENSG00000138771 32 | ENSG00000164761 33 | ENSG00000158859 34 | ENSG00000131979 35 | ENSG00000118257 36 | ENSG00000237499 37 | ENSG00000174059 38 | ENSG00000117228 39 | ENSG00000160326 40 | ENSG00000171223 41 | ENSG00000144655 42 | ENSG00000151014 43 | ENSG00000115008 44 | ENSG00000134070 45 | ENSG00000137393 46 | ENSG00000145365 47 | ENSG00000005889 48 | ENSG00000125538 49 | ENSG00000139289 50 | ENSG00000108387 51 | ENSG00000141682 52 | ENSG00000167910 53 | ENSG00000087074 54 | ENSG00000107968 55 | ENSG00000162825 56 | ENSG00000112149 57 | ENSG00000253522 58 | ENSG00000112096 59 | ENSG00000185650 60 | ENSG00000169242 61 | ENSG00000183696 62 | ENSG00000277462 63 | ENSG00000007908 64 | ENSG00000253837 65 | ENSG00000159200 66 | ENSG00000184371 67 | ENSG00000145632 68 | ENSG00000165507 69 | ENSG00000161010 70 | ENSG00000139211 71 | ENSG00000183655 72 | ENSG00000006210 73 | ENSG00000196466 74 | ENSG00000137193 75 | ENSG00000154734 76 | ENSG00000136997 77 | ENSG00000163545 78 | ENSG00000171786 79 | ENSG00000146457 80 | ENSG00000139618 81 | ENSG00000184557 82 | ENSG00000167604 83 | ENSG00000165891 84 | ENSG00000073756 85 | ENSG00000115963 86 | ENSG00000168389 87 | ENSG00000155090 88 | ENSG00000125618 89 | ENSG00000178860 90 | ENSG00000148154 91 | ENSG00000158050 92 | ENSG00000125898 93 | ENSG00000146112 94 | ENSG00000115137 95 | ENSG00000219607 96 | ENSG00000056558 97 | ENSG00000172602 98 | ENSG00000158615 99 | ENSG00000122691 100 | ENSG00000124145 101 | ENSG00000062716 102 | ENSG00000183337 103 | ENSG00000173334 104 | ENSG00000146278 105 | ENSG00000134954 106 | ENSG00000254004 107 | ENSG00000120738 108 | ENSG00000143067 109 | ENSG00000118849 110 | ENSG00000111276 111 | ENSG00000142961 112 | ENSG00000104635 113 | ENSG00000173846 114 | ENSG00000173548 115 | ENSG00000149596 116 | ENSG00000185947 117 | ENSG00000224057 118 | ENSG00000077514 119 | ENSG00000266094 120 | ENSG00000117318 121 | ENSG00000104856 122 | ENSG00000169085 123 | ENSG00000105327 124 | ENSG00000180801 125 | ENSG00000141232 126 | ENSG00000215417 127 | ENSG00000101665 128 | ENSG00000143514 129 | ENSG00000139318 130 | ENSG00000106003 131 | ENSG00000160325 132 | ENSG00000069399 133 | ENSG00000197632 134 | ENSG00000265972 135 | ENSG00000214022 136 | ENSG00000116574 137 | ENSG00000173166 138 | ENSG00000251136 139 | ENSG00000166387 140 | ENSG00000008517 141 | ENSG00000198346 142 | ENSG00000124875 143 | ENSG00000137936 144 | ENSG00000158092 145 | ENSG00000267882 146 | ENSG00000162772 147 | ENSG00000113369 148 | ENSG00000198053 149 | ENSG00000187479 150 | ENSG00000166592 151 | ENSG00000161011 152 | ENSG00000177045 153 | ENSG00000125657 154 | ENSG00000083799 155 | ENSG00000121671 156 | ENSG00000112715 157 | ENSG00000185246 158 | ENSG00000152049 159 | ENSG00000117226 160 | ENSG00000023608 161 | ENSG00000167625 162 | ENSG00000169991 163 | ENSG00000128016 164 | ENSG00000176597 165 | ENSG00000157557 166 | ENSG00000162645 167 | ENSG00000204209 168 | ENSG00000280852 169 | ENSG00000113448 170 | ENSG00000109320 171 | ENSG00000196227 172 | ENSG00000143772 173 | ENSG00000137502 174 | ENSG00000131797 175 | ENSG00000159128 176 | ENSG00000175155 177 | ENSG00000148926 178 | ENSG00000175066 179 | ENSG00000113739 180 | ENSG00000168398 181 | ENSG00000177606 182 | ENSG00000132823 183 | ENSG00000175592 184 | ENSG00000173120 185 | ENSG00000125931 186 | ENSG00000165494 187 | ENSG00000168209 188 | ENSG00000145780 189 | ENSG00000243753 190 | ENSG00000104825 191 | ENSG00000180530 192 | ENSG00000113504 193 | ENSG00000136867 194 | ENSG00000231574 195 | ENSG00000173926 196 | ENSG00000171522 197 | ENSG00000010818 198 | ENSG00000164430 199 | ENSG00000184545 200 | ENSG00000173281 201 | ENSG00000170961 202 | ENSG00000123609 203 | ENSG00000134569 204 | ENSG00000125398 205 | ENSG00000267519 206 | ENSG00000171163 207 | ENSG00000188647 208 | ENSG00000164308 209 | ENSG00000126778 210 | ENSG00000149289 211 | ENSG00000130775 212 | ENSG00000070404 213 | ENSG00000189367 214 | ENSG00000047410 215 | ENSG00000272695 216 | ENSG00000177542 217 | ENSG00000131669 218 | ENSG00000219200 219 | ENSG00000177374 220 | ENSG00000125430 221 | ENSG00000165312 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | 7 | setup( 8 | name='domino-python', 9 | version="0.1.1", 10 | author="Hagai Levi", 11 | author_email="hagai.levi.007@gmail.com", 12 | description='DOMINO: Discovery of Modules In Networks using Omic', 13 | url='https://github.com/Shamir-Lab/DOMINO', 14 | classifiers=[ 15 | "Programming Language :: Python :: 3.6", 16 | "License :: OSI Approved :: MIT License", 17 | "Operating System :: POSIX :: Linux", 18 | ], 19 | packages = find_packages(), 20 | package_data={'': ['*']}, 21 | include_package_data=True, 22 | install_requires=[ 23 | 'networkx==2.4', 24 | 'numpy==1.22.0', 25 | 'scipy==1.10.0', 26 | 'pandas==1.5.1', 27 | 'pcst-fast==1.0.7', 28 | 'statsmodels==0.11.0', 29 | 'python-louvain==0.14'], 30 | entry_points = { 31 | "console_scripts": [ 32 | "domino=src.runner:main_domino", 33 | "slicer=src.runner:main_slicer", 34 | ] 35 | } 36 | 37 | ) 38 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shamir-Lab/DOMINO/85dad1515717b425b17f58f92b13a063ccccb85d/src/__init__.py -------------------------------------------------------------------------------- /src/constants.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import multiprocessing 4 | 5 | USE_CACHE=False 6 | N_OF_THREADS=40 # int(np.ceil(multiprocessing.cpu_count()*0.9)) 7 | dir_path = os.path.dirname(os.path.realpath(__file__)) 8 | PATH_TO_CONF = "env/config/conf.json" 9 | 10 | REPO_DIR = os.path.dirname(os.path.realpath(__file__)) 11 | SH_DIR = os.path.join(REPO_DIR, "sh","scripts") 12 | 13 | 14 | LABEL_ID = "sample_type.samples" 15 | PRIMARY_TUMOR = "Primary Tumor" 16 | METASTATIC = "Metastatic" 17 | 18 | LABELS_NORMAL = "labels_normal" 19 | LABELS_SHUFFLE = "labels_shuffle" 20 | LABELS_RANDOM = "labels_random" 21 | LABELS_ALTERNATED = "labels_alternated" 22 | LABELS_INVERTED = "labels_inverted" 23 | 24 | ENSG_TO_GENE_SYMBOLS = "ensg2gene_symbol.txt" 25 | ENSMUSG_TO_GENE_SYMBOLS = "ensmusg2gene_symbol.txt" 26 | ENSEMBL_TO_ENTREZ = "ensembl2entrez.txt" 27 | 28 | GO_OBO_URL = 'http://purl.obolibrary.org/obo/go/go-basic.obo' 29 | GO_ASSOCIATION_GENE2GEO_URL = 'https://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2go.gz' 30 | GO_FILE_NAME = 'go_bp.obo' #'go-basic.obo' 31 | GO_ASSOCIATION_FILE_NAME = "gene2go" 32 | 33 | -------------------------------------------------------------------------------- /src/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shamir-Lab/DOMINO/85dad1515717b425b17f58f92b13a063ccccb85d/src/core/__init__.py -------------------------------------------------------------------------------- /src/core/domino.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, "../") 3 | 4 | import random 5 | import os 6 | 7 | import pandas as pd 8 | import numpy as np 9 | import pickle 10 | import multiprocessing 11 | from scipy.stats import hypergeom 12 | from statsmodels.sandbox.stats.multicomp import fdrcorrection0 13 | 14 | import pcst_fast 15 | import networkx as nx 16 | from networkx.algorithms.community.quality import modularity 17 | from networkx.algorithms.community.centrality import girvan_newman 18 | from networkx.algorithms.components import connected_components 19 | 20 | from functools import reduce 21 | from src.utils.graph_influence_linear_th import linear_threshold 22 | from src.core.preprocess_slices import read_preprocessed_slices 23 | from src.core.network_builder import build_network 24 | import src.constants as constants 25 | 26 | G_modularity = None 27 | 28 | 29 | def extract_scores(scores_file): 30 | """""" 31 | scores = pd.read_csv(scores_file, sep='\t', index_col=0, header=None, dtype=str) 32 | if "pval" in scores.columns: 33 | scores["score"] = scores["pval"] 34 | else: 35 | scores["score"] = 1 36 | return scores 37 | 38 | 39 | def add_scores_to_nodes(G, scores): 40 | """""" 41 | inds = [] 42 | for nd in G.nodes: 43 | G.nodes[nd]["pertubed_node"] = False 44 | G.nodes[nd]["score"] = 0 45 | 46 | for ind, row in scores.iterrows(): 47 | if ind in G.nodes: 48 | inds.append(ind) 49 | G.nodes[ind]["score"] = row["score"] 50 | G.nodes[ind]["pertubed_node"] = row["score"] > 0 # binarizing the activeness 51 | 52 | return G 53 | 54 | 55 | def create_subgraph(params): 56 | cur_module = params 57 | global G_modularity 58 | nodes = set(cur_module) 59 | res = G_modularity.subgraph(list(nodes)) 60 | return res 61 | 62 | 63 | def prune_network_by_modularity(G, modules, cache_file): 64 | global G_modularity 65 | if os.path.exists(cache_file) and constants.USE_CACHE: 66 | print(f'fetch cache file for subnetworks {cache_file}') 67 | G_modularity = pickle.load(open(cache_file, 'rb')) 68 | for n in G_modularity: 69 | G_modularity.nodes[n]['pertubed_node'] = G.nodes[n]['pertubed_node'] 70 | print('pkl is loaded') 71 | return G_modularity 72 | 73 | print(f'generating subgraphs...') 74 | G_modularity = G 75 | print( 76 | f"Before slicing: n of cc:{len(list(connected_components(G_modularity)))}, n of nodes: {len(G_modularity.nodes)}, n of edges, {len(G_modularity.edges)}") 77 | p = multiprocessing.Pool(constants.N_OF_THREADS) 78 | 79 | G_modules = p.map(create_subgraph, [m for m in modules]) 80 | p.close() 81 | # print(f'{modules}') 82 | print(f'# of modules after extraction: {len(G_modules)}') 83 | G_modularity = nx.algorithms.operators.union_all(G_modules) 84 | print( 85 | f"After slicing: n of cc:{len(list(connected_components(G_modularity)))}, n of nodes: {len(G_modularity.nodes)}, n of edges, {len(G_modularity.edges)}") 86 | pickle.dump(G_modularity, open(cache_file, 'wb+')) 87 | print('subgraphs\' pkl is saved') 88 | 89 | 90 | def prune_network_by_modularity_old(G, modules, dummy): 91 | G_modularity = G.copy() 92 | edges_to_remove = [] 93 | for cur_edge in G_modularity.copy().edges: 94 | in_cc = False 95 | for cur_module in modules: 96 | if cur_edge[0] in cur_module and cur_edge[1] in cur_module: 97 | in_cc = True 98 | if not in_cc: 99 | edges_to_remove.append(cur_edge) 100 | 101 | G_modularity.remove_edges_from(edges_to_remove) 102 | return G_modularity 103 | 104 | 105 | def get_pcst_prize(G_cc, prize_factor, n_steps): 106 | prizes = {} 107 | p_cc = linear_threshold(G_cc, [n for n in G_cc.nodes if G_cc.nodes[n]['pertubed_node'] > 0], steps=n_steps) 108 | for p_node in G_cc.nodes: 109 | prizes[p_node] = 0 110 | for i_cur_layer, cur_layer in enumerate(p_cc): 111 | for cur_node in cur_layer: 112 | prizes[cur_node] += prize_factor ** i_cur_layer 113 | 114 | return prizes 115 | 116 | def run_pcst(G_cc, i_cc, labels, n_steps, nodes, prize_factor): 117 | ## set prize ## 118 | prizes = get_pcst_prize(G_cc, prize_factor, n_steps) 119 | vertices_prizes = [] 120 | for cur_node in nodes: 121 | vertices_prizes.append( 122 | G_cc.nodes[cur_node]["pertubed_node"] if G_cc.nodes[cur_node]["pertubed_node"] else prizes[cur_node]) 123 | 124 | ## set cost ## 125 | edges_grid = [] 126 | for cur_edge in G_cc.edges: 127 | edges_grid.append([nodes.index(cur_edge[0]), nodes.index(cur_edge[1])]) 128 | 129 | edges_costs = [] 130 | for cur_edge in edges_grid: 131 | u_score = 0 if G_cc.nodes[nodes[cur_edge[0]]]["pertubed_node"] else 0.9999 132 | v_score = 0 if G_cc.nodes[nodes[cur_edge[1]]]["pertubed_node"] else 0.9999 133 | 134 | edges_costs.append(np.min([u_score, v_score])) 135 | 136 | ## find pcst component by running pcst fast## 137 | root = -1 138 | num_clusters = 1 139 | pruning = 'strong' # 'none' 140 | verbosity_level = 0 141 | vertices, edges = pcst_fast.pcst_fast(edges_grid, vertices_prizes, edges_costs, root, num_clusters, pruning, 142 | verbosity_level) 143 | 144 | return edges, edges_grid 145 | 146 | 147 | def split_subslice_into_putative_modules(G_optimized, improvement_delta, modularity_score_objective, best_modularity): 148 | cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)] 149 | cur_modularity = modularity(G_optimized, cur_components, weight='weight') 150 | if cur_modularity >= modularity_score_objective: 151 | return True, best_modularity 152 | 153 | if len(n_nodes) < 4: 154 | G_optimized.remove_nodes_from(n_nodes) 155 | 156 | cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)] 157 | if len(cur_components) == 0: 158 | return True, best_modularity 159 | 160 | optimized_connected_components = girvan_newman(G_optimized) 161 | cur_components = sorted(next(optimized_connected_components)) 162 | cur_modularity = modularity(G_optimized, cur_components, weight='weight') 163 | if cur_modularity <= best_modularity + improvement_delta: 164 | return True, best_modularity 165 | 166 | else: 167 | optimal_components = cur_components 168 | 169 | edges_to_remove = [] 170 | for cur_edge in G_optimized.edges: 171 | included = False 172 | for n_nodes in optimal_components: 173 | if cur_edge[0] in n_nodes and cur_edge[1] in n_nodes: 174 | included = True 175 | if not included: 176 | edges_to_remove.append(cur_edge) 177 | 178 | G_optimized.remove_edges_from(edges_to_remove) 179 | 180 | return False, cur_modularity 181 | 182 | 183 | def get_putative_modules(G, full_G=None, improvement_delta=0, modularity_score_objective=1, module_threshold=0.05, 184 | n_cc=1.0): 185 | """""" 186 | 187 | if full_G == None: 188 | full_G = G 189 | G_optimized = G.copy() 190 | 191 | # clean subslice from cycles and isolated nodes 192 | G_optimized.remove_edges_from(list(nx.selfloop_edges(G_optimized))) 193 | G_optimized.remove_nodes_from(list(nx.isolates(G_optimized))) 194 | 195 | # check subslice enrichment for active nodes 196 | pertubed_nodes = [cur_node for cur_node in full_G.nodes if full_G.nodes[cur_node]["pertubed_node"]] 197 | pertubed_nodes_in_cc = [n for n in G_optimized.nodes if G_optimized.nodes[n]["pertubed_node"]] 198 | n_nodes = list(G_optimized.nodes) 199 | sig_score = hypergeom.sf(len(pertubed_nodes_in_cc), len(full_G.nodes), len(pertubed_nodes), 200 | len(n_nodes)) \ 201 | + hypergeom.pmf(len(pertubed_nodes_in_cc), len(full_G.nodes), len(pertubed_nodes), 202 | len(n_nodes)) 203 | 204 | sig_score = sig_score / n_cc 205 | 206 | # if subslice is not enriched for active nodes split in into putative modules. otherwise, report it as a single putative module 207 | # print(f'{sig_score}<{module_threshold} and {len(G_optimized.nodes)}<30') 208 | is_enriched_sublice = (len(G_optimized.nodes) < 100) or len( 209 | G_optimized.nodes) == 0 # sig_score 0 else nx.Graph(), [list(m.nodes) 262 | for m in 263 | passed_modules], \ 264 | fdr_bh_results[1] 265 | 266 | 267 | def pf_filter(params): 268 | global G_modularity 269 | n_G_original, cur_cc, i_cur_cc, n_pertubed_nodes, perturbation_factor = params 270 | pertubed_nodes_in_cc = [cur_node for cur_node in cur_cc if G_modularity.nodes[cur_node]["pertubed_node"]] 271 | if len(cur_cc) < 4 or n_pertubed_nodes == 0 or not ( 272 | len(pertubed_nodes_in_cc) / float(len(cur_cc)) >= perturbation_factor or len(pertubed_nodes_in_cc) / float( 273 | n_pertubed_nodes) >= 0.1): 274 | return None 275 | else: 276 | score = hypergeom.sf(len(pertubed_nodes_in_cc), n_G_original, n_pertubed_nodes, 277 | len(cur_cc)) \ 278 | + hypergeom.pmf(len(pertubed_nodes_in_cc), n_G_original, n_pertubed_nodes, 279 | len(cur_cc)) 280 | return (cur_cc, score) 281 | 282 | 283 | def analyze_slice(params): 284 | G, cc, i_cc, n_steps, relevant_slices, prize_factor, module_threshold = params 285 | G_cc = nx.subgraph(G, cc) 286 | nodes = list(G_cc.nodes) 287 | labels = {n: G_cc.nodes[n] for n in nodes} 288 | n_pertubed_nodes = sum([G.nodes[a]["pertubed_node"] for a in G.nodes]) 289 | prize_factor = max(0, 1 - 3 * n_pertubed_nodes / float(len(G.nodes))) 290 | # print(f'active gene ratio: {n_pertubed_nodes}/{len(G_cc.nodes)}') 291 | # print(f"prize factor: {prize_factor}") 292 | edges, edges_grid = run_pcst(G_cc, i_cc, labels, n_steps, nodes, prize_factor) 293 | G_subslice = nx.Graph() 294 | G_subslice.add_edges_from([(nodes[edges_grid[e][0]], nodes[edges_grid[e][1]]) for e in edges]) 295 | nx.set_node_attributes(G_subslice, {n: labels[n] for n in G_subslice.nodes}) 296 | modularity_score_objective = np.log(len(G_subslice.nodes)) / np.log(len(G.nodes)) if len( 297 | G_subslice.nodes) > 10 else -1 298 | subslice_after_ng, putative_modules_of_slice = get_putative_modules(G_subslice, G, improvement_delta=10 ** -2, 299 | modularity_score_objective=modularity_score_objective, 300 | n_cc=len(relevant_slices), 301 | module_threshold=module_threshold) 302 | 303 | return putative_modules_of_slice 304 | 305 | 306 | def get_final_modules(G, G_putative_modules, module_threshold): 307 | module_sigs = [] 308 | for i_cur_module, cur_G_module in enumerate(G_putative_modules): 309 | pertubed_nodes_in_cc = [cur_node for cur_node in cur_G_module.nodes if G.nodes[cur_node]["pertubed_node"]] 310 | pertubed_nodes = [cur_node for cur_node in G.nodes if G.nodes[cur_node]["pertubed_node"]] 311 | 312 | sig_score = hypergeom.sf(len(pertubed_nodes_in_cc), len(G.nodes), len(pertubed_nodes), 313 | len(cur_G_module.nodes)) \ 314 | + hypergeom.pmf(len(pertubed_nodes_in_cc), len(G.nodes), len(pertubed_nodes), 315 | len(cur_G_module.nodes)) 316 | 317 | final_module_threshold = module_threshold / len(G_putative_modules) 318 | if sig_score <= final_module_threshold: 319 | module_sigs.append((cur_G_module, sig_score / len(G_putative_modules))) 320 | 321 | module_sigs = sorted(module_sigs, key=lambda a: a[1]) 322 | return [a[0] for a in module_sigs] 323 | 324 | 325 | def main(active_genes_file, network_file, slices_file=None, slice_threshold=0.3, module_threshold=0.05, prize_factor=0, 326 | n_steps=20): 327 | print("start running DOMINO...") 328 | if os.path.exists(f'{network_file}.pkl') and constants.USE_CACHE: 329 | G = pickle.load(open(f'{network_file}.pkl', 'rb')) 330 | print(f'network\' pkl is loaded: {network_file}.pkl') 331 | else: 332 | print(f'generating graph from {network_file}') 333 | G = build_network(network_file) 334 | pickle.dump(G, open(f'{network_file}.pkl', 'wb+')) 335 | print(f'network\' pkl is saved: {network_file}.pkl') 336 | 337 | print("done building network") 338 | # assign activeness to nodes 339 | scores = extract_scores(active_genes_file) 340 | G = add_scores_to_nodes(G, scores) 341 | 342 | modularity_connected_components = read_preprocessed_slices(slices_file) 343 | 344 | global G_modularity 345 | prune_network_by_modularity(G, modularity_connected_components, os.path.join(os.path.split(slices_file)[0], 346 | os.path.split(network_file)[1].split( 347 | ".")[0] + "." + 348 | os.path.split(slices_file)[1].split( 349 | ".")[0] + ".pkl")) 350 | G_modularity, relevant_slices, qvals = retain_relevant_slices(G, slice_threshold) 351 | print(f'{len(relevant_slices)} relevant slices were retained with threshold {slice_threshold}') 352 | params = [] 353 | for i_cc, cc in enumerate(relevant_slices): 354 | params.append([G, cc, i_cc, n_steps, relevant_slices, prize_factor, module_threshold]) 355 | p = multiprocessing.Pool(constants.N_OF_THREADS) 356 | putative_modules = reduce(lambda a, b: a + b, p.map(analyze_slice, params), []) 357 | p.close() 358 | print(f'n of putative modules: {len(putative_modules)}') 359 | final_modules = get_final_modules(G, putative_modules, module_threshold) 360 | print( 361 | f'n of final modules: {len(final_modules)} (n={[len(list(m)) for m in final_modules]})') 362 | return final_modules 363 | -------------------------------------------------------------------------------- /src/core/network_builder.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import pandas as pd 3 | 4 | 5 | def build_network(network_file): 6 | """""" 7 | edges_dataset = pd.read_csv(network_file, sep='\t', header=None, dtype=str) 8 | edges = [] 9 | for ind, row in edges_dataset.iterrows(): 10 | # if row.iloc[0]!=row.iloc[2]: 11 | edges.append((row.iloc[0], row.iloc[2])) 12 | G = nx.Graph() 13 | G.add_edges_from(edges) 14 | nx.set_node_attributes(G, 0, 'score') 15 | 16 | return G 17 | -------------------------------------------------------------------------------- /src/core/preprocess_slices.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, "../") 3 | import networkx as nx 4 | import pandas as pd 5 | import community as community_louvain 6 | import numpy as np 7 | import os 8 | 9 | def create_slices(network_file, output_file_name, resolution=0.15): 10 | 11 | if os.path.splitext(network_file)[1]==".sif": 12 | df = pd.read_csv(network_file, sep='\t') 13 | df.columns = ["node_1", "edge_type", "node_2"] 14 | G = nx.from_pandas_edgelist(df, 'node_1', 'node_2') 15 | 16 | else: 17 | G = nx.read_edgelist(network_file) 18 | 19 | 20 | partition = community_louvain.best_partition(G, resolution=resolution, random_state=1) # 0.1 21 | prt = {k: [] for k in np.arange(len(np.unique(list(partition.values()))))} 22 | for k, v in partition.items(): 23 | prt[v].append(k) 24 | 25 | i = 0 26 | with open(output_file_name, 'w+') as f: 27 | f.write(f'# of cc after modularity optimization: {len(prt.keys())}\n') 28 | for k, v in prt.items(): 29 | if len(v) >= 10: 30 | f.write(f'cc #{i}: n={len(v)}\n[{", ".join(v)}]\n') 31 | i += 1 32 | 33 | 34 | def read_preprocessed_slices(file_path): 35 | modules = [] 36 | 37 | with open(file_path, 'r') as f: 38 | line = f.readline() 39 | while line != "": 40 | line = f.readline() 41 | if line.startswith("cc"): 42 | modules.append(f.readline().strip()[1:-1].split(', ')) 43 | 44 | return modules 45 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shamir-Lab/DOMINO/85dad1515717b425b17f58f92b13a063ccccb85d/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/graph.html.format: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 15 | 16 | 25 | React App 26 | 27 | 28 | 29 | 31 | 32 | 33 | 34 | 35 | 36 | 59 | 60 | 89 | 90 | 122 | 123 | 344 | 345 | 346 | 347 | 348 | 375 | 376 | 377 | 378 | 379 | 382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 |
idnamemodules
399 | 400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |

Total number of genes: {NUM_OF_GENES}

410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 | 428 | 438 | 439 | 440 | 441 | -------------------------------------------------------------------------------- /src/runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from src.core.domino import main as domino_main 4 | from src.core.preprocess_slices import create_slices 5 | from src.utils.visualize_modules import visualize_modules 6 | import src.constants as constants 7 | def main_domino(): 8 | 9 | parser = argparse.ArgumentParser(description='DOMINO: An active module identification algorithm with reduce rate of false.\n NOTE YOU SHOULD RUN THE SLICES SCRIPT FIRST! (more info, type slicer -h) \n Example input files are available @ https://github.com/Shamir-Lab/DOMINO/tree/master/examples') 10 | parser.add_argument('-a', '--active_genes_files', dest='active_genes_files', help='Comma delimited list of absolute paths to files, each containing a list of active genes, separated by a new line char (\\n). e.g. /path/to/active_genes_files_1,/path/to/active_genes_files_2.', default="examples/tnfa_active_genes_file.txt") 11 | parser.add_argument('-n', '--network_file', dest='network_file', help='A path to network file (sif format). e.g. /path/to/network_file.sif', default="examples/huri.sif") 12 | parser.add_argument('-s', '--slices_file', dest='slices_file', help='A path to slices file (i.e. the output of "slicer" script). e.g., /path/to/slices_file.txt', default="examples/huri_slices.txt") 13 | parser.add_argument('-o', '--output_folder', dest='output_folder', help='A folder where output files will be written e.g., /path/to/output', default="examples/output") 14 | parser.add_argument('-c', '--use_cache', dest='use_cache', help='Use auto-generated cache network files (*.pkl) from previous executions with the same network. NOTE: (1) THIS IS NOT THE SLICES FILE! (2) If the content of the file has changed, you should set this option to "false"', default="true") 15 | parser.add_argument('-p', '--parallelization', dest='parallelization', help='The number of threads allocated to the run (usually single thread is enough)', default="1") 16 | parser.add_argument('-v', '--visualization', dest='visualization', help='Indicates whether a visualization of the modules ought to be generated', default="true") 17 | parser.add_argument('-sth', '--slice_threshold', dest='slice_threshold', default="0.3", help='The threshold for considering a slice as relevant') 18 | parser.add_argument('-mth', '--module_threshold', dest='module_threshold', default="0.05", help='The threshold for considering a putative module as final module') 19 | 20 | 21 | args = parser.parse_args() 22 | active_genes_files = args.active_genes_files.split(",") 23 | output_folder = args.output_folder 24 | network_file = args.network_file 25 | slices_file = args.slices_file 26 | slice_threshold = float(args.slice_threshold) 27 | module_threshold = float(args.module_threshold) 28 | use_cache = args.use_cache=="true" 29 | parallelization = int(args.parallelization) 30 | visualization = args.visualization=="true" 31 | 32 | constants.N_OF_THREADS=parallelization 33 | constants.USE_CACHE=use_cache 34 | 35 | for cur_ag in active_genes_files: 36 | G_final_modules=domino_main(active_genes_file=cur_ag, network_file=network_file, slices_file=slices_file, slice_threshold=slice_threshold, module_threshold=module_threshold) 37 | activity_name=os.path.splitext(os.path.split(cur_ag)[-1])[0] 38 | report_folder=os.path.join(output_folder,activity_name) 39 | try: 40 | os.makedirs(report_folder) 41 | except: 42 | pass 43 | 44 | out_file=os.path.join(report_folder, "modules.out") 45 | if len(G_final_modules) !=0: 46 | open(out_file, 'w+').write("\n".join(['[%s]' % ', '.join(list(m.nodes)) for m in G_final_modules])+"\n") 47 | else: 48 | open(out_file, 'w+').write("") 49 | 50 | print(f'{len(G_final_modules)} final modules are reported at {out_file}') 51 | print(visualization) 52 | if visualization: 53 | visualize_modules(os.path.splitext(cur_ag.split('/')[-1])[0], G_final_modules, None, network_file, report_folder) 54 | 55 | def main_slicer(): 56 | 57 | parser = argparse.ArgumentParser(description='Slicer for DOMINO (step #0): A preprocessing step for the network') 58 | parser.add_argument('-n', '--network_file', dest='network_file', help='A path to network file (sif format). e.g. /path/to/network_file.sif', default="examples/huri.sif") 59 | parser.add_argument('-o', '--output_file', dest='output_file', default="examples/huri.sif", help='A path to the output slices file. e.g., /path/to/output/slices_file.txt') 60 | 61 | 62 | args = parser.parse_args() 63 | network_file = args.network_file 64 | output_file = args.output_file 65 | create_slices(network_file, output_file) 66 | 67 | 68 | 69 | 70 | if __name__=="__main__": 71 | main_slicer() 72 | main_domino() 73 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shamir-Lab/DOMINO/85dad1515717b425b17f58f92b13a063ccccb85d/src/utils/__init__.py -------------------------------------------------------------------------------- /src/utils/ensembl2gene_symbol.py: -------------------------------------------------------------------------------- 1 | import src.constants as constants 2 | import os 3 | g2e_dict = None 4 | e2g_dict = None 5 | dict_type="" 6 | 7 | def load_gene_dictionary(gene_list_file_name, gene_list_path=None, source="GDC-TCGA",dataset="melanoma"): # ="TCGA-SKCM.htseq_counts.tsv" 8 | if gene_list_path is None: 9 | gene_list_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../data",gene_list_file_name) 10 | f = open(gene_list_path,'r') 11 | lines = [l.strip() for l in f] 12 | f.close() 13 | return lines 14 | 15 | 16 | def get_g2e_dictionary(cur_dict_type='ENSG'): 17 | 18 | if cur_dict_type=='ENSMUSG': 19 | lines_dict = load_gene_dictionary(constants.ENSMUSG_TO_GENE_SYMBOLS) 20 | elif cur_dict_type=='ENSG': 21 | lines_dict = load_gene_dictionary(constants.ENSG_TO_GENE_SYMBOLS) 22 | else: 23 | raise "unknown gene identifiers: {}".format(cur_dict_type) 24 | 25 | gene_symbols2ensembl = {} 26 | for cur in lines_dict: 27 | splited_line = cur.split() 28 | if splited_line[0].find('.') > 0: 29 | limit = splited_line[0].find('.') 30 | else: 31 | limit = len(splited_line[0]) 32 | gene_symbols2ensembl[splited_line[1]] = splited_line[0][:limit] 33 | return gene_symbols2ensembl 34 | 35 | def get_e2g_dictionary(cur_dict_type): 36 | if cur_dict_type=='ENSMUSG': 37 | lines_dict = load_gene_dictionary(constants.ENSMUSG_TO_GENE_SYMBOLS) 38 | elif cur_dict_type=='ENSG': 39 | lines_dict = load_gene_dictionary(constants.ENSG_TO_GENE_SYMBOLS) 40 | else: 41 | raise "unknown gene identifiers: {}".format(cur_dict_type) 42 | 43 | ensembl2gene_symbols = {} 44 | for cur in lines_dict: 45 | splited_line = cur.split() 46 | if splited_line[0].find('.') > 0: 47 | limit = splited_line[0].find('.') 48 | else: 49 | limit = len(splited_line[0]) 50 | ensembl2gene_symbols[splited_line[0][:limit]] = splited_line[1] 51 | 52 | global dict_type 53 | dict_type=cur_dict_type 54 | return ensembl2gene_symbols 55 | 56 | 57 | def e2g_convertor(e_ids): 58 | if type(e_ids) is str: 59 | e_ids=[e_ids] 60 | 61 | global g2e_dict 62 | global dict_type 63 | cur_dict_type=e_ids[0][:e_ids[0].index('0')] 64 | if g2e_dict is None or dict_type!=cur_dict_type: 65 | e2g_dict = get_e2g_dictionary("ENSG") # cur_dict_type) 66 | 67 | results = [] 68 | for cur in e_ids: 69 | if cur.split(".")[0] in e2g_dict: 70 | results.append(e2g_dict[cur.split(".")[0]]) 71 | else: 72 | results.append(cur.split(".")[0]) 73 | return results 74 | -------------------------------------------------------------------------------- /src/utils/graph_influence_linear_th.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implement linear threshold models 3 | """ 4 | #!/usr/bin/env python 5 | # Copyright (C) 2004-2010 by 6 | # Hung-Hsuan Chen 7 | # All rights reserved. 8 | # BSD license. 9 | # NetworkX:http://networkx.lanl.gov/. 10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 11 | 12 | import sys 13 | sys.setrecursionlimit(100000000) 14 | import copy 15 | import networkx as nx 16 | 17 | __all__ = ['linear_threshold'] 18 | 19 | def linear_threshold(G, seeds, steps=0): 20 | """Return the active nodes of each diffusion step by linear threshold model 21 | Parameters 22 | ---------- 23 | G : networkx graph 24 | The number of nodes. 25 | seeds: list of nodes 26 | The seed nodes of the graph 27 | steps: int 28 | The number of steps to diffuse 29 | When steps <= 0, the model diffuses until no more nodes 30 | can be activated 31 | Return 32 | ------ 33 | layer_i_nodes : list of list of activated nodes 34 | layer_i_nodes[0]: the seeds 35 | layer_i_nodes[k]: the nodes activated at the kth diffusion step 36 | Notes 37 | ----- 38 | 1. Each node is supposed to have an attribute "threshold". If not, the 39 | default value is given (0.5). 40 | 2. Each edge is supposed to have an attribute "influence". If not, the 41 | default value is given (1/in_degree) 42 | References 43 | ---------- 44 | [1] GranovetterMark. Threshold models of collective behavior. 45 | The American journal of sociology, 1978. 46 | Examples 47 | -------- 48 | >>> DG = nx.DiGraph() 49 | >>> DG.add_edges_from([(1,2), (1,3), (1,5), (2,1), (3,2), (4,2), (4,3), \ 50 | >>> (4,6), (5,3), (5,4), (5,6), (6,4), (6,5)]) 51 | >>> layers = networkx_addon.information_propagation.linear_threshold(DG, [1]) 52 | """ 53 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 54 | raise Exception( \ 55 | "linear_threshold() is not defined for graphs with multiedges.") 56 | 57 | # make sure the seeds are in the graph 58 | for s in seeds: 59 | if s not in G.nodes(): 60 | raise Exception("seed", s, "is not in graph") 61 | 62 | # change to directed graph 63 | if not G.is_directed(): 64 | DG = G.to_directed() 65 | else: 66 | DG = copy.deepcopy(G) 67 | 68 | # init thresholds 69 | for n in DG.nodes(): 70 | if 'threshold' not in DG.nodes[n]: 71 | DG.nodes[n]['threshold'] = 0.5 72 | elif DG.nodes[n]['threshold'] > 1: 73 | raise Exception("node threshold:", DG.nodes[n]['threshold'], \ 74 | "cannot be larger than 1") 75 | 76 | # init influences 77 | in_deg = DG.in_degree() 78 | for e in DG.edges(): 79 | if 'influence' not in DG[e[0]][e[1]]: 80 | DG[e[0]][e[1]]['influence'] = 1.0 / in_deg[e[1]] 81 | elif DG[e[0]][e[1]]['influence'] > 1: 82 | raise Exception("edge influence:", DG[e[0]][e[1]]['influence'], \ 83 | "cannot be larger than 1") 84 | 85 | # perform diffusion 86 | A = copy.deepcopy(seeds) 87 | if steps <= 0: 88 | # perform diffusion until no more nodes can be activated 89 | return _diffuse_all(DG, A) 90 | # perform diffusion for at most "steps" rounds only 91 | return _diffuse_k_rounds(DG, A, steps) 92 | 93 | def _diffuse_all(G, A): 94 | layer_i_nodes = [ ] 95 | layer_i_nodes.append([i for i in A]) 96 | while True: 97 | len_old = len(A) 98 | A, activated_nodes_of_this_round = _diffuse_one_round(G, A) 99 | layer_i_nodes.append(activated_nodes_of_this_round) 100 | if len(A) == len_old: 101 | break 102 | return layer_i_nodes 103 | 104 | def _diffuse_k_rounds(G, A, steps): 105 | layer_i_nodes = [ ] 106 | layer_i_nodes.append([i for i in A]) 107 | while steps > 0 and len(A) < len(G): 108 | len_old = len(A) 109 | A, activated_nodes_of_this_round = _diffuse_one_round(G, A) 110 | layer_i_nodes.append(activated_nodes_of_this_round) 111 | if len(A) == len_old: 112 | break 113 | steps -= 1 114 | return layer_i_nodes 115 | 116 | def _diffuse_one_round(G, A): 117 | activated_nodes_of_this_round = set() 118 | for s in A: 119 | nbs = G.successors(s) 120 | for nb in nbs: 121 | if nb in A: 122 | continue 123 | active_nb = list(set(G.predecessors(nb)).intersection(set(A))) 124 | if _influence_sum(G, active_nb, nb) >= G.nodes[nb]['threshold']: 125 | activated_nodes_of_this_round.add(nb) 126 | A.extend(list(activated_nodes_of_this_round)) 127 | return A, list(activated_nodes_of_this_round) 128 | 129 | def _influence_sum(G, froms, to): 130 | influence_sum = 0.0 131 | for f in froms: 132 | influence_sum += G[f][to]['influence'] 133 | return influence_sum -------------------------------------------------------------------------------- /src/utils/scripts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | def format_script(file_path, uid=True, **kwargs): 5 | formatted_script = open(file_path+".format").read().format(**kwargs) 6 | if uid: 7 | exec_file_name="{}_{}{}".format(os.path.splitext(file_path)[0] ,random.random(), os.path.splitext(file_path)[1]) # 8 | else: 9 | exec_file_name = file_path 10 | # int "exec_file: "+exec_file_name 11 | open(exec_file_name, "w+").write(formatted_script) 12 | return exec_file_name 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/utils/visualize_modules.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | sys.path.insert(0, '../../') 4 | import numpy as np 5 | import os 6 | import time 7 | import shutil 8 | import json 9 | import pandas as pd 10 | 11 | from src import constants 12 | from src.utils.scripts import format_script 13 | from src.utils.ensembl2gene_symbol import e2g_convertor 14 | import zipfile 15 | 16 | import multiprocessing 17 | from functools import reduce 18 | SH_MODULE_NAME = "module" 19 | SH_NUM_GENES = "#_genes" 20 | SH_ENRICHED = "enriched_groups" 21 | SH_DETAILS = "more_details" 22 | 23 | SH_TABLE_HEADERS = [SH_MODULE_NAME, SH_NUM_GENES, SH_ENRICHED, SH_DETAILS] 24 | 25 | MODULE_TH = 10 26 | 27 | def zipdir(path_to_zip, zip_file_path): 28 | ziph = zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) 29 | for root, dirs, files in os.walk(path_to_zip): 30 | for file in files: 31 | ziph.write(os.path.join(root, file)) 32 | 33 | def get_network_genes(network_file_name): 34 | network_df = pd.read_csv(network_file_name, sep="\t") 35 | src = np.array(network_df.loc[:, 0]) 36 | dst = np.array(network_df.loc[:, 2]) 37 | vertices = list(set(np.append(src, dst))) 38 | return vertices 39 | 40 | def remove_subgraph_self_loops(nodes_to_remove, network_file_name): 41 | if len(nodes_to_remove) == 0: 42 | return network_file_name 43 | network_df = pd.read_csv(network_file_name, sep="\t") 44 | filtered_network = network_df[network_df.loc[:,0]!=network_df.loc[:,2]] 45 | new_file_name = os.path.splitext(network_file_name) + "_no_loops" +".sif" 46 | filtered_network.to_csv(new_file_name, sep="\t", index=False) 47 | return filtered_network 48 | 49 | def remove_subgraph_by_nodes(nodes_to_remove, network_file_name, ts=str(time.time())): 50 | if len(nodes_to_remove) == 0: 51 | return network_file_name 52 | network_df = pd.read_csv(network_file_name, sep="\t") 53 | filtered_network = network_df[~(network_df.loc[:,0].isin(nodes_to_remove) | network_df.loc[:,2].isin(nodes_to_remove))] 54 | new_file_name = os.path.splitext(network_file_name)[0] + ts +".sif" 55 | filtered_network.to_csv(new_file_name, sep="\t", index=False) 56 | return new_file_name 57 | 58 | 59 | def summary_intergrative_reports(all_hg_reports, modules_summary, total_hg_report, algo_name, module_genes, report_file_name, dataset_name): 60 | general_algo_report(algo_name, all_hg_reports, module_genes, modules_summary, report_file_name, total_hg_report, dataset_name) 61 | 62 | 63 | def disease_algo_report(algo_name, disease_name, expected_genes, module_genes, modules_summary, report_file_name, dataset_name): 64 | 65 | disease_data = { 66 | "disease_name": disease_name, 67 | "num_of_modules": len(modules_summary), 68 | "TP+FN_(_true_)": len(expected_genes), 69 | "TP+TN_(_retrieved_)": len(module_genes), 70 | "TP/(TP+TN)_(_precision_)": 0, 71 | "TP/(TP+FN)_(_recall_)": 0, 72 | "F1": 0, 73 | "TP": 0, 74 | "module_size_avg" : 0, 75 | "module_size_std" :0 76 | } 77 | if len(modules_summary) > 0: 78 | modules_summary = pd.DataFrame(modules_summary) 79 | disease_genes_extracted = float(len(set(module_genes).intersection(expected_genes))) 80 | disease_data["TP"] = disease_genes_extracted 81 | disease_data["TP/(TP+TN)_(_precision_)"] = disease_genes_extracted / len(module_genes) 82 | disease_data["TP/(TP+FN)_(_recall_)"] = disease_genes_extracted / len(expected_genes) 83 | if (disease_data["TP/(TP+TN)_(_precision_)"] + disease_data["TP/(TP+FN)_(_recall_)"]) == 0: 84 | disease_data["F1"] = 0 85 | else: 86 | disease_data["F1"] = 2 * ((disease_data["TP/(TP+TN)_(_precision_)"] * disease_data["TP/(TP+FN)_(_recall_)"]) / 87 | (disease_data["TP/(TP+TN)_(_precision_)"] + disease_data["TP/(TP+FN)_(_recall_)"])) 88 | 89 | disease_data["module_size_avg"] = modules_summary[SH_NUM_GENES].mean() 90 | disease_data["module_size_std"] = modules_summary[SH_NUM_GENES].std() 91 | 92 | 93 | pd.DataFrame([disease_data]).to_csv(os.path.join(constants.OUTPUT_GLOBAL_DIR, dataset_name, algo_name, "{}_disease.tsv".format(report_file_name)),sep="\t", index=False) 94 | 95 | 96 | def general_algo_report(algo_name, all_hg_reports, module_genes, modules_summary, report_file_name, total_hg_report, dataset_name): 97 | data = {} 98 | if len(modules_summary) > 0 : 99 | df_summary = pd.DataFrame(modules_summary) 100 | data = {"num_of_modules": df_summary.index.size, 101 | "module_size_avg": df_summary[SH_NUM_GENES].mean(), 102 | "module_size_std": df_summary[SH_NUM_GENES].std(), 103 | "total_num_genes": len(module_genes) 104 | } 105 | 106 | df = pd.DataFrame() 107 | if len(data) >0: 108 | df = pd.DataFrame([data]) 109 | 110 | df.to_csv( 111 | os.path.join(constants.OUTPUT_GLOBAL_DIR, dataset_name, algo_name, 112 | "{}_general.tsv".format(report_file_name)), sep="\t", index=False) 113 | 114 | 115 | 116 | # def output_modules(output_file_name, modules, score_file_name, output_base_dir=""): 117 | # output_data = create_modules_output(modules, score_file_name) 118 | # file(output_file_name, 'w+').write(output_base_dir + "\n") 119 | # json.dump(output_data, file(output_file_name, 'a+')) 120 | # sys.stdout.write(output_file_name) 121 | 122 | def reduce_to_dict(x,y): 123 | if y["id"] in x: 124 | x[y["id"]]["modules"] = x[y["id"]]["modules"] + y["modules"] 125 | else: 126 | x[y["id"]]=y 127 | return x 128 | 129 | def merge_two_dicts(x, y): 130 | 131 | z = x.copy() 132 | z.update(y) 133 | return z 134 | 135 | def create_modules_output(G_modules, score_file_name): 136 | scores=None 137 | if score_file_name is not None: 138 | print("score_file_name: {}".format(score_file_name)) 139 | print(pd.read_csv(score_file_name,sep="\t").columns) 140 | scores = pd.read_csv(score_file_name,sep="\t").set_index("id") 141 | 142 | if constants.IS_PVAL_SCORES: 143 | scores["score"] = scores["pval"].apply(lambda x: -np.log10(x)) 144 | 145 | zero_scores = [ {"score" : 0, "id" : gene} for G_module in G_modules for gene in G_module.nodes if scores is None or gene not in scores.index] 146 | if len(zero_scores) !=0: 147 | zero_scores = pd.DataFrame(zero_scores).set_index("id") 148 | zero_scores=zero_scores[~zero_scores.index.duplicated(keep='first')] 149 | scores = pd.concat([scores, zero_scores],axis=0) 150 | return [merge_two_dicts({"id" : k}, v) for k,v in reduce(reduce_to_dict, [{"eid": gene, "modules": [i], "id": gene, "gene_symbol": e2g_convertor([gene])[0], "score" : float(scores.loc[gene,"score"])} for i, G_module in enumerate(G_modules) for gene in G_module],\ 151 | {}).items()] 152 | 153 | def draw_network(G_modules, score_file_name, network_file_name): 154 | output = [{"data" : x, "label" : x["eid"], "selected" : True } for x in create_modules_output(G_modules, score_file_name)] 155 | # active_genes = [y for x in G_modules for y in x.nodes] 156 | active_edges = [y for x in G_modules for y in x.edges] 157 | # active_edges = [[x.iloc[0], x.iloc[2]] for i, x in pd.read_csv(network_file_name, sep="\t").iterrows() if x.iloc[0] in active_genes and x.iloc[2] in active_genes] 158 | additional_edges = [] # [[x.iloc[0], x.iloc[2]] for i, x in pd.read_csv(network_file_name, sep="\t").iterrows() if not (x.iloc[0] in active_genes and x.iloc[2] in active_genes) and (x.iloc[0] in active_genes or x.iloc[2] in active_genes)] 159 | # additional_nodes = [] # [y for x in (active_edges + additional_edges) for y in x if y if y not in active_genes] 160 | additional_nodes = [] # list(set(additional_nodes)) 161 | 162 | return output + [{"data" : {"id" : x, "eid" : x, "modules" : []}, "label" : ""} for x in additional_nodes] + [{"data": {"id" : x[0]+"_"+x[1], "source":x[0], "target":x[1]}, "label" : ""} for x in additional_edges] + [{"data": {"id" : x[0]+"_"+x[1], "source":x[0], "target":x[1]}, "label" : "-"} for x in active_edges] 163 | 164 | 165 | 166 | def generate_report_from_template(cy, output_base_dir, output_file_name): 167 | 168 | len([x for x in cy if not "source" in x["data"] and len(x["data"]["modules"])>0]) 169 | report_file_name=format_script(os.path.join(os.path.dirname(os.path.abspath(__file__)),'../data', "graph.html"), NUM_OF_GENES=len([x for x in cy if not "source" in x["data"] and len(x["data"]["modules"])>0]), HG_REPORT=[], MODULES_SUMMARY=[], DISEASE_GENES=[], DATA=json.dumps(cy)) 170 | 171 | shutil.move(report_file_name, 172 | os.path.join(output_base_dir, "module_{}.html".format(output_file_name))) 173 | return "module_{}.html".format(output_file_name) 174 | 175 | 176 | def visualize_modules(dataset_name, G_modules, score_file_name, network_file_name, output_base_dir): 177 | print("visualizing modules...") 178 | if not os.path.exists(output_base_dir): 179 | os.makedirs(output_base_dir) 180 | 181 | manager=multiprocessing.Manager() 182 | modules_summary = manager.list() 183 | 184 | params=[] 185 | for i, G_module in enumerate(G_modules): 186 | params.append([i, G_module, score_file_name, network_file_name, dataset_name, modules_summary, output_base_dir]) 187 | p=multiprocessing.Pool(constants.N_OF_THREADS) 188 | p.map(module_report, params) 189 | p.close() 190 | # [module_report(p) for p in params] 191 | 192 | def module_report(params): 193 | module_index, G_module, score_file_name, network_file_name, dataset_name, modules_summary, output_base_dir=params 194 | print("visualize module {} for dataset {}".format(module_index, dataset_name)) 195 | 196 | modules_summary_row = {SH_MODULE_NAME: module_index, SH_NUM_GENES: len(G_module.nodes)} 197 | cy = draw_network([G_module], score_file_name, network_file_name) 198 | 199 | generate_report_from_template(cy, output_base_dir, str(module_index)) 200 | if modules_summary is not None: 201 | modules_summary.append(modules_summary_row) 202 | return modules_summary 203 | 204 | 205 | --------------------------------------------------------------------------------