├── .gitignore
├── CHANGELOG.md
├── Dockerfile
├── LICENSE
├── README.md
├── bin
    ├── iterativeWGCNA
    └── iterativeWGCNA_merge
├── dist
    ├── iterativeWGCNA-1.1.6-py3-none-any.whl
    └── iterativeWGCNA-1.1.6.tar.gz
├── iterativeWGCNA
    ├── __init__.py
    ├── __main__.py
    ├── analysis.py
    ├── cmlargs.py
    ├── colors.py
    ├── eigengenes.py
    ├── expression.py
    ├── genes.py
    ├── io
    │   ├── __init__.py
    │   └── utils.py
    ├── iterativeWGCNA.py
    ├── network.py
    ├── r
    │   ├── __init__.py
    │   ├── imports.py
    │   ├── manager.py
    │   └── snippets.py
    └── wgcna.py
├── merge_close_modules.py
├── run_iterative_wgcna.py
├── setup.cfg
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.pyc
3 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ### 1.1.6 / 2018-03-19
 2 |   * ability to gzip TOM .RData files added `--gzipTOMs` option
 3 |   * new release on PyPI 
 4 |   * bug fix: saveTOMs disabled by default; documentation updated
 5 |   * bug fix: issue parsing boolean WGCNA parameters (e.g. saveTOMs=FALSE or cosineCorrelation=TRUE) resolved 
 6 |   
 7 | ###  1.1.3
 8 |   * added script to adjust final module merge
 9 | 	* see [Add-ons](/README.md#add-ons) and updated [Output Files](/README.md#output-files) for more information
10 |   * fixed Python 3.3+ bug with converting odict_values to ro.StrVector
11 |   * added `--debug` option; currently only prints extensive debugging statements for module merge stage
12 | 
13 | ### 1.1.0 / 2017-06-28
14 | * Fix final module merge to improve efficiency and recalculate eigengenes after each merge 
15 | * Change program output to generate iteration-specific output files in easily accessible directory structure
16 | 
17 | ### 1.0.0 / 2017-02-06
18 | * First numbered release
19 | * Version names will follow "Semantic Versioning" of X.Y.Z where:
20 |     * Increment Z when you fix something
21 |     * Increment Y when you add a new feature
22 |     * Increment X when you break backwards-compatibility or add major features
23 | * Tags/Releases will be against Master, as a general guideline to keep it simple
24 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rocker/r-ver:3.3.3
 2 | 
 3 | RUN apt-get update && apt-get install -y build-essential wget unzip curl python python-dev python-matplotlib libicu-dev libssl-dev libffi-dev libxml2-dev libxslt1-dev zlib1g-dev libreadline-dev && apt-get clean && apt-get purge && rm -rf /var/lib/apt/lists/* /tmp/*
 4 | 
 5 | ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:/lib/x86_64-linux-gnu/:/usr/local/lib/R/lib"
 6 | ENV LDFLAGS="-L/lib/x86_64-linux-gnu/:/usr/lib/x86_64-linux-gnu/:/usr/local/lib/R/lib"
 7 | 
 8 | RUN R -e "source('http://bioconductor.org/biocLite.R');biocLite(c('GO.db', 'preprocessCore', 'impute', 'AnnotationDbi'));install.packages(c('data.table','matrixStats', 'checkmate', 'htmlTable', 'Hmisc', 'WGCNA'))"
 9 | 
10 | COPY . /usr/local/iterativeWGCNA
11 | 
12 | # For some reason the sym links for these are missing so adding ....
13 | WORKDIR /lib/x86_64-linux-gnu
14 | RUN ln -s libpcre.so.3 libpcre.so; ln -s liblzma.so.5 liblzma.so; ln -s libbz2.so.1 libbz2.so
15 | 
16 | WORKDIR /usr/local/iterativeWGCNA
17 | RUN curl https://bootstrap.pypa.io/pip/2.7/get-pip.py -o get-pip.py
18 | RUN python get-pip.py
19 | RUN pip install rpy2==2.7.9 --force-reinstall
20 | RUN pip install iterativeWGCNA
21 | 
22 | WORKDIR /home/docker
23 | 
24 | ENTRYPOINT ["iterativeWGCNA"]
25 | 
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # iterativeWGCNA: a WGCNA extension
  2 | 
  3 | ## New Release Available
  4 | 
  5 | * __iterativeWGCNA 1.1.6 now available__
  6 |   * bug fix: saveTOMs disabled by default but can be enabled by with the option  `--wgcnaParameters "saveTOMs=TRUE"`
  7 |   * bug fix: issue parsing boolean WGCNA parameters (e.g. `saveTOMs=FALSE` or `cosineCorrelation=TRUE`) resolved 
  8 |   * new parameter added: `--gzipTOMs` which will gzip TOM .RData files as generated to save space
  9 |   * new dist available on PyPI
 10 |   
 11 | * __iterativeWGCNA 1.1.3 now available__
 12 |   * added script to adjust final module merge
 13 | 	* see [Add-ons](#add-ons) and updated [Output Files](#output-files) for more information
 14 |   * fixed Python 3.3+ bug with converting odict_values to ro.StrVector
 15 |   * added `--debug` option; currently only prints extensive debugging statements for module merge stage
 16 | 
 17 | ## Synopsis
 18 | 
 19 | iterativeWGCNA provides a Python-wrapped extension for the R program [Weighted Gene Correlation Network Analysis](https://github.com/cran/WGCNA) (WGCNA) that improves the robustness of network-based classifications (modules) inferred from whole-transcriptome gene expression datasets.
 20 | 
 21 | ## How to Cite
 22 | 
 23 | When citing iterativeWGCNA, please use:
 24 | 
 25 | Greenfest-Allen et. al 2017. iterativeWGCNA: iterative refinement to improve module detection from WGCNA co-expression networks. [bioRxiv doi:10.1101/234062](https://doi.org/10.1101/234062)
 26 | 
 27 | 
 28 | ## Contents
 29 | 
 30 | ### Setup and Installation
 31 | 
 32 | * [Dependencies](#dependencies)
 33 | * [Installation](#installation)
 34 |   
 35 | ### Usage
 36 | 
 37 | * [Running iterativeWGCNA](#running-iterativewgcna)
 38 | * [Add-ons](#add-ons)
 39 |   
 40 | ### Troubleshooting
 41 | 
 42 | * [libreadline.so.6: undefined symbol](#libreadlineso6-undefined-symbol)
 43 | * [Cannot install rpy2 on OSX](#cannot-install-rpy2-with-latest-r-version-34x-on-macos)
 44 | * [Segmentation Faults, missing C libs, etc](#segmentation-faults-missing-c-libs-etc)
 45 | 
 46 | 
 47 | ## Setup and Installation
 48 | 
 49 | ### Dependencies
 50 | 
 51 | iterativeWGCNA has the following dependencies:
 52 | 
 53 | #### R language for statistical computing
 54 | 
 55 | [R](https://cran.r-project.org/) version 3.* must be available on the system and the binary executable in the system PATH.
 56 | 
 57 | > NOTE: the most recent version of R that supports WGCNA is 3.3.x
 58 | 
 59 | iterativeWGCNA requires that the following R packages be installed:
 60 | 
 61 | * [WGCNA](https://labs.genetics.ucla.edu/horvath/CoexpressionNetwork/Rpackages/WGCNA/#cranInstall): Weighted Gene Co-expression Network Analysis package and Bioconductor dependencies
 62 | 
 63 | #### Python
 64 | 
 65 | iterativeWGCNA requires Python version 2.7 or higher.  It is designed to be future compatible with Python 3+.  iterativeWGCNA requires the following Python packages:
 66 | 
 67 | * [rpy2](https://pypi.python.org/pypi/rpy2): a Python interface for R (v. 2.7.9+)
 68 | * [matplotlib](https://matplotlib.org/)
 69 | 
 70 | > NOTE: the most recent version of rpy2 requires python 3.x
 71 | 
 72 | If missing, rpy2 will be installed by the iterativeWGCNA installer.  See below.
 73 | 
 74 | ### Installation
 75 | 
 76 | iterativeWGCNA can be run without installing the package as long as the requisite Python dependencies (rpy2) and R are already present on the system.  Installing the package will install any missing *Python* dependencies.
 77 | 
 78 | > iterativeWGCNA is reposited in the Python Package Index (PyPI) and can be installed via `pip` or `easy_install`.
 79 | 
 80 | ```bash
 81 | pip install iterativeWGCNA
 82 | ```
 83 | 
 84 | This package is tied to the tagged releases on GitHub.
 85 | 
 86 | To install the iterativeWGCNA package from the git master, clone and then run the `python setup.py` script as folows:
 87 | 
 88 | ```bash
 89 | git clone https://github.com/cstoeckert/iterativeWGCNA.git
 90 | cd iterativeWGCNA
 91 | python setup.py install
 92 | ```
 93 | 
 94 | > NOTE: depending on your system this may require administrative (e.g., sudo) permissions.
 95 | 
 96 | As a work around, specify the `--user` switch to install iterativeWGCNA and its dependencies to a local (user) library (e.g., `.local/bin` on a Linux system) as follows:
 97 | 
 98 | ```sh
 99 | git clone https://github.com/cstoeckert/iterativeWGCNA.git
100 | cd iterativeWGCNA
101 | python setup.py install --user
102 | ```
103 | 
104 | Alternatively, you can also use `pip` to install from the git master:
105 | 
106 | ```bash
107 | pip install git+git://github.com/cstoeckert/iterativeWGCNA.git
108 | ```
109 | 
110 | 
111 | ## Usage
112 | 
113 | ### Running iterativeWGCNA
114 | 
115 | 1. [Quick Start](#quick-start)
116 | 1. [Command Line Options](#command-line-options)
117 | 1. [WGCNA Parameters](#wgcna-parameters)
118 | 1. [Input File Format](#input-file-format)
119 | 1. [Output Files](#output-files)
120 | 
121 | #### Quick Start
122 | 
123 | If installed via the `pip` or `easy_install`, iterativeWGCNA can be run using the `iterativeWGCNA` command.  At minimum, the `-i` option (`--inputFile`) denoting the full path to the input file must be specified:
124 | 
125 | ```sh
126 | iterativeWGCNA -i <input_file_path>
127 | ```
128 | 
129 | iterativeWGCNA can also be run without installing the iterativeWGCNA package by executing the wrapper script `run_iterative_wgcna.py` in the iterativeWGCNA directory. At a minimum, the `-i` option (`--inputFile`) denoting the full path to the input file must be specified:
130 | 
131 | ```sh
132 | python run_iterative_wgcna.py -i <input_file_path> 
133 | ```
134 | 
135 | if the iterativeWGCNA package was installed, iterativeWGCNA can also be run at the package level using the `-m` switch:
136 | 
137 | ```sh
138 | python -m iterativeWGCNA -i <input_file_path> 
139 | ```
140 | 
141 | #### Command Line Options
142 | 
143 | Execute `run_iterative_wgcna.py` with the `-h` (`--help`) switch to see all command line options and additional usage information, including details on file formats.
144 | 
145 | ```sh
146 | python run_iterative_wgcna.py -h
147 | ```
148 | 
149 | ```diff
150 | -h, --help
151 |    show help message and exit
152 |     
153 | -i <gene expression file>, --inputFile <gene expression file>
154 |    full path to input gene expression file; if full path is not provided,
155 |    assumes the file is in the working (output) directory
156 | + required
157 |    
158 | -o <output dir>, --workingDir <output dir>
159 |    R working directory; where output will be saved
160 |    default: current directory
161 |    
162 | -v, --verbose
163 |    print status messages
164 | 
165 | -p <param list>, --wgcnaParameters <param list>
166 |    comma separated list of parameters to be passed to WGCNA's blockwiseModules function
167 |    e.g., power=6,randomSeed=1234875
168 |    see 'blockwiseModules' section of the WGCNA manual for more information
169 |    
170 | --enableWGCNAThreads
171 |     enable WGCNA to use threads
172 |     
173 | --skipSaveBlocks
174 |     do not save WGCNA blockwise modules for each iteration
175 | 	also will not save TOMs
176 | 	NOTE: blocks are necessary to generate summary graphics
177 | 	
178 | --gzipTOMs
179 |     if the WGCNA parameter saveTOMs is set to TRUE, this will
180 | 	gzip the TOM .RData files
181 | 	NOTE: R is not able to read the .RData.gz files; uncompress
182 | 	first
183 | 	
184 | -f, --finalMergeCutHeight <cut height>
185 | 	cut height (max dissimilarity) for final module merge
186 | 	(after algorithm convergence); [0, 1.0], default=0.05
187 |     
188 | ```
189 | 
190 | #### WGCNA Parameters
191 | 
192 | iterativeWGCNA can accept any parameter valid for the WGCNA blockwiseModules function.  See https://www.rdocumentation.org/packages/WGCNA/versions/1.41-1/topics/blockwiseModules for full details
193 | 
194 | > To specify these parameters use the `--wgcnaParameters` flag followed by a comma separated list of parameter=value pairs.
195 | 
196 | For example:
197 | 
198 | `--wgcnaParameters maxBlockSize=5000,corType=bicor,power=10`
199 | 
200 | sets the maximum block size to 5000 genes, the correlation type to the biweight correlation, and the power-law scaling factor (beta) to 10
201 | 
202 | > WGCNA's `blockwiseModules` function partitions the gene set into a set of blocks each containing at most `maxBlockSize` genes.
203 | 
204 | *To run iterativeWGCNA in a single block, set `maxBlockSize` to a value > than the number of genes in your geneset*.
205 | 
206 | > NOTE: for large datasets (>10,000 genes or probes), adjacency and TOM matrix calculations done in a single block may fail due to memory allocation issues 
207 | 
208 | see the [WGCNA large dataset tutorial, section 2.c.2](https://labs.genetics.ucla.edu/horvath/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/FemaleLiver-02-networkConstr-blockwise.pdf) for more details
209 | 
210 | If WGCNA parameters are not specified, iterativeWGCNA uses the default WGCNA settings for the `blockwiseModules` function, except for the following:
211 | 
212 | ```python
213 | minModuleSize = 20 # minimum number of genes in a detected module
214 | saveTOMs = FALSE # save the topological overlap matrices for each block in the block data structure
215 | minKMEtoStay = 0.8 # minimum eigengene connectivity (kME) required for a gene to be retained in its assigned module
216 | minCoreKME = 0.8 # if the module does not have minModuleSize genes with eigengene connectivity at least minCoreKME, the module is disbanded
217 | reassignThreshold = 0.05 # per-set p-value ratio threshold for reassigning genes between modules
218 | networkType = 'signed' # character string specifying network type. Allowed values are "unsigned", "signed", and "signed hybrid"
219 | numericLabels = TRUE # label modules by numbers (e.g., 0,1,2) instead of colors
220 | ```
221 | 
222 | #### Input File Format
223 | 
224 | iterativeWGCNA expects a `tab-delimited` text file containing gene expression data arranged such that there is one row per gene and one column per sample.  The first column should contain `unique` gene identifiers.  For example:
225 | 
226 | | Gene | Sample1 | Sample2 | Sample3 |
227 | | --- | --- | --- | --- |
228 | | Gata1 | 500 | 715 | 1000 |
229 | | Phtf2 | 60 | 1000 | 1600 |
230 | 
231 | > NOTE: We recommend using numeric gene identifiers to uniquely label genes in the input file as R will do some character substitutions (e.g., '.' for '-') and concatenate 'X' to gene symbols starting with a number, leading to erroneous mapping between data frames and potential loss of data.
232 | 
233 | 
234 | > iterativeWGCNA will accept `gzipped` input files.
235 | 
236 | 
237 | #### Output Files
238 | 
239 | An **iteration** of iterativeWGCNA comprises one run of blockwiseWGCNA followed by an eigengene-connectivity (kME) goodness of fit assessment.  A **pass** of iterativeWGCNA comprises multiple iterations applied to an expression dataset until no more residuals to the kME-fit are found.  A new pass is initiated by creating a new expression dataset from all residuals to the kME-fit found during the previous pass.
240 | 
241 | > Modules are uniquely identified by the numerical assignment and the iteration in which they were first detected: e.g., `P1_I2_M1` is module 1, detected in the second iteration of the first pass.  **Unclassified genes are labeled UNCLASSIFIED with a kME of NA**.
242 | 
243 | Results from each pass and iteration are saved in a series of directories, labeled as:
244 | 
245 | > passM: results from the numbered (M) pass
246 | > iN: results from the numbered (N) iteration
247 | 
248 | The directory structure and output files are as follows:
249 | 
250 | ```
251 | ├── output_directory
252 | │   ├── iterativeWGCNA.log: main log file for the iterativeWGCNA run
253 | │   ├── iterativeWGCNA-R.log: log file for R; catches R errors and R warning messages
254 | │   ├── gene-counts.txt: tally of number of genes fit and residual to the fit with each iteration
255 | │   ├── final-eigengenes.txt: eigengenes for final modules after final network assembly (before merge)
256 | │   ├── final-kme-histogram.pdf: histogram of eigengene connectivities (kME) in the final classification (before merge)
257 | │   ├── final-membership.txt: gene-module assignments and kME after final iteration (before merge)
258 | │   ├── merge-<finalMergeCutHeight>-eigengenes.txt: recalculated eigengenes for modules retained after merging close modules
259 | │   ├── merge-<finalMergeCutHeight>-kme-histogram.pdf: histogram of eigengene connectivities (kME) after merging close modules
260 | │   ├── merge-<finalMergeCutHeight>-membership.txt: gene-module assignments and kME after merging close modules
261 | │   ├── passM
262 | │   │   ├── initial-pass-expression-set.txt: pass input
263 | │   │   ├── kme_histogram.pdf: histogram of eigengene connectivities for genes classified during pass
264 | │   │   ├── membership.txt: gene-module assignments and kME for genes classfied during pass
265 | │   │   ├── iN
266 | │   │   │   ├── eigengenes.txt: eigengenes of modules detected during the iteration
267 | │   │   │   ├── kme_histogram.pdf: kME histogram after pruning of WGCNA result based on kME
268 | │   │   │   ├── membership.txt: gene membership after kME-based goodness of fit (Pruning)
269 | |   │   │   ├── summary.txt: summaries pass (number genes input, classfied, residual, and number of detected modules)
270 | │   │   |   ├── wgcna-blocks.RData: R data object containing input expression data (expression) and results from blockwise WGCNA (blocks)
271 | │   │   │   ├── wgcna-kme_histogram.pdf: kME histogram based on WGCNA classification
272 | │   │   │   ├── wgcna-membership.txt: gene membership from WGCNA classification
273 | │   │   │   ├── passM_iN-TOM.block.X.RData(.gz): TOM for block X generated in passM, iN (if saveTOMs=TRUE; gzipped if --gzipTOMs option specified)
274 | ```
275 | 
276 | > Note: as of release 1.1.3, iterativeWGCNA now outputs two sets of files containing the final classification.  Those prefixed with `final-` report the penultimate module membership assignments and eigengenes; i.e. result at the algorithm convergence.  Those prefixed with `merge-` report the final module assignements determined after merging close modules and reassessing module memberships after the merge.
277 | 
278 | > Note: TOMs are only saved if the wgcnaParameter `saveTOMs` is set to `TRUE`.  With large gene sets (>10,000 genes), these can be very large and take a while to write to file, dramatically slowing down the performace of the algorithm in the early iterations.  To save disk space, specify the paratmer `--gzipTOMs` to gzip .RData files as generated.  Again, this i/o operation may slow down the performance of the algorithm in the early iterations.
279 | 
280 | ### Add-ons
281 | 
282 | 1. [Merge Close Modules](#merge-close-modules)
283 | 
284 | #### Merge Close Modules
285 | 
286 | Script for running the final-module merge. Allows users to choose a different merge-threshold without having to rerun the entire iterativeWGCNA classification.
287 | 
288 | The merge script depends on the following options:
289 | 
290 | ```diff
291 | -i <gene expression file>, --inputFile <gene expression file>
292 |    full path to input gene expression file; if full path is not provided,
293 |    assumes the file is in the working (output) directory
294 | + required
295 |    
296 | -o <output dir>, --workingDir <output dir>
297 |    R working directory; where output from the iterativeWGCNA run is stored
298 |    default: current directory
299 | + at minimum files final-membership.txt and final-eigengenes.txt must be in the directory
300 | 
301 |    	
302 | -f, --finalMergeCutHeight <cut height>
303 | 	cut height (max dissimilarity) for final module merge
304 | 	(after algorithm convergence); [0, 1.0], default=0.05
305 | 
306 | -p <param list>, --wgcnaParameters <param list>
307 |    comma separated list of parameter=value pairs required to assess module similarity and gene reassignment
308 |    The following parameters are required (defaults will be used if not specified):
309 | + minKMEtoStay --> should be the same as used when running iterativeWGCNA; default 0.8
310 | + reassignThreshold --> p-value cut-off for shifting a gene to module assignment; default 0.05
311 | ```
312 | 
313 | If the iterativeWGCNA package was installed, run as follows:
314 | 
315 | ```sh
316 | iterativeWGCNA_merge -i <input_file_path> -o <iterativeWGCNA_output_dir> --finalMergeCutHeight <float> -p minKMEtoStay=<float;same_as_iterativeWGCNA_run>
317 | ```
318 | 
319 | alternative, it can be run using the wrapper script in the iterativeWGCNA directory
320 | 
321 | 
322 | ```sh
323 | python merge_close_modules.py -i <input_file_path> -o <iterativeWGCNA_output_dir> --finalMergeCutHeight <float> -p minKMEtoStay=<float;same_as_iterativeWGCNA_run>
324 | ```
325 | 
326 | 
327 | ## Troubleshooting
328 | 
329 | ### libreadline.so.6 undefined symbol
330 | 
331 | Access to the `readline` library in the context of the `rpy2` library and an Ananconda install can be problematic and has been [reported elsewhere](https://github.com/ContinuumIO/anaconda-issues/issues/152). In trying to run iterativeWGCNA, an error like the following would be observed:
332 | ```
333 | Traceback (most recent call last):
334 |   File "../iterativeWGCNA-master/run_iterative_wgcna.py", line 7, in <module>
335 |     from iterativeWGCNA.iterativeWGCNA import IterativeWGCNA
336 |   File "../iterativeWGCNA-master/iterativeWGCNA/iterativeWGCNA.py", line 17, in <module>
337 |     import rpy2.robjects as ro
338 |   File "../lib/python2.7/site-packages/rpy2/robjects/__init__.py", line 15, in <module>
339 |     import rpy2.rinterface as rinterface
340 |   File "../lib/python2.7/site-packages/rpy2/rinterface/__init__.py", line 100, in <module>
341 |     from rpy2.rinterface._rinterface import *
342 | ImportError: ../lib/python2.7/site-packages/rpy2/rinterface/../../../../libreadline.so.6: undefined symbol: PC
343 | ```
344 | 
345 | The workaround is to uncomment the readline import in the `run_iterative_wgcna.py` script:
346 | 
347 | ```python
348 | # import readline
349 | ```
350 | 
351 | ### Cannot install rpy2 with latest R (version 3.4.x) on macOS
352 | 
353 | #### error: command 'gcc' failed with exit status 1
354 | 
355 | The build process for many R libraries explicitly uses ```gcc```, instead of the system default, which on OSX is ```clang```.  To override the default compiler, set the CC environmental variable as folows:
356 | 
357 | ```bash
358 | export CC=clang
359 | ```
360 | 
361 | #### clang: error: unsupported option '-fopenmp'
362 | 
363 | This is a known issue with an open ticket in the rpy2 project (see issue [#403](https://bitbucket.org/rpy2/rpy2/issues/403/cannot-pip-install-rpy2-with-latest-r-340)).  R 3.4.0 was built using the ```-fopenmp``` flag with Clang 4.0.0, which is not supplied by Apple.  There are several suggested workarounds (e.g., installing the LLVM library via homebrew) that do not work for all system configurations.  We recommend [downloading the rpy2 source](https://bitbucket.org/rpy2/rpy2/src), unpacking, and editing the ```setup.py``` file after line 268 (the line above the comment ```# OS X's frameworks need special attention```) as follows:
364 | 
365 | ```python
366 |   if "-fopenmp" in unknown:  # remove linker argument
367 |         unknown.remove("-fopenmp")
368 | ```
369 | 
370 | With this fix you should be able to build rpy2 from the downloaded source as follows:
371 | 
372 | ```bash 
373 | python setup.py install
374 | ```
375 | 
376 | ### Segmentation Faults, missing C libs, etc
377 | 
378 | iterativeWGCNA is written in Python but has dependencies on R and the rpy2 Python-R interface that both rely on C libraries.
379 | 
380 | If iterativeWGCNA is crashing as soon as it starts due to a segmentation fault, or you get an error along the lines of 
381 | 
382 | ```bash
383 | ImportError: <some C library>.so.0: cannot open shared object file: No such file or directory
384 | ```
385 | 
386 | then you are having C-related troubles.
387 | 
388 | Most likely, you are using the Anaconda package and environment system which has known issues with R and R-interfaces such as rpy2.
389 | 
390 | Many of these issues have already been addressed in user groups/issue trackers for [Anaconda](https://groups.google.com/a/anaconda.com/forum/#!forum/anaconda), [conda-forge](https://github.com/conda-forge/conda-forge.github.io/issues/) and [ryp2](https://bitbucket.org/rpy2/rpy2/issues).
391 | 


--------------------------------------------------------------------------------
/bin/iterativeWGCNA:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''Run iterativeWGCNA'''
 4 | 
 5 | # Installation workaround - see README
 6 | # import readline
 7 | 
 8 | from iterativeWGCNA.cmlargs import parse_command_line_args
 9 | from iterativeWGCNA.iterativeWGCNA import IterativeWGCNA
10 | 
11 | if __name__ == '__main__':
12 |     cmlArgs = parse_command_line_args()
13 |     alg = IterativeWGCNA(cmlArgs)
14 |     alg.run()
15 | 
16 | __author__ = 'Emily Greenfest-Allen'
17 | __copyright__ = 'Copyright 2016, University of Pennsylvania'
18 | 


--------------------------------------------------------------------------------
/bin/iterativeWGCNA_merge:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''Rerun final module merge'''
 4 | 
 5 | # Installation workaround - see README
 6 | # import readline
 7 | #pylint: disable=invalid-name
 8 | 
 9 | from iterativeWGCNA.iterativeWGCNA import IterativeWGCNA
10 | from iterativeWGCNA.cmlargs import parse_command_line_args
11 | 
12 | if __name__ == '__main__':
13 |     args = parse_command_line_args(program='iterativeWGCNA: Adjust Merge',
14 |                                    description='recompute final module merge from existing output')
15 |     alg = IterativeWGCNA(args, report="merge")
16 |     alg.merge_close_modules_from_output()
17 | 
18 | __author__ = 'Emily Greenfest-Allen'
19 | __copyright__ = 'Copyright 2018, University of Pennsylvania'
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/dist/iterativeWGCNA-1.1.6-py3-none-any.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cstoeckert/iterativeWGCNA/c5431f513a18c8564138b87588acd4df76d34d93/dist/iterativeWGCNA-1.1.6-py3-none-any.whl


--------------------------------------------------------------------------------
/dist/iterativeWGCNA-1.1.6.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cstoeckert/iterativeWGCNA/c5431f513a18c8564138b87588acd4df76d34d93/dist/iterativeWGCNA-1.1.6.tar.gz


--------------------------------------------------------------------------------
/iterativeWGCNA/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cstoeckert/iterativeWGCNA/c5431f513a18c8564138b87588acd4df76d34d93/iterativeWGCNA/__init__.py


--------------------------------------------------------------------------------
/iterativeWGCNA/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2.7
 2 | """
 3 | Perform iterative WGCNA analysis
 4 | 
 5 | python dependencies:
 6 |   * rpy2
 7 |   * matplotlib
 8 | 
 9 | R dependencies:
10 |   * WGCNA
11 | """
12 | 
13 | from .cmlargs import parse_command_line_args
14 | from .iterativeWGCNA import IterativeWGCNA
15 | 
16 | if __name__ == '__main__':
17 |     args = parse_command_line_args()
18 |     alg = IterativeWGCNA(args)
19 |     alg.run()
20 | 
21 | __author__ = 'Emily Greenfest-Allen'
22 | __copyright__ = 'Copyright 2016, University of Pennsylvania'
23 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/analysis.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | functions in support of data analysis
 3 | '''
 4 | 
 5 | # TODO move to RManager or wgcnaManager
 6 | 
 7 | from .r.imports import wgcna, stats, base
 8 | 
 9 | def calculate_kME(expr, eigengene, calculateP):
10 |     '''
11 |     calculates eigengene connectivity
12 |     between an eigengene and expression data set
13 |     '''
14 |     if calculateP:
15 |         correlation = wgcna().corAndPvalue(base().t(expr), base().t(eigengene))
16 |     else:
17 |         correlation = base().as_data_frame(stats().cor(base().t(expr), base().t(eigengene)))
18 |     return correlation
19 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/cmlargs.py:
--------------------------------------------------------------------------------
  1 | #pylint: disable=anomalous-backslash-in-string
  2 | #pylint: disable=invalid-name
  3 | 
  4 | '''
  5 | functions for defining, parsing, processing,
  6 | and validating command line arguments
  7 | '''
  8 | 
  9 | import re
 10 | import argparse
 11 | from os import getcwd
 12 | from .io.utils import warning
 13 | 
 14 | def parameter_list(strValue):
 15 |     '''
 16 |     for argument parsing;
 17 |     converts a comma separated list of 'param=value' pairs
 18 |     into a parm:value hash
 19 |     '''
 20 | 
 21 |     params = {}
 22 |     pairs = strValue.split(',')
 23 | 
 24 |     for p in pairs:
 25 |         name, value = p.split('=')
 26 | 
 27 |         # Test and cast for booleans
 28 |         if value.upper() in ['TRUE', 'T']:
 29 |             params[name] = True
 30 |         elif value.upper() in ['FALSE', 'F']:
 31 |             params[name] = False
 32 |         # Test and cast for integer
 33 |         elif value.isdigit():
 34 |             params[name] = int(value)
 35 |         # Test and cast for float
 36 |         elif re.match("^\d+?\.\d+?$", value):
 37 |             params[name] = float(value)
 38 |         else:
 39 |             params[name] = value
 40 | 
 41 |     return params
 42 | 
 43 | 
 44 | def restricted_float(x):
 45 |     '''
 46 |     for argument parsing; restricts float value from 0 to 1
 47 |     '''
 48 |     x = float(x)
 49 |     if x < 0.0 or x > 1.0:
 50 |         raise argparse.ArgumentTypeError("%r not in range [0.0, 1.0]"%(x,))
 51 |     return x
 52 | 
 53 | 
 54 | def summaryHelpEpilog():
 55 |     '''
 56 |     text for help epilog for
 57 |     summary
 58 |     '''
 59 |     # TODO Help for summarize script
 60 |     return "COMING SOON"
 61 | 
 62 | def helpEpilog():
 63 |     '''
 64 |     text for help epilog
 65 |     '''
 66 | 
 67 |     inputFileFormatHelp = '''
 68 | ------------------------------------------------------
 69 | Input File Format
 70 | ------------------------------------------------------
 71 | iterativeWGCNA expects a tab-delimited text file containing
 72 | gene expression data arranged such that there is one
 73 | row per gene and one column per sample.  The first column
 74 | should contain unique gene identifiers.  For example:
 75 | 
 76 | GENE    Sample1    Sample2    Sample3
 77 | Gata1    500    715    1000
 78 | Phtf2    60    1000    1600
 79 | '''
 80 |     wgcnaParametersHelp = '''
 81 | ------------------------------------------------------
 82 | WGCNA Parameters
 83 | ------------------------------------------------------
 84 | iterativeWGCNA can accept any parameter valid for the WGCNA
 85 | blockwiseModules function.
 86 | 
 87 | See http://www.inside-r.org/packages/cran/wgcna/docs/blockwiseModules
 88 | 
 89 | To specify these parameters use the --wgcnaParameters flag followed by
 90 | a comma separated list of parameter=value pairs.  For example:
 91 | 
 92 | --wgcnaParameters maxBlockSize=5000,corType=bicor,power=10
 93 | 
 94 | sets the maximum block size to 5000 genes,
 95 | the correlation type to the biweight correlation,
 96 | and the power-law scaling factor (beta) to 10
 97 | 
 98 | If parameters are not specified, iterativeWGCNA uses the default WGCNA settings,
 99 | except for the following:
100 | 
101 | minModuleSize=20
102 | saveTOMs=FALSE
103 | minKMEtoStay=0.8
104 | minCoreKME=0.8
105 | networkType=signed
106 | numericLabels=TRUE
107 | 
108 | '''
109 | 
110 |     return inputFileFormatHelp + '\n\n' + wgcnaParametersHelp
111 | 
112 | 
113 | def parse_command_line_args(program='iterativeWGCNA', description='perform iterativeWGCNA analysis'):
114 |     '''
115 |     parse command line args
116 |     '''
117 | 
118 |     parser = argparse.ArgumentParser(prog=program,
119 |                                      description=description,
120 |                                      epilog=helpEpilog(),
121 |                                      formatter_class=argparse.RawTextHelpFormatter)
122 | 
123 |     parser.add_argument('-i', '--inputFile',
124 |                         metavar='<gene expression file>',
125 |                         help="full path to input gene expression file; "
126 |                         + "if full path is not provided,\n"
127 |                         + "assumes the file is in the working directory\n;"
128 |                         + "see below for input file format",
129 |                         required=True)
130 | 
131 |     parser.add_argument('-o', '--workingDir',
132 |                         help="R working directory; where output will be saved",
133 |                         metavar='<output dir>', 
134 |                         default=getcwd())
135 | 
136 |     parser.add_argument('-v', '--verbose',
137 |                         help="print status messages",
138 |                         action='store_true')
139 | 
140 |     parser.add_argument('--debug',
141 |                         help="print debugging messages",
142 |                         action='store_true')
143 |     
144 |     parser.add_argument('-p', '--wgcnaParameters',
145 |                         metavar='<param list>',
146 |                         help="comma separated list of parameters to be passed to WGCNA's "
147 |                         + "blockwiseModules function\n"
148 |                         + "e.g., power=6,randomSeed=1234875\n"
149 |                         + "see WGCNA manual & more info below",
150 |                         type=parameter_list)
151 | 
152 |     parser.add_argument('--enableWGCNAThreads',
153 |                         help="enable WGCNA to use threading;\nsee WGCNA manual",
154 |                         action='store_true')
155 | 
156 |     parser.add_argument('--skipSaveBlocks',
157 |                         help="do not save WGCNA blockwise modules for each iteration;\n"
158 |                         + "NOTE: without blocks summary graphics cannot be generated.\n"
159 |                         + "Also will not saveTOMs.",
160 |                         action='store_true')
161 | 
162 |     parser.add_argument('--gzipTOMs',
163 |                         help="gzip TOM RData files\n",
164 |                         action='store_true')
165 | 
166 |     parser.add_argument('-f', '--finalMergeCutHeight',
167 |                         help="cut height for final merge (after iterations are assembled)",
168 |                         default=0.05,
169 |                         metavar='<cut height>',
170 |                         type=restricted_float)
171 | 
172 |     args = parser.parse_args()
173 |     args.wgcnaParameters = set_wgcna_parameter_defaults(args.wgcnaParameters, args.skipSaveBlocks)
174 | 
175 |     return args
176 | 
177 | 
178 | def set_wgcna_parameter_defaults(params, skipSaveBlocks):
179 |     '''
180 |     set default values for WGCNA blockwiseModules
181 |     numericLabels = TRUE
182 |     minKMEtoStay = 0.8
183 |     '''
184 | 
185 |     if params is None:
186 |         params = {}
187 | 
188 |     params['numericLabels'] = True # override user choice
189 |     
190 |     if 'networkType' not in params:
191 |         params['networkType'] = 'signed'
192 |     if 'minKMEtoStay' not in params:
193 |         params['minKMEtoStay'] = 0.8
194 |     if 'minCoreKME' not in params:
195 |         params['minCoreKME'] = params['minKMEtoStay']
196 |     if 'minModuleSize' not in params:
197 |         params['minModuleSize'] = 20
198 |     if 'reassignThreshold' not in params:
199 |         params['reassignThreshold'] = 0.05 # 0.000001 # 1e-6
200 |     if 'power' not in params:
201 |         params['power'] = 6
202 |     if 'saveTOMs' not in params:
203 |         params['saveTOMs'] = False
204 |     if skipSaveBlocks: # if blocks are not saved; TOMs are not saved
205 |         params['saveTOMs'] = False
206 | 
207 |     return params
208 | 
209 | 
210 | def parse_summary_command_line_args():
211 |     '''
212 |     parse command line args for summary
213 |     '''
214 | 
215 |     parser = argparse.ArgumentParser(prog='iterativeWGCNA network summary',
216 |                                      description="generate graphical results from interative WGCNA analysis",
217 |                                      formatter_class=argparse.RawTextHelpFormatter)
218 | 
219 |     parser.add_argument('-i', '--inputFile',
220 |                         metavar='<gene expression file>',
221 |                         help="full path to input gene expression file; "
222 |                         + "if full path is not provided,\n"
223 |                         + "assumes the file is in the working directory\n;"
224 |                         + "see below for input file format",
225 |                         required=True)
226 | 
227 |     parser.add_argument('-o', '--workingDir',
228 |                         help="R working directory; where output will be saved",
229 |                         metavar='<output dir>',
230 |                         default=getcwd())
231 | 
232 |     parser.add_argument('-v', '--verbose',
233 |                         help="print status messages",
234 |                         action='store_true')
235 | 
236 |     parser.add_argument('-p', '--power',
237 |                         metavar='<power law beta>',
238 |                         help="power law beta for weighting the adjacency matrix",
239 |                         default=6,
240 |                         type=int)
241 | 
242 |     parser.add_argument('--signed',
243 |                         help="generate signed adjacency matrix?",
244 |                         action='store_true')
245 | 
246 |     parser.add_argument('--minKMEtoStay',
247 |                         help="provide minKMEtoStay used for network generation",
248 |                         default=0.80,
249 |                         metavar='<minKMEtoStay>',
250 |                         type=restricted_float)
251 | 
252 |     parser.add_argument('--enableWGCNAThreads',
253 |                         help="enable WGCNA to use threading;\nsee WGCNA manual",
254 |                         action='store_true')
255 | 
256 |     parser.add_argument('--generateNetworkSummary',
257 |                         metavar='<view type>',
258 |                         choices=['all', 'network', 'input'],
259 |                         help="generate summary overview of the network (dendrogram & heatmap):\n"
260 |                         + "network - network comprised only of classified genes\n"
261 |                         + "input - all genes, with classified highlighted by module assignments\n"
262 |                         + "all - both views\n"
263 |                         + "NOTE: all adjacency matrix calculations are\n"
264 |                         + "done in one block and may fail due to memory allocation\n"
265 |                         + "issues for large gene-sets")
266 | 
267 |     parser.add_argument('-e', '--edgeWeight',
268 |                         metavar='<min edge weight>',
269 |                         default=0.5,
270 |                         help="min edge weight for network summary; filters for\n"
271 |                         + "connections supported by a correlation >= threshold",
272 |                         type=restricted_float)
273 | 
274 | 
275 |     return parser.parse_args()
276 | 
277 | 
278 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/colors.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=invalid-name
 2 | '''
 3 | manage colors
 4 | '''
 5 | 
 6 | import logging
 7 | from random import randint
 8 | from matplotlib import colors as ref_colors
 9 | 
10 | 
11 | class Colors(object):
12 |     '''
13 |     manage colors for modules
14 |     '''
15 |     def __init__(self):
16 |         # base colors taken from WGCNA standard coloring for modules
17 |         self.logger = logging.getLogger('iterativeWGCNA.Colors')
18 | 
19 |         self.base_colors = ['turquoise', 'blue', 'brown', 'yellow', 'green',
20 |                             'red', 'black', 'pink', 'magenta', 'purple',
21 |                             'greenyellow', 'tan', 'salmon', 'cyan', 'midnightblue',
22 |                             'lightcyan', 'lightgreen', 'lightyellow', 'royalblue',
23 |                             'darkred', 'darkgreen', 'darkturquoise', 'orange',
24 |                             'darkorange', 'skyblue', 'saddlebrown', 'steelblue',
25 |                             'paleturquoise', 'violet', 'darkolivegreen',
26 |                             'darkmagenta']
27 | 
28 |         self.used_colors = []
29 | 
30 | 
31 |     def assign_color(self, n):
32 |         '''
33 |         assigns a color
34 |         if n <= len(base_colors) assigns the base color whose index is n - 1
35 |         else generates a random color
36 |         '''
37 |         color = None
38 |         if n <= len(self.base_colors):
39 |             color = ref_colors.cnames[self.base_colors[n - 1]] # get hex representation
40 |         else:
41 |             color = self.__generate_random_color()
42 | 
43 |         self.used_colors.append(color)
44 |     
45 |         return color
46 | 
47 | 
48 |     def __generate_random_color(self):
49 |         '''
50 |         generate a random color
51 |         '''
52 |         color = '#' + '%06X' % randint(0, 0xFFFFFF)
53 |         while color in self.used_colors:
54 |             color = '#' + '%06X' % randint(0, 0xFFFFFF)
55 |         return color
56 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/eigengenes.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=invalid-name
  2 | # pylint: disable=unused-import
  3 | '''
  4 | manage eigengenes
  5 | '''
  6 | 
  7 | from __future__ import print_function
  8 | 
  9 | import logging
 10 | 
 11 | import rpy2.robjects as ro
 12 | from .r.imports import base, stats, rsnippets
 13 | from .io.utils import write_data_frame
 14 | from .wgcna import WgcnaManager
 15 | 
 16 | class Eigengenes(object):
 17 |     '''
 18 |     manage and manipulate eigengene matrices
 19 |     '''
 20 | 
 21 |     def __init__(self, matrix=None, debug=False):
 22 |         self.debug = debug
 23 |         self.logger = logging.getLogger('iterativeWGCNA.Eigengenes')
 24 |         self.matrix = matrix
 25 | 
 26 | 
 27 |     def extract_from_blocks(self, iteration, blocks, samples):
 28 |         '''
 29 |         extract eigenenges from blockwise WGCNA results
 30 |         '''
 31 |         self.matrix = rsnippets.extractEigengenes(iteration, blocks, samples)
 32 | 
 33 | 
 34 |     def samples(self):
 35 |         '''
 36 |         return sample names
 37 |         '''
 38 |         return self.matrix.names
 39 | 
 40 | 
 41 |     def nrows(self):
 42 |         '''
 43 |         wrapper for returning number of rows in
 44 |         the eigengene matrix
 45 |         '''
 46 | 
 47 |         return self.matrix.nrow
 48 | 
 49 | 
 50 |     def load_matrix_from_file(self, fileName):
 51 |         '''
 52 |         loads eigengenes from file into an R DataFrame
 53 |         '''
 54 |         self.matrix = ro.DataFrame.from_csvfile(fileName, sep='\t',
 55 |                                                 header=True, row_names=1)
 56 | 
 57 | 
 58 |     def write(self, prefix=''):
 59 |         '''
 60 |         writes the eigengene matrix to file
 61 |         '''
 62 |         fileName = prefix + 'eigengenes.txt'
 63 |         write_data_frame(self.matrix, fileName, 'Module')
 64 | 
 65 | 
 66 |     def similarity(self, module=None):
 67 |         '''
 68 |         calculate similarity between eigengene for a specific
 69 |         module and all the other eigengenes
 70 | 
 71 |         if no module is specified, calculate the similarity matrix
 72 |         between all eigengenes
 73 |         '''
 74 |         if module is None:
 75 |             sim = base().as_data_frame(stats().cor(base().t(self.matrix)))
 76 |         else:
 77 |             sim = base().as_data_frame(stats().cor(base().t(self.matrix), \
 78 |                                             base().t(self.matrix.rx(module, True))))
 79 |         return sim
 80 | 
 81 | 
 82 |     def correlation(self, m1, m2):
 83 |         '''
 84 |         calculate correlation between two module eigengenes
 85 |         '''
 86 |         e1 = self.get_module_eigengene(m1)
 87 |         e2 = self.get_module_eigengene(m2)
 88 |         cor = base().as_data_frame(stats().cor(base().t(e1), base().t(e2)))
 89 |         cor = round(cor.rx(1, 1)[0], 1)
 90 |         return cor
 91 | 
 92 | 
 93 |     def equal(self, m1, m2, threshold=0.0):
 94 |         '''
 95 |         check if 2 module eigengenes are "equivalent"
 96 |         (1 - correlation <= threshold)
 97 |         '''
 98 |         cor = self.correlation(m1, m2)
 99 |         return 1.0 - cor <= threshold
100 | 
101 | 
102 |     def get_module_eigengene(self, module):
103 |         '''
104 |         return a module eigengene
105 |         '''
106 |         return self.matrix.rx(module, True)
107 | 
108 | 
109 |     def extract_subset(self, modules):
110 |         '''
111 |         return a submatrix
112 |         '''
113 |         if self.debug:
114 |             self.logger.debug("Extracting eigengenes for the following modules:")
115 |             self.logger.debug(modules)
116 | 
117 |         if self.debug:
118 |             self.logger.debug("Converting module list to ro.StrVector; see R-log")
119 |             ro.r("print('Converting module list to ro.StrVector to extract eigengenes:')")
120 | 
121 |         vector = ro.StrVector(modules)
122 | 
123 |         if self.debug:
124 |             self.logger.debug(vector)
125 | 
126 |         if self.debug:
127 |             self.logger.debug("Extracted submatrix, see R-log")
128 |             ro.r("print('Extracted eigengene submatrix:')")
129 | 
130 | 
131 |         newMatrix = self.matrix.rx(vector, True)
132 | 
133 |         if self.debug:
134 |             self.logger.debug(newMatrix)
135 | 
136 |         return newMatrix
137 | 
138 | 
139 | 
140 |     def is_empty(self):
141 |         '''
142 |         return True if matrix is empty
143 |         '''
144 |         return self.matrix.nrow == 0
145 | 
146 | 
147 |     def update_to_subset(self, modules):
148 |         '''
149 |         update matrix to subset specified by modules
150 |         '''
151 |         self.matrix = self.extract_subset(modules)
152 | 
153 | 
154 |     def recalculate(self, profiles, membership, power=6):
155 |         '''
156 |         recalculate eigengenes given membership
157 |         and profiles
158 |         '''
159 |         manager = WgcnaManager(profiles, {'power':power}, debug=self.debug)
160 | 
161 |         self.matrix = rsnippets.extractRecalculatedEigengenes(
162 |             manager.module_eigengenes(membership.values()),
163 |             self.samples())
164 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/expression.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=invalid-name
 2 | 
 3 | '''
 4 | functions for manipulating expression profile matrices
 5 | '''
 6 | 
 7 | import rpy2.robjects as ro
 8 | 
 9 | class Expression(object):
10 |     '''
11 |     store and manipulate expression profile matrix
12 |     '''
13 |     def __init__(self, data):
14 |         self.profiles = data
15 |         self.size = len(self.profiles)
16 |         return None
17 | 
18 | 
19 |     def genes(self):
20 |         '''
21 |         return genes (row names)
22 |         '''
23 |         return self.profiles.rownames
24 | 
25 | 
26 |     def nrow(self):
27 |         '''
28 |         return number of rows
29 |         '''
30 |         return self.profiles.nrow
31 | 
32 | 
33 |     def ncol(self):
34 |         '''
35 |         return number of columns
36 |         '''
37 |         return self.profiles.ncol
38 | 
39 | 
40 |     def samples(self):
41 |         '''
42 |         return column names (samples)
43 |         '''
44 |         return self.profiles.colnames
45 | 
46 | 
47 |     def expression(self):
48 |         '''
49 |         wrapper for accessing self.profiles
50 |         '''
51 |         return self.profiles
52 | 
53 | 
54 |     def gene_expression(self, genes):
55 |         '''
56 |         subsets expression data
57 |         returning expression for list of genes
58 |         '''
59 |         return self.profiles.rx(ro.StrVector(genes), True)
60 | 
61 | 
62 |     def residual_expression(self, unclassifiedGenes):
63 |         '''
64 |         subsets expression data
65 |         returning expression for only residuals to the fit
66 |         '''
67 |         if unclassifiedGenes is None:
68 |             return None
69 |         else:
70 |             return self.gene_expression(unclassifiedGenes)
71 | 
72 | 
73 |     def fit_expression(self, fitGenes):
74 |         '''
75 |         subsets expression data
76 |         return only genes that passed the
77 |         goodness of fit assessment
78 |         '''
79 |         if fitGenes is None:
80 |             return None
81 |         else:
82 |             return self.gene_expression(fitGenes)
83 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/genes.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=invalid-name
  2 | # pylint: disable=unused-import
  3 | '''
  4 | manage genes
  5 | '''
  6 | from __future__ import print_function
  7 | 
  8 | import logging
  9 | from collections import OrderedDict
 10 | from collections import Counter
 11 | 
 12 | import rpy2.robjects as ro
 13 | 
 14 | # from .expression import Expression
 15 | from .analysis import calculate_kME
 16 | from .eigengenes import Eigengenes
 17 | from .r.imports import wgcna, stats, base, rsnippets, grdevices
 18 | from .io.utils import xstr
 19 | from .r.manager import RManager
 20 | 
 21 | class Genes(object):
 22 |     '''
 23 |     track input genes and their properties, including
 24 |     expression profiles, module membership, eigengene
 25 |     connectivity
 26 |     '''
 27 | 
 28 |     def __init__(self, exprData, debug=False):
 29 |         '''
 30 |         initialize an OrderedDict of genes
 31 |         from the row.names of the expression
 32 |         data set
 33 |         '''
 34 |         self.logger = logging.getLogger('iterativeWGCNA.Genes')
 35 |         self.profiles = exprData
 36 |         self.genes = OrderedDict((geneId, {'module': 'UNCLASSIFIED',
 37 |                                            'kME':float('NaN'),
 38 |                                            'iteration': None})
 39 |                                  for geneId in self.profiles.genes())
 40 | 
 41 |         self.size = len(self.genes)
 42 |         self.iteration = None
 43 |         self.debug = debug
 44 | 
 45 | 
 46 |     def get_module(self, gene):
 47 |         '''
 48 |         returns the assigned module for a gene
 49 |         '''
 50 |         return self.genes[gene]['module']
 51 | 
 52 | 
 53 |     def __is_classified(self, gene):
 54 |         '''
 55 |         returns true if the feature is classified
 56 |         '''
 57 |         return self.get_module(gene) != 'UNCLASSIFIED'
 58 | 
 59 | 
 60 |     def __update_module(self, gene, module):
 61 |         '''
 62 |         update gene module
 63 |         do not add new genes
 64 |         '''
 65 |         if gene in self.genes:
 66 |             self.genes[gene]['module'] = module
 67 |             return True
 68 |         else:
 69 |             return False
 70 | 
 71 | 
 72 |     def __update_classified_iteration(self, gene, iteration):
 73 |         '''
 74 |         set the iteration during which
 75 |         a gene was first classified
 76 |         '''
 77 |         if gene in self.genes:
 78 |             self.genes[gene]['iteration'] = iteration
 79 |             return True
 80 |         else:
 81 |             return False
 82 | 
 83 | 
 84 |     def update_membership(self, genes, blocks):
 85 |         '''
 86 |         fetches new module membership from WGCNA
 87 |         blocks and updates relevant genes
 88 |         '''
 89 |         modules = rsnippets.extractModules(blocks, ro.StrVector(genes))
 90 |         # if the feature is in the subset
 91 |         # update, otherwise leave as is
 92 |         for gene in genes:
 93 |             # .rx returns a FloatVector which introduces
 94 |             # a .0 to the numeric labels when converted to string
 95 |             # which needs to be removed
 96 |             # note: R array starts at index 1, python at 0
 97 |             module = str(modules.rx(gene, 1)[0]).replace('.0', '')
 98 |             if module in ('0', 'grey'):
 99 |                 module = 'UNCLASSIFIED'
100 |             else:
101 |                 module = self.iteration + '_' + 'M' + str(module)
102 |                 self.__update_classified_iteration(gene, self.iteration)
103 |             self.__update_module(gene, module)
104 | 
105 |         return None
106 | 
107 | 
108 |     def copy_membership(self, source):
109 |         '''
110 |         updates membership from another Genes object
111 |         '''
112 |         sourceMembership = source.get_gene_membership()
113 |         for gene, module in sourceMembership.items():
114 |             self.__update_module(gene, module)
115 | 
116 | 
117 |     def __extract_iteration_genes(self, targetIteration):
118 |         '''
119 |         get genes classified during specified interation
120 |         '''
121 |         assignedIterations = self.__extract_classified_iteration()
122 |         return [gene for gene, iteration in assignedIterations.items()
123 |                 if iteration == targetIteration]
124 | 
125 | 
126 |     def __extract_classified_iteration(self):
127 |         '''
128 |         get classified iteration as an ordered dict
129 |         '''
130 |         return OrderedDict((gene, membership['iteration'])
131 |                            for gene, membership in self.genes.items())
132 | 
133 | 
134 |     def __extract_modules(self):
135 |         '''
136 |         extract module membership as an ordered dict
137 |         '''
138 |         return OrderedDict((gene, membership['module']) for gene, membership in self.genes.items())
139 | 
140 | 
141 |     def get_gene_membership(self, genes=None):
142 |         '''
143 |         public facing method for getting gene membership; returns a
144 |         gene -> membership hash
145 | 
146 |         if gene list is provided, return only the membership assignment
147 |         for the provided genes
148 |         '''
149 |         if genes is None:
150 |             return self.__extract_modules()
151 |         else:
152 |             return OrderedDict((gene, module) for gene, module in self.__extract_modules().items() if gene in genes)
153 | 
154 | 
155 |     def get_gene_kME(self):
156 |         '''
157 |         public facing method for getting all gene kMEs
158 |         '''
159 |         return self.__extract_kME()
160 | 
161 | 
162 |     def get_iteration_kME(self, iteration):
163 |         '''
164 |         return kME for all assignments made
165 |         during current iteration
166 |         '''
167 |         iterationGenes = self.__extract_iteration_genes(iteration)
168 |         geneKME = self.__extract_kME()
169 |         return [kME for gene, kME in geneKME.items() if gene in iterationGenes]
170 | 
171 | 
172 |     def get_module_kME(self, targetModule):
173 |         '''
174 |         get all kME values in a module
175 |         '''
176 |         membership = self.get_module_members(targetModule)
177 |         memberKME = self.__extract_kME()
178 |         return [kME for gene, kME in memberKME.items() if gene in membership]
179 | 
180 | 
181 |     def get_kME(self, gene):
182 |         '''
183 |         returns the assigned kME for a gene
184 |         '''
185 |         return self.genes[gene]['kME']
186 | 
187 | 
188 |     def __extract_kME(self):
189 |         '''
190 |         extract eigengene connectivity (kME)
191 |         as an ordered dict
192 |         '''
193 |         return OrderedDict((gene, membership['kME']) for gene, membership in self.genes.items())
194 | 
195 | 
196 |     def __update_kME(self, gene, kME):
197 |         '''
198 |         update gene eigengene connectivity (kME)
199 |         do not add new genes
200 |         '''
201 |         if gene in self.genes:
202 |             self.genes[gene]['kME'] = kME
203 |             return True
204 |         else:
205 |             return False
206 | 
207 | 
208 |     def __update_module_kME(self, module, eigengene, genes=None):
209 |         '''
210 |         update member gene eigengene connectivity (kME)
211 |         for specified module and eigengene
212 |         '''
213 |         members = self.get_module_members(module)
214 |         memberKME = calculate_kME(self.profiles.gene_expression(members),
215 |                                   eigengene, False)
216 | 
217 |         for gene in memberKME.rownames:
218 |             if genes is not None:
219 |                 if gene in genes:
220 |                     self.__update_kME(gene, round(memberKME.rx(gene, 1)[0], 2))
221 | 
222 | 
223 |     def update_kME(self, eigengenes, genes=None):
224 |         '''
225 |         update module kME given its eigengene
226 |         '''
227 |         modules = self.get_modules()
228 |         for m in modules:
229 |             moduleEigengene = eigengenes.get_module_eigengene(m)
230 |             self.__update_module_kME(m, moduleEigengene, genes)
231 | 
232 | 
233 |     def write(self, prefix='', iteration=None):
234 |         '''
235 |         writes the membership and eigengene connectivity
236 |         to files
237 |         filtering for specific iteration if specified
238 |         '''
239 |         summaryGenes = None
240 |         if iteration is None:
241 |             summaryGenes = self.genes
242 |         else:
243 |             iterationGenes = self.__extract_iteration_genes(iteration)
244 |             summaryGenes = OrderedDict((gene, membership) for gene, membership
245 |                                        in self.genes.items()
246 |                                        if gene in iterationGenes
247 |                                        and membership['module'] != 'UNCLASSIFIED')
248 | 
249 |         with open(prefix + 'membership.txt', 'w') as f:
250 |             print('\t'.join(('Gene', 'Module', 'kME')), file=f)
251 |             for g in summaryGenes:
252 |                 print('\t'.join((g, self.genes[g]['module'], xstr(self.genes[g]['kME']))), file=f)
253 |         return None
254 | 
255 | 
256 |     def write_iteration_counts(self, prefix=''):
257 |         '''
258 |         print iteration summary
259 |         '''
260 |         with open(prefix + 'summary.txt', 'w') as f:
261 |             print('\t'.join(('N Input Genes', 'N Classified Genes',
262 |                              'N Residual Genes', 'N Detected Modules')), file=f)
263 |             numClassifiedGenes = self.count_classified_genes()
264 |             print('\t'.join((str(self.size),
265 |                              str(numClassifiedGenes),
266 |                              str(self.size - numClassifiedGenes),
267 |                              str(self.count_modules(self.get_classified_genes())))), file=f)
268 | 
269 | 
270 |     def plot_kme_histogram(self, iteration, prefix='', vline=0.80):
271 |         '''
272 |         generate kme histogram for genes classified in
273 |         current iteration
274 |         '''
275 |         kmeVector = None
276 |         if 'final' in prefix or 'merge' in prefix:
277 |             classifiedGenes = self.get_classified_genes()
278 |             geneKME = self.__extract_kME()
279 |             kmeVector = [kME for gene, kME in geneKME.items() if gene in classifiedGenes]
280 |         else:
281 |             kmeVector = self.get_iteration_kME(iteration)
282 | 
283 |         if kmeVector is not None:
284 |             if len(kmeVector) != 0:
285 |                 manager = RManager(kmeVector)
286 |                 grdevices().pdf(prefix + "kme_histogram.pdf")
287 |                 manager.histogram(vline, {'main': 'Gene -> Assigned Module kME for iteration ' + iteration,
288 |                                           'xlab': 'kME', 'ylab':'Gene Count'})
289 |                 grdevices().dev_off()
290 | 
291 | 
292 |     def count_module_members(self, genes=None):
293 |         '''
294 |         counts the number of genes per module
295 |         and returns a dict of module -> gene count
296 |         if a list of genes is provided, only counts within
297 |         the specified gene list
298 |         '''
299 |         membership = self.__extract_modules()
300 |         if genes is not None:
301 |             membership = {gene:module for gene, module in membership.items() if gene in genes}
302 |         return Counter(membership.values())
303 | 
304 | 
305 |     def count_classified_genes(self, genes=None):
306 |         '''
307 |         counts and return the number of classified genes
308 |         if a list of genes is provided, only counts within
309 |         the specified gene list
310 |         '''
311 |         membership = self.__extract_modules()
312 |         if genes is not None:
313 |             membership = {gene:module for gene, module in membership.items() if gene in genes}
314 |         classified = [gene for gene, module in membership.items()
315 |                       if module != 'UNCLASSIFIED']
316 | 
317 |         return len(classified)
318 | 
319 | 
320 | 
321 |     def get_classified_genes(self, genes=None):
322 |         '''
323 |         gets the list of classified genes
324 |         if a list of genes is provided, only returns
325 |         genes within the specified list
326 |         '''
327 |         membership = self.__extract_modules()
328 |         if genes is not None:
329 |             membership = OrderedDict((gene, module) for gene, module in membership.items()
330 |                                      if gene in genes)
331 |         classifiedGenes = [gene for gene, module in membership.items()
332 |                            if module != 'UNCLASSIFIED']
333 | 
334 |         return classifiedGenes
335 | 
336 | 
337 | 
338 |     def get_unclassified_genes(self):
339 |         '''
340 |         get unclassified genes
341 |         '''
342 |         membership = self.__extract_modules()
343 |         unclassifiedGenes = [gene for gene, module in membership.items()
344 |                              if module == 'UNCLASSIFIED']
345 |         return unclassifiedGenes
346 | 
347 | 
348 | 
349 |     def count_modules(self, genes=None):
350 |         '''
351 |         counts the number of modules (excluding unclassified)
352 |         if a list of genes is provided, only counts within
353 |         the specified gene list
354 |         '''
355 |         moduleCount = self.count_module_members(genes)
356 |         return len(moduleCount) - 1 if 'UNCLASSIFIED' in moduleCount else len(moduleCount)
357 | 
358 | 
359 |     def remove_small_modules(self, minModuleSize):
360 |         '''
361 |         checks membership counts and removes
362 |         any modules that are too small
363 |         by updating gene membership to UNCLASSIFIED and
364 |         setting eigengene connectivity (kME) to NaN
365 |         '''
366 |         memberCount = self.count_module_members()
367 | 
368 |         for g in self.genes:
369 |             geneModule = self.get_module(g)
370 |             if memberCount[geneModule] < minModuleSize:
371 |                 self.__update_module(g, 'UNCLASSIFIED')
372 |                 self.__update_kME(g, float('NaN'))
373 |                 self.__update_classified_iteration(g, None)
374 | 
375 | 
376 |     def get_modules(self, genes=None):
377 |         '''
378 |         gets list of unique modules from gene membership assignments
379 |         '''
380 |         # get unique members by converting values to a set
381 |         membership = set(self.__extract_modules().values())
382 |         membership.discard('UNCLASSIFIED')
383 |         return list(membership)
384 | 
385 |     
386 |     def get_module_members(self, targetModule):
387 |         '''
388 |         get list of module member genes
389 |         '''
390 |         membership = self.__extract_modules()
391 |         return [gene for gene, module in membership.items() if module == targetModule]
392 | 
393 | 
394 |     def get_genes(self):
395 |         '''
396 |         return list of all genes
397 |         '''
398 |         return [gene for gene in self.genes]
399 | 
400 | 
401 |     def evaluate_fit(self, minKMEtoStay, genes=None):
402 |         '''
403 |         evaluate fit of each gene to its assigned
404 |         module, unclassifying if the fit is below the
405 |         minimum KME to stay
406 |         if a gene list is provided, only evaluates the
407 |         specified genes
408 |         '''
409 | 
410 |         if genes is None:
411 |             genes = self.profiles.genes()
412 | 
413 |         for g in genes:
414 |             module = self.get_module(g)
415 |             kME = self.get_kME(g)
416 | 
417 |             if module == 'UNCLASSIFIED':
418 |                 self.__update_kME(g, float('NaN'))
419 |                 self.__update_classified_iteration(g, None)
420 | 
421 |             if kME < minKMEtoStay:
422 |                 self.__update_module(g, 'UNCLASSIFIED')
423 |                 self.__update_kME(g, float('NaN'))
424 |                 self.__update_classified_iteration(g, None)
425 | 
426 | 
427 |     def merge_close_modules(self, eigengenes, cutHeight):
428 |         '''
429 |         merge close modules based on similarity between
430 |         eigengenes
431 | 
432 |         return updated eigengene object
433 |         '''
434 | 
435 |         # repeat until no more merges are possible
436 |         noMergesFound = False
437 |         mergeCount = 0
438 |         modules = self.get_modules()
439 |         classifiedGenes = self.get_classified_genes()
440 |         classifiedGeneProfiles = self.profiles.gene_expression(classifiedGenes)
441 |         while not noMergesFound:
442 |             # compare modules, finding min dissimilarity
443 |             similarity = eigengenes.similarity(None)
444 |             closeModules = rsnippets.findCloseModules(similarity, cutHeight)
445 |             if closeModules != ro.NULL:
446 |                 m1 = closeModules.rx2('m1')[0]
447 |                 m2 = closeModules.rx2('m2')[0]
448 |                 dissimilarity = closeModules.rx2('dissimilarity')[0]
449 |                 mergeCount = mergeCount + 1
450 |                 self.logger.info("Merging " + m1 + " into " + m2
451 |                                  + " (D = " + str(dissimilarity) + ")")
452 | 
453 |                 memberGenes = self.get_module_members(m1)
454 |                 for g in memberGenes:
455 |                     self.__update_module(g, m2)
456 |                     self.__update_classified_iteration(g, 'FINAL_MERGE')
457 |                 self.__update_module_kME(m1, eigengenes.get_module_eigengene(m2))
458 | 
459 |                 modules = self.get_modules()
460 |                 classifiedGeneMembership = self.get_gene_membership(classifiedGenes)
461 |                 if self.debug:
462 |                     self.logger.debug("Getting module assignments for classified genes")
463 |                     self.logger.debug(classifiedGeneMembership)
464 | 
465 |                 eigengenes.recalculate(classifiedGeneProfiles,
466 |                                        classifiedGeneMembership)
467 | 
468 |             else:
469 |                 noMergesFound = True
470 | 
471 |         self.logger.info("Done merging close modules: " + str(mergeCount) + " modules merged.")
472 |         self.logger.info("Retained " + str(len(modules)) + " modules after merge.")
473 | 
474 |         return eigengenes
475 | 
476 | 
477 |     def reassign_to_best_fit(self, eigengenes, reassignThreshold, minKMEtoStay):
478 |         '''
479 |         Evaluate eigengene connectivity (kME)
480 |         for each feature against the eigengenes for each
481 |         of the final modules found by iWGCNA.
482 |         If kME(module) > kME(assigned_module)
483 |         and the p-value <= the reassignThreshold (of WGCNA
484 |         parameters) then reassign the module
485 |         membership of the feature.
486 | 
487 |         returns a count of the number of reassigned genes
488 |         '''
489 |         count = 0
490 |         modules = self.get_modules()
491 |         for m in modules:
492 |             # calculate kME of all genes to the module eigengene
493 |             moduleEigengene = eigengenes.get_module_eigengene(m)
494 |             moduleKME = calculate_kME(self.profiles.expression(), moduleEigengene, True)
495 | 
496 |             # for each gene not assigned to the current module, test fit
497 |             for g in self.genes:
498 |                 currentModule = self.get_module(g)
499 |                 if currentModule != m:
500 |                     kME = self.get_kME(g)
501 |                     newKME = round(moduleKME.rx2('cor').rx(g, 1)[0], 2)
502 |                     pvalue = moduleKME.rx2('p').rx(g, 1)[0]
503 | 
504 |                     if (currentModule == "UNCLASSIFIED" \
505 |                         and newKME >= minKMEtoStay) \
506 |                         or (newKME > kME \
507 |                         and pvalue < reassignThreshold):
508 | 
509 |                         self.__update_module(g, m)
510 |                         self.__update_kME(g, newKME)
511 |                         count = count + 1
512 | 
513 |         return count
514 | 
515 | 
516 |     def load_membership(self, fileName=None):
517 |         '''
518 |         loads membership
519 |         '''
520 |         if fileName is None:
521 |             fileName = "final-membership.txt"
522 | 
523 |         membership = ro.DataFrame.from_csvfile(fileName, sep='\t',
524 |                                                header=True, row_names=1, as_is=True)
525 | 
526 |         if self.debug:
527 |             self.logger.debug("Loaded membership from file " + fileName + "; see R-log")
528 |             ro.r("print('Loaded membership from file -- head of file:')")
529 |             self.logger.debug(membership.head())
530 | 
531 |         index = membership.names.index('Module') + 1 # add 1 b/c of python/rpy2/R inconsistency
532 | 
533 |         if self.debug:
534 |             self.logger.debug("Adjusted index of Module column: " + str(index))
535 | 
536 |         classifiedCount = 0
537 |         unclassifiedCount = 0
538 |         for g in self.genes:
539 |             gStr = ro.StrVector([str(g)])
540 |             # strange, but necessary so that rpy2 will treat numeric gene ids as strings
541 |             # python str() conversion did not work
542 | 
543 |             module = membership.rx(gStr[0], index)[0]
544 | 
545 |             if module == 'UNCLASSIFIED':
546 |                 unclassifiedCount = unclassifiedCount + 1
547 |             else:
548 |                 classifiedCount = classifiedCount + 1
549 |             self.__update_module(g, module)
550 | 
551 |         self.logger.info("Loaded " + str(classifiedCount) + " classified genes")
552 |         self.logger.info("Loaded " + str(unclassifiedCount) + " unclassified genes")
553 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cstoeckert/iterativeWGCNA/c5431f513a18c8564138b87588acd4df76d34d93/iterativeWGCNA/io/__init__.py


--------------------------------------------------------------------------------
/iterativeWGCNA/io/utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | I/O Utils
  3 | '''
  4 | from __future__ import print_function
  5 | from __future__ import with_statement
  6 | 
  7 | from sys import stderr
  8 | from math import isnan
  9 | import os
 10 | import re
 11 | from subprocess import check_call
 12 | import gzip
 13 | 
 14 | import rpy2.robjects as ro
 15 | from ..r.imports import rsnippets
 16 | 
 17 | def bulk_gzip(directory, pattern):
 18 |     '''
 19 |     gzip all files in the directory that match the pattern
 20 |     '''
 21 |     for filename in os.listdir(directory):
 22 |         if pattern in filename:
 23 |             with open(os.path.join(directory, filename), 'rb') as plain_file:
 24 |                 with gzip.open(os.path.join(directory, filename + '.gz'), 'wb') as zip_file:
 25 |                     zip_file.writelines(plain_file)
 26 | 
 27 |             os.remove(os.path.join(directory, filename)) # remove the old file
 28 | 
 29 | 
 30 | def xstr(value):
 31 |     '''
 32 |     handle nulls/nan in string conversion
 33 |     '''
 34 |     if value is None:
 35 |         return ''
 36 |     if value == 'NULL':
 37 |         return ''
 38 |     if isnan(value):
 39 |         return 'NA'
 40 | 
 41 |     return str(value)
 42 | 
 43 | 
 44 | def warning(*objs):
 45 |     '''
 46 |     wrapper for writing to stderr
 47 |     '''
 48 |     print(*objs, file=stderr)
 49 |     stderr.flush()
 50 | 
 51 | 
 52 | def create_dir(dirName):
 53 |     '''
 54 |     check if directory exists in the path, if not create
 55 |     '''
 56 |     try:
 57 |         os.stat(dirName)
 58 |     except OSError:
 59 |         os.mkdir(dirName)
 60 | 
 61 |     return dirName
 62 | 
 63 | 
 64 | def write_data_frame(df, fileName, rowLabel):
 65 |     '''
 66 |     write data frame to file; creates new file
 67 |     if none exists, otherwise appends new eigengenes
 68 |     to existing file
 69 |     '''
 70 |     try:
 71 |         os.stat(fileName)
 72 |     except OSError:
 73 |         header = (rowLabel,) + tuple(df.colnames)
 74 |         with open(fileName, 'w') as f:
 75 |             print('\t'.join(header), file=f)
 76 |     finally:
 77 |         df.to_csvfile(fileName, quote=False, sep='\t', col_names=False, append=True)
 78 | 
 79 |         
 80 | def read_data(fileName):
 81 |     '''
 82 |     read gene expression data into a data frame
 83 |     and convert numeric (integer) data to real
 84 |     '''
 85 |     data = ro.DataFrame.from_csvfile(fileName, sep='\t', header=True, row_names=1)
 86 |     return rsnippets.numeric2real(data)
 87 | 
 88 | 
 89 | def transpose_file_contents(fileName, rowLabel):
 90 |     '''
 91 |     read in a file to a dataframe, transpose, and output
 92 |     use this instead of R transforms b/c R will concatenate
 93 |     an "X" to gene ids starting with a number
 94 |     '''
 95 |     with open(fileName, 'r') as f:
 96 |         content = [line.rstrip().split() for line in f]
 97 | 
 98 |     header = True
 99 |     with open(fileName, 'w') as f:
100 |         for line in zip(*content):
101 |             if header:
102 |                 line = list(line)
103 |                 line[0] = rowLabel
104 |                 line = tuple(line)
105 |                 header = False
106 | 
107 |             # b/c rpy2 replaces '-' in gene symbols with '.'
108 |             if '.' in line[0]:
109 |                 line = list(line)
110 |                 line[0] = line[0].replace('.', '-')
111 |                 line = tuple(line)
112 | 
113 |             # b/c R tacks an X on to gene names that start
114 |             # with a #
115 |             if re.search('^X\d', line[0]) is not None:
116 |                 print(line[0], file=stderr)
117 |                 line = list(line)
118 |                 line[0] = re.sub('^X', '', line[0])
119 |                 line = tuple(line)
120 | 
121 |             print('\t'.join(line), file=f)
122 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/iterativeWGCNA.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=invalid-name
  2 | # pylint: disable=bare-except
  3 | # pylint: disable=broad-except
  4 | # pylint: disable=too-many-instance-attributes
  5 | '''
  6 | main application
  7 | '''
  8 | 
  9 | from __future__ import print_function
 10 | 
 11 | import logging
 12 | import sys
 13 | import os
 14 | from time import strftime
 15 | 
 16 | import rpy2.robjects as ro
 17 | from .genes import Genes
 18 | from .expression import Expression
 19 | from .eigengenes import Eigengenes
 20 | from .network import Network
 21 | from .wgcna import WgcnaManager
 22 | from .io.utils import create_dir, read_data, warning, write_data_frame, bulk_gzip
 23 | from .r.imports import base, wgcna, rsnippets
 24 | 
 25 | 
 26 | class IterativeWGCNA(object):
 27 |     '''
 28 |     main application
 29 | 
 30 |     flag report = True when generating
 31 |     result from existing output
 32 |     '''
 33 | 
 34 |     def __init__(self, args, report=False):
 35 |         self.args = args
 36 |         create_dir(self.args.workingDir)
 37 |         if not report:
 38 |             self.__verify_clean_working_dir()
 39 | 
 40 |         if report == 'merge':
 41 |             self.args.enableWGCNAThreads = False
 42 | 
 43 |         self.__initialize_log(report)
 44 |         self.logger.info(strftime("%c"))
 45 | 
 46 |         self.__initialize_R(report)
 47 |         if not report:
 48 |             self.__log_parameters()
 49 | 
 50 |         if self.args.debug:
 51 |             warning("Running in DEBUG mode.")
 52 |             warning("Rpy2 will print debugging messages and variable (e.g., matrix/vector) contents to the R log")
 53 |             warning("Thus, empty debug statements 'DEBUG:      ' in the iterativeWGCNA log should have a corresponding output in the R log")
 54 | 
 55 |         # load expression data and
 56 |         # initialize Genes object
 57 |         # to store results
 58 |         self.profiles = None
 59 |         self.__load_expression_profiles()
 60 |         self.__log_input_data()
 61 |         self.genes = Genes(self.profiles, debug=self.args.debug)
 62 |         self.eigengenes = Eigengenes(debug=args.debug)
 63 |         self.modules = None # will be hash of module name to color for plotting
 64 | 
 65 |         if not report:
 66 |             self.passCount = 1
 67 |             self.iterationCount = 1
 68 |             self.iteration = None # unique label for iteration
 69 |             self.algorithmConverged = False
 70 |             self.passConverged = False
 71 | 
 72 | 
 73 |     def __verify_clean_working_dir(self):
 74 |         '''
 75 |         verifies that working directory does not contain
 76 |         iterativeWGCNA output files
 77 |         exits to avoid accidental overwrite of earlier runs
 78 |         '''
 79 |         conflictingFiles = set(('final-eigengenes.txt', 'final-membership.txt', 'eigengenes.txt'))
 80 |         files = set(os.listdir(self.args.workingDir))
 81 |         if len(files.intersection(conflictingFiles)) > 0:
 82 |             warning("Working Directory: " + self.args.workingDir \
 83 |                                + " contains final output from a prior run of iterativeWGCNA.  Exiting...")
 84 |             sys.exit(1)
 85 | 
 86 | 
 87 |     def run_pass(self, passGenes):
 88 |         '''
 89 |         run a single pass of iterative WGCNA
 90 |         (prune data until no more residuals are found)
 91 |         '''
 92 | 
 93 |         passDirectory = 'pass' + str(self.passCount)
 94 |         create_dir(passDirectory)
 95 |         write_data_frame(self.profiles.gene_expression(passGenes),
 96 |                          os.path.join(passDirectory, 'initial-pass-expression-set.txt'),
 97 |                          'Gene')
 98 | 
 99 |         iterationGenes = passGenes
100 | 
101 |         while not self.passConverged:
102 |             self.run_iteration(iterationGenes)
103 | 
104 |             moduleCount = self.genes.count_modules(iterationGenes)
105 |             classifiedGeneCount = self.genes.count_classified_genes(iterationGenes)
106 | 
107 |             self.write_run_summary(len(iterationGenes), classifiedGeneCount)
108 | 
109 |             # if there are no residuals
110 |             # (classified gene count = number of genes input)
111 |             # then the pass has converged
112 |             if classifiedGeneCount == len(iterationGenes):
113 |                 self.passConverged = True
114 |                 self.__summarize_classification(passDirectory + '/')
115 |             else:
116 |                 # run again with genes classified in current pass
117 |                 iterationGenes = self.genes.get_classified_genes(iterationGenes)
118 |                 self.iterationCount = self.iterationCount + 1
119 | 
120 |             # if no modules were detected,
121 |             # then the algorithm has converged
122 |             # exit the pass
123 |             if moduleCount == 0:
124 |                 self.algorithmConverged = True
125 |                 self.passConverged = True
126 |                 self.__log_alogorithm_converged()
127 | 
128 | 
129 |     def run_iterative_wgcna(self):
130 |         '''
131 |         run iterative WGCNA
132 |         '''
133 |         if self.args.verbose:
134 |             warning("Beginning iterations")
135 | 
136 |         # genes involved in current iteration
137 |         passGenes = self.profiles.genes()
138 | 
139 |         while not self.algorithmConverged:
140 |             self.run_pass(passGenes)
141 |             classifiedGeneCount = self.genes.count_classified_genes(passGenes)
142 |             self.__log_pass_completion()
143 |             self.__log_gene_counts(len(passGenes), classifiedGeneCount)
144 | 
145 |             if not self.algorithmConverged:
146 |                 # set residuals as new gene list
147 |                 passGenes = self.genes.get_unclassified_genes()
148 | 
149 |                 # increment pass counter and reset iteration counter
150 |                 self.passCount = self.passCount + 1
151 |                 self.iterationCount = 1
152 | 
153 |                 # reset pass convergence flag
154 |                 self.passConverged = False
155 | 
156 |         self.iteration = 'FINAL'
157 |         self.genes.iteration = self.iteration
158 |         self.__log_gene_counts(self.genes.size, self.genes.count_classified_genes())
159 |         self.__summarize_classification('final-')
160 | 
161 |         # output current eigengenes for all modules, not just ones from last pass
162 |         self.eigengenes.load_matrix_from_file('eigengenes.txt')
163 |         modules = self.genes.get_modules()
164 |         self.eigengenes.update_to_subset(modules)
165 |         self.eigengenes.write('final-')
166 | 
167 |         self.iteration = 'MERGED'
168 |         self.genes.iteration = self.iteration
169 |         self.merge_close_modules()
170 |         self.reassign_genes_to_best_fit_module()
171 | 
172 |         self.__log_gene_counts(self.genes.size, self.genes.count_classified_genes())
173 | 
174 |         self.__summarize_classification('merged-' + str(self.args.finalMergeCutHeight) + '-')
175 |         self.eigengenes.write('merged-' + str(self.args.finalMergeCutHeight) + '-')
176 |         os.remove("eigengenes.txt")
177 | 
178 | 
179 |     def merge_close_modules_from_output(self):
180 |         '''
181 |         load data from output and remerge
182 |         '''
183 |         self.genes.load_membership()
184 |         self.merge_close_modules('final-')
185 |         self.reassign_genes_to_best_fit_module()
186 |         self.__log_gene_counts(self.genes.size, self.genes.count_classified_genes())
187 |         self.genes.write('adjusted-merge-' + str(self.args.finalMergeCutHeight) + '-')
188 |         self.eigengenes.write('adjusted-merge-' + str(self.args.finalMergeCutHeight) + '-')
189 |         # self.transpose_output_files()
190 | 
191 | 
192 |     def summarize_results(self):
193 |         '''
194 |         generate summary output and graphics
195 |         '''
196 |         network = Network(self.args)
197 |         network.build(self.genes, self.eigengenes)
198 |         network.summarize_network()
199 | 
200 | 
201 |     def run(self):
202 |         '''
203 |         main function --> makes calls to run iterativeWGCNA,
204 |         catches errors, and logs time
205 |         '''
206 | 
207 |         try:
208 |             self.run_iterative_wgcna()
209 |             # self.summarize_results() # can cause memory issues so, removing
210 |             self.logger.info('iterativeWGCNA: SUCCESS')
211 |         except Exception:
212 |             if self.logger is not None:
213 |                 self.logger.exception('iterativeWGCNA: FAIL')
214 |             else:
215 |                 raise
216 |         finally:
217 |             if self.logger is not None:
218 |                 self.logger.info(strftime("%c"))
219 | 
220 | 
221 |     def reassign_genes_to_best_fit_module(self):
222 |         '''
223 |         use kME goodness of fit to reassign module
224 |         membership
225 |         '''
226 |         if self.args.verbose:
227 |             warning("Making final goodness of fit assessment")
228 | 
229 |         count = self.genes.reassign_to_best_fit(self.eigengenes,
230 |                                                 self.args.wgcnaParameters['reassignThreshold'],
231 |                                                 self.args.wgcnaParameters['minKMEtoStay'])
232 |         self.logger.info("Reassigned " + str(count) + " genes in final kME review.")
233 |         if self.args.verbose:
234 |             warning("Reassigned " + str(count) + " genes in final kME review.")
235 | 
236 | 
237 |     def merge_close_modules(self, prefix=''):
238 |         '''
239 |         merge close modules based on similiarity in eigengenes
240 |         update membership, kME, and eigengenes accordingly
241 |         '''
242 |         if self.args.verbose:
243 |             warning("Extracting final eigengenes and merging close modules")
244 | 
245 |         modules = self.genes.get_modules()
246 |         self.__log_final_modules(modules)
247 | 
248 |         self.eigengenes.load_matrix_from_file(prefix + 'eigengenes.txt')
249 |         self.eigengenes.update_to_subset(modules)
250 | 
251 |         self.eigengenes = self.genes.merge_close_modules(self.eigengenes,
252 |                                                          self.args.finalMergeCutHeight)
253 | 
254 | 
255 |     def run_iteration(self, iterationGenes):
256 |         '''
257 |         run an iteration of blockwise WGCNA
258 |         '''
259 |         self.__generate_iteration_label()
260 | 
261 |         iterationDir = os.path.join('pass' + str(self.passCount), 'i' + str(self.iterationCount))
262 |         create_dir(iterationDir)
263 | 
264 |         if self.args.verbose:
265 |             warning("Iteration: " + self.iteration)
266 | 
267 |         self.genes.iteration = self.iteration
268 |         iterationProfiles = self.profiles.gene_expression(iterationGenes)
269 | 
270 |         blocks = self.run_blockwise_wgcna(iterationProfiles, iterationDir)
271 |         if not self.args.skipSaveBlocks:
272 |             rsnippets.saveBlockResult(blocks, iterationProfiles,
273 |                                       os.path.join(iterationDir, 'wgcna-blocks.RData'))
274 |             if self.args.gzipTOMs:
275 |                 bulk_gzip(iterationDir, 'TOM')
276 | 
277 | 
278 |         # update eigengenes from blockwise result
279 |         # if eigengenes are present (modules detected), evaluate
280 |         # fitness and update gene module membership
281 |         self.eigengenes.extract_from_blocks(self.iteration, blocks,
282 |                                             self.profiles.samples())
283 | 
284 |         if not self.eigengenes.is_empty():
285 |             self.eigengenes.write() # need to keep single file across all iterations
286 |             self.eigengenes.write(iterationDir + '/')
287 | 
288 |             # extract membership from blocks and calc eigengene connectivity
289 |             self.genes.update_membership(iterationGenes, blocks)
290 |             self.genes.update_kME(self.eigengenes, iterationGenes)
291 |             self.__summarize_classification(os.path.join(iterationDir, 'wgcna-'))
292 | 
293 |             self.genes.evaluate_fit(self.args.wgcnaParameters['minKMEtoStay'],
294 |                                     iterationGenes)
295 |             self.genes.remove_small_modules(self.args.wgcnaParameters['minModuleSize'])
296 |             self.__summarize_classification(os.path.join(iterationDir, ''), True)
297 | 
298 | 
299 |     def __summarize_classification(self, prefix, inclCounts=False):
300 |         '''
301 |         output gene summaries for the iteration
302 |         incl: text summary of iteration, updated gene membership, kme histogram
303 |         '''
304 |         if 'final' in prefix or 'merge' in prefix:
305 |             self.genes.write(prefix)
306 |         else:
307 |             self.genes.write(prefix, self.iteration)
308 |         self.genes.plot_kme_histogram(self.iteration, prefix,
309 |                                       self.args.wgcnaParameters['minKMEtoStay'])
310 |         if inclCounts:
311 |             self.genes.write_iteration_counts(prefix)
312 | 
313 | 
314 |     def run_blockwise_wgcna(self, exprData, workingDir):
315 |         '''
316 |         run WGCNA
317 |         '''
318 |         manager = WgcnaManager(exprData, self.args.wgcnaParameters)
319 |         manager.set_parameter('saveTOMFileBase', os.path.join(workingDir, self.iteration + '-TOM'))
320 |         return manager.blockwise_modules()
321 | 
322 | 
323 |     def __generate_iteration_label(self):
324 |         '''
325 |         generates the unique label for the iteration
326 |         '''
327 |         self.iteration = 'P' + str(self.passCount) + '_I' + str(self.iterationCount)
328 | 
329 | 
330 |     def __load_expression_profiles(self):
331 |         # gives a weird R error that I'm having trouble catching
332 |         # when it fails
333 |         # TODO: identify the exact exception
334 |         try:
335 |             self.profiles = Expression(read_data(self.args.inputFile))
336 |         except:
337 |             self.logger.error("Unable to open input file: " + self.args.inputFile)
338 |             sys.exit(1)
339 | 
340 | 
341 |     def __initialize_R(self, logType='run'):
342 |         '''
343 |         initialize R workspace and logs
344 |         '''
345 |         # set working directory
346 |         base().setwd(self.args.workingDir)
347 | 
348 |         # suppress warnings
349 |         ro.r['options'](warn=-1)
350 | 
351 |         # r log
352 |         logFile = 'iterativeWGCNA-R.log'
353 |         if logType == 'merge':
354 |             logFile = 'adjust-merge-' + str(self.args.finalMergeCutHeight) + '-' + logFile
355 | 
356 |         rLogger = base().file(logFile, open='wt')
357 |         base().sink(rLogger, type=base().c('output', 'message'))
358 | 
359 |         if self.args.enableWGCNAThreads:
360 |             wgcna().enableWGCNAThreads()
361 | 
362 | 
363 |     def __initialize_log(self, logType='run'):
364 |         '''
365 |         initialize log by setting path and file format
366 |         '''
367 |         logName = 'iterativeWGCNA.log'
368 |         if logType == 'summary':
369 |             logName = 'summarize-network-' + logName
370 |         elif logType == 'merge':
371 |             logName = 'adjust-merge-' + str(self.args.finalMergeCutHeight) + '-' + logName
372 | 
373 |         logging.basicConfig(filename=self.args.workingDir + '/' + logName,
374 |                             filemode='w', format='%(levelname)s: %(message)s',
375 |                             level=logging.DEBUG)
376 | 
377 |         logging.captureWarnings(True)
378 |         self.logger = logging.getLogger(__name__)
379 | 
380 | 
381 |     def __log_alogorithm_converged(self):
382 |         '''
383 |         log algorithm convergence
384 |         '''
385 |         message = "No modules detected for iteration " + self.iteration \
386 |                   + ". Classification complete."
387 |         self.logger.info(message)
388 |         if self.args.verbose:
389 |             warning(message)
390 | 
391 | 
392 |     def __log_parameters(self):
393 |         '''
394 |         log WGCNA parameter choices and working
395 |         directory name
396 |         '''
397 | 
398 |         self.logger.info("Working directory: " + self.args.workingDir)
399 |         self.logger.info("Saving blocks for each iteration? "
400 |                          + ("FALSE" if self.args.skipSaveBlocks else "TRUE"))
401 |         self.logger.info("Merging final modules if cutHeight <= " + str(self.args.finalMergeCutHeight))
402 |         self.logger.info("Allowing WGCNA Threads? "
403 |                          + ("TRUE" if self.args.enableWGCNAThreads else "FALSE"))
404 |         self.logger.info("Running WGCNA with the following params:")
405 |         self.logger.info(self.args.wgcnaParameters)
406 | 
407 |         if self.args.verbose:
408 |             warning("Working directory: " + self.args.workingDir)
409 |             warning("Allowing WGCNA Threads? "
410 |                     + ("TRUE" if self.args.enableWGCNAThreads else "FALSE"))
411 |             warning("Merging final modules if cutHeight <= " + str(self.args.finalMergeCutHeight))
412 |             warning("Running WGCNA with the following params:")
413 |             warning(self.args.wgcnaParameters)
414 | 
415 | 
416 |     def __log_input_data(self):
417 |         '''
418 |         log input details
419 |         '''
420 |         self.logger.info("Loaded file: " + self.args.inputFile)
421 |         self.logger.info(str(self.profiles.ncol()) + " Samples")
422 |         self.logger.info(str(self.profiles.nrow()) + " Genes")
423 |         if self.args.verbose:
424 |             warning("Loaded file: " + self.args.inputFile)
425 |             warning(str(self.profiles.ncol()) + " Samples")
426 |             warning(str(self.profiles.nrow()) + " Genes")
427 | 
428 | 
429 |     def __log_pass_completion(self):
430 |         '''
431 |         summarize pass in log when convergence condition is met
432 |         '''
433 |         message = "Pass " + str(self.passCount) + " converged on iteration " \
434 |                   + str(self.iterationCount) + "."
435 |         self.logger.info(message)
436 |         if self.args.verbose:
437 |             warning(message)
438 | 
439 | 
440 |     def __log_gene_counts(self, initialGeneCount, classifiedGeneCount):
441 |         '''
442 |         log classified gene count
443 |         '''
444 |         message = "FIT: " + str(classifiedGeneCount) + "; RESIDUAL: "
445 |         message = message + str(initialGeneCount - classifiedGeneCount)
446 |         self.logger.info(message)
447 |         if self.args.verbose:
448 |             warning(message)
449 | 
450 | 
451 |     def __log_final_modules(self, modules):
452 |         '''
453 |         log modules
454 |         '''
455 |         message = "Found " + str(len(modules)) + " modules."
456 |         self.logger.info(message)
457 |         self.logger.info(modules)
458 |         if self.args.verbose:
459 |             warning(message)
460 |             warning(modules)
461 | 
462 | 
463 |     def write_run_summary(self, initial, fit):
464 |         '''
465 |         writes the number of kept and dropped genes at the end of an iteration
466 |         '''
467 |         fileName = 'iterative-wgcna-run-summary.txt'
468 |         try:
469 |             os.stat(fileName)
470 |         except OSError:
471 |             header = ('Iteration', 'Initial', 'Fit', 'Residual')
472 |             with open(fileName, 'a') as f:
473 |                 print('\t'.join(header), file=f)
474 |         finally:
475 |             with open(fileName, 'a') as f:
476 |                 print('\t'.join((self.iteration, str(initial),
477 |                                  str(fit), str(initial - fit))), file=f)
478 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/network.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=invalid-name
  2 | # pylint: disable=no-self-use
  3 | # pylint: disable=too-many-instance-attributes
  4 | # pylint: disable=redefined-variable-type
  5 | 
  6 | '''
  7 | manage network (for summaries)
  8 | '''
  9 | 
 10 | from __future__ import print_function
 11 | from __future__ import with_statement
 12 | 
 13 | import logging
 14 | 
 15 | from collections import OrderedDict
 16 | 
 17 | import rpy2.robjects as ro
 18 | 
 19 | from .colors import Colors
 20 | from .wgcna import WgcnaManager
 21 | from .r.manager import RManager
 22 | from .r.imports import grdevices, base, rsnippets
 23 | 
 24 | from .eigengenes import Eigengenes
 25 | 
 26 | class Network(object):
 27 |     '''
 28 |     a network contains the classified genes
 29 |     and their assignments
 30 |     hash of modules and properties
 31 |     for summary purposes
 32 |     '''
 33 | 
 34 |     def __init__(self, args):
 35 |         self.logger = logging.getLogger('iterativeWGCNA.Network')
 36 |         self.args = args
 37 |         self.eigengenes = None
 38 | 
 39 |         self.genes = None
 40 |         self.modules = None
 41 |         self.classifiedGenes = None
 42 |         self.profiles = None
 43 |         self.kME = None
 44 |         self.membership = None
 45 | 
 46 |         # self.graph = None
 47 |         self.geneColors = None
 48 |         self.adjacency = None
 49 |         self.weightedAdjacency = None # weighted by shared membership
 50 | 
 51 | 
 52 |     def build(self, genes, eigengenes):
 53 |         '''
 54 |         build from iterativeWGCNA result in memory
 55 |         '''
 56 |         self.eigengenes = eigengenes
 57 |         self.genes = genes.get_genes()
 58 |         self.classifiedGenes = genes.get_classified_genes()
 59 |         self.profiles = genes.profiles
 60 |         self.kME = genes.get_gene_kME() # TODO -- fix this -- this function has changed
 61 |         self.membership = genes.get_gene_membership()
 62 | 
 63 |         self.modules = genes.get_modules()
 64 |         self.__initialize_module_properties()
 65 | 
 66 |         self.__assign_colors()
 67 |         self.__generate_weighted_adjacency()
 68 | 
 69 | 
 70 |     def __assign_colors(self):
 71 |         self.__generate_module_colors()
 72 |         self.geneColors = OrderedDict((gene, None) for gene in self.genes)
 73 |         self.assign_gene_colors()
 74 | 
 75 | 
 76 |     def __initialize_module_properties(self):
 77 |         '''
 78 |         transform module list into dict with placeholders
 79 |         for color, kIn, and kOut
 80 |         and values for size
 81 |         '''
 82 |         self.modules = OrderedDict((module,
 83 |                                     {'color':None,
 84 |                                      'kIn':0,
 85 |                                      'kOut':0,
 86 |                                      'size':self.__get_module_size(module),
 87 |                                      'density':0}) \
 88 |                                        for module in self.modules)
 89 |         self.modules.update({'UNCLASSIFIED': {'color':None, 'kIn': 0,
 90 |                                               'kOut': 0,
 91 |                                               'size': self.__get_module_size('UNCLASSIFIED'),
 92 |                                               'density': 0.0}})
 93 | 
 94 | 
 95 |     def build_from_file(self, profiles, adjacency=True):
 96 |         '''
 97 |         initialize Network from iterativeWGCNA output found in path
 98 |         '''
 99 |         self.profiles = profiles
100 |         self.genes = self.profiles.genes()
101 | 
102 |         # when membership is loaded from file, modules and classified
103 |         # genes are determined as well
104 |         self.__load_membership_from_file(self.args.preMerge)
105 |         self.__load_kme_from_file(self.args.preMerge)
106 |         warning("done")
107 |         
108 |         self.eigengenes = Eigengenes()
109 |         self.eigengenes.load_matrix_from_file("eigengenes-final.txt")
110 |         if adjacency:
111 |             self.__initialize_module_properties()
112 |             self.__assign_colors()
113 |             self.__generate_weighted_adjacency()
114 | 
115 | 
116 |     def __load_membership_from_file(self, preMerge):
117 |         '''
118 |         loads membership assignments from file
119 |         and assembles list of classified genes
120 |         and determines list of unique modules
121 |         '''
122 |         fileName = "membership.txt"
123 |         membership = ro.DataFrame.from_csvfile(fileName, sep='\t',
124 |                                                header=True, row_names=1, as_is=True)
125 | 
126 |         finalIndex = membership.names.index('final')
127 |         if preMerge:
128 |             finalIndex = finalIndex - 1 
129 | 
130 |         self.membership = OrderedDict((gene, None) for gene in self.genes)
131 |         self.classifiedGenes = []
132 |         self.modules = []
133 | 
134 |         for g in self.genes:
135 |             module = membership.rx(g, finalIndex)[0]
136 |             self.modules.append(module)
137 |             self.membership[g] = module
138 |             if module != 'UNCLASSIFIED':
139 |                 if 'p' not in module:
140 |                     self.logger.debug("Module: " + module + "; Gene: " + g)
141 |                 self.classifiedGenes.append(g)
142 | 
143 |         self.modules = list(set(self.modules)) # gets unique list of modules
144 | 
145 | 
146 |     def __load_kme_from_file(self, preMerge):
147 |         '''
148 |         loads kME to assigned module from file
149 |         '''
150 |         fileName = "eigengene-connectivity.txt"
151 |         kME = ro.DataFrame.from_csvfile(fileName, sep='\t',
152 |                                         header=True, row_names=1, as_is=True)
153 | 
154 |         finalIndex = kME.names.index('final')
155 |         if preMerge:
156 |             finalIndex = finalIndex - 1
157 |             
158 |         self.kME = OrderedDict((gene, None) for gene in self.genes)
159 |         for g in self.genes:
160 |             self.kME[g] = kME.rx(g, finalIndex)[0]
161 | 
162 | 
163 |     def summarize_network(self):
164 |         '''
165 |         generate summary figs and data
166 |         '''
167 |         self.__plot_eigengene_overview()
168 |         self.__plot_summary_views()
169 |         self.__summarize_network_modularity()
170 |         self.__write_module_summary()
171 | 
172 | 
173 |     def summarize_module(self, module):
174 |         '''
175 |         generate summare info for the specified module
176 |         '''
177 |         self.__plot_module_overview(module)
178 | 
179 | 
180 |     def __plot_module_overview(self, module):
181 |         '''
182 |         plot heatmap, dendrogram, and eigengene
183 |         '''
184 |         grdevices().pdf(module + "-summary.pdf")
185 |         self.plot_module_eigengene(module)
186 |         self.plot_module_kME(module)
187 |         self.plot_module_heatmap(module)
188 |         grdevices().dev_off()
189 | 
190 | 
191 |     def plot_module_heatmap(self, module):
192 |         '''
193 |         plot module heatmap
194 |         '''
195 | 
196 |         members = self.__get_module_members(module)
197 |         expression = self.profiles.gene_expression(members)
198 |         manager = RManager(expression, None)
199 |         manager.heatmap()
200 | 
201 | 
202 |     def plot_module_eigengene(self, module):
203 |         '''
204 |         barchart illustrating module eigengene
205 |         '''
206 |         eigengene = self.eigengenes.get_module_eigengene(module)
207 | 
208 |         params = {}
209 |         params['height'] = base().as_numeric(eigengene)
210 | 
211 |         limit = max(abs(base().max(eigengene)[0]), abs(base().min(eigengene)[0]))
212 |         ylim = [-1 * limit, limit]
213 |         params['ylim'] = ro.IntVector(ylim)
214 | 
215 |         colors = ["red" if e[0] > 0 else "blue" for e in eigengene]
216 |         params['col'] = ro.StrVector(colors)
217 | 
218 |         params['border'] = ro.NA_Logical
219 |         params['las'] = 2
220 |         params['names.arg'] = ro.StrVector(self.eigengenes.samples())
221 |         params['cex.names'] = 0.6
222 |         params['main'] = "Eigengene: " + module
223 |         manager = RManager(eigengene, params)
224 |         manager.barchart()
225 | 
226 | 
227 |     def plot_module_kME(self, module):
228 |         '''
229 |         plots module eigengene connectivity (kME)
230 |         '''
231 |         members = self.__get_module_members(module)
232 |         kME = [kME for gene, kME in self.kME.items() if gene in members]
233 | 
234 |         manager = RManager(kME, None)
235 |         manager.histogram(self.args.wgcnaParameters['minKMEtoStay'],
236 |                           {'main':"Member kME: " + module,
237 |                            'xlab': "Eigengene Connectivity (kME)",
238 |                            'ylab': "N Genes",
239 |                            'breaks': base().seq(0, 1, 0.1)})
240 | 
241 | 
242 | 
243 |     def __generate_module_colors(self):
244 |         '''
245 |         generate module color map
246 |         using standard colors for first
247 |         few modules then random for anything more than first 25
248 |         '''
249 | 
250 |         colors = Colors()
251 |         n = 0 # counter for module num
252 |         for m in self.modules:
253 |             n = n + 1
254 |             color = colors.assign_color(n)
255 |             self.modules[m]['color'] = color
256 | 
257 |         self.modules["UNCLASSIFIED"]['color'] = '#D3D3D3'
258 | 
259 | 
260 |     def assign_gene_colors(self):
261 |         '''
262 |         assign colors to genes according to module membership
263 |         '''
264 |         for g in self.genes:
265 |             self.geneColors[g] = self.modules[self.membership[g]]['color']
266 | 
267 | 
268 |     def get_gene_colors(self, targetGenes):
269 |         '''
270 |         retrieve colors for specified gene list
271 |         '''
272 |         colors = OrderedDict((gene, color) for gene, color in self.geneColors.items() \
273 |                                  if gene in targetGenes)
274 |         return colors
275 | 
276 | 
277 |     def get_gene_membership(self, targetGenes):
278 |         '''
279 |         retrieve membership for specified gene list
280 |         '''
281 |         colors = OrderedDict((gene, membership) for gene, membership in self.membership.items() \
282 |                                  if gene in targetGenes)
283 |         return colors
284 | 
285 | 
286 |     def __plot_eigengene_overview(self):
287 |         '''
288 |         plots eigengene graphs to single pdf
289 |         '''
290 |         grdevices().pdf("eigengene-overview.pdf")
291 |         self.plot_eigengene_network()
292 |         self.plot_eigengene_heatmap()
293 |         grdevices().dev_off()
294 | 
295 | 
296 |     def plot_eigengene_network(self):
297 |         '''
298 |         wrapper for plotting the eigengene network to pdf
299 |         '''
300 |         manager = WgcnaManager(self.eigengenes.matrix, None)
301 |         manager.plot_eigengene_network()
302 | 
303 | 
304 |     def plot_eigengene_heatmap(self):
305 |         '''
306 |         plot a heatmap of the eigengenes
307 |         '''
308 |         manager = RManager(self.eigengenes.matrix, None)
309 |         # manager.heatmap(params={'scale':'none'})
310 |         manager.heatmap()
311 | 
312 |         
313 |     def plot_network_summary(self, genes, title, filename):
314 |         '''
315 |         wrapper for WGCNA summary network view (heatmap + dendrogram)
316 |         '''
317 | 
318 |         expression = self.profiles.gene_expression(genes)
319 |         membership = self.get_gene_membership(genes)
320 |         # colors = self.get_gene_colors(genes)
321 |         
322 |         manager = WgcnaManager(expression, self.args.wgcnaParameters)
323 |         manager.set_module_colors(self.modules)
324 |         
325 |         grdevices().pdf(filename)
326 |         manager.plot_network_heatmap(membership, title) # plot_network_overview(colors, title)
327 |         grdevices().dev_off()
328 | 
329 | 
330 |     def __plot_summary_views(self):
331 |         '''
332 |         plot summary heatmaps/views
333 |         '''
334 |         if self.args.generateNetworkSummary == 'input' \
335 |            or self.args.generateNetworkSummary == 'all':
336 |             self.plot_network_summary(self.genes,
337 |                                       "All Genes (incl. unclassified)",
338 |                                       "input-block-diagram.pdf")
339 | 
340 |         if self.args.generateNetworkSummary == 'network' \
341 |            or self.args.generateNetworkSummary == 'all':
342 |             self.plot_network_summary(self.classifiedGenes,
343 |                                       "Network (classified genes)",
344 |                                       "network-block-diagram.pdf")
345 | 
346 | 
347 |     def __get_module_members(self, targetModule):
348 |         '''
349 |         get genes in targetModule
350 |         '''
351 |         return [gene for gene, module in self.membership.items() if module == targetModule]
352 | 
353 | 
354 |     def __generate_weighted_adjacency(self):
355 |         '''
356 |         gene x gene weight matrix with matrix[r][c] = 1
357 |         if genes r & c are in the same module; for
358 |         weighted graph viz and to simply
359 |         calc of in/out degree
360 |         '''
361 | 
362 |         manager = WgcnaManager(self.profiles.gene_expression(self.classifiedGenes),
363 |                                self.args.wgcnaParameters)
364 |         manager.adjacency('signed', True, True) # signed, but filter negatives & self-refs
365 |         self.adjacency = base().as_data_frame(manager.adjacencyMatrix)
366 |         self.weightedAdjacency = self.adjacency
367 | 
368 |         for m in self.modules:
369 |             if m == 'UNCLASSIFIED':
370 |                 continue
371 | 
372 |             members = self.__get_module_members(m)
373 |             for r in members:
374 |                 indexR = self.weightedAdjacency.names.index(r)
375 |                 for c in members:
376 |                     if r == c:
377 |                         continue
378 |                     indexC = self.weightedAdjacency.names.index(c)
379 |                     adj = self.adjacency[indexR][indexC]
380 |                     if adj > 0:
381 |                         self.weightedAdjacency[indexR][indexC] = adj + 1
382 | 
383 | 
384 |     def calculate_degree_modularity(self, targetModule):
385 |         '''
386 |         calculates in degree (kIn) and out degree (kOut)
387 |         for the target module
388 |         '''
389 |         members = self.__get_module_members(targetModule)
390 | 
391 |         degree = rsnippets.degree(self.adjacency, ro.StrVector(members),
392 |                                   self.args.edgeWeight)
393 |         self.modules[targetModule]['kIn'] = int(degree.rx2('kIn')[0])
394 |         self.modules[targetModule]['kOut'] = int(degree.rx2('kOut')[0])
395 |         size = self.modules[targetModule]['size']
396 |         self.modules[targetModule]['density'] = float(self.modules[targetModule]['kIn'])/(float(size) * (float(size) - 1.0)/2.0)
397 | 
398 | 
399 |     def __summarize_network_modularity(self):
400 |         '''
401 |         summarize modularity of network
402 |         calculates in degree (kIn) and out degree (kOut) per module
403 |         '''
404 |         for m in self.modules:
405 |             if m != 'UNCLASSIFIED':
406 |                 self.calculate_degree_modularity(m)
407 | 
408 | 
409 |     def __get_module_size(self, targetModule):
410 |         '''
411 |         return # of module members for target module
412 |         '''
413 |         return len(self.__get_module_members(targetModule))
414 | 
415 | 
416 |     def __write_module_summary(self):
417 |         '''
418 |         writes network modularity to a file
419 |         '''
420 |         fileName = 'module-summary.txt'
421 |         with open(fileName, 'w') as f:
422 |             header = ('module', 'size', 'color',
423 |                       'kOut', 'avg_node_kOut',
424 |                       'kIn',
425 |                       'module_density', 'kIn2kOut_ratio')
426 |             print('\t'.join(header), file=f)
427 |             for m in self.modules:
428 |                 kIn = self.modules[m]['kIn']
429 |                 kOut = self.modules[m]['kOut']
430 |                 size = self.modules[m]['size']
431 |                 avg_kOut = "{0:0.0f}".format(round(float(kOut) / float(size)))
432 |                 ratio = "{0:0.2f}".format(float(kIn) / float(kOut)) \
433 |                          if kOut != 0 else 'NA'
434 | 
435 |                 print(m,
436 |                       size,
437 |                       self.modules[m]['color'],
438 |                       kOut, avg_kOut,
439 |                       kIn,
440 |                       "{0:0.1f}".format(self.modules[m]['density']),
441 |                       ratio,
442 |                       sep='\t', file=f)
443 | 
444 | 
445 |     def export_cytoscape_json(self):
446 |         '''
447 |         creates and saves a cytoscape json file
448 |         '''
449 |         filterLevel = self.args.edgeWeight
450 | 
451 |         if self.weightedAdjacency is None:
452 |             self.__generate_weighted_adjacency()
453 | 
454 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/r/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/r/imports.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | imports from R; wrapped in functions
 3 | to ensure warning messages go to the R log
 4 | '''
 5 | from rpy2.robjects.packages import importr, SignatureTranslatedAnonymousPackage
 6 | from .snippets import FUNCTIONS
 7 | 
 8 | rsnippets = SignatureTranslatedAnonymousPackage(FUNCTIONS, 'rsnippets')
 9 | 
10 | def base():
11 |     return importr('base')
12 | 
13 | 
14 | def wgcna():
15 |     return importr('WGCNA')
16 | 
17 | 
18 | def stats():
19 |     return importr('stats')
20 | 
21 | 
22 | def graphics():
23 |     return importr('graphics')
24 | 
25 | 
26 | def grdevices():
27 |     return importr('grDevices')
28 | 
29 | 
30 | def pheatmap():
31 |     return importr('pheatmap')
32 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/r/manager.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=invalid-name
  2 | # pylint: disable=no-self-use
  3 | '''
  4 | Wrappers for R functions;
  5 | performs conversions to rpy2.robjects
  6 | where required
  7 | '''
  8 | 
  9 | import logging
 10 | import rpy2.robjects as ro
 11 | from collections import OrderedDict
 12 | 
 13 | # import rpy2.rlike.container as rlc
 14 | from .imports import base, pheatmap, graphics, rsnippets
 15 | 
 16 | class RManager(object):
 17 |     '''
 18 |     wrappers for running R functions
 19 |     '''
 20 |     def __init__(self, data, params=None):
 21 |         self.logger = logging.getLogger('iterativeWGCNA.RManager')
 22 |         self.data = data
 23 |         if params is None:
 24 |             self.params = {}
 25 |         else:
 26 |             self.params = params
 27 | 
 28 |         return None
 29 | 
 30 | 
 31 |     def update_parameters(self, params):
 32 |         '''
 33 |         update/replace all parameters
 34 |         '''
 35 |         self.params = params
 36 | 
 37 | 
 38 |     def set_parameter(self, name, value):
 39 |         '''
 40 |         add or update a single parameter
 41 |         '''
 42 |         self.params[name] = value
 43 | 
 44 | 
 45 |     def remove_parameter(self, name):
 46 |         '''
 47 |         remove named parameter
 48 |         '''
 49 |         del self.params[name]
 50 | 
 51 | 
 52 |     def transpose_data(self):
 53 |         '''
 54 |         transpose the data frame (required for some WGCNA functions)
 55 |         '''
 56 |         return base().t(self.data)
 57 | 
 58 | 
 59 |     def log2(self):
 60 |         '''
 61 |         log2 data
 62 |         '''
 63 |         return base().log2(rsnippets.add(self.data, 1))
 64 | 
 65 | 
 66 |     def row_names(self):
 67 |         '''
 68 |         wrapper for getting
 69 |         row names (usually genes)
 70 |         '''
 71 |         return self.data.rownames
 72 | 
 73 | 
 74 |     def col_names(self):
 75 |         '''
 76 |         wrapper for getting column names
 77 |         (usually samples)
 78 |         '''
 79 |         return self.data.names
 80 | 
 81 | 
 82 |     def heatmap_annotation_data_frame(self, categories, annotation):
 83 |         '''
 84 |         takes a dict of gene->value and creates a data frame
 85 |         data frame
 86 |         assume annotation is an ordered dict
 87 |         updates column names to names
 88 |         '''
 89 |         df = base().as_data_frame(base().t(ro.DataFrame(annotation)))
 90 |         df.colnames = ro.StrVector(categories)
 91 |       
 92 |         return df
 93 | 
 94 | 
 95 |     def heatmap_annotation_key(self, name, colors):
 96 |         '''
 97 |         generates data frame for color key for the annotation
 98 |         from a dict
 99 |         '''
100 |         keyColors = ro.StrVector([c for c in colors.values()])
101 |         keyColors.names = colors.keys()
102 |         key = OrderedDict()
103 |         key[name] = keyColors
104 | 
105 |         return ro.ListVector(key)
106 | 
107 | 
108 |     def heatmap(self, clusterCols=False, params=None):
109 |         '''
110 |         plot a heatmap with options specified in params
111 |         (see pheatmap documentation for all options)
112 |         '''
113 |         self.params['mat'] = base().as_matrix(self.log2())
114 |         self.params['border'] = ro.NA_Logical
115 |         self.params['cluster_cols'] = clusterCols
116 |         if clusterCols:
117 |             self.params['clustering_distance_cols'] = 'correlation'
118 |         self.params['clustering_distance_rows'] = 'correlation'
119 |         self.params['show_rownames'] = True if self.data.nrow <= 50 else False
120 |         self.params['scale'] = 'row'
121 |         self.params['color'] = rsnippets.BlWhRed()
122 | 
123 |         if params is not None:
124 |             self.params.update(params)
125 | 
126 |         pheatmap().pheatmap(**self.params)
127 | 
128 | 
129 | 
130 |     def barchart(self, params=None):
131 |         '''
132 |         barchart
133 |         '''
134 | 
135 |         self.params['height'] = base().as_numeric(self.data)
136 |         if params is not None:
137 |             self.params.update(params)
138 | 
139 |         graphics().barplot(**self.params)
140 | 
141 | 
142 |     def histogram(self, vline=None, params=None):
143 |         '''
144 |         plot histogram with vline at x=vline
145 |         '''
146 |         self.params['x'] = ro.FloatVector(self.data)
147 |         self.params['labels'] = False
148 | 
149 |         if params is not None:
150 |             self.params.update(params)
151 | 
152 |         graphics().hist(**self.params)
153 | 
154 |         if vline is not None:
155 |             lineParams = {'v': vline, 'col': 'red'}
156 |             graphics().abline(**lineParams)
157 | 
158 | 
159 | 
160 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/r/snippets.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2.7
  2 | """
  3 | uses rpy2 python library to create a namespace for R functions underlying iterativeWGCNA
  4 | """
  5 | 
  6 | FUNCTIONS = """
  7 | 
  8 | # convert numeric data frame to real
  9 | numeric2real <- function(df) {
 10 |      df * 1.0
 11 | }
 12 | 
 13 | # return 1-matrix
 14 | dissMatrix <- function(df) {
 15 |     1.0 - df
 16 | }
 17 | 
 18 | # add a constant value
 19 | add <- function(df, value) {
 20 |     df + value
 21 | }
 22 | 
 23 | # return power-weighted matrix
 24 | powerWeightMatrix <- function(df, power) {
 25 |     df^power
 26 | }
 27 | 
 28 | 
 29 | # set values < thresshold to 0
 30 | filterByThreshold <- function(df, threshold) {
 31 |     df[df < threshold] <- 0
 32 |     df
 33 | }
 34 | 
 35 | 
 36 | # set value of matrix diagonal
 37 | diag <- function(df, value) {
 38 |     diag(df) <- value
 39 |     df
 40 | }
 41 | 
 42 | # wrapper for save object b/c doesn't seem to work with rpy2
 43 | saveObject <- function(obj, objName, file) {
 44 |    assign(objName, obj)
 45 |    save(list=c(objName), file = file)
 46 | }
 47 | 
 48 | saveBlockResult <- function(blocks, profiles, file) {
 49 |    assign('blocks', blocks)
 50 |    assign('expression', profiles)
 51 |    save(list=c('blocks', 'expression'), file = file)
 52 | }
 53 | 
 54 | # calculate degree summary for module genes
 55 | degree <- function(adjMatrix, members, threshold) {
 56 |     adjSubset <- adjMatrix[members, members]
 57 |     inDegree = sum(adjSubset >= threshold) / 2
 58 |     adjSubset <- adjMatrix[members,  !names(adjMatrix) %in% members]
 59 |     outDegree <- sum(adjSubset >= threshold)
 60 |     list(kIn=inDegree, kOut=outDegree)
 61 | }
 62 | 
 63 | 
 64 | # find two closest modules given a similarity threshold
 65 | findCloseModules <- function(similarityMatrix, cutHeight) {
 66 |      returnVal <- NULL
 67 |      d <- 1 - similarityMatrix
 68 |      comparison <- d[d > 0 & d <= cutHeight]
 69 |      modulesFound <- sum(comparison) > 0
 70 | print(cutHeight)
 71 | 
 72 |      if (modulesFound) {
 73 |          # indexes of closest modules
 74 |          indexes <- which(d == min(comparison), arr.ind = TRUE)
 75 |          returnVal <- list(m1 = row.names(d)[indexes[1,1]], m2 = row.names(d)[indexes[1,2]], dissimilarity = d[indexes[1,1], indexes[1,2]])
 76 |      }
 77 |     returnVal
 78 | }
 79 | 
 80 | 
 81 | # given WGCNA blocks, extracts and transposes eigengene matrix
 82 | # labels columns (samples)
 83 | # cleans up module names (removes the "ME")
 84 | 
 85 | extractEigengenes <- function(iteration, blocks, sampleNames) {
 86 |     eigengenes <- as.data.frame(t(blocks$MEs))
 87 |     colnames(eigengenes) <- sampleNames
 88 |     eigengenes <- eigengenes[row.names(eigengenes) != "ME0" & row.names(eigengenes) != "MEgrey", ]
 89 |     row.names(eigengenes) <- gsub("ME", paste(iteration, "_M", sep=""), row.names(eigengenes))
 90 |     eigengenes
 91 | }
 92 | 
 93 | 
 94 | # extract eigengens from list object output
 95 | # from moduleEigengenes function
 96 | # label columns (samples)
 97 | # clean up module names (remove the "ME")
 98 | extractRecalculatedEigengenes <- function(elist, sampleNames) {
 99 |    eigengenes <- as.data.frame(t(elist$eigengenes))
100 |    colnames(eigengenes) <- sampleNames
101 |     row.names(eigengenes) <- gsub("ME", "", row.names(eigengenes))
102 |     eigengenes
103 | }
104 | 
105 | # given WGCNA blocks and gene names, returns
106 | # a data frame with modules mapped to gene names
107 | extractModules <- function(blocks, geneNames) {
108 |     as.data.frame(blocks$colors, row.names = geneNames)
109 | }
110 | 
111 | # extract module members
112 | # does not assume same ordering
113 | extractMembers <- function(module, expr, membership) {
114 |     membership <- unlist(membership)
115 |     membership <- membership[row.names(expr)]
116 |     members <- membership == module
117 |     expr[members, ]
118 | }
119 | 
120 | # remove unclassified from expression set
121 | removeUnclassified <- function(expr, membership) {
122 |     membership <- unlist(membership)
123 |     membership <- membership[row.names(expr)]
124 |     classified = membership != "UNCLASSIFIED"
125 |     expr[classified, ]
126 | }
127 | 
128 | # blue, white, red color scale
129 | BlWhRed <- function() {
130 |     colorRampPalette(c("blue", "white", "red"))(100)
131 | }
132 | 
133 | # white, yellow, red color scale
134 | WhYlRed <- function() {
135 |    colorRampPalette(c("white", "yellow", "red"))(100)
136 | }
137 | 
138 | # create color scale by passing a string vector of colors
139 | colorScale <- function(colors) {
140 |    colorRampPalette(colors)(100)
141 | }
142 | 
143 | """
144 | 
145 | __author__ = "Emily Greenfest-Allen"
146 | __copyright__ = "Copyright 2016, University of Pennsylvania"
147 | 


--------------------------------------------------------------------------------
/iterativeWGCNA/wgcna.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=invalid-name
  2 | # pylint: disable=no-self-use
  3 | '''
  4 | wgcna functions
  5 | '''
  6 | 
  7 | import logging
  8 | from collections import OrderedDict
  9 | import rpy2.robjects as ro
 10 | from .r.imports import base, wgcna, rsnippets, stats
 11 | from .r.manager import RManager
 12 | 
 13 | class WgcnaManager(RManager):
 14 |     '''
 15 |     wrappers for running WGCNA functions
 16 |     an extension of the RManager
 17 |     '''
 18 |     def __init__(self, data, params, debug=False):
 19 |         RManager.__init__(self, data, params)
 20 |         self.logger = logging.getLogger('iterativeWGCNA.WgcnaManager')
 21 |         self.adjacencyMatrix = None
 22 |         self.TOM = None
 23 |         self.dissimilarityMatrix = None
 24 |         self.geneTree = None
 25 |         self.moduleColors = None
 26 |         self.debug = debug
 27 |         return None
 28 | 
 29 | 
 30 |     def set_module_colors(self, modules):
 31 |         '''
 32 |         set module colors from dict of module properties
 33 |         expects dict to have a 'color' sub-dict
 34 |         '''
 35 |         self.moduleColors = OrderedDict((module, values['color']) \
 36 |                                             for module, values in modules.items())
 37 | 
 38 | 
 39 |     def blockwise_modules(self):
 40 |         '''
 41 |         run blockwise WGCNA
 42 |         '''
 43 |         self.params['datExpr'] = self.transpose_data()
 44 |         blocks = wgcna().blockwiseModules(**self.params)
 45 |         self.collect_garbage()
 46 |         return blocks
 47 | 
 48 | 
 49 |     def collect_garbage(self):
 50 |         '''
 51 |         run WGCNA garbage collection
 52 |         '''
 53 |         wgcna().collectGarbage()
 54 | 
 55 | 
 56 |     def adjacency(self, networkType='signed', removeNegatives=False, removeSelfReferences=False):
 57 |         '''
 58 |         calculate adjacency matrix; from pearson correlation
 59 |         '''
 60 | 
 61 |         adjParams = {}
 62 |         adjParams['power'] = self.params['power'] if 'power' in self.params else 6
 63 |         adjParams['corFnc'] = 'cor'
 64 |         adjParams['corOptions'] = "use='p'"
 65 |         adjParams['type'] = networkType
 66 |         adjParams['datExpr'] = self.transpose_data()
 67 | 
 68 |         self.adjacencyMatrix = wgcna().adjacency(**adjParams)
 69 |         self.collect_garbage()
 70 |         if removeNegatives:
 71 |             self.adjacencyMatrix = rsnippets.filterByThreshold(self.adjacencyMatrix, 0)
 72 | 
 73 |         if removeSelfReferences:
 74 |             self.adjacencyMatrix = rsnippets.diag(self.adjacencyMatrix, 0)
 75 | 
 76 |         self.collect_garbage()
 77 | 
 78 | 
 79 |     def TOM_dist(self):
 80 |         '''
 81 |         calculate Topological Overlap Matrix from adjacency matrix
 82 |         '''
 83 |         self.TOM = wgcna().TOMdist(self.adjacencyMatrix)
 84 |         self.collect_garbage()
 85 | 
 86 | 
 87 |     def TOM_similarity_from_expr(self):
 88 |         '''
 89 |         calculate Topological Overlap Matrix from expression data
 90 |         '''
 91 |         funcParams = {}
 92 |         funcParams['power'] = self.params['power'] if 'power' in self.params else 6
 93 |         funcParams['datExpr'] = self.transpose_data()
 94 |         self.TOM = wgcna().TOMsimilarityFromExpr(**funcParams)
 95 |         self.collect_garbage()
 96 | 
 97 | 
 98 |     def plot_network_heatmap(self, membership, title, useTOM=False):
 99 |         '''
100 |         plot network heatmap
101 |         recapitulates WGCNA plotNetworkOverview to work around
102 |         cutree issues
103 |         uses pheatmap
104 |         '''
105 | 
106 |         # TODO fix useTOM option for drawing with pheatmap and use of similarity
107 |         # instead of dissimilarity
108 | 
109 |         # if useTOM:
110 |             # self.TOM_similarity_from_expr()
111 |             # self.dissimilarityMatrix = rsnippets.dissMatrix(self.TOM)
112 | 			# raising TOM to power of 7 recommended by WGCNA documentation
113 |             # self.dissimilarityMatrix = rsnippets.powerWeightMatrix(self.TOM, 7)
114 |         #else:
115 | 
116 |         self.adjacency()
117 | 
118 |         annotation = self.heatmap_annotation_data_frame(['Module'], membership)
119 |         annotationKey = self.heatmap_annotation_key('Module', self.moduleColors)
120 |         # manager = RManager(self.dissimilarityMatrix, None)
121 |         self.heatmap(clusterCols=True, params={'scale': 'none',
122 |                                                'mat': self.adjacencyMatrix,
123 |                                                'show_colnames': False,
124 |                                                'color': rsnippets.WhYlRed(),
125 |                                                'annotation_col': annotation,
126 |                                                'annotation_row': annotation,
127 |                                                'annotation_colors': annotationKey})
128 | 
129 | 
130 |     def generate_gene_tree(self):
131 |         '''
132 |         generate hierarchical cluster from dissimilarity matrix
133 |         '''
134 |         distMatrix = stats().as_dist(self.dissimilarityMatrix)
135 |         self.geneTree = stats().hclust(distMatrix, method="average")
136 | 
137 | 
138 |     def plot_network_overview(self, moduleColors, title, useTOM=False):
139 |         '''
140 |         wrapper for TOMplot which provides a graphical representation
141 |         of the Topological Overlap Matrix or correlation matrix using a heatmap
142 |         and hierarchical clustering dendrogram annotated by
143 |         module colors
144 |         '''
145 | 
146 |         if useTOM:
147 |             self.TOM_similarity_from_expr()
148 |             self.dissimilarityMatrix = rsnippets.dissMatrix(self.TOM)
149 |             self.dissimilarityMatrix = rsnippets.powerWeightMatrix(self.TOM, 7)
150 |         else:
151 |             self.adjacency()
152 |             self.dissimilarityMatrix = rsnippets.dissMatrix(self.adjacencyMatrix)
153 | 
154 |         # self.dissimilarityMatrix = rsnippets.diag(self.dissimilarityMatrix, ro.NA_Integer)
155 |         self.generate_gene_tree()
156 | 
157 |         params = {}
158 |         params['dissim'] = self.dissimilarityMatrix
159 |         params['dendro'] = self.geneTree
160 |         params['Colors'] = moduleColors
161 |         params['main'] = title
162 |         wgcna().TOMplot(**params)
163 | 
164 | 
165 |     def module_eigengenes(self, membership):
166 |         '''
167 |         wrapper for moduleEigengenes function
168 |         calculates eigengenes from profiles &
169 |         module membership (gene -> membership dict)
170 |         '''
171 | 
172 |         if self.debug:
173 |             self.logger.debug("Running WGCNA moduleEigengenes function")
174 |             self.logger.debug("Module assignments:")
175 |             self.logger.debug(membership)
176 | 
177 |         params = {}
178 |         params['softPower'] = self.params['power'] if 'power' in self.params else 6
179 |         params['expr'] = base().as_data_frame(self.transpose_data())
180 | 
181 |         if self.debug:
182 |             self.logger.debug("Converting membership list to ro.StrVector; see R-log")
183 |             ro.r("print('Converting membership list to ro.StrVector for WGCNA moduleEigengenes:')")
184 | 
185 |         params['colors'] = ro.StrVector(list(membership))
186 | 
187 |         if self.debug:
188 |             self.logger.debug(params['colors'])
189 | 
190 |         return wgcna().moduleEigengenes(**params)
191 | 
192 | 
193 |     def plot_eigengene_network(self):
194 |         '''
195 |         wrapper for plotEigengeneNetworks function
196 |         plots an eigengene network
197 |         '''
198 |         params = {}
199 |         params['multiME'] = base().as_data_frame(self.transpose_data())
200 |         params['setLabels'] = ''
201 |         params['marDendro'] = ro.IntVector([0, 4, 1, 2])
202 |         params['marHeatmap'] = ro.IntVector([3, 4, 1, 2])
203 |         params['cex.lab'] = 0.8
204 |         params['xLabelsAngle'] = 90
205 |         params['colorLabels'] = False
206 |         params['signed'] = True
207 | 
208 |         wgcna().plotEigengeneNetworks(**params)
209 | 
210 | 


--------------------------------------------------------------------------------
/merge_close_modules.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''Rerun final module merge'''
 4 | 
 5 | # Installation workaround - see README
 6 | # import readline
 7 | #pylint: disable=invalid-name
 8 | 
 9 | from iterativeWGCNA.iterativeWGCNA import IterativeWGCNA
10 | from iterativeWGCNA.cmlargs import parse_command_line_args
11 | 
12 | if __name__ == '__main__':
13 |     args = parse_command_line_args(program='iterativeWGCNA: Adjust Merge',
14 |                                    description='recompute final module merge from existing output')
15 |     alg = IterativeWGCNA(args, report="merge")
16 |     alg.merge_close_modules_from_output()
17 | 
18 | __author__ = 'Emily Greenfest-Allen'
19 | __copyright__ = 'Copyright 2018, University of Pennsylvania'
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/run_iterative_wgcna.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''Convenience wrapper for running iterativeWGCNA directly from source tree.'''
 4 | 
 5 | # Installation workaround - see README
 6 | # import readline
 7 | 
 8 | from iterativeWGCNA.cmlargs import parse_command_line_args
 9 | from iterativeWGCNA.iterativeWGCNA import IterativeWGCNA
10 | 
11 | if __name__ == '__main__':
12 |     cmlArgs = parse_command_line_args()
13 |     alg = IterativeWGCNA(cmlArgs)
14 |     alg.run()
15 | 
16 | __author__ = 'Emily Greenfest-Allen'
17 | __copyright__ = 'Copyright 2016, University of Pennsylvania'
18 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file=README.md


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(name='iterativeWGCNA',
 4 |       version='1.1.6',
 5 |       description="Iterative application of WGCNA",
 6 |       long_description='''Iterative application of
 7 |       Weighted Gene Correlation Network Analysis (WGCNA)
 8 |       to improve whole-transcriptome gene classification''',
 9 |       url='http://github.com/cstoeckert/iterativeWGCNA',
10 |       download_url='https://github.com/cstoeckert/iterativeWGCNA/archive/v1.1.6.tar.gz',
11 |       author='Emily Greenfest-Allen',
12 |       author_email='allenem@pennmedicine.upenn.edu',
13 |       license='GNU',
14 |       packages=find_packages(),
15 |       install_requires=['rpy2','matplotlib'],
16 |       keywords=['network', 'WGCNA', 'gene expression', 'bioinformatics'],
17 |       scripts=['bin/iterativeWGCNA', 'bin/iterativeWGCNA_merge'],
18 |       zip_safe=False)
19 | 


--------------------------------------------------------------------------------