├── COPYING ├── MANIFEST ├── MANIFEST.in ├── README.md ├── bin └── mageck ├── demo ├── demo1 │ ├── run.sh │ └── sample.txt └── demo2 │ ├── library.txt │ ├── runmageck.sh │ ├── test1.fastq │ └── test2.fastq ├── doc ├── MAGeCK_Wiki_Home.html └── MAGeCK_Wiki_Home_files │ ├── b9cc1f9048ed9e437d36915feecd38c5 │ ├── conversion.js │ ├── css │ ├── css(1) │ ├── forge.css │ ├── ga.js │ ├── get │ ├── header.js │ ├── hilite.css │ ├── icon │ ├── icon(1) │ ├── jquery-1.8.0.min.js │ ├── js │ ├── modernizr.custom.90514.js │ ├── project_default.png │ ├── saved_resource.html │ ├── tool_icon_css │ ├── webtracker.js │ └── wiki.css ├── mageck.beta ├── mageck ├── __init__.py ├── argsParser.py ├── crisprFunction.py ├── fastq_template.Rnw ├── fileOps.py ├── mageckCount.py ├── pathwayFunc.py ├── plot_template.RTemplate ├── plot_template.Rnw ├── plot_template_indvgene.RTemplate ├── tags ├── testVisual.py └── testVisualCount.py ├── python_dist.sh ├── rra ├── Makefile ├── include │ ├── math_api.h │ ├── rngs.h │ ├── rvgs.h │ └── words.h └── src │ ├── RRA.cpp │ ├── classdef.h │ ├── fileio.cpp │ ├── fileio.h │ ├── math_api.cpp │ ├── rngs.cpp │ ├── rvgs.cpp │ ├── tags │ └── words.cpp └── setup.py /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Xiaole Shirley Liu lab at DFCI 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of Xiaole Shirley Liu lab nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT 19 | HOLDERs AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 | OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 24 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 25 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 | DAMAGE. 27 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | README 3 | setup.py 4 | bin/RRA 5 | bin/mageck 6 | demo/demo1/run.sh 7 | demo/demo1/sample.txt 8 | demo/demo2/library.txt 9 | demo/demo2/runmageck.sh 10 | demo/demo2/test1.fastq 11 | demo/demo2/test2.fastq 12 | doc/MAGeCK_Wiki_Home.html 13 | doc/MAGeCK_Wiki_Home_files/b9cc1f9048ed9e437d36915feecd38c5 14 | doc/MAGeCK_Wiki_Home_files/conversion.js 15 | doc/MAGeCK_Wiki_Home_files/css 16 | doc/MAGeCK_Wiki_Home_files/css(1) 17 | doc/MAGeCK_Wiki_Home_files/forge.css 18 | doc/MAGeCK_Wiki_Home_files/ga.js 19 | doc/MAGeCK_Wiki_Home_files/get 20 | doc/MAGeCK_Wiki_Home_files/header.js 21 | doc/MAGeCK_Wiki_Home_files/hilite.css 22 | doc/MAGeCK_Wiki_Home_files/icon 23 | doc/MAGeCK_Wiki_Home_files/icon(1) 24 | doc/MAGeCK_Wiki_Home_files/jquery-1.8.0.min.js 25 | doc/MAGeCK_Wiki_Home_files/js 26 | doc/MAGeCK_Wiki_Home_files/modernizr.custom.90514.js 27 | doc/MAGeCK_Wiki_Home_files/project_default.png 28 | doc/MAGeCK_Wiki_Home_files/saved_resource.html 29 | doc/MAGeCK_Wiki_Home_files/tool_icon_css 30 | doc/MAGeCK_Wiki_Home_files/webtracker.js 31 | doc/MAGeCK_Wiki_Home_files/wiki.css 32 | mageck/__init__.py 33 | mageck/argsParser.py 34 | mageck/crisprFunction.py 35 | mageck/fileOps.py 36 | mageck/mageckCount.py 37 | mageck/pathwayFunc.py 38 | rra/Makefile 39 | rra/include/math_api.h 40 | rra/include/rngs.h 41 | rra/include/rvgs.h 42 | rra/include/words.h 43 | rra/src/CrisprNorm.c 44 | rra/src/RRA.c.backup 45 | rra/src/RRA.cpp 46 | rra/src/math_api.cpp 47 | rra/src/rngs.cpp 48 | rra/src/rvgs.cpp 49 | rra/src/tags 50 | rra/src/words.cpp 51 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include rra * 2 | recursive-include demo * 3 | recursive-include doc * 4 | include mageck/*.RTemplate 5 | include mageck/*.Rnw 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # We are moving our codes to bitbucket. Please click the following link for the latest MAGeCK experimental code: 3 | 4 | https://bitbucket.org/davidliwei/mageck/ 5 | 6 | # Stable MAGeCK version 7 | 8 | For a stable version of MAGeCK, please visit: 9 | 10 | https://sourceforge.net/projects/mageck/ 11 | 12 | The doc/ folder includes the documentation of the software. For the latest MAGeCK documentation, please visit: 13 | 14 | http://mageck.sourceforge.net 15 | -------------------------------------------------------------------------------- /bin/mageck: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """MAGeCK main program 3 | Copyright (c) 2014 Wei Li, Han Xu, Xiaole Liu lab 4 | This code is free software; you can redistribute it and/or modify it 5 | under the terms of the BSD License (see the file COPYING included with 6 | the distribution). 7 | @status: experimental 8 | @version: $Revision$ 9 | @author: Wei Li 10 | @contact: li.david.wei AT gmail.com 11 | """ 12 | 13 | 14 | from __future__ import print_function 15 | import sys 16 | import argparse 17 | import random 18 | import bisect 19 | import logging 20 | 21 | if True: 22 | # for release version of mageck 23 | from mageck import * 24 | from mageck.crisprFunction import * 25 | from mageck.mageckCount import * 26 | from mageck.pathwayFunc import * 27 | from mageck.argsParser import * 28 | from mageck.testVisual import * 29 | else: 30 | # for beta test only 31 | from mageck_db import * 32 | from mageck_db.crisprFunction import * 33 | from mageck_db.mageckCount import * 34 | from mageck_db.pathwayFunc import * 35 | from mageck_db.argsParser import * 36 | from mageck_db.testVisual import * 37 | 38 | 39 | 40 | # main function 41 | def main(): 42 | args=crisprseq_parseargs(); 43 | logging.info('Welcome to MAGeCK. Command: '+args.subcmd); 44 | # get read counts 45 | if args.subcmd == 'run' or args.subcmd == 'count': 46 | mageckcount_main(args); 47 | 48 | # stat test 49 | if args.subcmd == 'run' or args.subcmd == 'test': 50 | magecktest_main(args); 51 | 52 | # pathway test 53 | if args.subcmd == 'pathway': 54 | mageck_pathwaytest(args); 55 | 56 | # visualizaiton 57 | if args.subcmd == 'plot': 58 | plot_main(args); 59 | 60 | 61 | 62 | 63 | if __name__ == '__main__': 64 | try: 65 | main(); 66 | except KeyboardInterrupt: 67 | sys.stderr.write("User interrupt me! ;-) Bye!\n") 68 | sys.exit(0) 69 | 70 | -------------------------------------------------------------------------------- /demo/demo1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | mageck test -k sample.txt -t HL60.final,KBM7.final -c HL60.initial,KBM7.initial -n demo 5 | # or 6 | #mageck test -k sample.txt -t 2,3 -c 0,1 -n demo 7 | 8 | 9 | -------------------------------------------------------------------------------- /demo/demo2/runmageck.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | 5 | mageck run --fastq test1.fastq test2.fastq -l library.txt -n demo --sample-label L1,CTRL -t L1 -c CTRL 6 | 7 | -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/b9cc1f9048ed9e437d36915feecd38c5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidliwei/mageck/8ac6eea1d4bdb0d6e12b6124f8ab77254eaf6efe/doc/MAGeCK_Wiki_Home_files/b9cc1f9048ed9e437d36915feecd38c5 -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/conversion.js: -------------------------------------------------------------------------------- 1 | (function(){var f=this,k=function(a){var b=typeof a;if("object"==b)if(a){if(a instanceof Array)return"array";if(a instanceof Object)return b;var d=Object.prototype.toString.call(a);if("[object Window]"==d)return"object";if("[object Array]"==d||"number"==typeof a.length&&"undefined"!=typeof a.splice&&"undefined"!=typeof a.propertyIsEnumerable&&!a.propertyIsEnumerable("splice"))return"array";if("[object Function]"==d||"undefined"!=typeof a.call&&"undefined"!=typeof a.propertyIsEnumerable&&!a.propertyIsEnumerable("call"))return"function"}else return"null"; 2 | else if("function"==b&&"undefined"==typeof a.call)return"object";return b};var l=function(a){l[" "](a);return a};l[" "]=function(){};var m=function(a,b){for(var d in a)Object.prototype.hasOwnProperty.call(a,d)&&b.call(void 0,a[d],d,a)};var n=window;var p=function(a,b,d){a.addEventListener?a.addEventListener(b,d,!1):a.attachEvent&&a.attachEvent("on"+b,d)};var q=function(a){return{visible:1,hidden:2,prerender:3,preview:4}[a.webkitVisibilityState||a.mozVisibilityState||a.visibilityState||""]||0},r=function(a){var b;a.mozVisibilityState?b="mozvisibilitychange":a.webkitVisibilityState?b="webkitvisibilitychange":a.visibilityState&&(b="visibilitychange");return b},t=function(a,b){if(3==q(b))return!1;a();return!0},u=function(a,b){if(!t(a,b)){var d=!1,c=r(b),e=function(){if(!d&&t(a,b)){d=!0;var g=e;b.removeEventListener?b.removeEventListener(c,g,!1):b.detachEvent&& 3 | b.detachEvent("on"+c,g)}};c&&p(b,c,e)}};var v=function(a){a=parseFloat(a);return isNaN(a)||1a?0:a};var w=v("0.06"),x=v("0.01"),y=v("0.05");var z;n:{var A=f.navigator;if(A){var B=A.userAgent;if(B){z=B;break n}}z=""};var C=-1!=z.indexOf("Opera")||-1!=z.indexOf("OPR"),D=-1!=z.indexOf("Trident")||-1!=z.indexOf("MSIE"),E=-1!=z.indexOf("Gecko")&&-1==z.toLowerCase().indexOf("webkit")&&!(-1!=z.indexOf("Trident")||-1!=z.indexOf("MSIE")),F=-1!=z.toLowerCase().indexOf("webkit"); 4 | (function(){var a="",b;if(C&&f.opera)return a=f.opera.version,"function"==k(a)?a():a;E?b=/rv\:([^\);]+)(\)|;)/:D?b=/\b(?:MSIE|rv)[: ]([^\);]+)(\)|;)/:F&&(b=/WebKit\/(\S+)/);b&&(a=(a=b.exec(z))?a[1]:"");return D&&(b=(b=f.document)?b.documentMode:void 0,b>parseFloat(a))?String(b):a})();var G=function(a){this.b=[];this.a={};for(var b=0,d=arguments.length;bMath.random())){var e=Math.random();if(e'};return 0==c.google_conversion_format&&null==c.google_conversion_domain?''+d(b,135,27)+"":1\x3c/script>'):d(b,1,1):'"}function da(){return new Image}function ea(a,b){var d=da;"function"===typeof a.opt_image_generator&&(d=a.opt_image_generator);d=d();b+=P("async","1");d.src=b;d.onload=function(){}}function Z(a,b,d){var c;c=W(a)+"//www.google.com/ads/user-lists/"+[N(d.google_conversion_id),"/?random=",Math.floor(1E9*Math.random())].join("");c+=[P("label",d.google_conversion_label),P("fmt","3"),V(a,b,d.google_conversion_page_url)].join("");ea(d,c)} 18 | function fa(a){if("landing"==a.google_conversion_type||!a.google_conversion_id||a.google_remarketing_only&&a.google_disable_viewthrough)return!1;a.google_conversion_date=new Date;a.google_conversion_time=a.google_conversion_date.getTime();a.google_conversion_snippets="number"==typeof a.google_conversion_snippets&&0');u(function(){try{var h=d.getElementById(e);h&&(h.innerHTML=Y(a,b,d,c),c.google_remarketing_for_search&&!c.google_conversion_domain&&Z(a,d,c))}catch(g){}},d)}else d.write(Y(a,b,d,a)),a.google_remarketing_for_search&&!a.google_conversion_domain&& 21 | Z(a,d,a)}catch(g){}ga(a)}})(window,navigator,document);})(); 22 | -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/css: -------------------------------------------------------------------------------- 1 | /* cyrillic-ext */ 2 | @font-face { 3 | font-family: 'Ubuntu'; 4 | font-style: normal; 5 | font-weight: 400; 6 | src: local('Ubuntu'), url(https://fonts.gstatic.com/s/ubuntu/v7/BxfrwvhZBmVnDwajjdTQeH-_kf6ByYO6CLYdB4HQE-Y.woff2) format('woff2'); 7 | unicode-range: U+0460-052F, U+20B4, U+2DE0-2DFF, U+A640-A69F; 8 | } 9 | /* cyrillic */ 10 | @font-face { 11 | font-family: 'Ubuntu'; 12 | font-style: normal; 13 | font-weight: 400; 14 | src: local('Ubuntu'), url(https://fonts.gstatic.com/s/ubuntu/v7/rOHfGaogav5XpJHYhB_YZ3-_kf6ByYO6CLYdB4HQE-Y.woff2) format('woff2'); 15 | unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; 16 | } 17 | /* greek-ext */ 18 | @font-face { 19 | font-family: 'Ubuntu'; 20 | font-style: normal; 21 | font-weight: 400; 22 | src: local('Ubuntu'), url(https://fonts.gstatic.com/s/ubuntu/v7/zwDIfh8KEInP4WYoM7h0b3-_kf6ByYO6CLYdB4HQE-Y.woff2) format('woff2'); 23 | unicode-range: U+1F00-1FFF; 24 | } 25 | /* greek */ 26 | @font-face { 27 | font-family: 'Ubuntu'; 28 | font-style: normal; 29 | font-weight: 400; 30 | src: local('Ubuntu'), url(https://fonts.gstatic.com/s/ubuntu/v7/BgwOR-U84B6EluzUITbpkH-_kf6ByYO6CLYdB4HQE-Y.woff2) format('woff2'); 31 | unicode-range: U+0370-03FF; 32 | } 33 | /* latin-ext */ 34 | @font-face { 35 | font-family: 'Ubuntu'; 36 | font-style: normal; 37 | font-weight: 400; 38 | src: local('Ubuntu'), url(https://fonts.gstatic.com/s/ubuntu/v7/Zcmru5bcP_p_TwCNg-F3DH-_kf6ByYO6CLYdB4HQE-Y.woff2) format('woff2'); 39 | unicode-range: U+0100-024F, U+1E00-1EFF, U+20A0-20AB, U+20AD-20CF, U+2C60-2C7F, U+A720-A7FF; 40 | } 41 | /* latin */ 42 | @font-face { 43 | font-family: 'Ubuntu'; 44 | font-style: normal; 45 | font-weight: 400; 46 | src: local('Ubuntu'), url(https://fonts.gstatic.com/s/ubuntu/v7/zvCUQcxqeoKhyOlbifSAaevvDin1pK8aKteLpeZ5c0A.woff2) format('woff2'); 47 | unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2212, U+2215, U+E0FF, U+EFFD, U+F000; 48 | } 49 | -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/css(1): -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, 13 | software distributed under the License is distributed on an 14 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | KIND, either express or implied. See the License for the 16 | specific language governing permissions and limitations 17 | under the License. 18 | */ 19 | .markdown_edit textarea{ 20 | height: 200px; 21 | width: 95%; 22 | font-family: Consolas, "Andale Mono", "Lucida Console", monospace; 23 | } 24 | .markdown_edit .btn{ 25 | margin: 5px 5px 5px 0; 26 | display: inline-block; 27 | } 28 | /* 29 | Licensed to the Apache Software Foundation (ASF) under one 30 | or more contributor license agreements. See the NOTICE file 31 | distributed with this work for additional information 32 | regarding copyright ownership. The ASF licenses this file 33 | to you under the Apache License, Version 2.0 (the 34 | "License"); you may not use this file except in compliance 35 | with the License. You may obtain a copy of the License at 36 | 37 | http://www.apache.org/licenses/LICENSE-2.0 38 | 39 | Unless required by applicable law or agreed to in writing, 40 | software distributed under the License is distributed on an 41 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 42 | KIND, either express or implied. See the License for the 43 | specific language governing permissions and limitations 44 | under the License. 45 | */ 46 | .page_list { 47 | float: right; 48 | margin: 0 20px 20px 0; 49 | font-size: 1.1em; 50 | } 51 | 52 | /* 53 | Licensed to the Apache Software Foundation (ASF) under one 54 | or more contributor license agreements. See the NOTICE file 55 | distributed with this work for additional information 56 | regarding copyright ownership. The ASF licenses this file 57 | to you under the Apache License, Version 2.0 (the 58 | "License"); you may not use this file except in compliance 59 | with the License. You may obtain a copy of the License at 60 | 61 | http://www.apache.org/licenses/LICENSE-2.0 62 | 63 | Unless required by applicable law or agreed to in writing, 64 | software distributed under the License is distributed on an 65 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 66 | KIND, either express or implied. See the License for the 67 | specific language governing permissions and limitations 68 | under the License. 69 | */ 70 | div.tagsinput { padding:5px; width:300px; height:100px; overflow-y: auto;} 71 | div.tagsinput span.tag { border: 1px solid #0077AA; -moz-border-radius:2px; -webkit-border-radius:2px; display: block; float: left; padding: 5px; text-decoration:none; background: #eeeeff; margin-right: 5px; margin-bottom:5px;font-family: helvetica; font-size:13px;} 72 | div.tagsinput span.tag:hover { background-color: #ebebeb; } 73 | div.tagsinput span.tag a { font-weight: bold; text-decoration:none; font-size: 11px; } 74 | div.tagsinput input { width:80px; margin:0px; font-family: helvetica; font-size: 13px; border:1px solid transparent; padding:5px; color: #000; outline:0px; margin-right:5px; margin-bottom:5px; } 75 | div.tagsinput div { display:block; float: left; } 76 | .tags_clear { clear: both; width: 100%; height: 0px; } 77 | .not_valid {background: #FBD8DB !important; color: #90111A !important;} 78 | -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/get: -------------------------------------------------------------------------------- 1 | var $low=window.$low||(function(){var a="readyState",A="setAttribute",b="onreadystatechange",B="stopPropagation",c="currentScript",d=document,e="addEventListener",f="script",F=false,G="load",g="insertBefore",h=/ng/,H="error",I=null,i=(function(){try{g_3gs}catch(e){return e.stack?I:"__loadedModule"}})(),k=window,l={},m="DOMNodeInserted",o="onload",p="parentNode",q="target",r="removeChild",s="exec",J=/it\/53(\d\.\d\d)/[s](navigator.userAgent),t="lastChild",u="DOMNodeRemoved",v="defer",w="stopImmediatePropagation",y=/([^\/\?#]*(\?[^#]*)?)(#.*)?$/,j=function(E){(E[w]&&E[w]())||E[B]();if(!J&&E.type==m){E[q][p][r](E[q]);}else if(d[c])E[q].src=I;},z=function(U,C,x,n){n=y[s](U)[1];if(x=l[n]){return C&&(x.l?C(x.r):x.c.push(C));}x=l[n]={e:d.createElement(f),l:F,c:(C?[C]:[])};x.e[A]("src",U);x.e[A](v,v);C=function(E){var c=x.c.length,f=this||E[q],d=k[n]||k[i];if(E){j(E);x.r=E.type==H?l[n]=I:""}else{if(h.test(f[a]))return C[g](x.e,C[t]);x.e[p]&&C[r](x.e)}try{d&&(x.r=d.load())}catch(e){}while(c-->0)x.c.shift()(x.r);f[o]=f[b]=I;x.l=!!x.r};if(x.e[e]){x.e[e]("load",C,F);x.e[e](H,C,F);x.e[e](m,j,F);x.e[e](u,j,F);}else x.e[o]=x.e[b]=C;C=d.getElementsByTagName(f)[0][p];C[g](x.e,C[t]);J||x.e[p]&&C[r](x.e)};J=J&&J[1]<5.19;return {"$load":z}})(); 2 | //v3.6.7 3 | window.truste=window.truste||{};truste.util=truste.util||{};truste.util.getUniqueID=function(){return"truste_"+Math.random()};truste.util.getScriptElement=function(d,e){var a,b,c,k=d&&d.test?d:RegExp(d);if((a=document.currentScript?document.currentScript:document.scripts&&document.scripts[document.scripts.length-1])&&a.src&&(!d||(e||!a.id)&&k.test(a.src)))return a;for(c=(b=document.getElementsByTagName("script")).length;0window.location.hostname.indexOf("."))for(var d=0;dnew Number(RegExp.$1)&&"complete"!=truste.util.readyState?window.addEventListener("load",truste.eu.initializeIcon,!1):truste.eu.initializeIcon());(d=truste.eu.bindMap)&&(d.apiDefaults&&13window.location.hostname.indexOf(".")},log:function(b){a.tconsole.isDebug()&&window.console&&window.console.log(b)}};a.parseJSON=function(b){if("string"!=typeof b)return b;try{return window.JSON?JSON.parse(b):!/[^,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]/.test(b.replace(/"(\\.|[^"\\])*"/g,""))&&eval("("+b+")")}catch(c){}return null};a.cheapJSON=function(b){return window.truste&&truste.util&&truste.util.getJSON(b)||window.JSON&&JSON.stringify(b)||'{"PrivacyManagerAPI":{"message":"The API needs a JSON parser"}}'}; 15 | a.getStorage=function(b,c){try{null!=c&&(c.charAt||(c=this.cheapJSON(c)));if(window.localStorage)try{if(null==c){if(c=window.localStorage[b]||window.localStorage.getItem(b))return this.parseJSON(c)||c;c=null}else c?window.localStorage.setItem(b,c):delete window.localStorage[b]}catch(a){this.tconsole.log("said was localstorage but wasn't: "+a.stack)}var d;if(null==c){if((d=RegExp("\\s*"+b.replace(".","\\.")+"\\s*=\\s*([^,;\\s]*)").exec(document.cookie))&&1(c=b.consentDecision))){var e="permit";b="";1==c?(b=a+"-"+d,e+=" required"):2==c&&(b=d,e+=" functional");c="; Path=/; Domain=."+(this.binfo&&location.hostname.indexOf(this.binfo.domain)+1&&this.binfo.domain||location.hostname.replace(/([^\.]*\.)?([^\.]+\.[^\.]+)/,"$2"))+"; Max-Age=31536000"; 22 | var f=(f=document.cookie.match(/\bcmapi_cookie_privacy=[\w ]+\b/))&&f[1];f!=b&&(document.cookie="cmapi_gtm_bl="+b+c,document.cookie="cmapi_cookie_privacy="+e+c,c=window[this.binfo&&this.binfo.dl||"dataLayer"])&&(c.push({"gtm.blacklist":b}),!f&&c.push({event:"cookie_prefs_set"}))}};a.apiDo=function(b,c){if(!b||!c)return{error:"Call is missing required parameters or not allowed"};switch(b){case "getConsent":this.loadConsentDecision(this.fake);var a=this.fake.consent.all.value;return a?{source:"asserted", 23 | consent:a,loading:!0}:{source:this.fake.default_source,consent:this.fake.default_consent,loading:!0};case "getConsentDecision":return this.loadConsentDecision(this.fake),a=this.fake.consentDecision||0,{consentDecision:a,source:null!=this.fake.consentDecision?"asserted":"implied"};default:return{message:"The API has not yet loaded"}}};a.processMessage=function(b,a){var d;if(!b||!a||!(d=a.origin||a.domain))return null;b.capabilities=["getConsentDecision"];this.tconsole.log("processing message from "+ 24 | d);d=this.apiDo(b.action,1);this.requestors.loading.push({origin:a.origin,domain:a.domain,source:a.source,apiOb:b});return d};a.handleCMMessage=function(a){return null};a.handleMessageError=function(a,c){if(window.console)console.log(a);else throw a;};window.postMessage&&(window.top.addEventListener?window.top.addEventListener("message",a.messageListener,!1):window.top.attachEvent("onmessage",a.messageListener));a.init(a.defaults);return d}(truste.eu.noticeLP); 25 | -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/header.js: -------------------------------------------------------------------------------- 1 | if(typeof SF === 'undefined'){ 2 | SF={}; 3 | } 4 | 5 | 6 | 7 | SF.Popover = function($popover) { 8 | var klass = $popover.attr('id').replace(/-tooltip/, ''); 9 | var $parents = $popover.parents('header'); 10 | var isOpen = false; 11 | 12 | function open(e) { 13 | e.preventDefault(); 14 | e.stopPropagation(); 15 | // close other popovers 16 | $('.tooltip:not(.' + klass + '):visible', $parents).trigger('popover:close'); 17 | $('.tooltip.' + klass, $popover).show(); 18 | $('body').on('click.popover', function(e) { 19 | $popover.trigger('popover:close'); 20 | }); 21 | $(document).on('keydown.popover', function(e) { 22 | if ((e.which || e.keyCode) === 27) { 23 | e.preventDefault(); 24 | $popover.trigger('popover:close'); 25 | } 26 | }); 27 | isOpen = true; 28 | } 29 | 30 | function close(e) { 31 | if(!$(e.target.parentNode).closest('div').hasClass('tooltip')){ 32 | e.preventDefault(); 33 | e.stopPropagation(); 34 | } 35 | $parents.find('.tooltip:visible').hide(); 36 | $('body').off('click.popover'); 37 | $(document).off('keydown.popover'); 38 | isOpen = false; 39 | } 40 | 41 | $popover.on('click', 'a', function(e) { 42 | if (isOpen) { 43 | close(e); 44 | } else { 45 | if(!$(e.target).hasClass('not-available')){ 46 | open(e); 47 | } 48 | } 49 | }); 50 | $popover.on('popover:close', close); 51 | }; 52 | 53 | jQuery(function($) { 54 | // Setup the updater popover 55 | var $updater = $('#updater-tooltip'); 56 | if ($updater.length) { 57 | if ($updater.hasClass('fetch')) { 58 | $.ajax({ 59 | url: '/user/updates/find', 60 | global: false, 61 | success: function(data) { 62 | if (data.length) { 63 | $updater.hide() 64 | .html(data) 65 | .show(); 66 | SF.Popover($updater); 67 | } 68 | } 69 | }); 70 | } else { 71 | SF.Popover($updater); 72 | } 73 | } 74 | // Setup the account popover 75 | var $account_tip = $('#account-tooltip'); 76 | if($account_tip.length) { 77 | SF.Popover($account_tip); 78 | } 79 | }); 80 | 81 | SF.SimplifiedCookieNotice = function () { 82 | return { 83 | overlay: null, 84 | banner: null, 85 | body: null, 86 | msg: "By using the SourceForge site, you agree to our use of cookies.", 87 | win: null, 88 | cookieKey: "tsn", 89 | 90 | init: function () { 91 | this.win = $(window); 92 | this.body = $('body'); 93 | var cookie_value = $.cookie(this.cookieKey); 94 | if (Number(cookie_value) !== 1) { 95 | this._setupBanner(); 96 | this._setupListeners(); 97 | this.show(); 98 | } 99 | 100 | return this; 101 | }, 102 | 103 | _setupListeners: function () { 104 | var self = this; 105 | 106 | this.body.on('click', '.truste-cookie-accept', function(evt) { 107 | evt.preventDefault(); 108 | // Expires is set to 13 months or 30x13 = 390 days. 109 | $.cookie(self.cookieKey, 1, {path: '/', expires: 390}); 110 | self.hide(); 111 | }); 112 | 113 | this.body.on('click', '.truste-cookie-denied', function (evt) { 114 | evt.preventDefault(); 115 | if (truste.eu && truste.eu.clickListener) { 116 | truste.eu.clickListener(); 117 | } 118 | }); 119 | 120 | this.win.resize(function (evt) { 121 | if (self.win.width() > 1075) { 122 | self.banner.width(self.win.width() - 80); 123 | } 124 | }); 125 | }, 126 | 127 | _setupBanner: function () { 128 | this._createBanner(); 129 | this._populateBanner(); 130 | }, 131 | 132 | _createBanner: function () { 133 | this.banner = $('
'); 134 | }, 135 | 136 | _populateBanner: function() { 137 | this.body.prepend(this.banner); 138 | var content = '
' + 139 | '

'+ this.msg + '

' + 140 | ''; 143 | this.banner.html(content); 144 | 145 | }, 146 | 147 | show: function () { 148 | this.banner.width(this.win.width() - 80); 149 | }, 150 | 151 | hide: function () { 152 | this.banner.animate({height: 'toggle', opacity: 'toggle'}, 1000, 'linear', function () { 153 | $(this).remove(); 154 | }); 155 | } 156 | }; 157 | }; 158 | 159 | -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/hilite.css: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, 13 | software distributed under the License is distributed on an 14 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | KIND, either express or implied. See the License for the 16 | specific language governing permissions and limitations 17 | under the License. 18 | */ 19 | 20 | .linenodiv pre, code { font-size:127% } 21 | .linenodiv pre { color: gray } 22 | .codehilite .hll { background-color: #ffffcc } 23 | .codehilite { background: #f8f8f8; } 24 | .codehilite .c { color: #408080; font-style: italic } /* Comment */ 25 | .codehilite .k { color: #008000; font-weight: bold } /* Keyword */ 26 | .codehilite .o { color: #666666 } /* Operator */ 27 | .codehilite .cm { color: #408080; font-style: italic } /* Comment.Multiline */ 28 | .codehilite .cp { color: #BC7A00 } /* Comment.Preproc */ 29 | .codehilite .c1 { color: #408080; font-style: italic } /* Comment.Single */ 30 | .codehilite .cs { color: #408080; font-style: italic } /* Comment.Special */ 31 | .codehilite .gd { color: #000; background-color: #fdd; display:inline-block; width:100%; } /* Generic.Deleted */ 32 | .codehilite .ge { font-style: italic } /* Generic.Emph */ 33 | .codehilite .gr { color: #FF0000 } /* Generic.Error */ 34 | .codehilite .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 35 | .codehilite .gi { color: #000; background-color: #dfd; display:inline-block; width:100%; } /* Generic.Inserted */ 36 | .codehilite .go { color: #808080 } /* Generic.Output */ 37 | .codehilite .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ 38 | .codehilite .gs { font-weight: bold } /* Generic.Strong */ 39 | .codehilite .gu { color: #000; background-color: #def; display:inline-block; width:100%; } /* Generic.Subheading */ 40 | .codehilite .gt { color: #0040D0 } /* Generic.Traceback */ 41 | .codehilite .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ 42 | .codehilite .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ 43 | .codehilite .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ 44 | .codehilite .kp { color: #008000 } /* Keyword.Pseudo */ 45 | .codehilite .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ 46 | .codehilite .kt { color: #B00040 } /* Keyword.Type */ 47 | .codehilite .m { color: #666666 } /* Literal.Number */ 48 | .codehilite .s { color: #BA2121 } /* Literal.String */ 49 | .codehilite .na { color: #7D9029 } /* Name.Attribute */ 50 | .codehilite .nb { color: #008000 } /* Name.Builtin */ 51 | .codehilite .nc { color: #0000FF; font-weight: bold } /* Name.Class */ 52 | .codehilite .no { color: #880000 } /* Name.Constant */ 53 | .codehilite .nd { color: #AA22FF } /* Name.Decorator */ 54 | .codehilite .ni { color: #999999; font-weight: bold } /* Name.Entity */ 55 | .codehilite .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ 56 | .codehilite .nf { color: #0000FF } /* Name.Function */ 57 | .codehilite .nl { color: #A0A000 } /* Name.Label */ 58 | .codehilite .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ 59 | .codehilite .nt { color: #008000; font-weight: bold } /* Name.Tag */ 60 | .codehilite .nv { color: #19177C } /* Name.Variable */ 61 | .codehilite .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ 62 | .codehilite .w { color: #bbbbbb } /* Text.Whitespace */ 63 | .codehilite .mf { color: #666666 } /* Literal.Number.Float */ 64 | .codehilite .mh { color: #666666 } /* Literal.Number.Hex */ 65 | .codehilite .mi { color: #666666 } /* Literal.Number.Integer */ 66 | .codehilite .mo { color: #666666 } /* Literal.Number.Oct */ 67 | .codehilite .sb { color: #BA2121 } /* Literal.String.Backtick */ 68 | .codehilite .sc { color: #BA2121 } /* Literal.String.Char */ 69 | .codehilite .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ 70 | .codehilite .s2 { color: #BA2121 } /* Literal.String.Double */ 71 | .codehilite .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ 72 | .codehilite .sh { color: #BA2121 } /* Literal.String.Heredoc */ 73 | .codehilite .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ 74 | .codehilite .sx { color: #008000 } /* Literal.String.Other */ 75 | .codehilite .sr { color: #BB6688 } /* Literal.String.Regex */ 76 | .codehilite .s1 { color: #BA2121 } /* Literal.String.Single */ 77 | .codehilite .ss { color: #19177C } /* Literal.String.Symbol */ 78 | .codehilite .bp { color: #008000 } /* Name.Builtin.Pseudo */ 79 | .codehilite .vc { color: #19177C } /* Name.Variable.Class */ 80 | .codehilite .vg { color: #19177C } /* Name.Variable.Global */ 81 | .codehilite .vi { color: #19177C } /* Name.Variable.Instance */ 82 | .codehilite .il { color: #666666 } /* Literal.Number.Integer.Long */ 83 | 84 | .codehilite { overflow: auto; } 85 | .codehilite pre { margin:0; word-wrap: normal; } 86 | .codehilite div { margin:0; padding: 0; } 87 | .codehilite .code_block { width:100%; } 88 | .codehilite .code_block:hover { background-color: #ffff99; } 89 | .codehilite .lineno { background-color: #ebebeb; 90 | display:inline-block; 91 | padding:0 .5em; 92 | border-width: 0 1px 0 0; 93 | border-style: solid; 94 | border-color: #ddd; } 95 | -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/icon: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidliwei/mageck/8ac6eea1d4bdb0d6e12b6124f8ab77254eaf6efe/doc/MAGeCK_Wiki_Home_files/icon -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/icon(1): -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidliwei/mageck/8ac6eea1d4bdb0d6e12b6124f8ab77254eaf6efe/doc/MAGeCK_Wiki_Home_files/icon(1) -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/modernizr.custom.90514.js: -------------------------------------------------------------------------------- 1 | /* Modernizr custom build of 1.7pre: fontface | flexbox | textshadow | rgba | hsla | borderimage | borderradius | boxshadow | opacity | backgroundsize | multiplebgs | cssanimations | csscolumns | cssgradients | cssreflections | csstransforms | csstransforms3d | csstransitions | applicationcache | canvas | canvastext | draganddrop | hashchange | history | audio | video | indexeddb | inputtypes | input | localstorage | postmessage | sessionstorage | webworkers | websockets | websqldatabase | geolocation | inlinesvg | svg | smil | svgclippaths | touch | iepp */ 2 | window.Modernizr=function(a,b,c){function G(){e.input=function(a){for(var b=0,c=a.length;b7)},r.history=function(){return a.history&&history.pushState},r.draganddrop=function(){return x("dragstart")&&x("drop")},r.websockets=function(){return"WebSocket"in a},r.rgba=function(){A("background-color:rgba(150,255,150,.5)");return D(k.backgroundColor,"rgba")},r.hsla=function(){A("background-color:hsla(120,40%,100%,.5)");return D(k.backgroundColor,"rgba")||D(k.backgroundColor,"hsla")},r.multiplebgs=function(){A("background:url(//:),url(//:),red url(//:)");return(new RegExp("(url\\s*\\(.*?){3}")).test(k.background)},r.backgroundsize=function(){return F("backgroundSize")},r.borderimage=function(){return F("borderImage")},r.borderradius=function(){return F("borderRadius","",function(a){return D(a,"orderRadius")})},r.boxshadow=function(){return F("boxShadow")},r.textshadow=function(){return b.createElement("div").style.textShadow===""},r.opacity=function(){B("opacity:.55");return/^0.55$/.test(k.opacity)},r.cssanimations=function(){return F("animationName")},r.csscolumns=function(){return F("columnCount")},r.cssgradients=function(){var a="background-image:",b="gradient(linear,left top,right bottom,from(#9f9),to(white));",c="linear-gradient(left top,#9f9, white);";A((a+o.join(b+a)+o.join(c+a)).slice(0,-a.length));return D(k.backgroundImage,"gradient")},r.cssreflections=function(){return F("boxReflect")},r.csstransforms=function(){return!!E(["transformProperty","WebkitTransform","MozTransform","OTransform","msTransform"])},r.csstransforms3d=function(){var a=!!E(["perspectiveProperty","WebkitPerspective","MozPerspective","OPerspective","msPerspective"]);a&&"webkitPerspective"in g.style&&(a=w("@media ("+o.join("transform-3d),(")+"modernizr)"));return a},r.csstransitions=function(){return F("transitionProperty")},r.fontface=function(){var a,c,d=h||g,e=b.createElement("style"),f=b.implementation||{hasFeature:function(){return!1}};e.type="text/css",d.insertBefore(e,d.firstChild),a=e.sheet||e.styleSheet;var i=f.hasFeature("CSS2","")?function(b){if(!a||!b)return!1;var c=!1;try{a.insertRule(b,0),c=/src/i.test(a.cssRules[0].cssText),a.deleteRule(a.cssRules.length-1)}catch(d){}return c}:function(b){if(!a||!b)return!1;a.cssText=b;return a.cssText.length!==0&&/src/i.test(a.cssText)&&a.cssText.replace(/\r+|\n+/g,"").indexOf(b.split(" ")[0])===0};c=i('@font-face { font-family: "font"; src: url(data:,); }'),d.removeChild(e);return c},r.video=function(){var a=b.createElement("video"),c=!!a.canPlayType;if(c){c=new Boolean(c),c.ogg=a.canPlayType('video/ogg; codecs="theora"');var d='video/mp4; codecs="avc1.42E01E';c.h264=a.canPlayType(d+'"')||a.canPlayType(d+', mp4a.40.2"'),c.webm=a.canPlayType('video/webm; codecs="vp8, vorbis"')}return c},r.audio=function(){var a=b.createElement("audio"),c=!!a.canPlayType;c&&(c=new Boolean(c),c.ogg=a.canPlayType('audio/ogg; codecs="vorbis"'),c.mp3=a.canPlayType("audio/mpeg;"),c.wav=a.canPlayType('audio/wav; codecs="1"'),c.m4a=a.canPlayType("audio/x-m4a;")||a.canPlayType("audio/aac;"));return c},r.localstorage=function(){try{return!!localStorage.getItem}catch(a){return!1}},r.sessionstorage=function(){try{return!!sessionStorage.getItem}catch(a){return!1}},r.webWorkers=function(){return!!a.Worker},r.applicationcache=function(){return!!a.applicationCache},r.svg=function(){return!!b.createElementNS&&!!b.createElementNS(q.svg,"svg").createSVGRect},r.inlinesvg=function(){var a=b.createElement("div");a.innerHTML="";return(a.firstChild&&a.firstChild.namespaceURI)==q.svg},r.smil=function(){return!!b.createElementNS&&/SVG/.test(n.call(b.createElementNS(q.svg,"animate")))},r.svgclippaths=function(){return!!b.createElementNS&&/SVG/.test(n.call(b.createElementNS(q.svg,"clipPath")))};for(var H in r)z(r,H)&&(v=H.toLowerCase(),e[v]=r[H](),u.push((e[v]?"":"no-")+v));e.input||G(),e.crosswindowmessaging=e.postmessage,e.historymanagement=e.history,e.addTest=function(a,b){a=a.toLowerCase();if(!e[a]){b=!!b(),g.className+=" "+(b?"":"no-")+a,e[a]=b;return e}},A(""),j=l=null,f&&a.attachEvent&&function(){var a=b.createElement("div");a.innerHTML="";return a.childNodes.length!==1}()&&function(a,b){function p(a,b){var c=-1,d=a.length,e,f=[];while(++c=0;h--){o=d.charCodeAt(h);a=(a<<6&268435455)+o+(o<<14);c=a&266338304;a=c!==0?a^c>>21:a;}} return a;} // jshint ignore:line 42 | 43 | /****************************************************************** 44 | SF webtracking code 45 | *****************************************************************/ 46 | var doc = document, body = doc.body, 47 | ads = getElementsByClassName('ad'), 48 | qs = parseQueryString(), 49 | url = '/log/webtracker/', 50 | testString = munch('__utmx') || munch('switchboard.test'), 51 | domainHash = munch('__utmc') || hash(window.location.hostname), 52 | data; 53 | // don't track error pages 54 | if (body.id === 'error-content') { 55 | return; 56 | } 57 | // parse json stored in meta element 58 | if (window.JSON && JSON.parse) { 59 | data = JSON.parse(doc.getElementById('webtracker').content); 60 | } else { 61 | data = eval('(' + doc.getElementById('webtracker').content + ')'); 62 | } 63 | // grab url 64 | data.url = location.href; 65 | // begin checking for optional info 66 | if (body.id && !data.action_type) { 67 | data.action_type = body.id; 68 | } 69 | if (body.getAttribute('data-template')) { 70 | data.download_ad_template = body.getAttribute('data-template'); 71 | } 72 | if (ads.length) { 73 | // loop through all ads and grab their zones 74 | var zones = []; 75 | for (var i = 0, l = ads.length; i < l; i++) { 76 | var zone = ads[i].id; 77 | if (zone) { 78 | zones.push(zone); 79 | } 80 | } 81 | if (zones.length) { 82 | data.ad_zones = zones; 83 | } 84 | } 85 | // check the query string for certain data 86 | if (qs.accel_key) { 87 | data.ticket = qs.accel_key; 88 | } 89 | if (qs.click_id) { 90 | data.click_id = qs.click_id; 91 | } 92 | // include any test data 93 | if (testString && 94 | domainHash && 95 | data.active_tests && data.active_tests.length) { 96 | data.tests = testString; 97 | data.domain_hash = domainHash; 98 | } 99 | // select the correct logging path based on whether this is 100 | // accelerator or not 101 | if (qs.accel_key) { 102 | url += 'accel/'; 103 | } 104 | // Setup default value for referer 105 | data.referer = data.referer || doc.referrer; 106 | // send it all off for data crunching 107 | $.ajax({ 108 | url: url, 109 | data: data, 110 | traditional: true, 111 | global: false 112 | }); 113 | })(); 114 | -------------------------------------------------------------------------------- /doc/MAGeCK_Wiki_Home_files/wiki.css: -------------------------------------------------------------------------------- 1 | /* 2 | Licensed to the Apache Software Foundation (ASF) under one 3 | or more contributor license agreements. See the NOTICE file 4 | distributed with this work for additional information 5 | regarding copyright ownership. The ASF licenses this file 6 | to you under the Apache License, Version 2.0 (the 7 | "License"); you may not use this file except in compliance 8 | with the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, 13 | software distributed under the License is distributed on an 14 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | KIND, either express or implied. See the License for the 16 | specific language governing permissions and limitations 17 | under the License. 18 | */ 19 | -------------------------------------------------------------------------------- /mageck.beta: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """MAGeCK main program 3 | Copyright (c) 2014 Wei Li, Han Xu, Xiaole Liu lab 4 | This code is free software; you can redistribute it and/or modify it 5 | under the terms of the BSD License (see the file COPYING included with 6 | the distribution). 7 | @status: experimental 8 | @version: $Revision$ 9 | @author: Wei Li 10 | @contact: li.david.wei AT gmail.com 11 | """ 12 | 13 | 14 | from __future__ import print_function 15 | import sys 16 | import argparse 17 | import random 18 | import bisect 19 | import logging 20 | 21 | if False: 22 | # for release version of mageck 23 | from mageck import * 24 | from mageck.crisprFunction import * 25 | from mageck.mageckCount import * 26 | from mageck.pathwayFunc import * 27 | from mageck.argsParser import * 28 | from mageck.testVisual import * 29 | else: 30 | # for beta test only 31 | from mageck_db import * 32 | from mageck_db.crisprFunction import * 33 | from mageck_db.mageckCount import * 34 | from mageck_db.pathwayFunc import * 35 | from mageck_db.argsParser import * 36 | from mageck_db.testVisual import * 37 | 38 | 39 | 40 | # main function 41 | def main(): 42 | args=crisprseq_parseargs(); 43 | logging.info('Welcome to MAGeCK. Command: '+args.subcmd); 44 | # get read counts 45 | if args.subcmd == 'run' or args.subcmd == 'count': 46 | mageckcount_main(args); 47 | 48 | # stat test 49 | if args.subcmd == 'run' or args.subcmd == 'test': 50 | magecktest_main(args); 51 | 52 | # pathway test 53 | if args.subcmd == 'pathway': 54 | mageck_pathwaytest(args); 55 | 56 | # visualizaiton 57 | if args.subcmd == 'plot': 58 | plot_main(args); 59 | 60 | 61 | 62 | 63 | if __name__ == '__main__': 64 | try: 65 | main(); 66 | except KeyboardInterrupt: 67 | sys.stderr.write("User interrupt me! ;-) Bye!\n") 68 | sys.exit(0) 69 | 70 | -------------------------------------------------------------------------------- /mageck/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidliwei/mageck/8ac6eea1d4bdb0d6e12b6124f8ab77254eaf6efe/mageck/__init__.py -------------------------------------------------------------------------------- /mageck/argsParser.py: -------------------------------------------------------------------------------- 1 | """MAGeCK argument parser 2 | Copyright (c) 2014 Wei Li, Han Xu, Xiaole Liu lab 3 | This code is free software; you can redistribute it and/or modify it 4 | under the terms of the BSD License (see the file COPYING included with 5 | the distribution). 6 | @status: experimental 7 | @version: $Revision$ 8 | @author: Wei Li 9 | @contact: li.david.wei AT gmail.com 10 | """ 11 | 12 | 13 | from __future__ import print_function 14 | import sys 15 | import argparse 16 | import logging 17 | 18 | def crisprseq_parseargs(): 19 | """Parsing mageck arguments. 20 | """ 21 | parser=argparse.ArgumentParser(description='mageck: performs sgRNA, gene and pathway analysis on CRISPR-Cas9 screening data.'); 22 | # definition of sub commands 23 | subparser=parser.add_subparsers(help='commands to run mageck',dest='subcmd'); 24 | 25 | parser.add_argument('-v', '--version',action='version',version='%(prog)s 0.5.0'); 26 | 27 | # run: execute the whole program 28 | subp_run=subparser.add_parser('run',help='Run the whole program from fastq files.'); 29 | subp_run.add_argument('-l','--list-seq',help='A file containing the list of sgRNA names, their sequences and associated genes. Support file format: csv and txt.'); 30 | subp_run.add_argument('-n','--output-prefix',default='sample1',help='The prefix of the output file(s). Default sample1.'); 31 | subp_run.add_argument('--sample-label',default='',help='Sample labels, separated by comma. Must be equal to the number of samples provided (in --fastq option). Default "sample1,sample2,...".'); 32 | subp_run.add_argument('--trim-5',type=int,default=0,help='Length of trimming the 5\' of the reads. Default 0'); 33 | subp_run.add_argument('--sgrna-len',type=int,default=20,help='Length of the sgRNA. Default 20'); 34 | subp_run.add_argument('--count-n',action='store_true',help='Count sgRNAs with Ns. By default, sgRNAs containing N will be discarded.'); 35 | # 36 | subp_run.add_argument('--gene-test-fdr-threshold',type=float,default=0.25,help='FDR threshold for gene test, default 0.25.'); 37 | subp_run.add_argument('-t','--treatment-id',required=True,action='append',help='Sample label or sample index (integer, 0 as the first sample) as treatment experiments, separated by comma (,). If sample label is provided, the labels must match the labels in the first line of the count table; for example, "HL60.final,KBM7.final". For sample index, "0,2" means the 1st and 3rd samples are treatment experiments.'); 38 | subp_run.add_argument('-c','--control-id',action='append',help='Sample label or sample index in the count table as control experiments, separated by comma (,). Default is all the samples not specified in treatment experiments.'); 39 | subp_run.add_argument('--adjust-method',choices=['fdr','holm'],default='fdr',help='Method for p-value adjustment, including false discovery rate (fdr) or holm\'s method (holm). Default fdr.'); 40 | subp_run.add_argument('--variance-from-all-samples',action='store_true',help='Estimate the variance from all samples, instead of from only control samples. Use this option only if you believe there are relatively few essential sgRNAs or genes between control and treatment samples.'); 41 | subp_run.add_argument('--keep-tmp',action='store_true',help='Keep intermediate files.'); 42 | subp_run.add_argument('--fastq',nargs='+',help='Sample fastq files, separated by space; use comma (,) to indicate technical replicates of the same sample. For example, "--fastq sample1_replicate1.fastq,sample1_replicate2.fastq sample2_replicate1.fastq,sample2_replicate2.fastq" indicates two samples with 2 technical replicates for each sample.'); 43 | subp_run.add_argument('--pdf-report',action='store_true',help='Generate pdf report of the analysis.'); 44 | 45 | # countonly: only collect counts 46 | subp_count=subparser.add_parser('count',help='Collecting read counts from fastq files.'); 47 | subp_count.add_argument('-l','--list-seq',required=True,help='A file containing the list of sgRNA names, their sequences and associated genes. Support file format: csv and txt.'); 48 | subp_count.add_argument('--sample-label',default='',help='Sample labels, separated by comma (,). Must be equal to the number of samples provided (in --fastq option). Default "sample1,sample2,...".'); 49 | subp_count.add_argument('-n','--output-prefix',default='sample1',help='The prefix of the output file(s). Default sample1.'); 50 | subp_count.add_argument('--trim-5',type=int,default=0,help='Length of trimming the 5\' of the reads. Default 0'); 51 | subp_count.add_argument('--sgrna-len',type=int,default=20,help='Length of the sgRNA. Default 20'); 52 | subp_count.add_argument('--count-n',action='store_true',help='Count sgRNAs with Ns. By default, sgRNAs containing N will be discarded.'); 53 | subp_count.add_argument('--fastq',nargs='+',help='Sample fastq files, separated by space; use comma (,) to indicate technical replicates of the same sample. For example, "--fastq sample1_replicate1.fastq,sample1_replicate2.fastq sample2_replicate1.fastq,sample2_replicate2.fastq" indicates two samples with 2 technical replicates for each sample.'); 54 | subp_count.add_argument('--pdf-report',action='store_true',help='Generate pdf report of the fastq files.'); 55 | subp_count.add_argument('--keep-tmp',action='store_true',help='Keep intermediate files.'); 56 | 57 | # stat test: only do the statistical test 58 | subn_stattest=subparser.add_parser('test',help='Perform statistical test from a given count table (generated by count command).'); 59 | subn_stattest.add_argument('-k','--count-table',required=True,help='Provide a tab-separated count table instead of sam files. Each line in the table should include sgRNA name (1st column) and read counts in each sample.'); 60 | # this parameter is depreciated 61 | # subn_stattest.add_argument('--gene-test',help='Perform rank association analysis. A tab-separated, sgRNA to gene mapping file is required. Each line in the file should include two columns, the sgRNA name and the gene name.'); 62 | subn_stattest.add_argument('-t','--treatment-id',required=True,action='append',help='Sample label or sample index (0 as the first sample) in the count table as treatment experiments, separated by comma (,). If sample label is provided, the labels must match the labels in the first line of the count table; for example, "HL60.final,KBM7.final". For sample index, "0,2" means the 1st and 3rd samples are treatment experiments.'); 63 | subn_stattest.add_argument('-c','--control-id',action='append',help='Sample label or sample index in the count table as control experiments, separated by comma (,). Default is all the samples not specified in treatment experiments.'); 64 | subn_stattest.add_argument('-n','--output-prefix',default='sample1',help='The prefix of the output file(s). Default sample1.'); 65 | #parser.add_argument('--count-control-index',help='If -k/--counts option is given, this parameter defines the control experiments in the table.'); 66 | subn_stattest.add_argument('--norm-method',choices=['none','median','total'],default='median',help='Method for normalization, default median.'); 67 | subn_stattest.add_argument('--normcounts-to-file',action='store_true',help='Write normalized read counts to file ([output-prefix].normalized.txt).'); 68 | subn_stattest.add_argument('--gene-test-fdr-threshold',type=float,default=0.25,help='FDR threshold for gene test, default 0.25.'); 69 | subn_stattest.add_argument('--adjust-method',choices=['fdr','holm'],default='fdr',help='Method for p-value adjustment, including false discovery rate (fdr) or holm\'s method (holm). Default fdr.'); 70 | subn_stattest.add_argument('--variance-from-all-samples',action='store_true',help='Estimate the variance from all samples, instead of from only control samples. Use this option only if you believe there are relatively few essential sgRNAs or genes between control and treatment samples.'); 71 | subn_stattest.add_argument('--sort-criteria',choices=['neg','pos'],default='neg',help='Sorting criteria, either by negative selection (neg) or positive selection (pos). Default negative selection.'); 72 | subn_stattest.add_argument('--keep-tmp',action='store_true',help='Keep intermediate files.'); 73 | subn_stattest.add_argument('--control-sgrna',help='A list of control sgRNAs for generating null distribution.'); 74 | subn_stattest.add_argument('--remove-zero',choices=['none','control','treatment','both'],default='none',help='Whether to remove zero-count sgRNAs in control and/or treatment experiments. Default: none (do not remove those zero-count sgRNAs).'); 75 | subn_stattest.add_argument('--pdf-report',action='store_true',help='Generate pdf report of the analysis.'); 76 | 77 | # pathway test 78 | subw_pathway=subparser.add_parser('pathway',help='Perform significant pathway analysis from gene rankings generated by the test command.'); 79 | subw_pathway.add_argument('--gene-ranking',required=True,help='The gene summary file (containing both positive and negative selection tests) generated by the gene test step. Pathway enrichment will be performed in both directions.'); 80 | subw_pathway.add_argument('--single-ranking',action='store_true',help='The provided file is a (single) gene ranking file, either positive or negative selection. Only one enrichment comparison will be performed.'); 81 | # subw_pathway.add_argument('--gene-ranking-2',help='An optional gene ranking file of opposite direction, generated by the gene test step.'); 82 | subw_pathway.add_argument('--gmt-file',required=True,help='The pathway file in GMT format.'); 83 | subw_pathway.add_argument('-n','--output-prefix',default='sample1',help='The prefix of the output file(s). Default sample1.'); 84 | subw_pathway.add_argument('--sort-criteria',choices=['neg','pos'],default='neg',help='Sorting criteria, either by negative selection (neg) or positive selection (pos). Default negative selection.'); 85 | subw_pathway.add_argument('--keep-tmp',action='store_true',help='Keep intermediate files.'); 86 | subw_pathway.add_argument('--ranking-column',default='2',help='Column number or label in gene summary file for gene ranking; can be either an integer of column number, or a string of column label. Default "2" (the 3rd column).'); 87 | subw_pathway.add_argument('--ranking-column-2',default='7',help='Column number or label in gene summary file for gene ranking; can be either an integer of column number, or a string of column label. This option is used to determine the column for positive selections and is disabled if --single-ranking is specified. Default "5" (the 6th column).'); 88 | 89 | # plot 90 | subp_plot=subparser.add_parser('plot',help='Generating graphics for selected genes.'); 91 | subp_plot.add_argument('-k','--count-table',required=True,help='Provide a tab-separated count table instead of sam files. Each line in the table should include sgRNA name (1st column) and read counts in each sample.'); 92 | subp_plot.add_argument('-g','--gene-summary',required=True,help='The gene summary file generated by the test command.'); 93 | subp_plot.add_argument('--genes',help='A list of genes to be plotted, separated by comma. Default: none.'); 94 | subp_plot.add_argument('-s','--samples',help='A list of samples to be plotted, separated by comma. Default: using all samples in the count table.'); 95 | subp_plot.add_argument('-n','--output-prefix',default='sample1',help='The prefix of the output file(s). Default sample1.'); 96 | subp_plot.add_argument('--norm-method',choices=['none','median','total'],default='median',help='Method for normalization, default median.'); 97 | subp_plot.add_argument('--keep-tmp',action='store_true',help='Keep intermediate files.'); 98 | 99 | args=parser.parse_args(); 100 | 101 | # logging status 102 | if args.subcmd=='pathway': 103 | logmode="a"; 104 | else: 105 | logmode="w"; 106 | 107 | logging.basicConfig(level=10, 108 | format='%(levelname)-5s @ %(asctime)s: %(message)s ', 109 | datefmt='%a, %d %b %Y %H:%M:%S', 110 | # stream=sys.stderr, 111 | filename=args.output_prefix+'.log', 112 | filemode=logmode 113 | ); 114 | console = logging.StreamHandler() 115 | console.setLevel(logging.INFO) 116 | # set a format which is simpler for console use 117 | formatter = logging.Formatter('%(levelname)-5s @ %(asctime)s: %(message)s ','%a, %d %b %Y %H:%M:%S') 118 | #formatter.formatTime('%a, %d %b %Y %H:%M:%S'); 119 | # tell the handler to use this format 120 | console.setFormatter(formatter) 121 | # add the handler to the root logger 122 | logging.getLogger('').addHandler(console) 123 | 124 | # add paramters 125 | logging.info('Parameters: '+' '.join(sys.argv)); 126 | 127 | return args; 128 | 129 | 130 | -------------------------------------------------------------------------------- /mageck/fastq_template.Rnw: -------------------------------------------------------------------------------- 1 | % This is a template file for Sweave used in MAGeCK 2 | % Author: Wei Li, Shirley Liu lab 3 | % Do not modify lines beginning with "#__". 4 | \documentclass{article} 5 | 6 | \usepackage{amsmath} 7 | \usepackage{amscd} 8 | \usepackage[tableposition=top]{caption} 9 | \usepackage{ifthen} 10 | \usepackage{fullpage} 11 | \usepackage[utf8]{inputenc} 12 | 13 | \begin{document} 14 | \setkeys{Gin}{width=0.9\textwidth} 15 | 16 | \title{MAGeCK Count Report} 17 | \author{Wei Li} 18 | 19 | \maketitle 20 | 21 | 22 | \tableofcontents 23 | 24 | \section{Summary} 25 | 26 | %Function definition 27 | <>= 28 | genreporttable<-function(filelist,labellist,reads,mappedreads,zerocounts){ 29 | xtb=data.frame(File=filelist,Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads,ZeroCounts=zerocounts); 30 | colnames(xtb)=c("File","Label","Reads","Mapped","Percentage","ZeroCounts"); 31 | return (xtb); 32 | } 33 | 34 | genboxplot<-function(filename,...){ 35 | #slmed=read.table(filename,header=T) 36 | slmed=read.csv(filename) 37 | slmat=as.matrix(slmed[,c(-1,-2)]) 38 | slmat_log=log2(slmat+1) 39 | 40 | boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...) 41 | } 42 | 43 | colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#F781BF", 44 | "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", 45 | "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", 46 | "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F"); 47 | 48 | ctfit_tx=0; 49 | genpcaplot<-function(filename,...){ 50 | #slmed=read.table(filename,header=T) 51 | slmed=read.csv(filename) 52 | slmat=as.matrix(slmed[,c(-1,-2)]) 53 | slmat_log=log2(slmat+1) 54 | ctfit_tx<<-prcomp(t(slmat_log),center=TRUE) 55 | 56 | # par(mfrow=c(2,1)); 57 | # first 2 PCA 58 | samplecol=colors[((1:ncol(slmat)) %% length(colors)) ] 59 | plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2])); 60 | text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol); 61 | # par(mfrow=c(1,1)); 62 | } 63 | 64 | genpcavar<-function(){ 65 | # % variance 66 | varpca=ctfit_tx$sdev^2 67 | varpca=varpca/sum(varpca)*100; 68 | if(length(varpca)>10){ 69 | varpca=varpca[1:10]; 70 | } 71 | plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained'); 72 | } 73 | 74 | @ 75 | 76 | The statistics of comparisons is as indicated in the following table. 77 | 78 | <>= 79 | library(xtable) 80 | #__COUNT_SUMMARY_STAT__ 81 | cptable=genreporttable(filelist,labellist,reads,mappedreads,zerocounts); 82 | print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one", 83 | digits = c(0,0, 0, 0, 0, 2,0), 84 | align=c('c', 'p{3cm}', 'c', 'c', 'c', 'c', 'c'), 85 | table.placement = "tbp", 86 | caption.placement = "top")) 87 | @ 88 | 89 | The meanings of the columns are as follows. 90 | 91 | \begin{itemize} 92 | \item \textbf{File}: The filename of fastq file; 93 | \item \textbf{Label}: Assigned label; 94 | \item \textbf{Reads}: The total read count in the fastq file; 95 | \item \textbf{Mapped}: Reads that can be mapped to gRNA library; 96 | \item \textbf{Percentage}: The percentage of mapped reads; 97 | \item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts. 98 | \end{itemize} 99 | 100 | 101 | %__INDIVIDUAL_PAGE__ 102 | 103 | \end{document} 104 | -------------------------------------------------------------------------------- /mageck/fileOps.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """MAGeCK file operation module 3 | Copyright (c) 2014 Wei Li, Han Xu, Xiaole Liu lab 4 | This code is free software; you can redistribute it and/or modify it 5 | under the terms of the BSD License (see the file COPYING included with 6 | the distribution). 7 | @status: experimental 8 | @version: $Revision$ 9 | @author: Wei Li 10 | @contact: li.david.wei AT gmail.com 11 | """ 12 | 13 | from __future__ import print_function 14 | import sys 15 | import math 16 | import logging 17 | import subprocess 18 | 19 | 20 | def systemcall(command, cmsg=True): 21 | logging.info('Running command: '+command); 22 | t=subprocess.Popen(command,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,shell=True).communicate()[0].decode("utf-8"); 23 | #tmsg=t.stdout.read(); 24 | if cmsg: 25 | logging.info('Command message:'); 26 | for t0 in t.split('\n'): 27 | logging.info(' '+t0); 28 | logging.info('End command message.'); 29 | return t; 30 | 31 | 32 | def merge_rank_summary_files(lowfile,highfile,outfile,args,lowfile_prefix='',highfile_prefix=''): 33 | """ 34 | Merge multiple rank summary files 35 | """ 36 | gfile={}; 37 | lowfileorder=[]; 38 | lowfileheader=[]; 39 | # read files individually 40 | nline=0; 41 | for line in open(lowfile): 42 | field=line.strip().split(); 43 | nline+=1; 44 | if nline==1: # skip the first line 45 | lowfileheader=field; 46 | lowfileheader[2:]=[lowfile_prefix+t for t in lowfileheader[2:]]; 47 | continue; 48 | if len(field)<4: 49 | logging.error('The number of fields in file '+lowfile+' is <4.'); 50 | sys.exit(-1); 51 | gid=field[0]; 52 | gitem=int(field[1]); 53 | g_others=field[1:]; 54 | gfile[gid]=[t for t in g_others]; 55 | lowfileorder+=[gid]; 56 | maxnline=nline; 57 | nline=0; 58 | njoinfield=0; 59 | highfileheader=[]; 60 | for line in open(highfile): 61 | field=line.strip().split(); 62 | nline+=1; 63 | if nline==1: # skip the first line 64 | highfileheader=field; 65 | highfileheader[2:]=[highfile_prefix+t for t in highfileheader[2:]]; 66 | continue; 67 | if len(field)<4: 68 | logging.error('The number of fields in file '+highfile+' is <4.'); 69 | sys.exit(-1); 70 | gid=field[0]; 71 | gitem=int(field[1]); 72 | g_others=field[2:]; 73 | if gid not in gfile: 74 | logging.warning('Item '+gid+' appears in '+highfile+', but not in '+lowfile+'. This record will be omitted.'); 75 | else: 76 | prevgitem=int(gfile[gid][0]); 77 | if prevgitem!=gitem: 78 | logging.warning('Item number of '+gid+' does not match previous file: '+str(gitem)+' !='+str(prevgitem)+'.'); 79 | gfile[gid]+=g_others; # don't repeat the gitem 80 | njoinfield=len(gfile[gid]); 81 | # check whether some items appear in the first group, but not in the second group 82 | keepsgs=[]; 83 | for (k,v) in gfile.iteritems(): 84 | if len(v)!=njoinfield: 85 | logging.warning('Item '+k+' appears in '+lowfile+', but not in '+highfile+'.'); 86 | else: 87 | keepsgs+=[k]; 88 | gfile2={k:gfile[k] for k in keepsgs}; 89 | 90 | # write to files 91 | ofhd=open(outfile,'w'); 92 | # print('\t'.join(['id','num','p.neg','fdr.neg','rank.neg','p.pos','fdr.pos','rank.pos']),file=ofhd); 93 | print('\t'.join(lowfileheader)+'\t'+'\t'.join(highfileheader[2:]),file=ofhd); 94 | for k in lowfileorder: 95 | if k in gfile2: 96 | print('\t'.join([k, '\t'.join([str(t) for t in gfile2[k]])]),file=ofhd); 97 | 98 | ofhd.close(); 99 | 100 | 101 | 102 | def merge_rank_files(lowfile,highfile,outfile,args): 103 | """ 104 | Merge neg. and pos. selected files (generated by RRA) into one 105 | """ 106 | gfile={}; 107 | # read files individually 108 | nline=0; 109 | for line in open(lowfile): 110 | field=line.strip().split(); 111 | nline+=1; 112 | if nline==1: # skip the first line 113 | continue; 114 | if len(field)<4: 115 | logging.error('The number of fields in file '+lowfile+' is <4.'); 116 | sys.exit(-1); 117 | gid=field[0]; 118 | gitem=int(field[1]); 119 | g_lo=float(field[2]); 120 | g_p=float(field[3]); 121 | g_fdr=float(field[4]); 122 | g_goodsgrna=int(field[5]); 123 | gfile[gid]=[(gitem,g_lo,g_p,g_fdr,nline-1,g_goodsgrna)]; 124 | maxnline=nline; 125 | nline=0; 126 | for line in open(highfile): 127 | field=line.strip().split(); 128 | nline+=1; 129 | if nline==1: # skip the first line 130 | continue; 131 | if len(field)<4: 132 | logging.error('The number of fields in file '+highfile+' is <4.'); 133 | sys.exit(-1); 134 | gid=field[0]; 135 | gitem=int(field[1]); 136 | g_lo=float(field[2]); 137 | g_p=float(field[3]); 138 | g_fdr=float(field[4]); 139 | g_goodsgrna=int(field[5]); 140 | if gid not in gfile: 141 | logging.warning('Item '+gid+' appears in '+highfile+', but not in '+lowfile+'.'); 142 | #gfile[gid]=[('NA',1.0,1.0,maxnline)]; 143 | gfile[gid]=[(1.0,1.0,1.0,maxnline,0)]; # note that gitem is not saved 144 | else: 145 | #gfile[gid]+=[(gitem,g_p,g_fdr,nline-1)]; 146 | if gfile[gid][0][0]!=gitem: 147 | logging.warning('Item number of '+gid+' does not match previous file: '+str(gitem)+' !='+str(gfile[gid][0][0])+'.'); 148 | gfile[gid]+=[(g_lo,g_p,g_fdr,nline-1,g_goodsgrna)]; # don't repeat the gitem 149 | # check whether some items appear in the first group, but not in the second group 150 | for (k,v) in gfile.iteritems(): 151 | if len(v)==1: 152 | logging.warning('Item '+gid+' appears in '+lowfile+', but not in '+highfile+'.'); 153 | #gfile[gid]+=[('NA',1.0,1.0,maxnline)]; 154 | gfile[gid]+=[(1.0,1.0,1.0,maxnline,0)]; 155 | 156 | 157 | # write to files 158 | ofhd=open(outfile,'w'); 159 | print('\t'.join(['id','num','lo.neg','p.neg','fdr.neg','rank.neg','goodsgrna.neg','lo.pos','p.pos','fdr.pos','rank.pos','goodsgrna.pos']),file=ofhd); 160 | if hasattr(args,'sort_criteria') and args.sort_criteria=='pos': 161 | logging.debug('Sorting the merged items by positive selection...'); 162 | skey=sorted(gfile.items(),key=lambda x : x[1][1][0]); 163 | else: 164 | logging.debug('Sorting the merged items by negative selection...'); 165 | skey=sorted(gfile.items(),key=lambda x : x[1][0][1]); 166 | 167 | for k in skey: 168 | print('\t'.join([k[0], '\t'.join([str(t) for t in k[1][0]+k[1][1]])]),file=ofhd); 169 | 170 | ofhd.close(); 171 | 172 | 173 | 174 | def parse_sampleids(samplelabel,ids): 175 | """ 176 | Parse the label id according to the given sample labels 177 | Parameter: 178 | samplelabel: a string of labels, like '0,2,3' or 'treat1,treat2,treat3' 179 | ids: a {samplelabel:index} ({string:int}) 180 | Return: 181 | (a list of index, a list of index labels) 182 | """ 183 | # labels 184 | idsk=[""]*len(ids); 185 | for (k,v) in ids.iteritems(): 186 | idsk[v]=k; 187 | if samplelabel == None: 188 | groupidslabel=(ids.keys()); 189 | groupids=[ids[x] for x in groupidslabel]; 190 | return (groupids,groupidslabel); 191 | 192 | try: 193 | groupids=[int(x) for x in samplelabel.split(',')]; 194 | groupidslabel=[idsk[x] for x in groupids]; 195 | except ValueError: 196 | groupidstr=samplelabel.split(','); 197 | groupids=[]; 198 | groupidslabel=[]; 199 | for gp in groupidstr: 200 | if gp not in ids: 201 | logging.error('Sample label '+gp+' does not match records in your count table.'); 202 | logging.error('Sample labels in your count table: '+','.join(idsk)); 203 | sys.exit(-1); 204 | groupids+=[ids[gp]]; 205 | groupidslabel+=[idsk[ids[gp]]]; 206 | logging.debug('Given sample labels: '+samplelabel); 207 | logging.debug('Converted index: '+' '.join([str(x) for x in groupids])); 208 | return (groupids,groupidslabel); 209 | 210 | 211 | -------------------------------------------------------------------------------- /mageck/mageckCount.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ MAGeCK count module 3 | Copyright (c) 2014 Wei Li, Han Xu, Xiaole Liu lab 4 | This code is free software; you can redistribute it and/or modify it 5 | under the terms of the BSD License (see the file COPYING included with 6 | the distribution). 7 | @status: experimental 8 | @version: $Revision$ 9 | @author: Wei Li 10 | @contact: li.david.wei AT gmail.com 11 | """ 12 | from __future__ import print_function 13 | 14 | import sys; 15 | import argparse; 16 | import math; 17 | import logging; 18 | from testVisualCount import *; 19 | 20 | def mageckcount_parseargs(): 21 | """ 22 | Parse arguments. Only used when mageckCount.py is executed directly. 23 | """ 24 | parser=argparse.ArgumentParser(description='Collecting read counts for multiple samples.'); 25 | 26 | parser.add_argument('-l','--list-seq',required=True,help='A file containing the list of sgRNA names, their sequences and associated genes. Support file format: csv and txt.'); 27 | parser.add_argument('--sample-label',default='',help='Sample labels, separated by comma (,). Must be equal to the number of samples provided. Default "sample1,sample2,...".'); 28 | parser.add_argument('-n','--output-prefix',default='sample1',help='The prefix of the output file(s). Default sample1.'); 29 | parser.add_argument('--trim-5',type=int,default=0,help='Length of trimming the 5\' of the reads. Default 0'); 30 | parser.add_argument('--sgrna-len',type=int,default=20,help='Length of the sgRNA. Default 20'); 31 | parser.add_argument('--count-n',action='store_true',help='Count sgRNAs with Ns. By default, sgRNAs containing N will be discarded.'); 32 | parser.add_argument('--fastq',nargs='+',help='Sample fastq files, separated by space; use comma (,) to indicate technical replicates of the same sample. For example, "--fastq sample1_replicate1.fastq,sample1_replicate2.fastq sample2_replicate1.fastq,sample2_replicate2.fastq" indicates two samples with 2 technical replicates for each sample.'); 33 | 34 | 35 | args=parser.parse_args(); 36 | 37 | 38 | return args; 39 | 40 | def mageckcount_checkargs(args): 41 | """ 42 | Check args 43 | """ 44 | if args.sample_label!='': 45 | nlabel=args.sample_label.split(','); 46 | #nfq=args.fastq.split(','); 47 | nfq=(args.fastq); 48 | if len(nlabel)!=len(nfq): 49 | logging.error('The number of labels ('+str(nlabel)+') must be equal to the number of fastq files provided.'); 50 | sys.exit(-1); 51 | return 0; 52 | 53 | def normalizeCounts(ctable,method='median'): 54 | """ 55 | Normalize read counts 56 | Return value: {sgRNA:[read counts]} 57 | """ 58 | # sums 59 | if len(ctable)==0: 60 | return ctable.copy(); 61 | n=len(ctable[ctable.keys()[0]]); # samples 62 | m=len(ctable); # sgRNAs 63 | # calculate the sum 64 | sumsample=[0]*n; 65 | for (k,v) in ctable.iteritems(): 66 | sumsample=[sumsample[i]+v[i] for i in range(n)]; 67 | logging.info('Total read counts of each sample: '+' '.join([str(x) for x in sumsample])); 68 | logging.debug('Normalization method: '+method); 69 | # normalizing factor 70 | avgsample=sum(sumsample)/float(n); 71 | samplefactor=[avgsample/k for k in sumsample]; 72 | logging.debug('Initial (total) size factor: '+' '.join([str(x) for x in samplefactor])); 73 | if method=='median': 74 | # calculate the average 75 | # meanval={k:(sum(v)*1.0/n) for (k,v) in ctable.iteritems() if sum(v)>0}; # mean 76 | meanval={k:math.exp( (sum( [ math.log(v2+1.0) for v2 in v])*1.0/n) ) for (k,v) in ctable.iteritems() if sum(v)>0}; # geometric mean 77 | meanval={k:(lambda x: x if x>0 else 1)(v) for (k,v) in meanval.iteritems()}; 78 | #samplefactor=[0]*n; 79 | usetotalnorm=False; 80 | medianfactor=[x for x in samplefactor]; 81 | for ni in range(n): 82 | meanfactor=[ v[ni]/meanval[k] for (k,v) in ctable.iteritems() if k in meanval]; 83 | #print(str(sorted(meanfactor))) 84 | xfactor=sorted(meanfactor)[len(meanfactor)//2]; # corrected 85 | if xfactor>0.0: 86 | medianfactor[ni]=1.0/xfactor; 87 | #logging.debug('xfactor:'+str(xfactor)); 88 | else: 89 | logging.warning('Sample '+str(ni)+' has zero median count, so median normalization is not possible. Switch to total read count normalization.'); 90 | usetotalnorm=True; 91 | # normalizing factor 92 | if usetotalnorm: 93 | pass; 94 | else: 95 | samplefactor=medianfactor; 96 | logging.debug('Median factor: '+' '.join([str(x) for x in samplefactor])); 97 | elif method=='none': 98 | samplefactor=[1]*n; 99 | logging.debug('Final factor: '+' '.join([str(x) for x in samplefactor])); 100 | 101 | # normalize the table 102 | ntable={ k: [ samplefactor[i]*v[i] for i in range(n)] for (k,v) in ctable.iteritems()}; 103 | 104 | return ntable; 105 | 106 | 107 | 108 | def mageckcount_processonefile(filename,args,ctab,genedict,datastat): 109 | ''' 110 | Go through one fastq file 111 | Parameters 112 | ---------- 113 | filename 114 | Fastq filename to be sequence 115 | args 116 | Arguments 117 | ctab 118 | A dictionary of sgRNA sequence and count 119 | genedict 120 | {sequence:(sgRNA_id,gene_id)} dictionary 121 | datastat 122 | Statistics of datasets ({key:value}) 123 | 124 | Return value 125 | ---------- 126 | ''' 127 | # ctab={}; 128 | nline=0; 129 | logging.info('Parsing file '+filename+'...'); 130 | nreadcount=0; 131 | for line in open(filename): 132 | nline=nline+1; 133 | if nline%1000000==1: 134 | logging.info('Processing '+str(round(nline/1000000))+ 'M lines..'); 135 | if nline%4 == 2: 136 | nreadcount+=1; 137 | fseq=line.strip(); 138 | if args.trim_5 >0: 139 | fseq=fseq[args.trim_5:]; 140 | 141 | if len(fseq)0 and args.count_n==False: 145 | continue; 146 | if fseq not in ctab: 147 | ctab[fseq]=0; 148 | ctab[fseq]=ctab[fseq]+1; 149 | # statistics 150 | datastat['reads']=nreadcount; 151 | # check if a library is provided 152 | if len(genedict)==0: 153 | datastat['mappedreads']=0; 154 | datastat['zerosgrnas']=0; 155 | else: 156 | nmapped=0; 157 | for (k,v) in ctab.iteritems(): 158 | if k in genedict: 159 | nmapped+=v; 160 | nzerosg=0; 161 | for (k,v) in genedict.iteritems(): 162 | if k not in ctab: 163 | nzerosg+=1; 164 | logging.info('mapped:'+str(nmapped)); 165 | datastat['mappedreads']=nmapped; 166 | datastat['zerosgrnas']=nzerosg; 167 | #return ctab; 168 | return 0; 169 | 170 | def mageckcount_mergedict(dict0,dict1): 171 | ''' 172 | Merge all items in dict1 to dict0. 173 | ''' 174 | nsample=0; 175 | if len(dict0)>0: 176 | nsample=len(dict0[dict0.keys()[0]]); 177 | for (k,v) in dict0.iteritems(): 178 | if k in dict1: 179 | v+=[dict1[k]]; 180 | else: 181 | v+=[0]; 182 | for (k,v) in dict1.iteritems(): 183 | if k not in dict0: 184 | if nsample>0: 185 | dict0[k]=[0]*nsample; 186 | else: 187 | dict0[k]=[]; 188 | dict0[k]+=[v]; 189 | # return dict0; 190 | 191 | def mageckcount_printdict(dict0,args,ofile,sgdict,datastat,sep='\t'): 192 | ''' 193 | Write the table count to file 194 | ''' 195 | allfastq=args.fastq; 196 | nsample=len(allfastq); 197 | slabel=[datastat[f.split(',')[0]]['label'] for f in allfastq]; 198 | # print header 199 | print('sgRNA'+sep+'Gene'+sep+sep.join(slabel),file=ofile); 200 | # print items 201 | if len(sgdict)==0: 202 | for (k,v) in dict0.iteritems(): 203 | print(k+sep+'None'+sep+sep.join([str(x) for x in v]),file=ofile); 204 | else: 205 | for (k,v) in dict0.iteritems(): 206 | if k not in sgdict: # only print those in the genedict 207 | continue; 208 | sx=sgdict[k]; 209 | print(sep.join([sx[0],sx[1]])+sep+sep.join([str(x) for x in v]),file=ofile); 210 | # print the remaining counts, fill with 0 211 | for (k,v) in sgdict.iteritems(): 212 | if k not in dict0: 213 | print(sep.join([v[0],v[1]])+sep+sep.join(["0"]*nsample),file=ofile); 214 | 215 | def mageck_printdict(dict0,args,sgdict,sampledict,sampleids): 216 | """Write the normalized read counts to file 217 | 218 | Parameters 219 | ---------- 220 | dict0 : dict 221 | a {sgRNA: [read counts]} structure 222 | args : class 223 | a argparse class 224 | sgdict: dict 225 | a {sgrna:gene} dictionary 226 | sampledict: dict 227 | a {sample name: index} dict 228 | sampleids: list 229 | a list of sample index. Should include control+treatment 230 | 231 | """ 232 | # print header 233 | # print items 234 | dfmt="{:.5g}" 235 | ofile=open(args.output_prefix+'.normalized.txt','w'); 236 | # headers 237 | mapres_list=['']*len(sampledict); 238 | for (k,v) in sampledict.iteritems(): 239 | mapres_list[v]=k; 240 | if len(sampledict)>0: 241 | cntheader=[mapres_list[x] for x in sampleids] 242 | else: 243 | cntheader=None; 244 | logging.info('Writing normalized read counts to '+args.output_prefix+'.normalized.txt'); 245 | if cntheader !=None: 246 | print('sgRNA\tGene\t'+'\t'.join(cntheader),file=ofile); 247 | if len(sgdict)==0: 248 | for (k,v) in dict0.iteritems(): 249 | print(k+'\t'+'None'+'\t'+'\t'.join([str(x) for x in v]),file=ofile); 250 | else: 251 | for (k,v) in dict0.iteritems(): 252 | if k not in sgdict: # only print those in the genedict 253 | logging.warning(k+' not in the sgRNA list'); 254 | continue; 255 | print('\t'.join([k,sgdict[k]])+'\t'+'\t'.join([str(x) for x in v]),file=ofile); 256 | # print the remaining counts, fill with 0 257 | ofile.close(); 258 | 259 | 260 | 261 | 262 | def mageckcount_checklists(args): 263 | """ 264 | Read sgRNAs and associated sequences and lists 265 | format: sgRNAid seq geneid 266 | """ 267 | genedict={}; 268 | hascsv=False; 269 | if args.list_seq.upper().endswith('CSV'): 270 | hascsv=True; 271 | n=0; 272 | seqdict={}; 273 | for line in open(args.list_seq): 274 | if hascsv: 275 | field=line.strip().split(','); 276 | else: 277 | field=line.strip().split(); 278 | n+=1; 279 | if field[0] in genedict: 280 | logging.warning('Duplicated sgRNA label '+field[0]+' in line '+str(n)+'. Skip this record.'); 281 | continue; 282 | if len(field)<3: 283 | logging.warning('Not enough field in line '+str(n)+'. Skip this record.'); 284 | continue; 285 | if field[1].upper() in seqdict: 286 | logging.warning('Duplicated sgRNA sequence '+field[1]+' in line '+str(n)+'. Skip this record.'); 287 | continue; 288 | genedict[field[0]]=(field[1].upper(),field[2]); 289 | logging.info('Loading '+str(len(genedict))+' predefined sgRNAs.'); 290 | return genedict; 291 | 292 | def mageckcount_printstat(args,datastat): 293 | for (k,v) in datastat.iteritems(): 294 | logging.info('Summary of file '+k+':'); 295 | for (v1,v2) in v.iteritems(): 296 | logging.info(str(v1)+'\t'+str(v2)); 297 | # write to table 298 | crv=VisualRCount(); 299 | crv.outprefix=args.output_prefix; 300 | for (fq, fqstat) in datastat.iteritems(): 301 | crv.fastqfile+=[fq]; 302 | if 'label' in fqstat: 303 | crv.fastqlabels+=[fqstat['label']]; 304 | else: 305 | crv.fastqlabels+=['NA']; 306 | if 'reads' in fqstat: 307 | crv.reads+=[fqstat['reads']]; 308 | else: 309 | crv.reads+=[0]; 310 | if 'mappedreads' in fqstat: 311 | crv.mappedreads+=[fqstat['mappedreads']]; 312 | else: 313 | crv.mappedreads+=[0]; 314 | if 'zerosgrnas' in fqstat: 315 | crv.zerocounts+=[fqstat['zerosgrnas']]; 316 | else: 317 | crv.zerocounts+=[0]; 318 | # 319 | crv.startRTemplate(); 320 | crv.writeCountSummary(); 321 | outcsvfile=args.output_prefix+'.count.median_normalized.csv'; 322 | crv.insertReadCountBoxPlot(os.path.basename(outcsvfile)); 323 | crv.insertPCAPlot(os.path.basename(outcsvfile)); 324 | crv.closeRTemplate(); 325 | if hasattr(args,"pdf_report") and args.pdf_report: 326 | if hasattr(args,"keep_tmp") : 327 | crv.generatePDF(keeptmp=args.keep_tmp); 328 | else: 329 | crv.generatePDF(); 330 | 331 | def mageckcount_main(args): 332 | """ 333 | Main entry for mageck count module 334 | """ 335 | # check arguments 336 | mageckcount_checkargs(args); 337 | # check the listed files 338 | # listfq=args.fastq.split(','); 339 | listfq=[[z for z in x.split(',')] for x in args.fastq]; 340 | nsample=len(listfq); 341 | datastat={}; 342 | # check labels 343 | alllabel=args.sample_label; 344 | if alllabel=='': 345 | slabel=['sample'+str(x) for x in range(1,nsample+1)]; 346 | else: 347 | slabel=alllabel.split(','); 348 | for i in range(nsample): 349 | for fi in listfq[i]: 350 | datastat[fi]={}; 351 | datastat[fi]['label']=slabel[i]; 352 | # process gene dicts 353 | genedict={}; 354 | if args.list_seq is not None: 355 | genedict=mageckcount_checklists(args); 356 | # save sgRNA ID and gene name 357 | sgdict={}; # 358 | for (k,v) in genedict.iteritems(): 359 | sgdict[v[0]]=(k,v[1]); 360 | alldict={}; 361 | # go through the fastq files 362 | for filenamelist in listfq: 363 | dict0={}; 364 | for filename in filenamelist: 365 | mageckcount_processonefile(filename,args,dict0,sgdict,datastat[filename]); 366 | mageckcount_mergedict(alldict,dict0); 367 | # write to file 368 | ofilel=open(args.output_prefix+'.count.txt','w'); 369 | mageckcount_printdict(alldict,args,ofilel,sgdict,datastat); 370 | ofilel.close(); 371 | # write the median normalized read counts to csv file 372 | ofilel=open(args.output_prefix+'.count.median_normalized.csv','w'); 373 | if len(sgdict)>0: 374 | allmappeddict={k:v for (k,v) in alldict.iteritems() if k in sgdict}; # only keep those with known sgRNAs 375 | else: 376 | allmappeddict=alldict; 377 | medalldict=normalizeCounts(allmappeddict); 378 | mageckcount_printdict(medalldict,args,ofilel,sgdict,datastat,sep=','); 379 | ofilel.close(); 380 | # print statistics 381 | mageckcount_printstat(args,datastat); 382 | return 0; 383 | 384 | 385 | def getcounttablefromfile(filename): 386 | """ 387 | read count table from file 388 | Returns: 389 | --------------- 390 | x: dict 391 | {sgrna:[read counts]} 392 | y: dict 393 | {sgrna:gene} 394 | z: dict 395 | z={sample_id:index} 396 | """ 397 | gtab={}; 398 | mapptab={}; 399 | sampleids={}; 400 | nline=0; 401 | nfield=-1; 402 | # if it is CSV file 403 | hascsv=False; 404 | if filename.upper().endswith('.CSV'): 405 | hascsv=True; 406 | logging.info('Loading count table from '+filename+' '); 407 | for line in open(filename): 408 | nline+=1; 409 | if nline % 100000 == 1: 410 | logging.info('Processing '+str(nline)+' lines..'); 411 | try: 412 | if hascsv==False: 413 | field=line.strip().split(); 414 | else: 415 | field=line.strip().split(','); 416 | sgid=field[0]; 417 | geneid=field[1]; 418 | # check if duplicate sgRNA IDs are detected 419 | if sgid in gtab: 420 | logging.warning('Duplicated sgRNA IDs: '+sgid+' in line '+str(nline)+'. Skip this record.'); 421 | continue; 422 | mapptab[sgid]=geneid; 423 | sgrecs=[float(x) for x in field[2:]]; 424 | # check the number of fields 425 | if nfield!=-1 and len(sgrecs)!=nfield: 426 | logging.error('Error: incorrect number of dimensions in line '+str(nline)+'. Please double-check your read count table file.'); 427 | sys.exit(-1); 428 | nfield=len(sgrecs); 429 | gtab[sgid]=sgrecs; 430 | except ValueError: 431 | if nline!=1: 432 | logging.warning('Parsing error in line '+str(nline)+'. Skip this line.'); 433 | else: 434 | logging.debug('Parsing error in line '+str(nline)+' (usually the header line). Skip this line.'); 435 | ids=field[2:]; 436 | for i in range(len(ids)): 437 | sampleids[ids[i]]=i; 438 | continue; 439 | logging.info('Loaded '+str(len(gtab))+' records.'); 440 | return (gtab,mapptab,sampleids); 441 | 442 | 443 | 444 | if __name__ == '__main__': 445 | try: 446 | args=mageckcount_parseargs(); 447 | mageckcount_main(args); 448 | except KeyboardInterrupt: 449 | sys.stderr.write("Interrupted.\n") 450 | sys.exit(0) 451 | 452 | 453 | 454 | -------------------------------------------------------------------------------- /mageck/pathwayFunc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """MAGeCK pathway analysis module 3 | Copyright (c) 2014 Wei Li, Han Xu, Xiaole Liu lab 4 | This code is free software; you can redistribute it and/or modify it 5 | under the terms of the BSD License (see the file COPYING included with 6 | the distribution). 7 | @status: experimental 8 | @version: $Revision$ 9 | @author: Wei Li 10 | @contact: li.david.wei AT gmail.com 11 | """ 12 | 13 | 14 | from __future__ import print_function 15 | from crisprFunction import * 16 | import random; 17 | import sys; 18 | import logging; 19 | 20 | from fileOps import * 21 | 22 | 23 | def mageck_read_GMT(args): 24 | ''' 25 | Read the GMT pathway files 26 | ''' 27 | logging.info('Reading gmt:'+args.gmt_file); 28 | pathwaydict={}; 29 | for line in open(args.gmt_file): 30 | field=line.strip().split(); 31 | if len(field)<3: 32 | continue; 33 | pathwaydict[field[0]]=[x.upper() for x in field[2:]]; 34 | logging.info(str(len(pathwaydict))+' pathways loaded.'); 35 | return pathwaydict; 36 | 37 | def mageck_readgeneranking(fname,args,log2=False,columnid=2): 38 | """ 39 | Read gene ranking files into a dictionary 40 | 41 | Arguments 42 | --------- 43 | fname: string 44 | gene ranking file. 45 | args: argparse object 46 | log2: 47 | whether to log-2 transform the score 48 | columnid: int/string 49 | The column id of the score, default is 2 (3rd score) 50 | """ 51 | geneinfo={}; 52 | nline=0; 53 | columnid_int=-1; 54 | try: 55 | columnid_int=int(columnid); 56 | except ValueError: 57 | pass; 58 | for line in open(fname): 59 | nline=nline+1; 60 | field=line.strip().split(); 61 | if nline<2: # skip the first line 62 | for i in range(len(field)): 63 | if field[i]==columnid: 64 | columnid_int=i; 65 | break; 66 | if columnid_int==-1: 67 | logging.error('Cannot determine the column to be used for ranking. Please double check with the id you provide.'); 68 | sys.exit(-1); 69 | else: 70 | logging.info('Column used for ranking: '+str(columnid)+' ('+str(columnid_int)+')'); 71 | continue; 72 | if len(field)1: 196 | #testval2=sum(random.sample(gdictvals,nsize))/(nsize**0.5); # one direction scale 197 | testval2=sum([x**2 for x in random.sample(gdictvals,nsize)])/2/(nsize-1)-0.5; # two directional scale 198 | else: 199 | testval2=0; 200 | if pname not in pdictqvals: 201 | pdictqvals[pname]=[0.0,0.0]; 202 | if pitem[3]>=testval2: 203 | pdictqvals[pname][0]+=1; 204 | if pitem[3]<=testval2: 205 | pdictqvals[pname][1]+=1; 206 | print('',file=sys.stderr); 207 | pdictqvals={k:(v[0]*1.0/niter,v[1]*1.0/niter) for (k,v) in pdictqvals.iteritems()}; 208 | return pdictqvals; 209 | 210 | 211 | 212 | 213 | def mageck_pathway_ztest(args,gdict,pdict): 214 | ''' 215 | Perform z test on the normalized gene score (gdict) and pathway (pdict) 216 | Return values: 217 | pdict pvals: a dictionary structure, with key the pathway name and value a n-tuple including: 218 | p value (low), pvalue (high), size of the pathway genes in the destination, and test statistics 219 | ''' 220 | pdictpvals={}; 221 | for (pname,pitem) in pdict.iteritems(): 222 | testvec=[gdict[x] for x in pitem if x in gdict]; 223 | testvecsq=[x**2 for x in testvec]; 224 | if len(testvec)>1: 225 | #testval2=sum(testvec)/(len(testvec)**0.5); # one direction scale 226 | testval2=sum(testvecsq)/2/(len(testvec)-1)-0.5; # two directional scale 227 | else: 228 | testval2=0; 229 | sq_p_low=getnormcdf(testval2); 230 | sq_p_high=getnormcdf(testval2,lowertail=False); 231 | pdictpvals[pname]=(sq_p_low,sq_p_high,len(testvec),testval2); 232 | return pdictpvals; 233 | 234 | 235 | def mageck_pathwaygsa(args): 236 | ''' 237 | pathway enrichment analysis 238 | ''' 239 | # reading pathway files 240 | pdict=mageck_read_GMT(args); 241 | # reading gene ranking files 242 | ginfo=mageck_readgeneranking(args.gene_ranking,args,log2=True); 243 | if args.gene_ranking_2 is not None: 244 | ginfo2=mageck_readgeneranking(args.gene_ranking_2,args,log2=True); 245 | # merge the scores of ginfo2 into ginfo 246 | for gk in ginfo.keys(): 247 | if gk in ginfo2.keys(): 248 | ginfo[gk]=ginfo[gk]-ginfo2[gk]; 249 | # rank transform 250 | ranktransform=True; 251 | ginfo_rt=ginfo; 252 | if ranktransform: 253 | ginfo_st=sorted(ginfo.iteritems(),key=lambda x: x[1]); 254 | gauss_x=sorted([ random.gauss(0,1) for x in ginfo_st]); 255 | ginfo_rt={ ginfo_st[i][0]:gauss_x[i] for i in range(len(ginfo_st))}; 256 | # standardize the scores 257 | ginfo_sd=mageck_pathway_standardize(ginfo_rt); 258 | 259 | # test for pathways 260 | pathway_pval=mageck_pathway_ztest(args,ginfo_sd,pdict); 261 | #pathway_pval_tup=sorted(pathway_pval.iteritems(),key=lambda x : min(x[1][:2])); 262 | #x_pvalues=[min(t[1][:2]) for t in pathway_pval_tup]; 263 | #x_fdr=pFDR(x_pvalues); 264 | 265 | # permutation? 266 | pathway_qval=mageck_pathway_ztest_permutation(args,ginfo_sd,pdict,pathway_pval); 267 | pathway_qval_tup=sorted(pathway_qval.iteritems(),key=lambda x:min(x[1])); 268 | x_qvalues=[min(t[1]) for t in pathway_qval_tup]; 269 | x_fdr=pFDR(x_qvalues); 270 | # output file 271 | rsa_path_output_file=args.output_prefix+'.pathway.txt'; 272 | rsa_file=open(rsa_path_output_file,'w'); 273 | # print('\t'.join(['PATHWAY','low','high','size','score','pvalue','q.low','q.high','FDR']),file=rsa_file); 274 | print('\t'.join(['PATHWAY','size','score','q.low','q.high','FDR']),file=rsa_file); 275 | for i in range(len(pathway_qval_tup)): 276 | lx=pathway_qval_tup[i]; 277 | lpadj=x_fdr[i]; 278 | print('\t'.join([lx[0],'\t'.join([str(z) for z in pathway_pval[lx[0]][2:]]), # str(min(lx[1][:2])), 279 | str(pathway_qval[lx[0]][0]), str(pathway_qval[lx[0]][1]), str(lpadj)]),file=rsa_file); 280 | rsa_file.close(); 281 | 282 | 283 | 284 | 285 | 286 | def mageck_pathwaytest(args): 287 | ''' 288 | The main entry for pathway test 289 | ''' 290 | mageck_pathwayrra(args); 291 | #mageck_pathwaygsa(args); 292 | 293 | -------------------------------------------------------------------------------- /mageck/plot_template.RTemplate: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # parameters 4 | # Do not modify the variables beginning with "__" 5 | 6 | # gstablename='__GENE_SUMMARY_FILE__' 7 | startindex=__INDEX_COLUMN__ 8 | # outputfile='__OUTPUT_FILE__' 9 | targetgenelist=__TARGET_GENE_LIST__ 10 | # samplelabel=sub('.\\w+.\\w+$','',colnames(gstable)[startindex]); 11 | samplelabel='__SAMPLE_LABEL__' 12 | 13 | 14 | # You need to write some codes in front of this code: 15 | # gstable=read.table(gstablename,header=T) 16 | # pdf(file=outputfile,width=6,height=6) 17 | 18 | 19 | # set up color using RColorBrewer 20 | #library(RColorBrewer) 21 | #colors <- brewer.pal(length(targetgenelist), "Set1") 22 | 23 | colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#F781BF", 24 | "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", 25 | "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", 26 | "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F") 27 | 28 | ###### 29 | # function definition 30 | 31 | plotrankedvalues<-function(val, tglist, ...){ 32 | 33 | plot(val,log='y',ylim=c(max(val),min(val)),type='l',lwd=2, ...) 34 | if(length(tglist)>0){ 35 | for(i in 1:length(tglist)){ 36 | targetgene=tglist[i]; 37 | tx=which(names(val)==targetgene);ty=val[targetgene]; 38 | points(tx,ty,col=colors[(i %% length(colors)) ],cex=2,pch=20) 39 | # text(tx+50,ty,targetgene,col=colors[i]) 40 | } 41 | legend('topright',tglist,pch=20,pt.cex = 2,cex=1,col=colors) 42 | } 43 | } 44 | 45 | 46 | 47 | plotrandvalues<-function(val,targetgenelist, ...){ 48 | # choose the one with the best distance distribution 49 | 50 | mindiffvalue=0; 51 | randval=val; 52 | for(i in 1:20){ 53 | randval0=sample(val) 54 | vindex=sort(which(names(randval0) %in% targetgenelist)) 55 | if(max(vindex)>0.9*length(val)){ 56 | # print('pass...') 57 | next; 58 | } 59 | mindiffind=min(diff(vindex)); 60 | if (mindiffind > mindiffvalue){ 61 | mindiffvalue=mindiffind; 62 | randval=randval0; 63 | # print(paste('Diff: ',mindiffvalue)) 64 | } 65 | } 66 | plot(randval,log='y',ylim=c(max(randval),min(randval)),pch=20,col='grey', ...) 67 | 68 | if(length(targetgenelist)>0){ 69 | for(i in 1:length(targetgenelist)){ 70 | targetgene=targetgenelist[i]; 71 | tx=which(names(randval)==targetgene);ty=randval[targetgene]; 72 | points(tx,ty,col=colors[(i %% length(colors)) ],cex=2,pch=20) 73 | text(tx+50,ty,targetgene,col=colors[i]) 74 | } 75 | } 76 | 77 | } 78 | 79 | 80 | 81 | 82 | # set.seed(1235) 83 | 84 | 85 | 86 | pvec=gstable[,startindex] 87 | names(pvec)=gstable[,'id'] 88 | pvec=sort(pvec); 89 | 90 | plotrankedvalues(pvec,targetgenelist,xlab='Genes',ylab='RRA score',main=paste('Distribution of RRA scores in \\n',samplelabel)) 91 | 92 | # plotrandvalues(pvec,targetgenelist,xlab='Genes',ylab='RRA score',main=paste('Distribution of RRA scores in \\n',samplelabel)) 93 | 94 | 95 | pvec=gstable[,startindex+1] 96 | names(pvec)=gstable[,'id'] 97 | pvec=sort(pvec); 98 | 99 | plotrankedvalues(pvec,targetgenelist,xlab='Genes',ylab='p value',main=paste('Distribution of p values in \\n',samplelabel)) 100 | 101 | # plotrandvalues(pvec,targetgenelist,xlab='Genes',ylab='p value',main=paste('Distribution of p values in \\n',samplelabel)) 102 | 103 | 104 | 105 | # you need to write after this code: 106 | # dev.off() 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /mageck/plot_template.Rnw: -------------------------------------------------------------------------------- 1 | % This is a template file for Sweave used in MAGeCK 2 | % Author: Wei Li, Shirley Liu lab 3 | % Do not modify lines beginning with "#__". 4 | \documentclass{article} 5 | 6 | \usepackage{amsmath} 7 | \usepackage{amscd} 8 | \usepackage[tableposition=top]{caption} 9 | \usepackage{ifthen} 10 | \usepackage{fullpage} 11 | \usepackage[utf8]{inputenc} 12 | 13 | \begin{document} 14 | \setkeys{Gin}{width=0.9\textwidth} 15 | 16 | \title{MAGeCK Comparison Report} 17 | \author{Wei Li} 18 | 19 | \maketitle 20 | 21 | 22 | \tableofcontents 23 | 24 | \section{Summary} 25 | 26 | %Function definition 27 | <>= 28 | genreporttable<-function(comparisons,ngenes,direction,fdr1,fdr5,fdr25){ 29 | xtb=data.frame(Comparison=comparisons,Genes=ngenes,Selection=direction,FDR1=fdr1,FDR5=fdr5,FDR25=fdr25); 30 | colnames(xtb)=c("Comparison","Genes","Selection","FDR1%","FDR5%","FDR25%"); 31 | return (xtb); 32 | } 33 | @ 34 | 35 | The statistics of comparisons is as indicated in the following table. 36 | 37 | <>= 38 | library(xtable) 39 | #__GENE_SUMMARY_STAT__ 40 | cptable=genreporttable(comparisons,ngenes,direction,fdr1,fdr5,fdr25); 41 | print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one", 42 | digits = c(0, 0, 0, 0, 0, 0, 0), 43 | table.placement = "tbp", 44 | caption.placement = "top")) 45 | @ 46 | 47 | The meanings of the columns are as follows. 48 | 49 | \begin{itemize} 50 | \item \textbf{Comparison}: The label for comparisons; 51 | \item \textbf{Genes}: The number of genes in the library; 52 | \item \textbf{Selection}: The direction of selection, either positive selection or negative selection; 53 | \item \textbf{FDR1\%}: The number of genes with FDR $<$ 1\%; 54 | \item \textbf{FDR5\%}: The number of genes with FDR $<$ 5\%; 55 | \item \textbf{FDR25\%}: The number of genes with FDR $<$ 25\%; 56 | \end{itemize} 57 | 58 | The following figures show: 59 | 60 | \begin{itemize} 61 | \item Individual sgRNA read counts of selected genes in selected samples; 62 | \item The distribution of RRA scores and p values of all genes; and 63 | \item The RRA scores and p values of selected genes. 64 | \end{itemize} 65 | 66 | %__INDIVIDUAL_PAGE__ 67 | 68 | \end{document} 69 | -------------------------------------------------------------------------------- /mageck/plot_template_indvgene.RTemplate: -------------------------------------------------------------------------------- 1 | 2 | # parameters 3 | # Do not modify the variables beginning with "__" 4 | targetmat=__TARGET_MATRIX__ 5 | targetgene=__TARGET_GENE__ 6 | collabel=__COL_LABEL__ 7 | 8 | # set up color using RColorBrewer 9 | #library(RColorBrewer) 10 | #colors <- brewer.pal(length(targetgenelist), "Set1") 11 | 12 | colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#F781BF", 13 | "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", 14 | "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", 15 | "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F") 16 | 17 | 18 | ## code 19 | 20 | targetmatvec=unlist(targetmat)+1 21 | yrange=range(targetmatvec[targetmatvec>0]); 22 | # yrange[1]=1; # set the minimum value to 1 23 | for(i in 1:length(targetmat)){ 24 | vali=targetmat[[i]]+1; 25 | if(i==1){ 26 | plot(1:length(vali),vali,type='b',las=1,pch=20,main=paste('sgRNAs in',targetgene),ylab='Read counts',xlab='Samples',xlim=c(0.7,length(vali)+0.3),ylim = yrange,col=colors[(i %% length(colors))],xaxt='n',log='y') 27 | axis(1,at=1:length(vali),labels=(collabel),las=2) 28 | # lines(0:100,rep(1,101),col='black'); 29 | }else{ 30 | lines(1:length(vali),vali,type='b',pch=20,col=colors[(i %% length(colors))]) 31 | } 32 | } 33 | 34 | 35 | -------------------------------------------------------------------------------- /mageck/tags: -------------------------------------------------------------------------------- 1 | !_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ 2 | !_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ 3 | !_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ 4 | !_TAG_PROGRAM_NAME Exuberant Ctags // 5 | !_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ 6 | !_TAG_PROGRAM_VERSION 5.8 // 7 | VisualRValue testVisual.py /^class VisualRValue:$/;" c 8 | WriteRTemplate testVisual.py /^ def WriteRTemplate(self):$/;" m class:VisualRValue 9 | argparse argsParser.py /^import argparse$/;" i 10 | argparse mageckCount.py /^import argparse;$/;" i 11 | args mageckCount.py /^ args=mageckcount_parseargs();$/;" v 12 | closeRTemplate testVisual.py /^ def closeRTemplate(self):$/;" m class:VisualRValue 13 | comparisonlabel testVisual.py /^ comparisonlabel=[]; # label for comparison$/;" v class:VisualRValue 14 | cpindex testVisual.py /^ cpindex=[];$/;" v class:VisualRValue 15 | cplabel testVisual.py /^ cplabel=''$/;" v class:VisualRValue 16 | crispr_test crisprFunction.py /^def crispr_test(tab,ctrlg,testg, destfile,sgrna2genelist,args):$/;" f 17 | crisprseq_parseargs argsParser.py /^def crisprseq_parseargs():$/;" f 18 | generatePDF testVisual.py /^ def generatePDF(self,keeptmp=False):$/;" m class:VisualRValue 19 | genesummaryfile testVisual.py /^ genesummaryfile='';$/;" v class:VisualRValue 20 | getGeneSummaryStat testVisual.py /^ def getGeneSummaryStat(self,isplot=True):$/;" m class:VisualRValue 21 | getMeans crisprFunction.py /^def getMeans(matt):$/;" f 22 | getNBPValue crisprFunction.py /^def getNBPValue(mean0,var0,mean1, lower=False,log=False):$/;" f 23 | getNormalPValue crisprFunction.py /^def getNormalPValue(mean0,var0,mean1, lower=False):$/;" f 24 | getVars crisprFunction.py /^def getVars(matt):$/;" f 25 | getadjustvar crisprFunction.py /^def getadjustvar(coef,meanval,method='mixed'):$/;" f 26 | getcounttablefromfile mageckCount.py /^def getcounttablefromfile(filename):$/;" f 27 | getgeomean crisprFunction.py /^def getgeomean(v):$/;" f 28 | getnormcdf crisprFunction.py /^def getnormcdf(x,lowertail=True):$/;" f 29 | leastsquare crisprFunction.py /^def leastsquare(x,y,weight=None):$/;" f 30 | loadGeneExp testVisual.py /^ def loadGeneExp(self,genelist,nttab,sgrna2genelist,collabels):$/;" m class:VisualRValue 31 | loadSelGene testVisual.py /^ def loadSelGene(self,targetgene):$/;" m class:VisualRValue 32 | loadSelGeneWithExp testVisual.py /^ def loadSelGeneWithExp(self,targetgene,nttab,sgrna2genelist,collabels,k=10):$/;" m class:VisualRValue 33 | loadTopK testVisual.py /^ def loadTopK(self, filename, k=10):$/;" m class:VisualRValue 34 | loadTopKWithExp testVisual.py /^ def loadTopKWithExp(self,filename,nttab,sgrna2genelist,collabels,k=10):$/;" m class:VisualRValue 35 | logging argsParser.py /^import logging$/;" i 36 | logging crisprFunction.py /^import logging$/;" i 37 | logging fileOps.py /^import logging$/;" i 38 | logging mageckCount.py /^import logging;$/;" i 39 | logging pathwayFunc.py /^import logging;$/;" i 40 | logging testVisual.py /^import logging;$/;" i 41 | mageck_pathway_standardize pathwayFunc.py /^def mageck_pathway_standardize(gdict):$/;" f 42 | mageck_pathway_ztest pathwayFunc.py /^def mageck_pathway_ztest(args,gdict,pdict):$/;" f 43 | mageck_pathway_ztest_permutation pathwayFunc.py /^def mageck_pathway_ztest_permutation(args,gdict,pdict,pdictpvals):$/;" f 44 | mageck_pathwaygsa pathwayFunc.py /^def mageck_pathwaygsa(args):$/;" f 45 | mageck_pathwayrra pathwayFunc.py /^def mageck_pathwayrra(args):$/;" f 46 | mageck_pathwayrra_onedir pathwayFunc.py /^def mageck_pathwayrra_onedir(args,pdict,cid,sourcefile,rra_path_input_file,rra_path_output_file):$/;" f 47 | mageck_pathwaytest pathwayFunc.py /^def mageck_pathwaytest(args):$/;" f 48 | mageck_printdict mageckCount.py /^def mageck_printdict(dict0,args,sgdict,sampledict,sampleids):$/;" f 49 | mageck_read_GMT pathwayFunc.py /^def mageck_read_GMT(args):$/;" f 50 | mageck_readgeneranking pathwayFunc.py /^def mageck_readgeneranking(fname,args,log2=False,columnid=2):$/;" f 51 | mageck_removetmprra pathwayFunc.py /^def mageck_removetmprra(args):$/;" f 52 | mageckcount_checkargs mageckCount.py /^def mageckcount_checkargs(args):$/;" f 53 | mageckcount_checklists mageckCount.py /^def mageckcount_checklists(args):$/;" f 54 | mageckcount_main mageckCount.py /^def mageckcount_main(args):$/;" f 55 | mageckcount_mergedict mageckCount.py /^def mageckcount_mergedict(dict0,dict1):$/;" f 56 | mageckcount_parseargs mageckCount.py /^def mageckcount_parseargs():$/;" f 57 | mageckcount_printdict mageckCount.py /^def mageckcount_printdict(dict0,args,ofile,sgdict,datastat,sep='\\t'):$/;" f 58 | mageckcount_printstat mageckCount.py /^def mageckcount_printstat(args,datastat):$/;" f 59 | mageckcount_processonefile mageckCount.py /^def mageckcount_processonefile(filename,args,ctab,genedict,datastat):$/;" f 60 | magecktest_main crisprFunction.py /^def magecktest_main(args):$/;" f 61 | magecktest_removetmp crisprFunction.py /^def magecktest_removetmp(prefix):$/;" f 62 | math crisprFunction.py /^import math$/;" i 63 | math fileOps.py /^import math$/;" i 64 | math mageckCount.py /^import math;$/;" i 65 | merge_rank_files fileOps.py /^def merge_rank_files(lowfile,highfile,outfile,args):$/;" f 66 | merge_rank_summary_files fileOps.py /^def merge_rank_summary_files(lowfile,highfile,outfile,args,lowfile_prefix='',highfile_prefix=''):$/;" f 67 | modelmeanvar crisprFunction.py /^def modelmeanvar(ctable,method='edger'):$/;" f 68 | nbinom crisprFunction.py /^ from scipy.stats import nbinom$/;" i 69 | nfdr1 testVisual.py /^ nfdr1=[]; # genes with FDR < 1, 5, 25%$/;" v class:VisualRValue 70 | nfdr25 testVisual.py /^ nfdr25=[];$/;" v class:VisualRValue 71 | nfdr5 testVisual.py /^ nfdr5=[];$/;" v class:VisualRValue 72 | ngenes testVisual.py /^ ngenes=[]; # number of genes$/;" v class:VisualRValue 73 | normalizeCounts mageckCount.py /^def normalizeCounts(ctable,method='median'):$/;" f 74 | os testVisual.py /^import os;$/;" i 75 | outprefix testVisual.py /^ outprefix='sample1';$/;" v class:VisualRValue 76 | outrfh testVisual.py /^ outrfh=None; # file handle for R file$/;" v class:VisualRValue 77 | outrnwfh testVisual.py /^ outrnwfh=None;$/;" v class:VisualRValue 78 | outrnwstring testVisual.py /^ outrnwstring='';$/;" v class:VisualRValue 79 | pFDR crisprFunction.py /^def pFDR(pvalues, method='fdr'):$/;" f 80 | parse_sampleids fileOps.py /^def parse_sampleids(samplelabel,ids):$/;" f 81 | plot_main testVisual.py /^def plot_main(args):$/;" f 82 | print_function argsParser.py /^from __future__ import print_function$/;" i 83 | print_function crisprFunction.py /^from __future__ import print_function$/;" i 84 | print_function fileOps.py /^from __future__ import print_function$/;" i 85 | print_function mageckCount.py /^from __future__ import print_function$/;" i 86 | print_function pathwayFunc.py /^from __future__ import print_function$/;" i 87 | print_function testVisual.py /^from __future__ import print_function;$/;" i 88 | random pathwayFunc.py /^import random;$/;" i 89 | rank_association_test crisprFunction.py /^def rank_association_test(file,outfile,cutoff,args):$/;" f 90 | re testVisual.py /^import re;$/;" i 91 | rnwtemplatestr testVisual.py /^ rnwtemplatestr='';$/;" v class:VisualRValue 92 | rtemplate_gene_str testVisual.py /^ rtemplate_gene_str=''; # template string$/;" v class:VisualRValue 93 | rtemplatestr testVisual.py /^ rtemplatestr=''; # template string$/;" v class:VisualRValue 94 | selection testVisual.py /^ selection=[]; # selections$/;" v class:VisualRValue 95 | startRTemplate testVisual.py /^ def startRTemplate(self):$/;" m class:VisualRValue 96 | subprocess fileOps.py /^import subprocess$/;" i 97 | sys argsParser.py /^import sys$/;" i 98 | sys crisprFunction.py /^import sys$/;" i 99 | sys fileOps.py /^import sys$/;" i 100 | sys mageckCount.py /^import sys;$/;" i 101 | sys pathwayFunc.py /^import sys;$/;" i 102 | sys testVisual.py /^import sys;$/;" i 103 | systemcall fileOps.py /^def systemcall(command, cmsg=True):$/;" f 104 | targetgene testVisual.py /^ targetgene=[];$/;" v class:VisualRValue 105 | types crisprFunction.py /^import types$/;" i 106 | writeGeneSummaryStatToBuffer testVisual.py /^ def writeGeneSummaryStatToBuffer(self):$/;" m class:VisualRValue 107 | -------------------------------------------------------------------------------- /mageck/testVisual.py: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | from __future__ import print_function; 4 | 5 | import sys; 6 | import re; 7 | import os; 8 | import logging; 9 | from fileOps import * 10 | from mageckCount import * 11 | 12 | 13 | class VisualRValue: 14 | ''' 15 | Class for R visualization 16 | ''' 17 | outprefix='sample1'; 18 | genesummaryfile=''; 19 | cpindex=[]; 20 | targetgene=[]; 21 | cplabel='' 22 | 23 | # internal variable, for R file 24 | rtemplatestr=''; # template string 25 | rtemplate_gene_str=''; # template string 26 | outrfh=None; # file handle for R file 27 | 28 | # for Rnw file 29 | rnwtemplatestr=''; 30 | outrnwfh=None; 31 | outrnwstring=''; 32 | # for statistics of gene_summary_file 33 | comparisonlabel=[]; # label for comparison 34 | ngenes=[]; # number of genes 35 | selection=[]; # selections 36 | nfdr1=[]; # genes with FDR < 1, 5, 25% 37 | nfdr5=[]; 38 | nfdr25=[]; 39 | ''' 40 | Member functions 41 | ''' 42 | def startRTemplate(self): 43 | ''' 44 | Open a template, create an R file 45 | 46 | ''' 47 | # R files 48 | filename=os.path.join(os.path.dirname(__file__),'plot_template.RTemplate'); 49 | if os.path.isfile(filename) and os.path.exists(filename): 50 | logging.info('Loading R template file: '+filename+'.'); 51 | else: 52 | logging.error('Cannot find template file: '+filename); 53 | return -1; 54 | filename_indgene=os.path.join(os.path.dirname(__file__),'plot_template_indvgene.RTemplate'); 55 | if os.path.isfile(filename_indgene) and os.path.exists(filename_indgene): 56 | logging.info('Loading R template file: '+filename_indgene+'.'); 57 | else: 58 | logging.error('Cannot find template file: '+filename_indgene); 59 | return -1; 60 | # Rnw files 61 | filename_rnw=os.path.join(os.path.dirname(__file__),'plot_template.Rnw'); 62 | if os.path.isfile(filename_rnw) and os.path.exists(filename_rnw): 63 | logging.info('Loading Rnw template file: '+filename_rnw+'.'); 64 | else: 65 | logging.error('Cannot find template file: '+filename_rnw); 66 | return -1; 67 | logging.debug('Setting up the visualization module...'); 68 | # 69 | # loading 70 | with open(filename, "r") as rtfile: 71 | rtp=rtfile.read(); 72 | outpdffile=self.outprefix+'.pdf'; 73 | # rtp=re.sub('__OUTPUT_FILE__',outpdffile,rtp); # pdf file 74 | # 75 | outrfile=self.outprefix+'.R'; 76 | 77 | # write to R file 78 | outrfh=open(outrfile,'w'); 79 | self.outrfh=outrfh; 80 | self.rtemplatestr=rtp; 81 | 82 | # write pdf loading 83 | pdfloadstr="pdf(file='"+os.path.basename(outpdffile)+"',width=4.5,height=4.5);"; 84 | # write file reading 85 | # rtp=re.sub('__GENE_SUMMARY_FILE__',self.genesummaryfile,rtp); # gene_summary 86 | tableadstr="gstable=read.table('"+os.path.basename(self.genesummaryfile)+"',header=T)"; 87 | print(pdfloadstr,file=outrfh); 88 | print(tableadstr,file=outrfh); 89 | # load individual gene code 90 | with open(filename_indgene, "r") as rtfile: 91 | rtp=rtfile.read(); 92 | self.rtemplate_gene_str=rtp; 93 | # load Rnw file 94 | with open(filename_rnw,"r") as rtfile: 95 | rnw=rtfile.read(); 96 | self.rnwtemplatestr=rnw; 97 | outrfile=self.outprefix+'_summary.Rnw'; 98 | self.outrnwstring=self.rnwtemplatestr; 99 | outrfh=open(outrfile,'w'); 100 | self.outrnwfh=outrfh; 101 | 102 | return 0; 103 | 104 | def closeRTemplate(self): 105 | ''' 106 | Close the R file 107 | ''' 108 | # write to R file 109 | print("dev.off()",file=self.outrfh); 110 | # 111 | rnwfile=self.outprefix+'_summary.Rnw'; 112 | rfile=self.outprefix+'.R'; 113 | summaryfile=self.outprefix+'_summary'; 114 | latexfile=self.outprefix+'_summary.tex'; 115 | (rnwfile_dir,rnwfile_base)=os.path.split(rnwfile); 116 | # write code in R file to generate PDF files 117 | print("Sweave(\""+rnwfile_base+"\");\nlibrary(tools);\n",file=self.outrfh); 118 | print("texi2dvi(\""+os.path.basename(latexfile)+"\",pdf=TRUE);\n",file=self.outrfh); 119 | # write to Rnw file 120 | print(self.outrnwstring,file=self.outrnwfh); 121 | 122 | self.outrnwfh.close(); 123 | self.outrfh.close(); 124 | 125 | def WriteRTemplate(self): 126 | ''' 127 | Given a VisualRValue object, write to an R file. 128 | The following variables need to be set up: self.cpindex, self.targetgene, self.cplabel 129 | ''' 130 | 131 | # load file 132 | rtp=self.rtemplatestr; 133 | # replace the variables 134 | 135 | 136 | indexchr=','.join([str(x) for x in self.cpindex]); 137 | rtp=re.sub('__INDEX_COLUMN__',indexchr,rtp); # index 138 | logging.debug('Column index:'+indexchr); 139 | 140 | targetchr="c("+','.join(['"'+x+'"' for x in self.targetgene])+")"; 141 | rtp=re.sub('__TARGET_GENE_LIST__',targetchr,rtp); # index 142 | 143 | rtp=re.sub('__SAMPLE_LABEL__',self.cplabel,rtp); 144 | 145 | # write to R file 146 | print(rtp,file=self.outrfh); 147 | # save to Rnw file 148 | rtprnw=""; 149 | rtprnw+=r"\n\\newpage\\section{Comparison results of "+re.sub('_',' ',self.cplabel)+"}\n"; 150 | rtprnw+=r"\n"+"The following figure shows the distribution of RRA score in the comparison "+re.sub('_',' ',self.cplabel)+", and the RRA scores of "+str(len(self.targetgene))+" genes.\n"; 151 | rtprnw+="\n<>=\n"; 152 | tableadstr="gstable=read.table('"+os.path.basename(self.genesummaryfile)+"',header=T)"; 153 | rtprnw+=tableadstr+"\n@"+"\n"; 154 | rtprnw+=r"%\n\n\n"; 155 | # rtprnw+=r"\\"+"begin{figure}[!h]\n"+r"\\"+"begin{center}\n" 156 | rtprnw+="<>="+rtp+"@"+"\n"; 157 | # rtprnw+="\\end{center}\n\\end{figure}\n"; 158 | rtprnw+=r"%%\n"+"\\clearpage\n"; 159 | rtprnw+="%__INDIVIDUAL_PAGE__\n\n" 160 | updatestr=re.sub('%__INDIVIDUAL_PAGE__',rtprnw,self.outrnwstring); 161 | self.outrnwstring=updatestr; 162 | 163 | def loadTopK(self, filename, k=10): 164 | ''' 165 | Load the top k gene names from the file 166 | ''' 167 | n=-1; 168 | self.targetgene=[]; 169 | for line in open(filename): 170 | n+=1; 171 | if n==0: 172 | continue; 173 | if n<=k: 174 | field=line.strip().split(); 175 | tgenename=field[0]; 176 | self.targetgene+=[tgenename]; 177 | else: 178 | break; 179 | # write to file? 180 | logging.info('Loading top '+str(k) +' genes from '+filename+': '+','.join(self.targetgene)); 181 | self.WriteRTemplate(); 182 | return 0; 183 | 184 | def loadTopKWithExp(self,filename,nttab,sgrna2genelist,collabels,k=10): 185 | ''' 186 | Plot the individual sgRNA read counts of top k genes, and the position of these gene scores 187 | ''' 188 | self.loadTopK(filename,k); 189 | self.loadGeneExp(self.targetgene,nttab,sgrna2genelist,collabels); 190 | 191 | def loadSelGeneWithExp(self,targetgene,nttab,sgrna2genelist,collabels,k=10): 192 | ''' 193 | Plot the individual sgRNA read counts of top k genes, and the position of these gene scores 194 | ''' 195 | self.targetgene=targetgene; 196 | self.WriteRTemplate(); 197 | self.loadGeneExp(self.targetgene,nttab,sgrna2genelist,collabels); 198 | 199 | def loadSelGene(self,targetgene): 200 | self.targetgene=targetgene; 201 | self.WriteRTemplate(); 202 | 203 | def loadGeneExp(self,genelist,nttab,sgrna2genelist,collabels): 204 | ''' 205 | Load the sgRNA read counts of selected genes into file 206 | ''' 207 | # insertion str 208 | insertstr='' 209 | # set up par 210 | # 211 | #parstr="<>=\n"+" par(mfrow=c(2,2));\n" +"@"+"\n"; 212 | #insertstr+=parstr; 213 | nsubfigs=4; 214 | npl=0; 215 | # explanation 216 | insertstr+=r"\\newpage\n"+"The following figures show the distribution of sgRNA read counts (normalized) of selected genes in selected samples.\n"; 217 | # plot individual genes 218 | for gene in genelist: 219 | sglist=[ k for (k,v) in sgrna2genelist.iteritems() if v==gene]; 220 | ntgene={k:v for (k,v) in nttab.iteritems() if k in sglist}; 221 | npl+=1; 222 | # load to file 223 | valstring='list('; 224 | vstrlist=[]; 225 | for (k,v) in ntgene.iteritems(): 226 | vstr='c('+','.join([str(vv) for vv in v])+')'; 227 | vstrlist+=[vstr]; 228 | valstring+=','.join(vstrlist); 229 | valstring+=')'; 230 | rtp=self.rtemplate_gene_str; 231 | rtp=re.sub('__TARGET_GENE__','"'+gene+'"',rtp); 232 | rtp=re.sub('__TARGET_MATRIX__',valstring,rtp); 233 | # labels 234 | clabstr='c('; 235 | clabstr+=','.join(['"'+ x+'"' for x in collabels]); 236 | clabstr+=')'; 237 | rtp=re.sub('__COL_LABEL__',clabstr,rtp); 238 | # save to R file 239 | print(rtp,file=self.outrfh); 240 | # save to Rnw file 241 | rtprnw=''; 242 | if npl %4 ==1: 243 | rtprnw=r"%\n\n\n"; 244 | # rtprnw+=r"\\"+"begin{figure}[!h]\n"+r"\\"+"begin{center}\n" 245 | rtprnw+="<>=\n"; 246 | rtprnw+="par(mfrow=c(2,2));\n" 247 | rtprnw+=rtp; 248 | if npl%4==0 or npl == len(genelist): 249 | rtprnw+="\npar(mfrow=c(1,1));\n" 250 | rtprnw+="@"+"\n"; 251 | # rtprnw+="\\end{center}\n\\end{figure}\n%%\n"; 252 | insertstr+=rtprnw; 253 | # rtprnw+="%__INDIVIDUAL_PAGE__\n\n" 254 | # updatestr=re.sub('%__INDIVIDUAL_PAGE__',rtprnw,self.outrnwstring); 255 | # self.outrnwstring=updatestr; 256 | # recover par 257 | ## parstr="<>=\n"+" par(mfrow=c(1,1));\n" +"@"+"\n"; 258 | ## insertstr+=parstr; 259 | # write to Rnw file 260 | insertstr+="%__INDIVIDUAL_PAGE__\n\n" 261 | updatestr=re.sub('%__INDIVIDUAL_PAGE__',insertstr,self.outrnwstring); 262 | self.outrnwstring=updatestr; 263 | 264 | def getGeneSummaryStat(self,isplot=True): 265 | ''' 266 | Get the summary statistics of gene summary file 267 | ''' 268 | n=0; 269 | ncomparisons=0; 270 | comparisonlabel=self.comparisonlabel; 271 | for line in open(self.genesummaryfile): 272 | n+=1; 273 | field=line.strip().split('\t'); 274 | if n==1: 275 | if len(field) %10 !=2: 276 | logging.error('Not enough field in gene summary file: '+args.gene_summary); 277 | sys.exit(-1); 278 | ncomparisons=int( (len(field)-2)/10); 279 | # extract comparison labels 280 | for i in range(ncomparisons): 281 | neglabelindex=i*10+2; 282 | negstr=re.sub('.lo.neg','',field[neglabelindex]); 283 | comparisonlabel+=[negstr]; 284 | comparisonlabel+=[negstr]; 285 | # set up the variables 286 | self.nfdr1=[0]*2*ncomparisons; 287 | self.nfdr5=[0]*2*ncomparisons; 288 | self.nfdr25=[0]*2*ncomparisons; 289 | else: 290 | for i in range(ncomparisons): 291 | nneg=i*10+2+2; npos=i*10+2+7; 292 | if float(field[nneg])<0.01: 293 | self.nfdr1[2*i]+=1; 294 | if float(field[npos])<0.01: 295 | self.nfdr1[2*i+1]+=1; 296 | if float(field[nneg])<0.05: 297 | self.nfdr5[2*i]+=1; 298 | if float(field[npos])<0.05: 299 | self.nfdr5[2*i+1]+=1; 300 | if float(field[nneg])<0.25: 301 | self.nfdr25[2*i]+=1; 302 | if float(field[npos])<0.25: 303 | self.nfdr25[2*i+1]+=1; 304 | # end if 305 | # end for 306 | self.ngenes=[n-1]*2*ncomparisons; 307 | self.selection=['negative','positive']*ncomparisons; 308 | # 309 | if isplot==True: 310 | self.writeGeneSummaryStatToBuffer(); 311 | 312 | def writeGeneSummaryStatToBuffer(self): 313 | ''' 314 | Write statistics from gene summary file to buffer 315 | ''' 316 | # insert string 317 | insertstr=''; 318 | insertstr+='comparisons=c(' + ','.join(['"'+x+'"' for x in self.comparisonlabel ]) +');\n'; 319 | insertstr+='ngenes=c('+ ','.join([str(x) for x in self.ngenes]) +');\n'; 320 | insertstr+='direction=c('+','.join(['"'+x+'"' for x in self.selection])+');\n'; 321 | insertstr+='fdr1=c('+','.join([str(x) for x in self.nfdr1])+');\n'; 322 | insertstr+='fdr5=c('+','.join([str(x) for x in self.nfdr5])+');\n'; 323 | insertstr+='fdr25=c('+','.join([str(x) for x in self.nfdr25])+');\n'; 324 | # 325 | nwktowrite=re.sub('#__GENE_SUMMARY_STAT__',insertstr,self.outrnwstring); 326 | self.outrnwstring=nwktowrite; 327 | 328 | def generatePDF(self,keeptmp=False): 329 | ''' 330 | Call R and pdflatex 331 | ''' 332 | rnwfile=self.outprefix+'_summary.Rnw'; 333 | rfile=self.outprefix+'.R'; 334 | summaryfile=self.outprefix+'_summary'; 335 | (rnwfile_dir,rnwfile_base)=os.path.split(rnwfile); 336 | if rnwfile_dir=='': 337 | rnwfile_dir='./'; 338 | systemcall('cd '+rnwfile_dir+'; '+'Rscript '+os.path.basename(rfile)); 339 | #systemcall('cd '+rnwfile_dir+'; '+ 'R CMD Sweave '+rnwfile_base); 340 | #systemcall('export SWEAVE_STYLEPATH_DEFAULT="TRUE";'+ 'cd '+rnwfile_dir+'; '+'pdflatex '+os.path.basename(summaryfile)); 341 | # cleaning the fraction pdf 342 | if keeptmp==False: 343 | systemcall('cd '+rnwfile_dir+'; '+'rm -rf '+os.path.basename(summaryfile)+'-*.pdf'); 344 | 345 | 346 | 347 | 348 | 349 | def plot_main(args): 350 | ''' 351 | Main entry for plotting 352 | ''' 353 | # loading count tables 354 | mapres=getcounttablefromfile(args.count_table); 355 | cttab=mapres[0]; 356 | sgrna2genelist=mapres[1]; 357 | samplelabelindex=mapres[2]; 358 | 359 | # parse labels 360 | (treatgroup,treatgrouplabellist)=parse_sampleids(args.samples,samplelabelindex); 361 | # parse selected genes 362 | if args.genes==None: 363 | selgene=[]; 364 | else: 365 | selgene=args.genes.split(','); 366 | 367 | # initialize R visualization init 368 | vrv=VisualRValue(); 369 | vrv.outprefix=args.output_prefix; 370 | vrv.genesummaryfile=args.gene_summary; 371 | vrv.startRTemplate(); 372 | 373 | # generate summary file; must be done before plotting any individual genes 374 | vrv.getGeneSummaryStat(); 375 | 376 | # check the maximum column in gene summary 377 | n=0; 378 | ncomparisons=0; 379 | comparisonlabel=[]; 380 | for line in open(vrv.genesummaryfile): 381 | n+=1; 382 | if n==1: 383 | field=line.strip().split('\t'); 384 | if len(field) %10 !=2: 385 | logging.error('Not enough field in gene summary file: '+args.gene_summary); 386 | sys.exit(-1); 387 | ncomparisons=int( (len(field)-2)/10); 388 | # extract comparison labels 389 | for i in range(ncomparisons): 390 | neglabelindex=i*10+2; 391 | negstr=re.sub('.lo.neg','',field[neglabelindex]); 392 | comparisonlabel+=[negstr]; 393 | else: 394 | break; 395 | 396 | # read the sgRNA-gene table for rank association 397 | # normalization 398 | cttab_sel={k:([v[i] for i in treatgroup]) for (k,v) in cttab.iteritems()}; # controlgroup do not overlap with treatgroup 399 | if hasattr(args,'norm_method'): 400 | nttab=normalizeCounts(cttab_sel,method=args.norm_method); 401 | else: 402 | nttab=normalizeCounts(cttab_sel); 403 | 404 | if len(selgene)>0: 405 | vrv.loadGeneExp(selgene,nttab,sgrna2genelist,treatgrouplabellist); 406 | # testing the comparisons 407 | for nc in range(ncomparisons): 408 | # visualization: load top k genes 409 | # print(str(samplelabelindex)); 410 | vrv.cplabel=comparisonlabel[nc]+' neg.'; 411 | vrv.cpindex=[2+10*nc+1]; 412 | vrv.loadSelGene(selgene); 413 | vrv.cplabel=comparisonlabel[nc]+' pos.'; 414 | vrv.cpindex=[2+10*nc+5+1]; 415 | vrv.loadSelGene(selgene); 416 | 417 | 418 | 419 | 420 | # generate pdf file 421 | vrv.closeRTemplate(); 422 | vrv.generatePDF(args.keep_tmp); 423 | #systemcall('Rscript '+vrv.outprefix+'.R'); 424 | #systemcall('R CMD Sweave '+vrv.outprefix+'_summary.Rnw'); 425 | #systemcall('pdflatex '+vrv.outprefix+'_summary'); 426 | 427 | 428 | -------------------------------------------------------------------------------- /mageck/testVisualCount.py: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | from __future__ import print_function; 4 | 5 | import sys; 6 | import re; 7 | import os; 8 | import logging; 9 | from fileOps import * 10 | from mageckCount import * 11 | 12 | 13 | class VisualRCount: 14 | ''' 15 | Class for generating reports of count command 16 | ''' 17 | outprefix='sample1'; 18 | 19 | # internal variable, for R file 20 | outrfh=None; # file handle for R file 21 | 22 | # for Rnw file 23 | rnwtemplatestr=''; 24 | outrnwfh=None; 25 | outrnwstring=''; 26 | # for statistics of coutns 27 | fastqfile=[]; # fastq files 28 | fastqlabels=[]; # fastq labels 29 | reads=[]; # read counts 30 | mappedreads=[]; # 31 | zerocounts=[]; 32 | ''' 33 | Member functions 34 | ''' 35 | def startRTemplate(self): 36 | ''' 37 | Open a template, create an R file 38 | 39 | ''' 40 | # R files 41 | # Rnw files 42 | filename_rnw=os.path.join(os.path.dirname(__file__),'fastq_template.Rnw'); 43 | if os.path.isfile(filename_rnw) and os.path.exists(filename_rnw): 44 | logging.info('Loading Rnw template file: '+filename_rnw+'.'); 45 | else: 46 | logging.error('Cannot find template file: '+filename_rnw); 47 | return -1; 48 | logging.debug('Setting up the visualization module...'); 49 | # 50 | 51 | # R file 52 | outrfile=self.outprefix+'_countsummary.R'; 53 | outrfh=open(outrfile,'w'); 54 | self.outrfh=outrfh; 55 | # load Rnw file 56 | with open(filename_rnw,"r") as rtfile: 57 | rnw=rtfile.read(); 58 | self.rnwtemplatestr=rnw; 59 | outrfile=self.outprefix+'_countsummary.Rnw'; 60 | self.outrnwstring=self.rnwtemplatestr; 61 | outrfh=open(outrfile,'w'); 62 | self.outrnwfh=outrfh; 63 | 64 | return 0; 65 | 66 | def closeRTemplate(self): 67 | ''' 68 | Close the R file 69 | ''' 70 | # write to R file 71 | # 72 | rnwfile=self.outprefix+'_countsummary.Rnw'; 73 | rfile=self.outprefix+'_countsummary.R'; 74 | summaryfile=self.outprefix+'_countsummary'; 75 | latexfile=self.outprefix+'_countsummary.tex'; 76 | (rnwfile_dir,rnwfile_base)=os.path.split(rnwfile); 77 | # write code in R file to generate PDF files 78 | print("Sweave(\""+rnwfile_base+"\");\nlibrary(tools);\n",file=self.outrfh); 79 | print("texi2dvi(\""+os.path.basename(latexfile)+"\",pdf=TRUE);\n",file=self.outrfh); 80 | # write to Rnw file 81 | print(self.outrnwstring,file=self.outrnwfh); 82 | 83 | self.outrnwfh.close(); 84 | self.outrfh.close(); 85 | 86 | 87 | 88 | 89 | def writeCountSummary(self): 90 | ''' 91 | Write statistics from gene summary file to buffer 92 | ''' 93 | # insert string 94 | insertstr=''; 95 | insertstr+='filelist=c(' + ','.join(['"'+x+'"' for x in self.fastqfile]) +');\n'; 96 | insertstr+='labellist=c('+ ','.join(['"'+x+'"' for x in self.fastqlabels]) +');\n'; 97 | insertstr+='reads=c('+','.join([str(x) for x in self.reads])+');\n'; 98 | insertstr+='mappedreads=c('+','.join([str(x) for x in self.mappedreads])+');\n'; 99 | insertstr+='zerocounts=c('+','.join([str(x) for x in self.zerocounts])+');\n'; 100 | # 101 | nwktowrite=re.sub('#__COUNT_SUMMARY_STAT__',insertstr,self.outrnwstring); 102 | self.outrnwstring=nwktowrite; 103 | 104 | def insertReadCountBoxPlot(self,filename): 105 | ''' 106 | Insert box plot of normalized read counts 107 | ''' 108 | # inserted R code 109 | rtp="\ngenboxplot(\""+filename+"\");\n"; 110 | # 111 | insertstr=''; 112 | 113 | insertstr+=r"\n\\newpage\\section{Normalized read count distribution of all samples}\n"; 114 | insertstr+="The following figure shows the distribution of median-normalized read counts in all samples.\n\n\n"; 115 | 116 | insertstr+="<>="+rtp+"@"+"\n"; 117 | # 118 | insertstr+='\n%__INDIVIDUAL_PAGE__\n' 119 | nwktowrite=re.sub('%__INDIVIDUAL_PAGE__',insertstr,self.outrnwstring); 120 | self.outrnwstring=nwktowrite; 121 | 122 | def insertPCAPlot(self,filename): 123 | ''' 124 | Insert box plot of PCA analysis 125 | ''' 126 | # inserted R code 127 | rtp="\ngenpcaplot(\""+filename+"\");\n"; 128 | rtp2="\ngenpcavar("+");\n"; 129 | # 130 | insertstr=''; 131 | 132 | insertstr+=r"\n\\newpage\\section{Principle Component Analysis}\n"; 133 | insertstr+="The following figure shows the first 2 principle components (PCs) from the Principle Component Analysis (PCA), and the percentage of variances explained by the top PCs.\n\n\n"; 134 | 135 | insertstr+="\n<>="+rtp+"@"+"\n"; 136 | insertstr+="\n<>="+rtp2+"@"+"\n"; 137 | # 138 | insertstr+='\n%__INDIVIDUAL_PAGE__\n' 139 | nwktowrite=re.sub('%__INDIVIDUAL_PAGE__',insertstr,self.outrnwstring); 140 | self.outrnwstring=nwktowrite; 141 | 142 | 143 | 144 | def generatePDF(self,keeptmp=False): 145 | ''' 146 | Call R and pdflatex 147 | ''' 148 | rnwfile=self.outprefix+'_countsummary.Rnw'; 149 | rfile=self.outprefix+'_countsummary.R'; 150 | summaryfile=self.outprefix+'_countsummary'; 151 | (rnwfile_dir,rnwfile_base)=os.path.split(rnwfile); 152 | if rnwfile_dir=='': 153 | rnwfile_dir='./'; 154 | systemcall('cd '+rnwfile_dir+'; '+'Rscript '+os.path.basename(rfile)); 155 | #systemcall('cd '+rnwfile_dir+'; '+ 'R CMD Sweave '+rnwfile_base); 156 | #systemcall('export SWEAVE_STYLEPATH_DEFAULT="TRUE";'+ 'cd '+rnwfile_dir+'; '+'pdflatex '+os.path.basename(summaryfile)); 157 | # cleaning the fraction pdf 158 | if keeptmp==False: 159 | systemcall('cd '+rnwfile_dir+'; '+'rm -rf '+os.path.basename(summaryfile)+'-*.pdf'); 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /python_dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python setup.py sdist 4 | 5 | 6 | -------------------------------------------------------------------------------- /rra/Makefile: -------------------------------------------------------------------------------- 1 | # define the C compiler to use 2 | # CC = gcc 3 | CC = g++ 4 | 5 | # define any compile-time flags 6 | CFLAGS = -Wall -g -O2 7 | 8 | # define any directories containing header files other than /usr/include 9 | # 10 | INCLUDES = -I./include 11 | 12 | # define the C source files 13 | APIS = ./src/rngs.cpp ./src/words.cpp ./src/rvgs.cpp ./src/math_api.cpp ./src/fileio.cpp 14 | MAIN1 = ./src/RRA.cpp 15 | # MAIN2 = ./src/CrisprNorm.c 16 | 17 | # define the C object files 18 | # 19 | # This uses Suffix Replacement within a macro: 20 | # $(name:string1=string2) 21 | # For each word in 'name' replace 'string1' with 'string2' 22 | # Below we are replacing the suffix .c of all words in the macro SRCS 23 | # with the .o suffix 24 | # 25 | API_OBJS = $(APIS:.cpp=.o) 26 | MAIN1_OBJS = $(MAIN1:.cpp=.o) 27 | MAIN2_OBJS = $(MAIN2:.c=.o) 28 | 29 | # define the executable file 30 | MAIN1_APP = ../bin/RRA 31 | # MAIN2_APP = ../bin/CrisprNorm 32 | 33 | # 34 | # The following part of the makefile is generic; it can be used to 35 | # build any executable just by changing the definitions above and by 36 | # deleting dependencies appended to the file from 'make depend' 37 | # 38 | 39 | # all: $(MAIN1_APP) $(MAIN2_APP) 40 | all: $(MAIN1_APP) 41 | 42 | $(MAIN1_APP): $(API_OBJS) $(MAIN1_OBJS) 43 | $(CC) $(CFLAGS) $(INCLUDES) -o $(MAIN1_APP) $(API_OBJS) $(MAIN1_OBJS) -lm 44 | 45 | # $(MAIN2_APP): $(API_OBJS) $(MAIN2_OBJS) 46 | # $(CC) $(CFLAGS) $(INCLUDES) -o $(MAIN2_APP) $(API_OBJS) $(MAIN2_OBJS) -lm 47 | 48 | # this is a suffix replacement rule for building .o's from .c's 49 | # it uses automatic variables $<: the name of the prerequisite of 50 | # the rule(a .c file) and $@: the name of the target of the rule (a .o file) 51 | # (see the gnu make manual section about automatic variables) 52 | .c.o: 53 | $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ 54 | .cpp.o: 55 | $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ 56 | 57 | clean: 58 | $(RM) $(API_OBJS) $(MAIN1_OBJS) $(MAIN2_OBJS) $(MAIN1_APP) 59 | 60 | depend: $(SRCS) 61 | makedepend $(INCLUDES) $^ 62 | -------------------------------------------------------------------------------- /rra/include/math_api.h: -------------------------------------------------------------------------------- 1 | /* 2 | * math_api.h 3 | * RegulatorPrediction 4 | * 5 | * Created by Han Xu on 3/4/13. 6 | * Copyright 2013 Dana Farber Cancer Institute. All rights reserved. 7 | * 8 | */ 9 | 10 | typedef struct 11 | { 12 | double value; 13 | int index; 14 | }INDEXED_FLOAT; 15 | 16 | //Quicksort an array in real values, in ascending order 17 | void QuicksortF(double *a, int lo, int hi); 18 | 19 | //Quicksort an indexed array, in ascending order 20 | void QuicksortIndexedArray(INDEXED_FLOAT *a, int lo, int hi); 21 | 22 | //BTreeSearchingF: Searching value in array, which was organized in ascending order previously 23 | int bTreeSearchingF(double value, double *a, int lo, int hi); 24 | 25 | //Rank the values in a float array and store the rank values in an integer array 26 | void Ranking(int *rank, double *values, int sampleNum); 27 | 28 | //Normal score transform 29 | int NormalTransform(double *destA, int *rank, int sampleNum); 30 | 31 | //Compute distance correlation. dim: number of samples; inputNum: number of variables in the input; input: the input array with inputNum*dim items; output: the output array 32 | double ComputeDistanceCorrelation(double *input, double *output, int inputNum, int dim); 33 | 34 | //Pearson correlation 35 | double PearsonCorrel(double *a, double *b, int dim); 36 | 37 | //Partial correlation 38 | double PartialCorrel(double *a, double *b, double *control, int dim); 39 | 40 | //Randomly permute an array of float values 41 | void PermuteFloatArrays(double *a, int size); 42 | 43 | //Compute CDF of a non-central beta distribution. when lambda is 0.0, it's cpf of beta distribution 44 | double BetaNoncentralCdf(double a, double b, double lambda, double x, double error_max); 45 | -------------------------------------------------------------------------------- /rra/include/rngs.h: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------------- 2 | * Name : rngs.h (header file for the library file rngs.c) 3 | * Author : Steve Park & Dave Geyer 4 | * Language : ANSI C 5 | * Latest Revision : 09-22-98 6 | * ----------------------------------------------------------------------- 7 | */ 8 | 9 | #if !defined( _RNGS_ ) 10 | #define _RNGS_ 11 | 12 | double Random(void); 13 | void PlantSeeds(long x); 14 | void GetSeed(long *x); 15 | void PutSeed(long x); 16 | void SelectStream(int index); 17 | void TestRandom(void); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /rra/include/rvgs.h: -------------------------------------------------------------------------------- 1 | /* ------------------------------------------------------------- 2 | * Name : rvgs.h (header file for the library rvgs.c) 3 | * Author : Steve Park & Dave Geyer 4 | * Language : ANSI C 5 | * Latest Revision : 11-03-96 6 | * -------------------------------------------------------------- 7 | */ 8 | 9 | #if !defined( _RVGS_ ) 10 | #define _RVGS_ 11 | 12 | long Bernoulli(double p); 13 | long Binomial(long n, double p); 14 | long Equilikely(long a, long b); 15 | long Geometric(double p); 16 | long Pascal(long n, double p); 17 | long Poisson(double m); 18 | 19 | double Uniform(double a, double b); 20 | double Exponential(double m); 21 | double Erlang(long n, double b); 22 | double Normal(double m, double s); 23 | double Lognormal(double a, double b); 24 | double Chisquare(long n); 25 | double Student(long n); 26 | 27 | #endif 28 | 29 | -------------------------------------------------------------------------------- /rra/include/words.h: -------------------------------------------------------------------------------- 1 | /* 2 | * words.h 3 | * Word manipulations 4 | * 5 | * Created by Han Xu on 10/12/12. 6 | * Copyright 2012 Dana Farber Cancer Institute. All rights reserved. 7 | * 8 | */ 9 | 10 | //allocate 2d array of characters. Return the pointer to the array, and NULL if failure 11 | char **AllocWords(int wordNum, int wordLen); 12 | 13 | //Free 2d array of characters 14 | void FreeWords(char **ptr, int wordNum); 15 | 16 | //Extract word from a string. Words seperated by the deliminators. return number of words extracted. return -1 if failure. 17 | //Example: wordNum = StringToWords(words, string, maxWordLen, maxWordNum, " \t\r\n\v\f"); 18 | int StringToWords(char **words, char *str, int maxWordLen, int maxWordNum, const char *delim); 19 | 20 | //Read a list of file names from a directory to word structure. Return the number of files read. Return -1 if failure. 21 | //Read files end with ext. If ext=NULL, read all files. 22 | int DirToWords(char **words, char *dirName, int maxWordLen, int maxWordNum, const char *ext); 23 | 24 | -------------------------------------------------------------------------------- /rra/src/classdef.h: -------------------------------------------------------------------------------- 1 | #ifndef CLASSDEF_H 2 | #define CLASSDEF_H 3 | 4 | 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #define NDEBUG 11 | #include 12 | 13 | #define MAX_NAME_LEN 10000 //maximum length of item name, group name or list name 14 | #define CDF_MAX_ERROR 1E-10 //maximum error in Cumulative Distribution Function estimation in beta statistics 15 | #define MAX_GROUP_NUM 100000 //maximum number of groups 16 | #define MAX_LIST_NUM 1000 //maximum number of list 17 | #define RAND_PASS_NUM 100 //number of passes in random simulation for computing FDR 18 | 19 | #define MAX_WORD_NUM 1000 //maximum number of word 20 | 21 | //WL: define boolean variables 22 | //typedef int bool; 23 | //#define true 1 24 | //#define false 0 25 | 26 | 27 | typedef struct // item definition; i.e., sgRNA 28 | { 29 | char name[MAX_NAME_LEN]; //name of the item 30 | int listIndex; //index of list storing the item 31 | double value; //value of measurement 32 | double percentile; //percentile in the list 33 | double prob; //The probability of each sgRNA; added by Wei 34 | int isChosen; //whether this sgRNA should be considered in calculation 35 | } ITEM_STRUCT; 36 | 37 | typedef struct // group definition; i.e., gene 38 | { 39 | char name[MAX_NAME_LEN]; //name of the group 40 | ITEM_STRUCT *items; //items in the group 41 | int itemNum; //number of items in the group 42 | int maxItemNum; // max number of items 43 | double loValue; //lo-value in RRA 44 | double pvalue; //p value for permutation 45 | double fdr; //false discovery rate 46 | int isbad; //if the lovalue is too low (i.e., higher than the given percentile) 47 | int goodsgrnas; //sgRNAs with significant changes 48 | } GROUP_STRUCT; 49 | 50 | typedef struct //list definition; i.e., gene groups 51 | { 52 | char name[MAX_NAME_LEN]; //name of the list 53 | double *values; //values of items in the list, used for sorting 54 | int itemNum; //number of items in the list 55 | int maxItemNum; //max item number 56 | } LIST_STRUCT; 57 | 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /rra/src/fileio.cpp: -------------------------------------------------------------------------------- 1 | //C++ functions 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | 9 | #include "fileio.h" 10 | 11 | 12 | //split strings 13 | int stringSplit(string str, string delim, vector & v){ 14 | int start=0; 15 | int pos=str.find_first_of(delim,start); 16 | v.clear(); 17 | while(pos!=str.npos){ 18 | if(pos!=start) 19 | v.push_back(str.substr(start,pos-start)); 20 | start=pos+1; 21 | pos=str.find_first_of(delim,start); 22 | } 23 | if(start& groupNames, map& listNames){ 29 | // determine the group number and list number 30 | // if success, return 0; else, return -1; 31 | 32 | int i,k; 33 | //char **words, *tmpS, *tmpS2; 34 | int wordNum; 35 | int tmpGroupNum, tmpListNum; 36 | 37 | ifstream fh; 38 | fh.open(fileName); 39 | if(!fh.is_open()){ 40 | cerr<<"Error opening "< vwords; 46 | vector vsubwords; 47 | wordNum=stringSplit(oneline," \t\r\n\v",vwords); 48 | //Read the header row to get the sample number 49 | //fgets(tmpS, MAX_WORD_NUM*(MAX_NAME_LEN+1)*sizeof(char), fh); 50 | //wordNum = StringToWords(words, tmpS,MAX_WORD_NUM, MAX_NAME_LEN+1, " \t\r\n\v\f"); 51 | 52 | 53 | if (wordNum < 4 || wordNum > 6){ 54 | cerr<<"Error: incorrect input file format: [] [iscounted]\n"; 55 | fh.close(); 56 | return -1; 57 | } 58 | 59 | //read records of items 60 | 61 | tmpGroupNum = 0; 62 | tmpListNum = 0; 63 | 64 | //fgets(tmpS, MAX_WORD_NUM*(MAX_NAME_LEN+1)*sizeof(char), fh); 65 | //wordNum = StringToWords(words, tmpS, MAX_NAME_LEN+1, MAX_WORD_NUM, " \t\r\n\v\f"); 66 | getline(fh,oneline); 67 | wordNum=stringSplit(oneline," \t\r\f\v",vwords); 68 | 69 | 70 | //WL 71 | int subWordNum; 72 | subWordNum=0; 73 | 74 | while ((wordNum>=4 )&&(!fh.eof())) 75 | { 76 | 77 | // separate the group name by "," 78 | subWordNum=stringSplit(vwords[1],",",vsubwords); 79 | assert(subWordNum>0); 80 | 81 | for(k=0;k= maxGroupNum){ 91 | printf("Error: too many groups. maxGroupNum = %d\n", maxGroupNum); 92 | return -1; 93 | } 94 | } 95 | else{ 96 | i=groupNames[subwstr]; 97 | groups[i].itemNum++; 98 | } 99 | } 100 | 101 | //for (i=0;i= maxListNum){ 109 | printf("Error: too many lists. maxListNum = %d\n", maxListNum); 110 | return -1; 111 | } 112 | } 113 | else{ 114 | i=listNames[thisliststr]; 115 | lists[i].itemNum++; 116 | } 117 | 118 | 119 | getline(fh,oneline); 120 | wordNum=stringSplit(oneline," \t\r\f\v",vwords); 121 | //fgets(tmpS, MAX_WORD_NUM*(MAX_NAME_LEN+1)*sizeof(char), fh); 122 | //wordNum = StringToWords(words, tmpS, MAX_NAME_LEN+1, MAX_WORD_NUM, " \t\r\n\v\f"); 123 | } 124 | 125 | fh.close(); 126 | 127 | groupNum=tmpGroupNum; 128 | listNum=tmpListNum; 129 | 130 | return 0; 131 | } 132 | 133 | //Read input file. File Format: . Return 1 if success, -1 if failure 134 | int ReadFile(char *fileName, GROUP_STRUCT *groups, int maxGroupNum, int *groupNum, 135 | LIST_STRUCT *lists, int maxListNum, int *listNum) 136 | { 137 | //FILE *fh; 138 | int i,j,k; 139 | //char **words, *tmpS, *tmpS2; 140 | int wordNum=0; 141 | int totalItemNum=0; 142 | int tmpGroupNum=0, tmpListNum=0; 143 | //char tmpGroupName[MAX_NAME_LEN], tmpListName[MAX_NAME_LEN], tmpItemName[MAX_NAME_LEN]; 144 | double tmpValue=0; 145 | string oneline; 146 | 147 | //char **subwords; 148 | double sgrnaProbValue=0.0; 149 | int sgrnaChosen=1; 150 | 151 | ifstream fh; 152 | vector vwords; 153 | vector vsubwords; 154 | int subWordNum=0; 155 | 156 | map groupNames; 157 | map listNames; 158 | //assert(tmpS!=NULL && tmpS2!=NULL); 159 | if(getGroupListNum(fileName, groups, lists, maxGroupNum, maxListNum, tmpGroupNum, tmpListNum, groupNames, listNames)!=0){ 160 | return -1; 161 | } 162 | // construct the structure 163 | for (i=0;i=4)&&(!fh.eof())){ 195 | //strcpy(tmpListName, words[2]); 196 | tmpValue = atof(vwords[3].c_str()); 197 | //WL 198 | //parsing prob column, if available 199 | if (wordNum > 4){ 200 | sgrnaProbValue=atof(vwords[4].c_str()); 201 | }else{ 202 | sgrnaProbValue=1.0; 203 | } 204 | if (wordNum > 5){ 205 | sgrnaChosen=atoi(vwords[5].c_str()); 206 | if(sgrnaChosen==0) skippedsgrna+=1; 207 | }else{ 208 | sgrnaChosen=1; 209 | } 210 | 211 | 212 | 213 | //WL: now, search for corresponding list index 214 | //for (j=0;j0); 217 | j=listNames[thislistname]; 218 | assert(j0); 230 | //for(k=0;k0); 235 | i=groupNames[vsubwords[k]]; 236 | assert(i 275 | int SaveGroupInfo(char *fileName, GROUP_STRUCT *groups, int groupNum) 276 | { 277 | FILE *fh; 278 | int i; 279 | 280 | fh = (FILE *)fopen(fileName, "w"); 281 | 282 | if (!fh){ 283 | printf("Cannot open %s.\n", fileName); 284 | return -1; 285 | } 286 | 287 | fprintf(fh, "group_id\titems_in_group\tlo_value\tp\tFDR\tgoodsgrna\n"); 288 | 289 | for (i=0;i . Return 1 if success, -1 if failure 7 | int ReadFile(char *fileName, GROUP_STRUCT *groups, int maxGroupNum, int *groupNum, LIST_STRUCT *lists, int maxListNum, int *listNum); 8 | 9 | //Save group information to output file. Format 10 | int SaveGroupInfo(char *fileName, GROUP_STRUCT *groups, int groupNum); 11 | 12 | 13 | 14 | 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /rra/src/math_api.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * math_api.c 3 | * RegulatorPrediction 4 | * 5 | * Created by Han Xu on 3/4/13. 6 | * Copyright 2013 Dana Farber Cancer Institute. All rights reserved. 7 | * 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include "math_api.h" 14 | #include "rvgs.h" 15 | 16 | // normalInv: from Ziegler's code 17 | double normalInv(double p); 18 | 19 | //compute Euclidean distance 20 | double EucliDist(double *a, double *b, int dim); 21 | 22 | //Compute logarithm of Gamma function. flag=0, no error; flag=1, x<=0 23 | double LogGamma(double x, int *flag); 24 | 25 | //Compute incomplete beta function ratio 26 | double betain (double x, double p, double q, double beta, int *ifault); 27 | 28 | //BTreeSearchingF: Searching value in array, which was organized in ascending order previously 29 | int bTreeSearchingF(double value, double *a, int lo, int hi) 30 | { 31 | if (value<=a[lo]) 32 | { 33 | return lo; 34 | } 35 | 36 | if (value>=a[hi]) 37 | { 38 | return hi; 39 | } 40 | 41 | if (hi-lo<=1) 42 | { 43 | if (fabs(a[hi]-value)>fabs(a[lo]-value)) 44 | { 45 | return lo; 46 | } 47 | else 48 | { 49 | return hi; 50 | } 51 | } 52 | 53 | if (value>=a[(lo+hi)/2]) 54 | { 55 | return bTreeSearchingF(value, a, (lo+hi)/2, hi); 56 | } 57 | else 58 | { 59 | return bTreeSearchingF(value, a, lo, (lo+hi)/2); 60 | } 61 | } 62 | 63 | //Quicksort an array in real values, in ascending order 64 | void QuicksortF(double *a, int lo, int hi) 65 | { 66 | int i=lo, j=hi; 67 | double x=a[(lo+hi)/2]; 68 | double h; 69 | 70 | if (hix)&&(i<=j)) 83 | { 84 | j--; 85 | } 86 | if (i<=j) 87 | { 88 | h = a[i]; 89 | a[i] = a[j]; 90 | a[j] = h; 91 | i++; j--; 92 | } 93 | } 94 | 95 | // recursion 96 | if (lox)&&(i<=j)) 120 | { 121 | j--; 122 | } 123 | if (i<=j) 124 | { 125 | memcpy(&h, a+i, sizeof(INDEXED_FLOAT)); 126 | memcpy(a+i,a+j,sizeof(INDEXED_FLOAT)); 127 | memcpy(a+j, &h, sizeof(INDEXED_FLOAT)); 128 | i++; j--; 129 | } 130 | } 131 | 132 | // recursion 133 | if (lo=size)) 338 | { 339 | continue; 340 | } 341 | 342 | tmp = a[i]; 343 | a[i] = a[index]; 344 | a[index] = tmp; 345 | } 346 | } 347 | 348 | //Pearson correlation 349 | double PearsonCorrel(double *a, double *b, int dim) 350 | { 351 | int i; 352 | double mean1, mean2, sumAB, sumAA, sumBB; 353 | 354 | mean1 = 0; 355 | mean2 = 0; 356 | 357 | for (i=0;i 0 then x is the state 15 | * if x < 0 then the state is obtained from the system clock 16 | * if x = 0 then the state is to be supplied interactively. 17 | * 18 | * The generator used in this library is a so-called 'Lehmer random number 19 | * generator' which returns a pseudo-random number uniformly distributed 20 | * 0.0 and 1.0. The period is (m - 1) where m = 2,147,483,647 and the 21 | * smallest and largest possible values are (1 / m) and 1 - (1 / m) 22 | * respectively. For more details see: 23 | * 24 | * "Random Number Generators: Good Ones Are Hard To Find" 25 | * Steve Park and Keith Miller 26 | * Communications of the ACM, October 1988 27 | * 28 | * Name : rngs.c (Random Number Generation - Multiple Streams) 29 | * Authors : Steve Park & Dave Geyer 30 | * Language : ANSI C 31 | * Latest Revision : 09-22-98 32 | * ------------------------------------------------------------------------- 33 | */ 34 | 35 | #include 36 | #include 37 | #include "rngs.h" 38 | 39 | #define MODULUS 2147483647 /* DON'T CHANGE THIS VALUE */ 40 | #define MULTIPLIER 48271 /* DON'T CHANGE THIS VALUE */ 41 | #define CHECK 399268537 /* DON'T CHANGE THIS VALUE */ 42 | #define STREAMS 256 /* # of streams, DON'T CHANGE THIS VALUE */ 43 | #define A256 22925 /* jump multiplier, DON'T CHANGE THIS VALUE */ 44 | #define DEFAULT 123456789 /* initial seed, use 0 < DEFAULT < MODULUS */ 45 | 46 | static long seed[STREAMS] = {DEFAULT}; /* current state of each stream */ 47 | static int stream = 0; /* stream index, 0 is the default */ 48 | static int initialized = 0; /* test for stream initialization */ 49 | 50 | 51 | double Random(void) 52 | /* ---------------------------------------------------------------- 53 | * Random returns a pseudo-random real number uniformly distributed 54 | * between 0.0 and 1.0. 55 | * ---------------------------------------------------------------- 56 | */ 57 | { 58 | const long Q = MODULUS / MULTIPLIER; 59 | const long R = MODULUS % MULTIPLIER; 60 | long t; 61 | 62 | t = MULTIPLIER * (seed[stream] % Q) - R * (seed[stream] / Q); 63 | if (t > 0) 64 | seed[stream] = t; 65 | else 66 | seed[stream] = t + MODULUS; 67 | return ((double) seed[stream] / MODULUS); 68 | } 69 | 70 | 71 | void PlantSeeds(long x) 72 | /* --------------------------------------------------------------------- 73 | * Use this function to set the state of all the random number generator 74 | * streams by "planting" a sequence of states (seeds), one per stream, 75 | * with all states dictated by the state of the default stream. 76 | * The sequence of planted states is separated one from the next by 77 | * 8,367,782 calls to Random(). 78 | * --------------------------------------------------------------------- 79 | */ 80 | { 81 | const long Q = MODULUS / A256; 82 | const long R = MODULUS % A256; 83 | int j; 84 | int s; 85 | 86 | initialized = 1; 87 | s = stream; /* remember the current stream */ 88 | SelectStream(0); /* change to stream 0 */ 89 | PutSeed(x); /* set seed[0] */ 90 | stream = s; /* reset the current stream */ 91 | for (j = 1; j < STREAMS; j++) { 92 | x = A256 * (seed[j - 1] % Q) - R * (seed[j - 1] / Q); 93 | if (x > 0) 94 | seed[j] = x; 95 | else 96 | seed[j] = x + MODULUS; 97 | } 98 | } 99 | 100 | 101 | void PutSeed(long x) 102 | /* --------------------------------------------------------------- 103 | * Use this function to set the state of the current random number 104 | * generator stream according to the following conventions: 105 | * if x > 0 then x is the state (unless too large) 106 | * if x < 0 then the state is obtained from the system clock 107 | * if x = 0 then the state is to be supplied interactively 108 | * --------------------------------------------------------------- 109 | */ 110 | { 111 | char ok = 0; 112 | 113 | if (x > 0) 114 | x = x % MODULUS; /* correct if x is too large */ 115 | if (x < 0) 116 | x = ((unsigned long) time((time_t *) NULL)) % MODULUS; 117 | if (x == 0) 118 | while (!ok) { 119 | printf("\nEnter a positive integer seed (9 digits or less) >> "); 120 | scanf("%ld", &x); 121 | ok = (0 < x) && (x < MODULUS); 122 | if (!ok) 123 | printf("\nInput out of range ... try again\n"); 124 | } 125 | seed[stream] = x; 126 | } 127 | 128 | 129 | void GetSeed(long *x) 130 | /* --------------------------------------------------------------- 131 | * Use this function to get the state of the current random number 132 | * generator stream. 133 | * --------------------------------------------------------------- 134 | */ 135 | { 136 | *x = seed[stream]; 137 | } 138 | 139 | 140 | void SelectStream(int index) 141 | /* ------------------------------------------------------------------ 142 | * Use this function to set the current random number generator 143 | * stream -- that stream from which the next random number will come. 144 | * ------------------------------------------------------------------ 145 | */ 146 | { 147 | stream = ((unsigned int) index) % STREAMS; 148 | if ((initialized == 0) && (stream != 0)) /* protect against */ 149 | PlantSeeds(DEFAULT); /* un-initialized streams */ 150 | } 151 | 152 | 153 | void TestRandom(void) 154 | /* ------------------------------------------------------------------ 155 | * Use this (optional) function to test for a correct implementation. 156 | * ------------------------------------------------------------------ 157 | */ 158 | { 159 | long i; 160 | long x; 161 | double u; 162 | char ok = 0; 163 | 164 | SelectStream(0); /* select the default stream */ 165 | PutSeed(1); /* and set the state to 1 */ 166 | for(i = 0; i < 10000; i++) 167 | u = Random(); 168 | GetSeed(&x); /* get the new state value */ 169 | ok = (x == CHECK); /* and check for correctness */ 170 | 171 | SelectStream(1); /* select stream 1 */ 172 | PlantSeeds(1); /* set the state of all streams */ 173 | GetSeed(&x); /* get the state of stream 1 */ 174 | ok = ok && (x == A256); /* x should be the jump multiplier */ 175 | if (ok) 176 | printf("\n The implementation of rngs.c is correct.\n\n"); 177 | else 178 | printf("\n\a ERROR -- the implementation of rngs.c is not correct.\n\n"); 179 | } 180 | -------------------------------------------------------------------------------- /rra/src/rvgs.cpp: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------------------- 2 | * This is an ANSI C library for generating random variates from six discrete 3 | * distributions 4 | * 5 | * Generator Range (x) Mean Variance 6 | * 7 | * Bernoulli(p) x = 0,1 p p*(1-p) 8 | * Binomial(n, p) x = 0,...,n n*p n*p*(1-p) 9 | * Equilikely(a, b) x = a,...,b (a+b)/2 ((b-a+1)*(b-a+1)-1)/12 10 | * Geometric(p) x = 0,... p/(1-p) p/((1-p)*(1-p)) 11 | * Pascal(n, p) x = 0,... n*p/(1-p) n*p/((1-p)*(1-p)) 12 | * Poisson(m) x = 0,... m m 13 | * 14 | * and seven continuous distributions 15 | * 16 | * Uniform(a, b) a < x < b (a + b)/2 (b - a)*(b - a)/12 17 | * Exponential(m) x > 0 m m*m 18 | * Erlang(n, b) x > 0 n*b n*b*b 19 | * Normal(m, s) all x m s*s 20 | * Lognormal(a, b) x > 0 see below 21 | * Chisquare(n) x > 0 n 2*n 22 | * Student(n) all x 0 (n > 1) n/(n - 2) (n > 2) 23 | * 24 | * For the a Lognormal(a, b) random variable, the mean and variance are 25 | * 26 | * mean = exp(a + 0.5*b*b) 27 | * variance = (exp(b*b) - 1) * exp(2*a + b*b) 28 | * 29 | * Name : rvgs.c (Random Variate GeneratorS) 30 | * Author : Steve Park & Dave Geyer 31 | * Language : ANSI C 32 | * Latest Revision : 10-28-98 33 | * -------------------------------------------------------------------------- 34 | */ 35 | 36 | #include 37 | #include "rngs.h" 38 | #include "rvgs.h" 39 | 40 | 41 | long Bernoulli(double p) 42 | /* ======================================================== 43 | * Returns 1 with probability p or 0 with probability 1 - p. 44 | * NOTE: use 0.0 < p < 1.0 45 | * ======================================================== 46 | */ 47 | { 48 | return ((Random() < (1.0 - p)) ? 0 : 1); 49 | } 50 | 51 | long Binomial(long n, double p) 52 | /* ================================================================ 53 | * Returns a binomial distributed integer between 0 and n inclusive. 54 | * NOTE: use n > 0 and 0.0 < p < 1.0 55 | * ================================================================ 56 | */ 57 | { 58 | long i, x = 0; 59 | 60 | for (i = 0; i < n; i++) 61 | x += Bernoulli(p); 62 | return (x); 63 | } 64 | 65 | long Equilikely(long a, long b) 66 | /* =================================================================== 67 | * Returns an equilikely distributed integer between a and b inclusive. 68 | * NOTE: use a < b 69 | * =================================================================== 70 | */ 71 | { 72 | return (a + (long) ((b - a + 1) * Random())); 73 | } 74 | 75 | long Geometric(double p) 76 | /* ==================================================== 77 | * Returns a geometric distributed non-negative integer. 78 | * NOTE: use 0.0 < p < 1.0 79 | * ==================================================== 80 | */ 81 | { 82 | return ((long) (log(1.0 - Random()) / log(p))); 83 | } 84 | 85 | long Pascal(long n, double p) 86 | /* ================================================= 87 | * Returns a Pascal distributed non-negative integer. 88 | * NOTE: use n > 0 and 0.0 < p < 1.0 89 | * ================================================= 90 | */ 91 | { 92 | long i, x = 0; 93 | 94 | for (i = 0; i < n; i++) 95 | x += Geometric(p); 96 | return (x); 97 | } 98 | 99 | long Poisson(double m) 100 | /* ================================================== 101 | * Returns a Poisson distributed non-negative integer. 102 | * NOTE: use m > 0 103 | * ================================================== 104 | */ 105 | { 106 | double t = 0.0; 107 | long x = 0; 108 | 109 | while (t < m) { 110 | t += Exponential(1.0); 111 | x++; 112 | } 113 | return (x - 1); 114 | } 115 | 116 | double Uniform(double a, double b) 117 | /* =========================================================== 118 | * Returns a uniformly distributed real number between a and b. 119 | * NOTE: use a < b 120 | * =========================================================== 121 | */ 122 | { 123 | return (a + (b - a) * Random()); 124 | } 125 | 126 | double Exponential(double m) 127 | /* ========================================================= 128 | * Returns an exponentially distributed positive real number. 129 | * NOTE: use m > 0.0 130 | * ========================================================= 131 | */ 132 | { 133 | return (-m * log(1.0 - Random())); 134 | } 135 | 136 | double Erlang(long n, double b) 137 | /* ================================================== 138 | * Returns an Erlang distributed positive real number. 139 | * NOTE: use n > 0 and b > 0.0 140 | * ================================================== 141 | */ 142 | { 143 | long i; 144 | double x = 0.0; 145 | 146 | for (i = 0; i < n; i++) 147 | x += Exponential(b); 148 | return (x); 149 | } 150 | 151 | double Normal(double m, double s) 152 | /* ======================================================================== 153 | * Returns a normal (Gaussian) distributed real number. 154 | * NOTE: use s > 0.0 155 | * 156 | * Uses a very accurate approximation of the normal idf due to Odeh & Evans, 157 | * J. Applied Statistics, 1974, vol 23, pp 96-97. 158 | * ======================================================================== 159 | */ 160 | { 161 | const double p0 = 0.322232431088; const double q0 = 0.099348462606; 162 | const double p1 = 1.0; const double q1 = 0.588581570495; 163 | const double p2 = 0.342242088547; const double q2 = 0.531103462366; 164 | const double p3 = 0.204231210245e-1; const double q3 = 0.103537752850; 165 | const double p4 = 0.453642210148e-4; const double q4 = 0.385607006340e-2; 166 | double u, t, p, q, z; 167 | 168 | u = Random(); 169 | if (u < 0.5) 170 | t = sqrt(-2.0 * log(u)); 171 | else 172 | t = sqrt(-2.0 * log(1.0 - u)); 173 | p = p0 + t * (p1 + t * (p2 + t * (p3 + t * p4))); 174 | q = q0 + t * (q1 + t * (q2 + t * (q3 + t * q4))); 175 | if (u < 0.5) 176 | z = (p / q) - t; 177 | else 178 | z = t - (p / q); 179 | return (m + s * z); 180 | } 181 | 182 | double Lognormal(double a, double b) 183 | /* ==================================================== 184 | * Returns a lognormal distributed positive real number. 185 | * NOTE: use b > 0.0 186 | * ==================================================== 187 | */ 188 | { 189 | return (exp(a + b * Normal(0.0, 1.0))); 190 | } 191 | 192 | double Chisquare(long n) 193 | /* ===================================================== 194 | * Returns a chi-square distributed positive real number. 195 | * NOTE: use n > 0 196 | * ===================================================== 197 | */ 198 | { 199 | long i; 200 | double z, x = 0.0; 201 | 202 | for (i = 0; i < n; i++) { 203 | z = Normal(0.0, 1.0); 204 | x += z * z; 205 | } 206 | return (x); 207 | } 208 | 209 | double Student(long n) 210 | /* =========================================== 211 | * Returns a student-t distributed real number. 212 | * NOTE: use n > 0 213 | * =========================================== 214 | */ 215 | { 216 | return (Normal(0.0, 1.0) / sqrt(Chisquare(n) / n)); 217 | } 218 | 219 | -------------------------------------------------------------------------------- /rra/src/tags: -------------------------------------------------------------------------------- 1 | !_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ 2 | !_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ 3 | !_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ 4 | !_TAG_PROGRAM_NAME Exuberant Ctags // 5 | !_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ 6 | !_TAG_PROGRAM_VERSION 5.8 // 7 | A256 rngs.cpp 43;" d file: 8 | AdjustMR CrisprNorm.c /^int AdjustMR(ITEM_STRUCT *items, int itemNum, int winSize)$/;" f 9 | AllocWords words.cpp /^char **AllocWords(int wordNum, int wordLen)$/;" f 10 | Bernoulli rvgs.cpp /^ long Bernoulli(double p)$/;" f 11 | BetaNoncentralCdf math_api.cpp /^double BetaNoncentralCdf ( double a, double b, double lambda, double x, double error_max )$/;" f 12 | Binomial rvgs.cpp /^ long Binomial(long n, double p)$/;" f 13 | CDF_MAX_ERROR RRA.cpp 31;" d file: 14 | CHECK rngs.cpp 41;" d file: 15 | Chisquare rvgs.cpp /^ double Chisquare(long n)$/;" f 16 | ComputeDistanceCorrelation math_api.cpp /^double ComputeDistanceCorrelation(double *input, double *output, int inputNum, int dim)$/;" f 17 | ComputeFDR RRA.cpp /^int ComputeFDR(GROUP_STRUCT *groups, int groupNum, double maxPercentile, int numOfRandPass)$/;" f 18 | ComputeLoValue RRA.cpp /^int ComputeLoValue(double *percentiles, \/\/array of percentiles$/;" f 19 | ComputeLoValue_Prob RRA.cpp /^int ComputeLoValue_Prob(double *percentiles, \/\/array of percentiles$/;" f 20 | ComputeMR CrisprNorm.c /^int ComputeMR(ITEM_STRUCT *items, int itemNum)$/;" f 21 | DEFAULT rngs.cpp 44;" d file: 22 | DirToWords words.cpp /^int DirToWords(char **words, char *dirName, int maxWordLen, int maxWordNum, const char *ext)$/;" f 23 | Equilikely rvgs.cpp /^ long Equilikely(long a, long b)$/;" f 24 | Erlang rvgs.cpp /^ double Erlang(long n, double b)$/;" f 25 | EucliDist math_api.cpp /^double EucliDist(double *a, double *b, int dim)$/;" f 26 | Exponential rvgs.cpp /^ double Exponential(double m)$/;" f 27 | FreeWords words.cpp /^void FreeWords(char **ptr, int wordNum)$/;" f 28 | GROUP_STRUCT RRA.cpp /^} GROUP_STRUCT;$/;" t typeref:struct:__anon3 file: 29 | Geometric rvgs.cpp /^ long Geometric(double p)$/;" f 30 | GetSeed rngs.cpp /^ void GetSeed(long *x)$/;" f 31 | ITEM_STRUCT CrisprNorm.c /^} ITEM_STRUCT;$/;" t typeref:struct:__anon1 file: 32 | ITEM_STRUCT RRA.cpp /^} ITEM_STRUCT;$/;" t typeref:struct:__anon2 file: 33 | LIST_STRUCT RRA.cpp /^} LIST_STRUCT;$/;" t typeref:struct:__anon4 file: 34 | LogGamma math_api.cpp /^double LogGamma(double x, int *flag)$/;" f 35 | Lognormal rvgs.cpp /^ double Lognormal(double a, double b)$/;" f 36 | MAX_GROUP_NUM RRA.cpp 32;" d file: 37 | MAX_LIST_NUM RRA.cpp 33;" d file: 38 | MAX_NAME_LEN CrisprNorm.c 22;" d file: 39 | MAX_NAME_LEN RRA.cpp 30;" d file: 40 | MAX_WORD_IN_LINE CrisprNorm.c 23;" d file: 41 | MAX_WORD_NUM RRA.cpp 36;" d file: 42 | MODULUS rngs.cpp 39;" d file: 43 | MULTIPLIER rngs.cpp 40;" d file: 44 | NDEBUG CrisprNorm.c 14;" d file: 45 | NDEBUG RRA.cpp 16;" d file: 46 | Normal rvgs.cpp /^ double Normal(double m, double s)$/;" f 47 | NormalTransform math_api.cpp /^int NormalTransform(double *destA, int *rank, int sampleNum)$/;" f 48 | PRINT_DEBUG RRA.cpp /^int PRINT_DEBUG=0;$/;" v 49 | PartialCorrel math_api.cpp /^double PartialCorrel(double *a, double *b, double *control, int dim)$/;" f 50 | Pascal rvgs.cpp /^ long Pascal(long n, double p)$/;" f 51 | PearsonCorrel math_api.cpp /^double PearsonCorrel(double *a, double *b, int dim)$/;" f 52 | PermuteFloatArrays math_api.cpp /^void PermuteFloatArrays(double *a, int size)$/;" f 53 | PlantSeeds rngs.cpp /^ void PlantSeeds(long x)$/;" f 54 | Poisson rvgs.cpp /^ long Poisson(double m)$/;" f 55 | PrintCommandUsage CrisprNorm.c /^void PrintCommandUsage(const char *command)$/;" f 56 | PrintCommandUsage RRA.cpp /^void PrintCommandUsage(const char *command)$/;" f 57 | ProcessGroups RRA.cpp /^int ProcessGroups(GROUP_STRUCT *groups, int groupNum, LIST_STRUCT *lists, int listNum, double maxPercentile)$/;" f 58 | PutSeed rngs.cpp /^ void PutSeed(long x)$/;" f 59 | QuickSortGroupByLoValue RRA.cpp /^void QuickSortGroupByLoValue(GROUP_STRUCT *groups, int lo, int hi)$/;" f 60 | QuickSortItemByM CrisprNorm.c /^void QuickSortItemByM(ITEM_STRUCT *items, int lo, int hi)$/;" f 61 | QuicksortF math_api.cpp /^void QuicksortF(double *a, int lo, int hi)$/;" f 62 | QuicksortIndexedArray math_api.cpp /^void QuicksortIndexedArray(INDEXED_FLOAT *a, int lo, int hi)$/;" f 63 | RAND_PASS_NUM RRA.cpp 34;" d file: 64 | Random rngs.cpp /^ double Random(void)$/;" f 65 | Ranking math_api.cpp /^void Ranking(int *rank, double *values, int sampleNum)$/;" f 66 | ReadFile CrisprNorm.c /^int ReadFile(char *fileName, ITEM_STRUCT **pItems)$/;" f 67 | ReadFile RRA.cpp /^int ReadFile(char *fileName, GROUP_STRUCT *groups, int maxGroupNum, int *groupNum, LIST_STRUCT *lists, int maxListNum, int *listNum)$/;" f 68 | STREAMS rngs.cpp 42;" d file: 69 | SaveGroupInfo RRA.cpp /^int SaveGroupInfo(char *fileName, GROUP_STRUCT *groups, int groupNum)$/;" f 70 | SaveToOuput CrisprNorm.c /^int SaveToOuput(char *fileName, ITEM_STRUCT *items, int itemNum)$/;" f 71 | SelectStream rngs.cpp /^ void SelectStream(int index)$/;" f 72 | StringToWords words.cpp /^int StringToWords(char **words, char *str, int maxWordLen, int maxWordNum, const char *delim)$/;" f 73 | Student rvgs.cpp /^ double Student(long n)$/;" f 74 | TestRandom rngs.cpp /^ void TestRandom(void)$/;" f 75 | Uniform rvgs.cpp /^ double Uniform(double a, double b)$/;" f 76 | adjustedR CrisprNorm.c /^ double adjustedR; \/\/adjusted log-ratio$/;" m struct:__anon1 file: 77 | bTreeSearchingF math_api.cpp /^int bTreeSearchingF(double value, double *a, int lo, int hi)$/;" f 78 | betain math_api.cpp /^double betain ( double x, double p, double q, double beta, int *ifault )$/;" f 79 | fdr RRA.cpp /^ double fdr; \/\/false discovery rate$/;" m struct:__anon3 file: 80 | geneName CrisprNorm.c /^ char geneName[MAX_NAME_LEN]; \/\/name of the gene$/;" m struct:__anon1 file: 81 | initialized rngs.cpp /^static int initialized = 0; \/* test for stream initialization *\/$/;" v file: 82 | itemNum RRA.cpp /^ int itemNum; \/\/number of items in the group$/;" m struct:__anon3 file: 83 | itemNum RRA.cpp /^ int itemNum; \/\/number of items in the list$/;" m struct:__anon4 file: 84 | items RRA.cpp /^ ITEM_STRUCT *items; \/\/items in the group$/;" m struct:__anon3 file: 85 | listIndex RRA.cpp /^ int listIndex; \/\/index of list storing the item$/;" m struct:__anon2 file: 86 | loValue RRA.cpp /^ double loValue; \/\/lo-value in RRA$/;" m struct:__anon3 file: 87 | m CrisprNorm.c /^ double m; \/\/log-mean$/;" m struct:__anon1 file: 88 | main CrisprNorm.c /^int main (int argc, const char * argv[]) $/;" f 89 | main RRA.cpp /^int main (int argc, const char * argv[]) $/;" f 90 | maxItemNum RRA.cpp /^ int maxItemNum; \/\/ max number of items$/;" m struct:__anon3 file: 91 | maxItemNum RRA.cpp /^ int maxItemNum; \/\/max item number$/;" m struct:__anon4 file: 92 | name RRA.cpp /^ char name[MAX_NAME_LEN]; \/\/name of the group$/;" m struct:__anon3 file: 93 | name RRA.cpp /^ char name[MAX_NAME_LEN]; \/\/name of the item$/;" m struct:__anon2 file: 94 | name RRA.cpp /^ char name[MAX_NAME_LEN]; \/\/name of the list$/;" m struct:__anon4 file: 95 | normalInv math_api.cpp /^double normalInv(double p)$/;" f 96 | percentile RRA.cpp /^ double percentile; \/\/percentile in the list$/;" m struct:__anon2 file: 97 | prob RRA.cpp /^ double prob; \/\/The probability of each sgRNA; added by Wei$/;" m struct:__anon2 file: 98 | r CrisprNorm.c /^ double r; \/\/log-ratio$/;" m struct:__anon1 file: 99 | seed rngs.cpp /^static long seed[STREAMS] = {DEFAULT}; \/* current state of each stream *\/$/;" v file: 100 | sgName CrisprNorm.c /^ char sgName[MAX_NAME_LEN]; \/\/name of the sgRNA$/;" m struct:__anon1 file: 101 | stream rngs.cpp /^static int stream = 0; \/* stream index, 0 is the default *\/$/;" v file: 102 | stringSplit RRA.cpp /^int stringSplit(string str, string delim, vector & v){$/;" f 103 | value RRA.cpp /^ double value; \/\/value of measurement$/;" m struct:__anon2 file: 104 | values RRA.cpp /^ double *values; \/\/values of items in the list, used for sorting$/;" m struct:__anon4 file: 105 | x1 CrisprNorm.c /^ double x1; \/\/value of first measure$/;" m struct:__anon1 file: 106 | x2 CrisprNorm.c /^ double x2; \/\/value of second measure$/;" m struct:__anon1 file: 107 | -------------------------------------------------------------------------------- /rra/src/words.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * words.c 3 | * Word manipulations 4 | * 5 | * Created by Han Xu on 10/12/12. 6 | * Copyright 2012 Dana Farber Cancer Institute. All rights reserved. 7 | * 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include "words.h" 14 | #include "stdlib.h" 15 | 16 | //allocate 2d array of characters. Return the pointer to the array, and NULL if failure 17 | char **AllocWords(int wordNum, int wordLen) 18 | { 19 | char **ptr; 20 | int i; 21 | 22 | ptr = (char **)malloc(wordNum*sizeof(char *)); 23 | 24 | if (!ptr) 25 | { 26 | return NULL; 27 | } 28 | 29 | for (i=0;i=maxWordLen) 71 | { 72 | free(tmpStr); 73 | return -1; 74 | } 75 | 76 | strcpy(words[wordNum], pch); 77 | wordNum++; 78 | 79 | if (wordNum >=maxWordNum) 80 | { 81 | break; 82 | } 83 | pch = strtok(NULL, delim); 84 | } 85 | 86 | free(tmpStr); 87 | 88 | return wordNum; 89 | } 90 | 91 | //Read a list of file names from a directory to word structure. Return the number of files read. Return -1 if failure. 92 | //Read files end with ext. If ext=NULL, read all files. 93 | int DirToWords(char **words, char *dirName, int maxWordLen, int maxWordNum, const char *ext) 94 | { 95 | DIR *dp; 96 | struct dirent *ep; 97 | int count = 0; 98 | 99 | dp = opendir(dirName); 100 | 101 | if (dp==NULL) 102 | { 103 | return -1; 104 | } 105 | 106 | while (ep = readdir(dp)) 107 | { 108 | if ((ext != NULL)&&(strcmp(ext, ep->d_name+strlen(ep->d_name)-strlen(ext)))) 109 | { 110 | continue; 111 | } 112 | 113 | assert(strlen(ep->d_name)<=maxWordLen); 114 | assert(countd_name)>maxWordLen) 117 | { 118 | continue; 119 | } 120 | 121 | if (count>=maxWordNum) 122 | { 123 | break; 124 | } 125 | 126 | strcpy(words[count], ep->d_name); 127 | count++; 128 | } 129 | 130 | closedir(dp); 131 | 132 | return count; 133 | } 134 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ''' 3 | MAGeCK set up script 4 | ''' 5 | 6 | 7 | from __future__ import print_function; 8 | 9 | import os 10 | import sys 11 | from distutils.core import setup, Extension 12 | from subprocess import call as subpcall 13 | from distutils.command.install import install as DistutilsInstall 14 | 15 | def compile_rra(): 16 | # 17 | os.chdir('rra'); 18 | subpcall('make',shell=True); 19 | rev=subpcall('../bin/RRA',shell=True); 20 | os.chdir('../'); 21 | return rev; 22 | 23 | 24 | class RRAInstall(DistutilsInstall): 25 | def run(self): 26 | # compile RRA 27 | if(compile_rra()!=0): 28 | print("CRITICAL: error compiling the RRA source code. Please check your c compilation environment.",file=sys.stderr); 29 | sys.exit(1); 30 | DistutilsInstall.run(self) 31 | 32 | 33 | 34 | def main(): 35 | # check python version 36 | if float(sys.version[:3])<2.7 or float(sys.version[:3])>=2.8: 37 | sys.stderr.write("CRITICAL: Python version must be 2.7!\n") 38 | sys.exit(1); 39 | 40 | setup(name='mageck', 41 | version='0.5.0', 42 | description='Model-based Analysis of Genome-wide CRISPR-Cas9 Knockout', 43 | author='Wei Li, Han Xu', 44 | author_email='li.david.wei@gmail.com', 45 | url='http://mageck.sourceforge.net', 46 | packages=['mageck'], 47 | scripts=['bin/RRA','bin/mageck'], 48 | package_dir={'mageck':'mageck'}, 49 | cmdclass={'install':RRAInstall}, 50 | package_data={'mageck':['*.Rnw','*.RTemplate']} 51 | #package_data={'mageck':['mageck/Makefile','mageck/src/*.c','include/*','utils/*']} 52 | #data_files=[('',['Makefile','src/*.c','include/*','utils/*'])] 53 | ); 54 | 55 | 56 | if __name__ == '__main__': 57 | main(); 58 | 59 | 60 | --------------------------------------------------------------------------------