├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── batch ├── run.sh ├── run_combined.py └── run_combined.sh ├── bin ├── lisa ├── lisa_baseline ├── lisa_bw2hdf ├── lisa_combine_ranks ├── lisa_list_data.py ├── lisa_model ├── lisa_postmodel_background_selection ├── lisa_predict_tfbs ├── lisa_premodel_background_selection ├── lisa_rank_tfs ├── lisa_show_ranks └── lisa_update_conf ├── conda.recipe ├── build.sh └── meta.yaml ├── demo ├── AR.symbol ├── lisa_results_meta_table_human_with_gene_sets.xls ├── lisa_results_meta_table_mouse_with_gene_sets.xls └── run.sh ├── environment.yml ├── lisa ├── __init__.py ├── data.py ├── dependent_data.txt ├── lisa.ini ├── lisa.ini.bak ├── lisa.ini.latest ├── lisa.ini.latest.bak ├── lisa.ini.old ├── lisa.ini.updated ├── model.py ├── mouse.tfs ├── rank.py ├── regpotential │ ├── __init__.py │ ├── aliType.c │ ├── aliType.h │ ├── asParse.c │ ├── asParse.h │ ├── bPlusTree.c │ ├── bPlusTree.h │ ├── base64.c │ ├── base64.h │ ├── basicBed.c │ ├── basicBed.h │ ├── bbiFile.h │ ├── bbiRead.c │ ├── bbiWrite.c │ ├── bigBed.h │ ├── bigBedSummary.c │ ├── bigWig.h │ ├── bigWigRegPotential.c │ ├── bigWigSummary.c │ ├── binRange.c │ ├── binRange.h │ ├── bits.c │ ├── bits.h │ ├── bwgInternal.h │ ├── bwgQuery.c │ ├── bwgValsOnChrom.c │ ├── cheapcgi.c │ ├── cheapcgi.h │ ├── cirTree.c │ ├── cirTree.h │ ├── colHash.c │ ├── colHash.h │ ├── common.c │ ├── common.h │ ├── dlist.c │ ├── dlist.h │ ├── dnaseq.c │ ├── dnaseq.h │ ├── dnautil.c │ ├── dnautil.h │ ├── dystring.c │ ├── dystring.h │ ├── errAbort.c │ ├── errAbort.h │ ├── ffAli.c │ ├── ffAliHelp.c │ ├── ffScore.c │ ├── fuzzyFind.c │ ├── fuzzyFind.h │ ├── gfxPoly.c │ ├── gfxPoly.h │ ├── hash.c │ ├── hash.h │ ├── hex.c │ ├── hex.h │ ├── hmmstats.c │ ├── hmmstats.h │ ├── htmshell.h │ ├── https.c │ ├── https.h │ ├── intExp.c │ ├── internet.c │ ├── internet.h │ ├── kxTok.c │ ├── kxTok.h │ ├── linefile.c │ ├── linefile.h │ ├── localmem.c │ ├── localmem.h │ ├── makefile │ ├── makefile.1 │ ├── memalloc.c │ ├── memalloc.h │ ├── mime.c │ ├── mime.h │ ├── net.c │ ├── net.h │ ├── obscure.c │ ├── obscure.h │ ├── options.c │ ├── options.h │ ├── osunix.c │ ├── pipeline.c │ ├── pipeline.h │ ├── portable.h │ ├── portimpl.c │ ├── portimpl.h │ ├── psl.c │ ├── psl.h │ ├── pybw.c │ ├── pybw.h │ ├── rangeTree.c │ ├── rangeTree.h │ ├── rbTree.c │ ├── rbTree.h │ ├── servBrcMcw.c │ ├── servCrunx.c │ ├── servcis.c │ ├── servcl.c │ ├── servmsII.c │ ├── servpws.c │ ├── sig.h │ ├── sqlList.c │ ├── sqlList.h │ ├── sqlNum.c │ ├── sqlNum.h │ ├── tokenizer.c │ ├── tokenizer.h │ ├── udc.c │ ├── udc.h │ ├── vGfx.c │ ├── vGfx.h │ ├── vGfxPrivate.h │ ├── verbose.c │ ├── verbose.h │ ├── wildcmp.c │ ├── zlibFace.c │ └── zlibFace.h ├── rules │ ├── __init__.py │ ├── background_selection.rule │ ├── baseline.rule │ ├── combine_chipseq.rule │ ├── combine_motif.rule │ ├── entropy.rule │ ├── fastq.rule │ ├── hdf5.rule │ ├── knockout.rule │ ├── lisa_direct.rule │ └── model.rule ├── utils.py └── workflows │ ├── Snakefile │ ├── __init__.py │ ├── cluster.json │ ├── sbatch.sh │ ├── sbatch_dependency.py │ └── sbatch_dependency.sh ├── lisa_docs ├── Makefile └── source │ ├── FAQ.rst │ ├── Installation.rst │ ├── Tutorial.rst │ ├── _static │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ └── 9.png │ ├── conf.py │ └── index.rst ├── lisa_web ├── generate_heatmap_js.py ├── lisa_scatter.py ├── lisa_web.conf ├── lisa_web.wsgi ├── lisa_web │ ├── __init__.py │ ├── __init__.pyc │ ├── __init__.py~ │ ├── check_genename.py │ ├── combined_gallery_multiple_display.html │ ├── combined_gallery_multiple_display_mm.html │ ├── fonts │ │ ├── FontAwesome.otf │ │ ├── fontawesome-webfont.eot │ │ ├── fontawesome-webfont.svg │ │ ├── fontawesome-webfont.ttf │ │ ├── fontawesome-webfont.woff │ │ └── fontawesome-webfont.woff2 │ ├── form.py │ ├── form.pyc │ ├── gallery_multiple_display.html │ ├── gallery_template.html │ ├── generate_combined_gallery.py │ ├── generate_combined_gallery_mm.py │ ├── generate_gallery2.py │ ├── images │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ ├── 5.png │ │ ├── 6.png │ │ ├── 7.png │ │ └── 8.png │ ├── mail.py │ ├── new_gallery.html │ ├── new_gallery_mm.html │ ├── run.sh │ ├── static │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ ├── 5.png │ │ ├── 6.png │ │ ├── 7.png │ │ ├── 8.png │ │ ├── Enrichrgram.js │ │ ├── Figure1.png │ │ ├── Figure2.png │ │ ├── Figure3.png │ │ ├── Figure4.png │ │ ├── Figure5.png │ │ ├── Figure6.png │ │ ├── Figure7.png │ │ ├── Figure8.png │ │ ├── MACRO_ape_all_cistrome_pwm.sh │ │ ├── clustergrammer.js │ │ ├── clustergrammer.min.js │ │ ├── clustergrammer.node.js │ │ ├── clustergrammer.node.min.js │ │ ├── combined_lisa2_static.js │ │ ├── custom.css │ │ ├── d3.js │ │ ├── d3.v4.min.js │ │ ├── display.html │ │ ├── font-awesome.min.css │ │ ├── gallery.js │ │ ├── gallery.js~ │ │ ├── hzome_functions.js │ │ ├── jquery-1.12.4.js │ │ ├── jquery-3.2.1.min.js │ │ ├── lisa.css │ │ ├── lisa.css~ │ │ ├── lisa.jpg │ │ ├── lisa.js │ │ ├── lisa2.css │ │ ├── lisa2.js │ │ ├── lisa2_static.js │ │ ├── load_clustergram.js │ │ ├── multiple_display.html │ │ ├── plot.R │ │ ├── popper.min.js │ │ ├── popper.min.js.map │ │ ├── run.sh │ │ ├── send_to_Enrichr.js │ │ ├── seqpos.R │ │ └── underscore-min.js │ └── templates │ │ ├── #display.html# │ │ ├── 404.html │ │ ├── display.html │ │ ├── doc.html │ │ ├── gallery.html │ │ ├── gallery.html~ │ │ ├── gallery_mm.html │ │ ├── index.html │ │ ├── index.html~ │ │ ├── multiple_display.html │ │ ├── new_gallery.html │ │ ├── new_gallery_mm.html │ │ └── stat.html ├── lisa_web_requirement.txt ├── make_session.py ├── output_profile_regulatory_potential.py ├── plotly_scatter.py ├── run-redis.sh ├── run.py ├── run.sh ├── run_browser.sh ├── run_celery.sh ├── run_heatmap.sh ├── run_lisa.sh ├── run_lisa2.sh ├── run_plot.sh └── test.sh └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *so 2 | record.txt 3 | dist 4 | build 5 | lisa.egg-info 6 | __pycache__ 7 | flycheck* 8 | .nfs* 9 | *.pwm.jpg 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2018-2019 Qian Qin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.in 2 | #recursive-include lisa/data * 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### LISA 2 | Web version and documentation is hosted at http://lisa.cistrome.org. For large scale gene set analysis, we recommend user to install local version. 3 | 4 | ### Preparation of Anaconda environment 5 | 6 | ``` sh 7 | wget -c https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 8 | bash Miniconda3-latest-Linux-x86_64.sh 9 | export PATH="${HOME}/miniconda3/bin:$PATH" 10 | 11 | conda create -n lisa python=3.6 && conda config --add channels conda-forge && conda config --add channels bioconda 12 | 13 | ``` 14 | 15 | ### Installation 16 | 17 | ``` sh 18 | conda activate lisa 19 | # or for old conda 20 | source activate lisa 21 | export MKL_THREADING_LAYER=GNU 22 | 23 | conda install -c qinqian lisa 24 | ``` 25 | 26 | To update, use `git clone https://github.com/qinqian/lisa && cd lisa && python setup.py develop`. 27 | 28 | 29 | ### Get pre-computed datasets from CistromeDB 30 | 31 | User can download hg38 or mm10 datasets based on their experiments for human or mouse, the password can be obtained after LISA is published. 32 | 33 | ``` sh 34 | wget --user=lisa --password='xxx' http://lisa.cistrome.org/cistromedb_data/lisa_v1.0_hg38.tar.gz 35 | 36 | # or 37 | 38 | wget --user=lisa --password='xxx' http://lisa.cistrome.org/cistromedb_data/lisa_v1.1_mm10.tar.gz 39 | ``` 40 | 41 | Then, user need to uncompress the datasets, and update the configuration for lisa. 42 | 43 | ``` sh 44 | tar xvfz lisa_v1.0_hg38.tar.gz 45 | lisa_update_conf --folder hg38/ --species hg38 46 | 47 | # or 48 | 49 | tar xvfz lisa_v1.0_mm10.tar.gz 50 | lisa_update_conf --folder mm10/ --species mm10 51 | ``` 52 | 53 | ### Usage 54 | 55 | Given multiple gene set file `gene_set1`, `gene_set2`, `gene_set3` et al., each file has one gene (RefSeq id or gene symbol) for each row, user can predict transcriptional regulator ranking using the following commands with random background genes 56 | 57 | ``` sh 58 | time lisa model --method="all" --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome "['DNase', 'H3K27ac']" --cluster=False --covariates=False --random=True --prefix first_run --background=None --stat_background_number=1000 --threads 4 gene_set1 gene_set2 gene_set3 ... 59 | ``` 60 | 61 | Alternatively, user can generate a fixed background genes based on TAD and promoter activity, and input it to lisa, 62 | 63 | ``` sh 64 | lisa_premodel_background_selection --species hg38 --epigenomes="['DNase']" --gene_set=None --prefix=test --random=None --background=dynamic_auto_tad 65 | cut -f 5 -d: test.background_gene.3000 > test.fixed.background_gene 66 | 67 | time lisa model --method="all" --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome "['DNase', 'H3K27ac']" --cluster=False --covariates=False --random=True --prefix first_run --background=test.fixed.background_gene --stat_background_number=1000 --threads 4 gene_set1 gene_set2 gene_set3 ... 68 | ``` 69 | 70 | User can also input a customized background genes, which should include more than 30 unique RefSeq genes, all input genes are used for modeling and computing statistics, so `--stat_background_number` is ignored. 71 | 72 | ``` sh 73 | time lisa model --method="all" --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome "['DNase', 'H3K27ac']" --cluster=False --covariates=False --random=True --prefix first_run --background=test.fixed.background_gene --threads 4 gene_set1 gene_set2 gene_set3 ... 74 | ``` 75 | 76 | ### Update LISA 77 | 78 | ``` sh 79 | git clone http://github.com/qinqian/lisa/ 80 | source activate lisa 81 | cd lisa && python setup.py develop 82 | lisa_update_conf --folder hg38/ --species hg38 83 | lisa_update_conf --folder mm10/ --species mm10 84 | ``` 85 | 86 | ### Remove LISA 87 | 88 | ``` sh 89 | conda env remove -n lisa 90 | rm -r mm10/ hg38/ 91 | ``` 92 | 93 | ### Citation 94 | 95 | Qin Q, Fan J, Zheng R, Wan C, Mei S, Wu Q. Inferring transcriptional regulators through integrative modeling of public chromatin accessibility and ChIP-seq data. 2019. 96 | 97 | Please note that the reference is a preprint hosted at [biorxiv](https://www.biorxiv.org/content/10.1101/846139v1). 98 | -------------------------------------------------------------------------------- /batch/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for i in `seq 1 18`; do 3 | echo $i 4 | sbatch --array=3-100:2 --constraint="amd" --open-mode=append run_combined.sh $i 5 | done 6 | -------------------------------------------------------------------------------- /batch/run_combined.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | import glob 5 | 6 | filenames=glob.glob('/n/home08/cliffmeyer/projects/lisa/gene_num_sample_size/output/*gene_symbol') 7 | file_path="/n/home08/cliffmeyer/projects/lisa/gene_num_sample_size" 8 | 9 | if __name__ == "__main__": 10 | start = time.time() 11 | try: 12 | parser = argparse.ArgumentParser(description="""lisa TCGA gene sets.""") 13 | #parser.add_argument( '-c', dest='chip', type=str, required=True, help='input bed file' ) 14 | parser.add_argument( '-n', dest='number', type=int, required=True) 15 | parser.add_argument( '-s', dest='sample', type=int, required=True) 16 | args = parser.parse_args() 17 | filename=filenames[args.number-1] 18 | os.chdir(file_path) 19 | os.system("mkdir -p %s_%s" % (filename, args.sample)) 20 | os.chdir("%s_%s" % (filename, args.sample)) 21 | os.system("cp %s ." % filename) 22 | os.system("lisa model --method='all' --web=False --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome \'[\'DNase\']\' --cluster=False --covariates=False --random=True --prefix %s --threads 8 --sample-number %s %s" % (os.path.basename(filename)+"_"+str(args.sample), args.sample, os.path.basename(filename))) 23 | except KeyboardInterrupt: 24 | sys.stderr.write("User interrunpt me! ;-) Bye!\n") 25 | -------------------------------------------------------------------------------- /batch/run_combined.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -J lisa_GEO # A single job name for the array 3 | #SBATCH -n 8 # Number of cores 4 | #SBATCH -N 1 # All cores on one machine 5 | #SBATCH -p serial_requeue # Partition 6 | #SBATCH --mem 10000 # Memory request (4Gb) 7 | #SBATCH -t 0-8:00 # Maximum execution time (D-HH:MM) 8 | #SBATCH -o lisa_%A_%a.out # Standard output 9 | #SBATCH -e lisa_%A_%a.err # Standard error 10 | 11 | # module load gcc/7.1.0-fasrc01 openmpi/2.1.0-fasrc01 hdf5/1.10.1-fasrc01 12 | 13 | export PATH=/n/home08/cliffmeyer/Jingyu/miniconda3/bin:$PATH 14 | source activate lisa_python3_env 15 | 16 | cd /n/home08/cliffmeyer/projects/lisa/gene_num_sample_size 17 | python run_combined.py -s "${SLURM_ARRAY_TASK_ID}" -n $1 18 | 19 | -------------------------------------------------------------------------------- /bin/lisa_bw2hdf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ input a bigwig, preprocess them to lisa 3 | regulatory potential and 1kb read count, generate hdf5 file 4 | """ 5 | import fire 6 | from lisa.data import EpigenomeData 7 | import h5py 8 | import numpy as np 9 | import os 10 | 11 | class HDF(object): 12 | """ interface for processing single bigwig to hdf5 """ 13 | def __init__(self, species, epigenome, prefix): 14 | """ `epigenome` can be epigenome type, e.g. H3K27ac or ATAC-seq or DNase 15 | `epigenome` can also be covariates, e.g., GC or mappability 16 | 17 | prefix is used to label output HDF5 files, for epigenome sample, use `project name` 18 | for covariates, use `covarates` 19 | """ 20 | self.species = species 21 | self.epigenome = epigenome 22 | self.prefix = prefix 23 | 24 | def get_regpotential_hdf(self, bigwig): 25 | """ input one bigwig file, generate temporary 26 | hdf5 file for RP and read count """ 27 | data = EpigenomeData(self.species, self.epigenome) 28 | data.create_RP_h5(bigwig, self.prefix) 29 | 30 | def merge_reg_potential_hdf(self, *hdf5): 31 | """ processing a list of reg potential hdf5 files into one merged hdf5, 32 | input should be from the same epigenome type, e.g. H3K4me3, 33 | or from a list of covariates, e.g. GC. 34 | """ 35 | with h5py.File(hdf5[0]) as inf: 36 | nrp = inf["RP"].shape[0] 37 | refseq = inf["RefSeq"][...] 38 | with h5py.File('%s.%s.reg.h5' % (self.prefix, self.epigenome), "a") as store: 39 | refseq_arr = store.create_dataset("RefSeq", 40 | shape=(len(refseq), ), 41 | dtype='S200', 42 | compression='gzip', 43 | shuffle=True, fletcher32=True) 44 | refseq_arr[...] = refseq 45 | 46 | RP = store.create_dataset("RP", dtype=np.float32, shape=(nrp, len(hdf5)), compression='gzip', shuffle=True, fletcher32=True) 47 | ids = store.create_dataset("IDs", 48 | shape=(len(hdf5), ), dtype='S50', 49 | compression='gzip', shuffle=True, fletcher32=True) 50 | 51 | iids = [] 52 | for i, d in enumerate(hdf5): 53 | with h5py.File(d) as inf: 54 | RP[:,i] = inf["RP"][:,0] 55 | store.flush() 56 | iids.append(str.encode(self.prefix + ".%s" % os.path.basename(d).split('.')[0], 'utf-8')) 57 | 58 | ids[...] = np.array(iids) 59 | store.flush() 60 | 61 | def get_readcount_hdf(self, bigwig): 62 | """ input one bigwig file, generate temporary 63 | hdf5 file for RP and read count """ 64 | data = EpigenomeData(self.species, self.epigenome) 65 | data.create_Count_h5(bigwig, self.prefix) 66 | 67 | def merge_readcount_hdf(self, *hdf5): 68 | """ merge multiple hdf5 generated from process_one_bigwig """ 69 | with h5py.File(hdf5[0]) as inf: 70 | nc = inf["OrderCount"].shape[0] 71 | 72 | with h5py.File('%s.%s.readcount.h5' % (self.prefix, self.epigenome), "a") as store: 73 | ct = store.create_dataset("OrderCount", dtype=np.float32, shape=(nc, len(hdf5)), compression='gzip', shuffle=True, fletcher32=True) 74 | ids = store.create_dataset("IDs", shape=(len(hdf5), ), 75 | dtype='S50', 76 | compression='gzip', shuffle=True, fletcher32=True) 77 | 78 | iids = [] 79 | for i, d in enumerate(hdf5): 80 | with h5py.File(d) as inf: 81 | ct[:,i] = inf["OrderCount"][:,0] 82 | store.flush() 83 | iids.append(str.encode(self.prefix + ".%s" % os.path.basename(d).split('.')[0], 'utf-8')) 84 | ids[...] = np.array(iids) 85 | store.flush() 86 | 87 | 88 | if __name__ == '__main__': 89 | fire.Fire(HDF) 90 | -------------------------------------------------------------------------------- /bin/lisa_combine_ranks: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import pandas as pd 3 | import numpy as np 4 | import os 5 | import scipy.stats as stats 6 | import argparse 7 | import copy 8 | 9 | def deduplicate(df): 10 | out = copy.deepcopy(df) 11 | #do not deduplicate, since row names is not consistent for cistromedb and imputed TFBS 12 | out.loc[:, 'TF'] = df.index.map(lambda x: x.split('|')[1]) 13 | ##out.loc[:, 'TF'] = df.index.map(lambda x: x.split('_')[0]) 14 | out = out.drop_duplicates('TF', inplace=False) 15 | print(out.head()) 16 | return out 17 | 18 | def cauchy_p_value(p_vals, wi=None): 19 | """https://arxiv.org/abs/1808.09011""" 20 | p_vals = np.array(p_vals, np.float64) 21 | if np.any(p_vals <= 1e-15): # np.finfo(np.float64) 1e-15 22 | from mpmath import mp 23 | mp.dps = 200 24 | mp.pretty = True 25 | p_vals = [mp.mpf(i) for i in p_vals] 26 | t0 = sum([mp.tan((mp.mpf(0.5)-i)*mp.pi())/mp.mpf(3) for i in p_vals]) 27 | p = mp.mpf(0.5)-mp.atan(t0)/mp.pi() 28 | return t0, p 29 | else: 30 | if wi is None: 31 | wi = 1.0 / len(p_vals) 32 | stat = np.sum(wi * np.tan((0.5-p_vals) * np.pi)) 33 | return stat, 0.5 - np.arctan(stat)/np.pi 34 | 35 | def main(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('rankcsv', nargs='+', help='a list of TF rank csv files') 38 | parser.add_argument('-tf', required=False, default='', type=str, help='target tf name') 39 | parser.add_argument('-prefix', required=True, type=str, help='output prefix') 40 | args = parser.parse_args() 41 | 42 | individual_ranks = [] 43 | rank = pd.read_csv(args.rankcsv[0], index_col=0, header=None) 44 | rank_dedup = deduplicate(rank) 45 | 46 | if len(args.rankcsv) == 1: 47 | rank.to_csv('%s_cauchy_combine_raw.csv' % args.prefix) 48 | rank_dedup.to_csv('%s_cauchy_combine_dedup.csv' % args.prefix) 49 | 50 | rank.to_csv('%s_fisher_combine_raw.csv' % args.prefix) 51 | rank_dedup.to_csv('%s_fisher_combine_dedup.csv' % args.prefix) 52 | return True 53 | else: 54 | rank_dedup.to_csv('%s_dedup.csv' % args.rankcsv[0].replace('.csv', '')) 55 | rank_dedup.index = rank_dedup.TF 56 | rank_dedup.drop('TF', axis=1) 57 | 58 | for r in args.rankcsv[1:]: 59 | rank2 = pd.read_csv(r, index_col=0, header=None) 60 | rank_dedup2 = deduplicate(rank2) 61 | rank_dedup2.to_csv('%s_dedup.csv' % r.replace('.csv', '')) 62 | rank_dedup2.index = rank_dedup2.TF 63 | rank_dedup2.drop('TF', axis=1) 64 | rank = rank.merge(rank2, left_index=True, right_index=True) 65 | 66 | print(rank.shape) 67 | print(rank.head()) 68 | 69 | # cauchy combination test 70 | combine_p = rank.apply(lambda x: cauchy_p_value(x)[1], axis=1) 71 | combine_p.sort_values(inplace=True) 72 | combine_p = pd.DataFrame(combine_p) 73 | combine_p.to_csv('%s_cauchy_combine_raw.csv' % args.prefix) 74 | combine_p = deduplicate(combine_p) 75 | combine_p.to_csv('%s_cauchy_combine_dedup.csv' % args.prefix) 76 | 77 | combine_p2 = rank.apply(lambda x: stats.combine_pvalues(x, method='fisher')[1], axis=1) 78 | ## load meta data 79 | 80 | combine_p2.sort_values(inplace=True) 81 | combine_p2 = pd.DataFrame(combine_p2) 82 | combine_p2.to_csv('%s_fisher_combine_raw.csv' % args.prefix) 83 | combine_p2 = deduplicate(combine_p2) 84 | combine_p2.to_csv('%s_fisher_combine_dedup.csv' % args.prefix) 85 | 86 | if __name__ == '__main__': 87 | main() 88 | -------------------------------------------------------------------------------- /bin/lisa_list_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import configparser 5 | import sys 6 | 7 | if len(sys.argv) > 2: 8 | sys.stderr.write('too many parameters...') 9 | sys.exit(1) 10 | 11 | c = configparser.ConfigParser() 12 | c.read(sys.argv[1]) 13 | 14 | for s in c.sections(): 15 | for k in c[s].keys(): 16 | if k == 'bwa_index': 17 | continue 18 | if os.path.exists(c.get(s, k)): 19 | print(c.get(s, k)) 20 | else: 21 | print(c.get(s, k)) 22 | raise Exception('File not exists %s' % (c.get(s, k))) 23 | -------------------------------------------------------------------------------- /bin/lisa_postmodel_background_selection: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ for each differential gene, select a nearest background gene 3 | based on the lisa prediction of regulatory potential 4 | 5 | if two DE genes have the same background gene, one of the DE genes would 6 | choose the secondary nearest gene 7 | """ 8 | from glob import glob 9 | import fire 10 | import pandas as pd 11 | from lisa.data import EpigenomeData 12 | import numpy as np 13 | 14 | def select_background(species, covariates, prefix, *files): 15 | """ 16 | species: hg38 or mm10 17 | covariates: True or False, whether to consider GC or not 18 | prefix: the prefix of lisa_model output, e.g. gene symbols file name 19 | files: a list of lisa prediction output files from lisa_model 20 | """ 21 | epigenome = EpigenomeData(species, None) 22 | pred = [] 23 | if covariates: 24 | pred.append(epigenome.get_covariates_reg) 25 | 26 | fore_genes = glob("%s.*.foreground_gene" % prefix)[0] 27 | fore_genes = np.genfromtxt(fore_genes, dtype='str') 28 | 29 | all_back_genes = glob("%s.*.all_background_gene" % prefix)[0] 30 | all_back_genes = np.genfromtxt(all_back_genes, dtype='str') 31 | 32 | for lp in files: 33 | if not 'H3K27me3' in lp: 34 | df = pd.read_csv(lp, index_col=0) 35 | pred.append(df) 36 | 37 | pred = pred[0].join(pred[1:]) 38 | pred = pred.rank(axis=0, ascending=False) 39 | fore_genes_df = pred.loc[fore_genes] 40 | all_back_genes_df = pred.loc[all_back_genes] 41 | 42 | # |rank_j K4me3 - rank_i K4me3| + | rank_j K27ac - rank_i K27ac | + | rank_j GC - rank_i GC | 43 | background_genes = set() 44 | for fore_gene in fore_genes: 45 | dist = all_back_genes_df.sub(fore_genes_df.loc[fore_gene], axis=1) \ 46 | .abs() \ 47 | .sum(axis=1) 48 | dist.sort_values(axis=0, ascending=True, inplace=True) 49 | for candidate in dist.index: 50 | # if first nearest candidate background gene is already be taken 51 | # choose the next one, etc... 52 | if not candidate in background_genes: 53 | background_genes.add(candidate) 54 | break # match one background gene 55 | 56 | back_genes_df = pred.loc[list(background_genes)] 57 | fore_genes_df.to_csv("%s.fore_gene.rank.csv" % prefix) 58 | back_genes_df.to_csv("%s.back_gene.rank.csv" % prefix) 59 | 60 | if __name__ == '__main__': 61 | fire.Fire(select_background) 62 | -------------------------------------------------------------------------------- /bin/lisa_predict_tfbs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ validate lisa model on prediction of TF binding sites """ 3 | import fire 4 | from lisa.data import EpigenomeData 5 | from lisa.utils import binarize_gene_set 6 | from lisa.model import Logit 7 | from sklearn.preprocessing import StandardScaler 8 | from sklearn.metrics import make_scorer, roc_auc_score, average_precision_score, roc_curve, r2_score 9 | import json 10 | import pandas as pd 11 | import numpy as np 12 | import h5py 13 | 14 | def convert_name(name): 15 | try: 16 | name = name.decode('utf-8').replace("tf_", "") 17 | except: 18 | name = name.replace("tf_", "") 19 | return name 20 | 21 | def _get_hdf(epigenome, dtype): 22 | """ get corresponding TF binding data type for 100bp window hit 23 | """ 24 | tfbs_dict = dict( 25 | motif99=epigenome.config.get_motif_index(99), 26 | #motif98=epigenome.config.get_motif_index(98), 27 | #motif97=epigenome.config.get_motif_index(97), 28 | chipseq=epigenome.config.tf_chipseq 29 | ) 30 | return tfbs_dict[dtype] 31 | 32 | def predict_tfbs(species, epigenome, prefix, coefficients, chip_seq_id=None): 33 | """ 34 | species: species for epigenome and gene_set 35 | epigenome: one epigenome type, e.g. DNase 36 | gene_set: a gene set file, one gene per line 37 | """ 38 | epigenome = EpigenomeData(species, epigenome) 39 | bin_100_to_1kb = np.load(epigenome.config.genome_window_map) 40 | meta = pd.read_table(epigenome.config.get_meta, 41 | encoding="ISO-8859-1", 42 | index_col=0) 43 | selection = 'factor' 44 | 45 | coef = pd.read_csv(coefficients, encoding="ISO-8859-1", index_col=0) 46 | coef.index = coef.index.astype(str) 47 | print(coef) 48 | 49 | aucs = [] 50 | prs = [] 51 | dtype = 'chipseq' 52 | offset = -1 if dtype == 'chipseq' else 0 53 | with h5py.File(_get_hdf(epigenome, dtype), mode='r') as store: 54 | ids = store['IDs'][...] 55 | for tfbs_id in ids: 56 | try: 57 | tfbs_id_c = int(tfbs_id.decode('utf-8').split('_')[0]) 58 | except: 59 | tfbs_id_c = int(tfbs_id.split('_')[0]) 60 | if tfbs_id_c == int(chip_seq_id): 61 | tfbs_index = store[tfbs_id][...] + offset 62 | print(tfbs_index[:5]) 63 | # 1kb window 64 | print(bin_100_to_1kb[-1]) 65 | tfbs_bin = np.zeros(bin_100_to_1kb[-1] + 1, dtype=np.int32) 66 | # # 1kb 0-1 vector 67 | tfbs_bin[bin_100_to_1kb[tfbs_index]] = 1 68 | 69 | read_count = epigenome.get_count(list(coef.index), False, None) # no hdf5 and covariates 70 | annotation = meta.loc[tfbs_id_c, selection] 71 | print(annotation) 72 | feature_x = np.log2(read_count+1) 73 | 74 | scale = StandardScaler(with_std=False) 75 | ## scale = RobustScaler(quantile_range=(5, 95)) 76 | feature_x = scale.fit_transform(feature_x) 77 | print(feature_x[:5]) 78 | 79 | score = np.dot(feature_x, coef.iloc[:, 0].values) 80 | aucs.append(roc_auc_score(tfbs_bin, score)) 81 | print(aucs) 82 | prs.append(average_precision_score(tfbs_bin, score)) 83 | 84 | with open("%s_direct_tfbs.txt" % prefix, 'w') as outf: 85 | for i, j in zip(aucs, prs): 86 | outf.write("%s\t%s"%(i, j)) 87 | 88 | if __name__ == '__main__': 89 | fire.Fire(predict_tfbs) 90 | 91 | -------------------------------------------------------------------------------- /bin/lisa_show_ranks: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import pandas as pd 3 | import numpy as np 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--tf') 8 | parser.add_argument('--genes') 9 | args = parser.parse_args() 10 | 11 | df = pd.read_csv(args.genes, header=None) 12 | df.loc[:, 'TF' ] = df.iloc[:, 0].map(lambda x: x.split('|')[1]) 13 | df.drop_duplicates('TF', inplace=True) 14 | print(np.where(df.loc[:, 'TF'] == args.tf)[0][0]) 15 | -------------------------------------------------------------------------------- /bin/lisa_update_conf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """update local data directory for .ini configuration file 4 | """ 5 | from pkg_resources import resource_filename 6 | import fire 7 | 8 | def update(folder, species): 9 | """ update the config given a folder 10 | """ 11 | import os 12 | import configparser 13 | folder = os.path.abspath(folder) 14 | 15 | in_out = resource_filename("lisa", "lisa.ini") 16 | print(in_out) 17 | conf = configparser.ConfigParser() 18 | conf.read(in_out) 19 | assert species in ['hg38', 'mm10'], 'species not support' 20 | 21 | # common files 22 | conf.set('basics', 'motif', os.path.join(folder, os.path.basename(conf.get('basics', 'motif')))) 23 | conf.set('basics', 'meta', os.path.join(folder, os.path.basename(conf.get('basics', 'meta')))) 24 | # species specific files 25 | for i in conf[species].keys(): 26 | conf.set(species, i, os.path.join(folder, os.path.basename(conf.get(species, i)))) 27 | 28 | with open(in_out, 'w') as configfile: 29 | conf.write(configfile) 30 | 31 | if __name__ == '__main__': 32 | fire.Fire(update) 33 | -------------------------------------------------------------------------------- /conda.recipe/build.sh: -------------------------------------------------------------------------------- 1 | cd $RECIPE_DIR/.. 2 | 3 | $PYTHON setup.py install --single-version-externally-managed --record=record.txt 4 | -------------------------------------------------------------------------------- /conda.recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | ## https://docs.anaconda.com/anaconda-cloud/user-guide/tutorials/ 2 | package: 3 | name: lisa 4 | version: 1.0 5 | 6 | build: 7 | number: 7 8 | #entry_points: 9 | # - lisa = ../bin/lisa 10 | 11 | requirements: 12 | build: 13 | - python 14 | - fire 15 | - setuptools 16 | - numpy ==1.15.1 17 | - patchelf 18 | - {{ compiler('c') }} 19 | host: 20 | - python 21 | - zlib 22 | - numpy ==1.15.1 23 | run: 24 | - python 25 | - zlib 26 | - mkl-service 27 | - curl 28 | - fire 29 | - scikit-learn 30 | - theano 31 | - pandas 32 | - h5py 33 | - numpy ==1.15.1 34 | - PyYAML >=3.12 35 | - snakemake ==5.7.1 36 | - yappi 37 | - bwa 38 | - samtools=0.1.19 39 | - bedtools=2.17.0 40 | - seqtk 41 | - ucsc-bedclip 42 | - ucsc-bedgraphtobigwig 43 | - ucsc-wigcorrelate 44 | - ucsc-wigtobigwig 45 | - deeptools 46 | - openssl=1.0 47 | - mpmath 48 | 49 | #- matplotlib 50 | #- seaborn 51 | 52 | about: 53 | home: http://lisa.cistrome.org 54 | license: EULA 55 | license_family: OTHER 56 | license_file: ../LICENSE 57 | summary: 'ChIP-seq/DNase-seq data-driven TF and CR prioritization tool' 58 | -------------------------------------------------------------------------------- /demo/AR.symbol: -------------------------------------------------------------------------------- 1 | NR_045762 2 | NM_001002231 3 | NM_001256080 4 | NM_005551 5 | NR_045763 6 | NM_001135099 7 | NM_005656 8 | NM_004917 9 | NM_014668 10 | NM_001030047 11 | NM_001030048 12 | NM_001648 13 | NM_001255976 14 | NM_020182 15 | NM_199169 16 | NM_199170 17 | NM_199171 18 | NM_024080 19 | NR_046072 20 | NM_001256339 21 | NM_006167 22 | NM_001161352 23 | NM_001161353 24 | NM_002247 25 | NM_020752 26 | NM_018414 27 | NM_001105515 28 | NM_005845 29 | NM_020338 30 | NM_007085 31 | NM_001130518 32 | NM_001127257 33 | NM_020342 34 | NM_033102 35 | NM_018371 36 | NR_024040 37 | NM_001083924 38 | NM_023938 39 | NM_001100624 40 | NM_006549 41 | NM_153499 42 | NM_153500 43 | NM_172216 44 | NM_172226 45 | NM_054027 46 | NM_015036 47 | NM_002867 48 | NM_001172 49 | NM_032323 50 | NR_026678 51 | NM_012081 52 | NM_001100625 53 | NM_001104558 54 | NM_024930 55 | NM_006633 56 | NM_138799 57 | NM_014762 58 | NM_007011 59 | NM_152924 60 | NM_000693 61 | NM_006810 62 | NR_028444 63 | NM_001018011 64 | NM_006006 65 | NM_003711 66 | NM_176895 67 | NM_001105539 68 | NM_023929 69 | NM_004457 70 | NM_203372 71 | NM_018455 72 | NM_000608 73 | NM_001256301 74 | NM_021205 75 | NR_037962 76 | NM_021614 77 | NM_170775 78 | XR_110583 79 | NM_014146 80 | NM_032463 81 | NM_032464 82 | NM_030806 83 | NM_012152 84 | XR_112606 85 | NM_022782 86 | NM_207446 87 | NM_018960 88 | NM_001077654 89 | NM_014350 90 | NM_000860 91 | NM_001145816 92 | NM_001256305 93 | NM_013233 94 | NM_173854 95 | NM_001256307 96 | NM_001256306 97 | NM_000875 98 | NM_207307 99 | NM_001145775 100 | NM_001145776 101 | NM_004117 102 | NM_024409 103 | NM_000607 104 | NM_015261 105 | -------------------------------------------------------------------------------- /demo/run.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | time lisa model --method="all" --web=False --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome "['DNase', 'H3K27ac']" --cluster=False --covariates=False --random=False --prefix AR.symbol --background=dynamic_auto_tad --stat_background_number=1000 --threads 8 AR.symbol 4 | 5 | -------------------------------------------------------------------------------- /lisa/__init__.py: -------------------------------------------------------------------------------- 1 | """ interface for loading lisa config file 2 | """ 3 | from configparser import ConfigParser 4 | 5 | class Config(ConfigParser): 6 | """ data input interface""" 7 | def __init__(self, f, s): 8 | """ 9 | f: configuration file 10 | s: species 11 | """ 12 | super().__init__() 13 | self.read(f) 14 | self.s = s 15 | 16 | @property 17 | def get_meta(self): 18 | if hasattr(self, "s"): 19 | return self.get('basics', 'meta') 20 | 21 | @property 22 | def get_annotation(self): 23 | if hasattr(self, "s"): 24 | return self.get(self.s, 'tssbin') 25 | 26 | @property 27 | def get_tss_refseq(self): 28 | if hasattr(self, "s"): 29 | return self.get(self.s, 'tss') 30 | 31 | def get_rp(self, factor): 32 | if hasattr(self, "s"): 33 | if factor == 'H3K4me3': 34 | return self.get(self.s, '%s_1kbRP' % factor) 35 | return self.get(self.s, '%s_RP' % factor) 36 | 37 | @property 38 | def get_dnase_bin(self): 39 | if hasattr(self, "s"): 40 | return self.get(self.s, 'DNase_bin') 41 | 42 | def genome_count(self, factor): 43 | if hasattr(self, "s"): 44 | return self.get(self.s, '%s_count' % factor) 45 | 46 | @property 47 | def genome_window_map(self): 48 | if hasattr(self, "s"): 49 | return self.get(self.s, 'genome_window_map') 50 | 51 | @property 52 | def genome_window(self): 53 | if hasattr(self, "s"): 54 | return self.get(self.s, 'genome_window') 55 | 56 | @property 57 | def genome_100bp_window(self): 58 | if hasattr(self, "s"): 59 | return self.get(self.s, 'genome_100bp_window') 60 | 61 | @property 62 | def tf_chipseq(self): 63 | if hasattr(self, "s"): 64 | return self.get(self.s, 'tf_chipseq') 65 | 66 | @property 67 | def chrom(self): 68 | if hasattr(self, "s"): 69 | return self.get(self.s, 'chrom_len') 70 | 71 | @property 72 | def get_motif_meta(self): 73 | if hasattr(self, "s"): 74 | return self.get('basics', 'motif') 75 | 76 | def get_motif_index(self, cutoff=99): 77 | """ 100bp for deletion """ 78 | if hasattr(self, "s"): 79 | return self.get(self.s, 'genome_100bp_motif_index%s' % cutoff) 80 | 81 | def get_motif_1kb(self, cutoff=99): 82 | """ 1kb for cluster """ 83 | if hasattr(self, "s"): 84 | return self.get(self.s, 'genome_motif%s' % cutoff) 85 | 86 | @property 87 | def get_motif_sim(self): 88 | if hasattr(self, "s"): 89 | return self.get("basics", "motif_similarity") 90 | 91 | @property 92 | def get_beta(self): 93 | if hasattr(self, "s"): 94 | return self.get(self.s, 'tf_chipseq_beta') 95 | 96 | @property 97 | def get_udhs(self): 98 | if hasattr(self, "s"): 99 | return self.get(self.s, 'udhs_100bp_index') 100 | 101 | @property 102 | def get_index(self): 103 | """genome index 104 | """ 105 | if hasattr(self, "s"): 106 | return self.get(self.s, 'bwa_index') 107 | 108 | @property 109 | def get_tad(self): 110 | """genome index 111 | """ 112 | if hasattr(self, "s"): 113 | return self.get(self.s, 'tad_info') 114 | 115 | @property 116 | def get_cluster(self): 117 | """only for hg38 now......""" 118 | if hasattr(self, "s"): 119 | return self.get(self.s, 'cluster') 120 | -------------------------------------------------------------------------------- /lisa/lisa.ini: -------------------------------------------------------------------------------- 1 | [basics] 2 | motif = ${prefix}/cistrome.txt 3 | meta = ${prefix}/lisa_meta.xls 4 | 5 | [mm10] 6 | bwa_index = ${prefix}/mm10.fa 7 | chrom_len = ${prefix}/mm10.genome 8 | tssbin = ${prefix}/mm10_window1kb_tss.bed 9 | tss = ${prefix}/mm10.tss 10 | H3K27ac_RP = ${prefix}/margeRP_H3K27ac_mm.h5 11 | DNase_RP = ${prefix}/margeRP_DNase_mm.h5 12 | genome_window = ${prefix}/mm10_window1kb.bed 13 | genome_window_map = ${prefix}/mm10_100to1000window.out.npy 14 | H3K27ac_count = ${prefix}/mm10_window1kb_H3K27ac.h5 15 | DNase_count = ${prefix}/mm10_window1kb_DNase.h5 16 | tf_chipseq = ${prefix}/mm10_lisa_tf_100bp_all_nonhm_nonca_peak5fold.h5 17 | tf_chipseq_beta = ${prefix}/mm10_beta_peak5fold.h5 18 | cluster = ${prefix}/cluster_mouse 19 | genome_100bp_motif_index99 = ${prefix}/mm10_marge2_motif_100bp_99.h5 20 | tad_info = ${prefix}/mm10_promoter_TADann_H3K4me3_enhance_k27me3_Using.xls 21 | 22 | [hg38] 23 | bwa_index = ${prefix}/hg38.fa 24 | chrom_len = ${prefix}/hg38.genome 25 | tssbin = ${prefix}/hg38_window1kb_tss.bed 26 | tss = ${prefix}/hg38.tss 27 | H3K27ac_RP = ${prefix}/margeRP_H3K27ac.h5 28 | DNase_RP = ${prefix}/margeRP_DNase.h5 29 | genome_window = ${prefix}/hg38_window1kb.bed 30 | genome_window_map = ${prefix}/hg38_100to1000window.out.npy 31 | H3K27ac_count = ${prefix}/hg38_window1kb_H3K27ac.h5 32 | DNase_count = ${prefix}/hg38_window1kb_DNase.h5 33 | tf_chipseq = ${prefix}/hs_tf_new_peak_loct.h5 34 | tf_chipseq_beta = ${prefix}/hs_tf_new_beta_rp.h5 35 | cluster = ${prefix}/cluster_human 36 | genome_100bp_motif_index99 = ${prefix}/marge2_motif_100bp_99.h5 37 | tad_info = ${prefix}/hg38_promoter_TADann_H3K4me3_enhance_k27me3_Using.xls 38 | -------------------------------------------------------------------------------- /lisa/lisa.ini.updated: -------------------------------------------------------------------------------- 1 | [basics] 2 | motif = /data/home/qqin/lisa_web/download/data/mm10/cistrome.txt 3 | meta = /data/home/qqin/lisa_web/download/data/mm10/dc_meta_lisa_20180102.xls 4 | 5 | [mm10] 6 | bwa_index = /data/home/qqin/lisa_web/download/data/mm10/mm10.fa 7 | meta = /data/home/qqin/lisa_web/download/data/mm10/margeFactor_mm.csv 8 | chrom_len = /data/home/qqin/lisa_web/download/data/mm10/mm10.genome 9 | tssbin = /data/home/qqin/lisa_web/download/data/mm10/mm10_window1kb_tss.bed 10 | tss = /data/home/qqin/lisa_web/download/data/mm10/mm10.tss 11 | h3k27ac_rp = /data/home/qqin/lisa_web/download/data/mm10/margeRP_H3K27ac_mm.h5 12 | dnase_rp = /data/home/qqin/lisa_web/download/data/mm10/margeRP_DNase_mm.h5 13 | genome_window_map = /data/home/qqin/lisa_web/download/data/mm10/mm10_100to1000window.out.npy 14 | h3k27ac_count = /data/home/qqin/lisa_web/download/data/mm10/mm10_window1kb_H3K27ac.h5 15 | dnase_count = /data/home/qqin/lisa_web/download/data/mm10/mm10_window1kb_DNase.h5 16 | tf_chipseq = /data/home/qqin/lisa_web/download/data/mm10/mm10_lisa_tf_100bp_all_nonhm_nonca_peak5fold.h5 17 | tf_chipseq_meta = /data/home/qqin/lisa_web/download/data/mm10/mm10_best_dc_tfcr_basedon_frip_peak_dhs_all_nonhm_nonca.xls 18 | tf_chipseq_beta = /data/home/qqin/lisa_web/download/data/mm10/mm10_beta_peak5fold.h5 19 | cluster = /data/home/qqin/lisa_web/download/data/mm10/cluster_mouse 20 | genome_100bp_motif_index99 = /data/home/qqin/lisa_web/download/data/mm10/mm10_marge2_motif_100bp_99.h5 21 | tad_info = /data/home/qqin/lisa_web/download/data/mm10/mm10_promoter_TADann_H3K4me3_enhance_k27me3_Using.xls 22 | 23 | [hg38] 24 | bwa_index = /data/home/qqin/lisa_web/download/data/mm10/hg38.fa 25 | meta = /data/home/qqin/lisa_web/download/data/mm10/margeFactor.csv 26 | chrom_len = /data/home/qqin/lisa_web/download/data/mm10/hg38.genome 27 | tssbin = /data/home/qqin/lisa_web/download/data/mm10/hg38_window1kb_tss.bed 28 | tss = /data/home/qqin/lisa_web/download/data/mm10/hg38.tss 29 | h3k27ac_rp = /data/home/qqin/lisa_web/download/data/mm10/margeRP_H3K27ac.h5 30 | dnase_rp = /data/home/qqin/lisa_web/download/data/mm10/margeRP_DNase.h5 31 | genome_window_map = /data/home/qqin/lisa_web/download/data/mm10/hg38_100to1000window.out.npy 32 | h3k27ac_count = /data/home/qqin/lisa_web/download/data/mm10/hg38_window1kb_H3K27ac.h5 33 | dnase_count = /data/home/qqin/lisa_web/download/data/mm10/hg38_window1kb_DNase.h5 34 | tf_chipseq = /data/home/qqin/lisa_web/download/data/mm10/hs_tf_new_peak_loct.h5 35 | tf_chipseq_meta = /data/home/qqin/lisa_web/download/data/mm10/hs_tf_meta_qc.xls 36 | tf_chipseq_beta = /data/home/qqin/lisa_web/download/data/mm10/hs_tf_new_beta_rp.h5 37 | cluster = /data/home/qqin/lisa_web/download/data/mm10/cluster_human 38 | genome_100bp_motif_index99 = /data/home/qqin/lisa_web/download/data/mm10/marge2_motif_100bp_99.h5 39 | tad_info = /data/home/qqin/lisa_web/download/data/mm10/hg38_promoter_TADann_H3K4me3_enhance_k27me3_Using.xls 40 | 41 | -------------------------------------------------------------------------------- /lisa/mouse.tfs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa/mouse.tfs -------------------------------------------------------------------------------- /lisa/rank.py: -------------------------------------------------------------------------------- 1 | """ two methods to rank TFs 2 | """ 3 | import theano 4 | import theano.tensor as T 5 | import pandas as pd 6 | import numpy as np 7 | import scipy.stats as stats 8 | from numpy.linalg import norm 9 | 10 | def get_insilico_knockout_tensor_op(lisa_prediction, precompute, coef, original_median=None): 11 | """ use theano tensor operation to speed up 12 | return a theano.function 13 | 14 | lisa_prediction: numpy array 15 | precompute: numpy array 16 | coef: pandas DataFrame 17 | """ 18 | x = T.imatrix('E') # each motif tensor 19 | precomp = theano.shared(precompute.astype(theano.config.floatX), name='precompute') 20 | r = theano.shared(lisa_prediction.astype(theano.config.floatX), name='Lisa RP') 21 | c = theano.shared(coef.iloc[:, 0].values.astype(theano.config.floatX), name='coefficients') 22 | m = theano.shared(original_median.astype(theano.config.floatX), name='original_rp_median') 23 | 24 | # sample x (gene1_bin1, gene1_bin2...gene2_bin1,gene2_bin2...) 25 | y = T.extra_ops.repeat(x, precompute.shape[0], axis=0) 26 | tensor_del = y * precomp # sample x (gene,bin) 27 | tensor_del = T.reshape(tensor_del, (c.shape[0],r.shape[0],200)) # sample x gene x bin 28 | tensor_del = T.transpose(T.sum(tensor_del, axis=2), (1,0)) + T.constant(1) # one motif 29 | 30 | ##tensor_del_med = T.mean(tensor_del, axis=0) # one motif 31 | ##log_tensor_del = T.log2(tensor_del) - T.log2(tensor_del_med) 32 | 33 | log_tensor_del = T.log2(tensor_del) - m # original median already take log2 34 | tensor_delta = r - T.dot(log_tensor_del, c) 35 | 36 | mode = theano.Mode(linker='cvm', optimizer='fast_run') 37 | theano.config.exception_verbosity = 'high' 38 | # theano.config.openmp = True 39 | theano_delta_rp = theano.function([x], tensor_delta, mode=mode) 40 | return theano_delta_rp 41 | 42 | def rank_by_entropy(pq, kl=True): 43 | """ evaluate kl divergence, wasserstein distance 44 | wasserstein: http://pythonhosted.org/pyriemann/_modules/pyriemann/utils/distance.html 45 | """ 46 | # to avoid Inf cases 47 | pq = pq + 0.0000001 48 | pq = pq/pq.sum(axis=0) 49 | 50 | if kl: # entropy actually can calculate KL divergence 51 | final=pq.iloc[:, :-1].apply( 52 | lambda x: stats.entropy(x, pq.iloc[:, -1], base=2), axis=0) 53 | label = 'KL' 54 | else: # JS divergence 55 | final=pq.iloc[:, :-1].apply( 56 | lambda x: JSD(x, pq.iloc[:, -1]), axis=0) 57 | label = 'JSD' 58 | final.sort_values(ascending=False, inplace=True) 59 | rank = final.rank(ascending=False) 60 | final = pd.concat([final, rank], axis=1) 61 | final.columns = [label, 'rank'] 62 | return final 63 | 64 | def JSD(P, Q): 65 | """ compute JS divergence 66 | JSD: http://stackoverflow.com/questions/15880133/jensen-shannon-divergence 67 | """ 68 | P = P / norm(P, ord=1) 69 | Q = Q / norm(Q, ord=1) 70 | M = 0.5 * (P + Q) 71 | return 0.5 * (stats.entropy(P, M) + stats.entropy(Q, M)) 72 | -------------------------------------------------------------------------------- /lisa/regpotential/__init__.py: -------------------------------------------------------------------------------- 1 | import lisa._bw as regpotential 2 | -------------------------------------------------------------------------------- /lisa/regpotential/aliType.c: -------------------------------------------------------------------------------- 1 | /* aliType - some definitions for type of alignment. */ 2 | 3 | /* Copyright (C) 2011 The Regents of the University of California 4 | * See README in this or parent directory for licensing information. */ 5 | #include "common.h" 6 | #include "aliType.h" 7 | 8 | 9 | char *gfTypeName(enum gfType type) 10 | /* Return string representing type. */ 11 | { 12 | if (type == gftDna) return "DNA"; 13 | if (type == gftRna) return "RNA"; 14 | if (type == gftProt) return "protein"; 15 | if (type == gftDnaX) return "DNAX"; 16 | if (type == gftRnaX) return "RNAX"; 17 | internalErr(); 18 | return NULL; 19 | } 20 | 21 | enum gfType gfTypeFromName(char *name) 22 | /* Return type from string. */ 23 | { 24 | if (sameWord(name, "DNA")) return gftDna; 25 | if (sameWord(name, "RNA")) return gftRna; 26 | if (sameWord(name, "protein")) return gftProt; 27 | if (sameWord(name, "prot")) return gftProt; 28 | if (sameWord(name, "DNAX")) return gftDnaX; 29 | if (sameWord(name, "RNAX")) return gftRnaX; 30 | errAbort("Unknown sequence type '%s'", name); 31 | return 0; 32 | } 33 | 34 | -------------------------------------------------------------------------------- /lisa/regpotential/aliType.h: -------------------------------------------------------------------------------- 1 | /* aliType - some definitions for type of alignment. */ 2 | 3 | #ifndef ALITYPE_H 4 | #define ALITYPE_H 5 | 6 | enum gfType 7 | /* Types of sequence genoFind deals with. */ 8 | { 9 | gftDna = 0, /* DNA (genomic) */ 10 | gftRna = 1, /* RNA */ 11 | gftProt = 2, /* Protein. */ 12 | gftDnaX = 3, /* Genomic DNA translated to protein */ 13 | gftRnaX = 4, /* RNA translated to protein */ 14 | }; 15 | 16 | char *gfTypeName(enum gfType type); 17 | /* Return string representing type. */ 18 | 19 | enum gfType gfTypeFromName(char *name); 20 | /* Return type from string. */ 21 | 22 | enum ffStringency 23 | /* How tight of a match is required. */ 24 | { 25 | ffExact = 0, /* Only an exact match will do. */ 26 | 27 | ffCdna = 1, /* Near exact. Tolerate long gaps in target (genomic) */ 28 | ffTight = 2, /* Near exact. Not so tolerant of long gaps in target. */ 29 | ffLoose = 3, /* Less exact. */ 30 | }; 31 | 32 | #endif /* ALITYPE_H */ 33 | -------------------------------------------------------------------------------- /lisa/regpotential/base64.c: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2011 The Regents of the University of California 2 | * See README in this or parent directory for licensing information. */ 3 | 4 | #include "common.h" 5 | #include "base64.h" 6 | 7 | 8 | char *base64Encode(char *input, size_t inplen) 9 | /* Use base64 to encode a string. Returns one long encoded 10 | * string which need to be freeMem'd. Note: big-endian algorithm. 11 | * For some applications you may need to break the base64 output 12 | * of this function into lines no longer than 76 chars. 13 | */ 14 | { 15 | char b64[] = B64CHARS; 16 | int words = (inplen+2)/3; 17 | int remains = inplen % 3; 18 | char *result = (char *)needMem(4*words+1); 19 | size_t i=0, j=0; 20 | int word = 0; 21 | unsigned char *p = (unsigned char*) input; 22 | /* p must be unsigned char*, because without "unsigned", 23 | sign extend messes up last group outputted 24 | when the value of the chars following last in input 25 | happens to be char 0x80 or higher */ 26 | for(i=1; i<=words; i++) 27 | { 28 | word = 0; 29 | word |= *p++; 30 | word <<= 8; 31 | word |= *p++; 32 | word <<= 8; 33 | word |= *p++; 34 | if (i==words && remains>0) 35 | { 36 | word &= 0x00FFFF00; 37 | if (remains==1) 38 | word &= 0x00FF0000; 39 | } 40 | result[j++]=b64[word >> 18 & 0x3F]; 41 | result[j++]=b64[word >> 12 & 0x3F]; 42 | result[j++]=b64[word >> 6 & 0x3F]; 43 | result[j++]=b64[word & 0x3F]; 44 | } 45 | result[j] = 0; 46 | if (remains >0) result[j-1] = '='; 47 | if (remains==1) result[j-2] = '='; 48 | return result; 49 | } 50 | 51 | 52 | boolean base64Validate(char *input) 53 | /* Return true if input is valid base64. 54 | * Note that the input string is changed by 55 | * eraseWhiteSpace(). */ 56 | { 57 | size_t i = 0, l = 0; 58 | char *p = input; 59 | boolean validB64 = TRUE; 60 | 61 | /* remove whitespace which is unnecessary and */ 62 | eraseWhiteSpace(input); 63 | 64 | l = strlen(p); 65 | for(i=0;i> 16 & 0xFF; 123 | result[j++]=word >> 8 & 0xFF; 124 | result[j++]=word & 0xFF; 125 | } 126 | result[j] = 0; 127 | if (returnSize) 128 | *returnSize = j; 129 | 130 | return result; 131 | } 132 | 133 | -------------------------------------------------------------------------------- /lisa/regpotential/base64.h: -------------------------------------------------------------------------------- 1 | /* Base64 encoding and decoding. 2 | * by Galt Barber */ 3 | 4 | #ifndef BASE64_H 5 | #define BASE64_H 6 | 7 | #define B64CHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 8 | 9 | char *base64Encode(char *input, size_t inplen); 10 | /* Use base64 to encode a string. Returns one long encoded 11 | * string which need to be freeMem'd. Note: big-endian algorithm. 12 | * For some applications you may need to break the base64 output 13 | * of this function into lines no longer than 76 chars. 14 | */ 15 | 16 | boolean base64Validate(char *input); 17 | /* Return true if input is valid base64. 18 | * Note that the input string is changed by 19 | * eraseWhiteSpace(). */ 20 | 21 | char *base64Decode(char *input, size_t *returnSize); 22 | /* Use base64 to decode a string. Return decoded 23 | * string which will be freeMem'd. Note: big-endian algorithm. 24 | * Call eraseWhiteSpace() and check for invalid input 25 | * before passing in input if needed. 26 | * Optionally set retun size for use with binary data. 27 | */ 28 | 29 | #endif /* BASE64_H */ 30 | 31 | -------------------------------------------------------------------------------- /lisa/regpotential/bigBedSummary.c: -------------------------------------------------------------------------------- 1 | /* bigBedSummary - Extract summary information from a bigBed file.. */ 2 | 3 | /* Copyright (C) 2011 The Regents of the University of California 4 | * See README in this or parent directory for licensing information. */ 5 | #include "common.h" 6 | #include "linefile.h" 7 | #include "hash.h" 8 | #include "options.h" 9 | #include "sqlNum.h" 10 | #include "bigBed.h" 11 | #include "asParse.h" 12 | #include "udc.h" 13 | #include "obscure.h" 14 | 15 | 16 | char *summaryType = "coverage"; 17 | 18 | void usage() 19 | /* Explain usage and exit. */ 20 | { 21 | errAbort( 22 | "bigBedSummary - Extract summary information from a bigBed file.\n" 23 | "usage:\n" 24 | " bigBedSummary file.bb chrom start end dataPoints\n" 25 | "Get summary data from bigBed for indicated region, broken into\n" 26 | "dataPoints equal parts. (Use dataPoints=1 for simple summary.)\n" 27 | "options:\n" 28 | " -type=X where X is one of:\n" 29 | " coverage - %% of region that is covered (default)\n" 30 | " mean - average depth of covered regions\n" 31 | " min - minimum depth of covered regions\n" 32 | " max - maximum depth of covered regions\n" 33 | " -fields - print out information on fields in file.\n" 34 | " If fields option is used, the chrom, start, end, dataPoints\n" 35 | " parameters may be omitted\n" 36 | " -udcDir=/dir/to/cache - place to put cache for remote bigBed/bigWigs\n" 37 | ); 38 | } 39 | 40 | static struct optionSpec options[] = { 41 | {"type", OPTION_STRING}, 42 | {"fields", OPTION_BOOLEAN}, 43 | {"udcDir", OPTION_STRING}, 44 | {NULL, 0}, 45 | }; 46 | 47 | void bigBedSummary(char *fileName, char *chrom, int start, int end, int dataPoints) 48 | /* bigBedSummary - Extract summary information from a bigBed file.. */ 49 | { 50 | /* Make up values array initialized to not-a-number. */ 51 | double nan0 = strtod("NaN", NULL); 52 | double summaryValues[dataPoints]; 53 | int i; 54 | for (i=0; idefinedFieldCount, bbi->fieldCount); 86 | struct asObject *as = bigBedAs(bbi); 87 | if (as != NULL) 88 | { 89 | struct asColumn *col; 90 | for (col = as->columnList; col != NULL; col = col->next) 91 | { 92 | printf("\t%s\t%s\n", col->name, col->comment); 93 | } 94 | } 95 | else 96 | { 97 | printf("No additional field information included.\n"); 98 | } 99 | } 100 | 101 | int main(int argc, char *argv[]) 102 | /* Process command line. */ 103 | { 104 | optionInit(&argc, argv, options); 105 | udcSetDefaultDir(optionVal("udcDir", udcDefaultDir())); 106 | if (optionExists("fields")) 107 | { 108 | if (argc < 2) 109 | usage(); 110 | bigBedFields(argv[1]); 111 | } 112 | else 113 | { 114 | summaryType = optionVal("type", summaryType); 115 | if (argc != 6) 116 | usage(); 117 | bigBedSummary(argv[1], argv[2], sqlUnsigned(argv[3]), sqlUnsigned(argv[4]), sqlUnsigned(argv[5])); 118 | } 119 | if (verboseLevel() > 1) 120 | printVmPeak(); 121 | return 0; 122 | } 123 | -------------------------------------------------------------------------------- /lisa/regpotential/bigWigSummary.c: -------------------------------------------------------------------------------- 1 | /* bigWigSummary - Extract summary information from a bigWig file.. */ 2 | 3 | /* Copyright (C) 2011 The Regents of the University of California 4 | * See README in this or parent directory for licensing information. */ 5 | #include "common.h" 6 | #include "linefile.h" 7 | #include "hash.h" 8 | #include "options.h" 9 | #include "sqlNum.h" 10 | #include "udc.h" 11 | #include "bigWig.h" 12 | #include "obscure.h" 13 | 14 | 15 | char *summaryType = "mean"; 16 | 17 | 18 | void usage() 19 | /* Explain usage and exit. */ 20 | { 21 | errAbort( 22 | "bigWigSummary - Extract summary information from a bigWig file.\n" 23 | "usage:\n" 24 | " bigWigSummary file.bigWig chrom start end dataPoints\n" 25 | "Get summary data from bigWig for indicated region, broken into\n" 26 | "dataPoints equal parts. (Use dataPoints=1 for simple summary.)\n" 27 | "\nNOTE: start and end coordinates are in BED format (0-based)\n\n" 28 | "options:\n" 29 | " -type=X where X is one of:\n" 30 | " mean - average value in region (default)\n" 31 | " min - minimum value in region\n" 32 | " max - maximum value in region\n" 33 | " std - standard deviation in region\n" 34 | " coverage - %% of region that is covered\n" 35 | " -udcDir=/dir/to/cache - place to put cache for remote bigBed/bigWigs\n" 36 | ); 37 | } 38 | 39 | static struct optionSpec options[] = { 40 | {"type", OPTION_STRING}, 41 | {"udcDir", OPTION_STRING}, 42 | {NULL, 0}, 43 | }; 44 | 45 | void bigWigSummary(char *bigWigFile, char *chrom, int start, int end, int dataPoints) 46 | /* bigWigSummary - Extract summary information from a bigWig file.. */ 47 | { 48 | struct bbiFile *bwf = bigWigFileOpen(bigWigFile); 49 | 50 | /* Make up values array initialized to not-a-number. */ 51 | double nan0 = strtod("NaN", NULL); 52 | double summaryValues[dataPoints]; 53 | int i; 54 | for (i=0; i 1) 89 | printVmPeak(); 90 | return 0; 91 | } 92 | -------------------------------------------------------------------------------- /lisa/regpotential/bits.h: -------------------------------------------------------------------------------- 1 | /* bits - handle operations on arrays of bits. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #ifndef BITS_H 7 | #define BITS_H 8 | 9 | #include "localmem.h" 10 | 11 | typedef unsigned char Bits; 12 | 13 | #define bitToByteSize(bitSize) ((bitSize+7)/8) 14 | /* Convert number of bits to number of bytes needed to store bits. */ 15 | 16 | Bits *bitAlloc(int bitCount); 17 | /* Allocate bits. */ 18 | 19 | Bits *bitRealloc(Bits *b, int bitCount, int newBitCount); 20 | /* Resize a bit array. If b is null, allocate a new array */ 21 | 22 | Bits *bitClone(Bits* orig, int bitCount); 23 | /* Clone bits. */ 24 | 25 | void bitFree(Bits **pB); 26 | /* Free bits. */ 27 | 28 | Bits *lmBitAlloc(struct lm *lm,int bitCount); 29 | // Allocate bits. Must supply local memory. 30 | 31 | Bits *lmBitRealloc(struct lm *lm, Bits *b, int bitCount, int newBitCount); 32 | // Resize a bit array. If b is null, allocate a new array. Must supply local memory. 33 | 34 | Bits *lmBitClone(struct lm *lm, Bits* orig, int bitCount); 35 | // Clone bits. Must supply local memory. 36 | 37 | void bitSetOne(Bits *b, int bitIx); 38 | /* Set a single bit. */ 39 | 40 | void bitClearOne(Bits *b, int bitIx); 41 | /* Clear a single bit. */ 42 | 43 | void bitSetRange(Bits *b, int startIx, int bitCount); 44 | /* Set a range of bits. */ 45 | 46 | boolean bitReadOne(Bits *b, int bitIx); 47 | /* Read a single bit. */ 48 | 49 | int bitCountRange(Bits *b, int startIx, int bitCount); 50 | /* Count number of bits set in range. */ 51 | 52 | int bitFindSet(Bits *b, int startIx, int bitCount); 53 | /* Find the index of the the next set bit. */ 54 | 55 | int bitFindClear(Bits *b, int startIx, int bitCount); 56 | /* Find the index of the the next clear bit. */ 57 | 58 | void bitClear(Bits *b, int bitCount); 59 | /* Clear many bits (possibly up to 7 beyond bitCount). */ 60 | 61 | void bitClearRange(Bits *b, int startIx, int bitCount); 62 | /* Clear a range of bits. */ 63 | 64 | void bitAnd(Bits *a, Bits *b, int bitCount); 65 | /* And two bitmaps. Put result in a. */ 66 | 67 | int bitAndCount(Bits *a, Bits *b, int bitCount); 68 | // Without altering 2 bitmaps, count the AND bits. 69 | 70 | void bitOr(Bits *a, Bits *b, int bitCount); 71 | /* Or two bitmaps. Put result in a. */ 72 | 73 | int bitOrCount(Bits *a, Bits *b, int bitCount); 74 | // Without altering 2 bitmaps, count the OR'd bits. 75 | 76 | void bitXor(Bits *a, Bits *b, int bitCount); 77 | /* Xor two bitmaps. Put result in a. */ 78 | 79 | int bitXorCount(Bits *a, Bits *b, int bitCount); 80 | // Without altering 2 bitmaps, count the XOR'd bits. 81 | 82 | void bitNot(Bits *a, int bitCount); 83 | /* Flip all bits in a. */ 84 | 85 | void bitReverseRange(Bits *bits, int startIx, int bitCount); 86 | // Reverses bits in range (e.g. 110010 becomes 010011) 87 | 88 | void bitPrint(Bits *a, int startIx, int bitCount, FILE* out); 89 | /* Print part or all of bit map as a string of 0s and 1s. Mostly useful for 90 | * debugging */ 91 | 92 | void bitsOut(FILE* out, Bits *bits, int startIx, int bitCount, boolean onlyOnes); 93 | // Print part or all of bit map as a string of 0s and 1s. 94 | // If onlyOnes, enclose result in [] and use ' ' instead of '0'. 95 | 96 | Bits *bitsIn(struct lm *lm,char *bitString, int len); 97 | // Returns a bitmap from a string of 1s and 0s. Any non-zero, non-blank char sets a bit. 98 | // Returned bitmap is the size of len even if that is longer than the string. 99 | // Optionally supply local memory. Note does NOT handle enclosing []s printed with bitsOut(). 100 | 101 | extern int bitsInByte[256]; 102 | /* Lookup table for how many bits are set in a byte. */ 103 | 104 | void bitsInByteInit(); 105 | /* Initialize bitsInByte array. */ 106 | 107 | #endif /* BITS_H */ 108 | 109 | -------------------------------------------------------------------------------- /lisa/regpotential/colHash.c: -------------------------------------------------------------------------------- 1 | /* colHash - stuff for fast lookup of index given an 2 | * rgb value. */ 3 | 4 | /* Copyright (C) 2011 The Regents of the University of California 5 | * See README in this or parent directory for licensing information. */ 6 | 7 | #include "common.h" 8 | #include "memgfx.h" 9 | #include "colHash.h" 10 | 11 | 12 | struct colHash *colHashNew() 13 | /* Get a new color hash. */ 14 | { 15 | struct colHash *cHash; 16 | AllocVar(cHash); 17 | cHash->freeEl = cHash->elBuf; 18 | return cHash; 19 | } 20 | 21 | void colHashFree(struct colHash **pEl) 22 | /* Free up color hash. */ 23 | { 24 | freez(pEl); 25 | } 26 | 27 | struct colHashEl *colHashAdd(struct colHash *cHash, 28 | unsigned r, unsigned g, unsigned b, int ix) 29 | /* Add new element to color hash. */ 30 | { 31 | struct colHashEl *che = cHash->freeEl++, **pCel; 32 | che->col.r = r; 33 | che->col.g = g; 34 | che->col.b = b; 35 | che->ix = ix; 36 | pCel = &cHash->lists[colHashFunc(r,g,b)]; 37 | slAddHead(pCel, che); 38 | return che; 39 | } 40 | 41 | struct colHashEl *colHashLookup(struct colHash *cHash, 42 | unsigned r, unsigned g, unsigned b) 43 | /* Lookup value in hash. */ 44 | { 45 | struct colHashEl *che; 46 | for (che = cHash->lists[colHashFunc(r,g,b)]; che != NULL; che = che->next) 47 | if (che->col.r == r && che->col.g == g && che->col.b == b) 48 | return che; 49 | return NULL; 50 | } 51 | 52 | -------------------------------------------------------------------------------- /lisa/regpotential/colHash.h: -------------------------------------------------------------------------------- 1 | /* colHash - stuff for fast lookup of index given an 2 | * rgb value. */ 3 | 4 | /* Copyright (C) 2002 The Regents of the University of California 5 | * See README in this or parent directory for licensing information. */ 6 | #ifndef COLHASH_H 7 | #define COLHASH_H 8 | 9 | #define colHashFunc(r,g,b) (r+g+g+b) 10 | 11 | struct colHashEl 12 | /* An element in a color hash. */ 13 | { 14 | struct colHashEl *next; /* Next in list. */ 15 | struct rgbColor col; /* Color RGB. */ 16 | int ix; /* Color Index. */ 17 | }; 18 | 19 | struct colHash 20 | /* A hash on RGB colors. */ 21 | { 22 | struct colHashEl *lists[4*256]; /* Hash chains. */ 23 | struct colHashEl elBuf[256]; /* Buffer of elements. */ 24 | struct colHashEl *freeEl; /* Pointer to next free element. */ 25 | }; 26 | 27 | struct colHash *colHashNew(); 28 | /* Get a new color hash. */ 29 | 30 | void colHashFree(struct colHash **pEl); 31 | /* Free up color hash. */ 32 | 33 | struct colHashEl *colHashAdd(struct colHash *cHash, 34 | unsigned r, unsigned g, unsigned b, int ix); 35 | /* Add new element to color hash. */ 36 | 37 | struct colHashEl *colHashLookup(struct colHash *cHash, 38 | unsigned r, unsigned g, unsigned b); 39 | /* Lookup value in hash. */ 40 | 41 | #endif /* COLHASH_H */ 42 | -------------------------------------------------------------------------------- /lisa/regpotential/dnaseq.h: -------------------------------------------------------------------------------- 1 | /* dnaSeq - stuff to manage DNA sequences. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #ifndef DNASEQ_H 7 | #define DNASEQ_H 8 | 9 | #ifndef DNAUTIL_H 10 | #include "dnautil.h" 11 | #endif 12 | 13 | #ifndef BITS_H 14 | #include "bits.h" 15 | #endif 16 | 17 | struct dnaSeq 18 | /* A dna sequence in one-character per base format. */ 19 | { 20 | struct dnaSeq *next; /* Next in list. */ 21 | char *name; /* Name of sequence. */ 22 | DNA *dna; /* Sequence base by base. */ 23 | int size; /* Size of sequence. */ 24 | Bits* mask; /* Repeat mask (optional) */ 25 | }; 26 | 27 | typedef struct dnaSeq bioSeq; /* Preferred use if either DNA or protein. */ 28 | typedef struct dnaSeq aaSeq; /* Preferred use if protein. */ 29 | 30 | struct dnaSeq *newDnaSeq(DNA *dna, int size, char *name); 31 | /* Create a new DNA seq. */ 32 | 33 | struct dnaSeq *cloneDnaSeq(struct dnaSeq *seq); 34 | /* Duplicate dna sequence in RAM. */ 35 | 36 | void freeDnaSeq(struct dnaSeq **pSeq); 37 | /* Free up DNA seq. */ 38 | #define dnaSeqFree freeDnaSeq 39 | 40 | void freeDnaSeqList(struct dnaSeq **pSeqList); 41 | /* Free up list of DNA sequences. */ 42 | #define dnaSeqFreeList freeDnaSeqList 43 | 44 | aaSeq *translateSeqN(struct dnaSeq *inSeq, unsigned offset, unsigned size, boolean stop); 45 | /* Return a translated sequence. Offset is position of first base to 46 | * translate. If size is 0 then use length of inSeq. */ 47 | 48 | aaSeq *translateSeq(struct dnaSeq *inSeq, unsigned offset, boolean stop); 49 | /* Return a translated sequence. Offset is position of first base to 50 | * translate. If stop is TRUE then stop at first stop codon. (Otherwise 51 | * represent stop codons as 'Z'). */ 52 | 53 | boolean seqIsDna(bioSeq *seq); 54 | /* Make educated guess whether sequence is DNA or protein. */ 55 | 56 | boolean seqIsLower(bioSeq *seq); 57 | /* Return TRUE if sequence is all lower case. */ 58 | 59 | bioSeq *whichSeqIn(bioSeq **seqs, int seqCount, char *letters); 60 | /* Figure out which if any sequence letters is in. */ 61 | 62 | Bits *maskFromUpperCaseSeq(bioSeq *seq); 63 | /* Allocate a mask for sequence and fill it in based on 64 | * sequence case. */ 65 | 66 | struct hash *dnaSeqHash(struct dnaSeq *seqList); 67 | /* Return hash of sequences keyed by name. */ 68 | 69 | int dnaSeqCmpName(const void *va, const void *vb); 70 | /* Compare to sort based on sequence name. */ 71 | 72 | #endif /* DNASEQ_H */ 73 | 74 | -------------------------------------------------------------------------------- /lisa/regpotential/dystring.h: -------------------------------------------------------------------------------- 1 | /* dystring - dynamically resizing string. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #ifndef DYSTRING_H /* Wrapper to avoid including this twice. */ 7 | #define DYSTRING_H 8 | 9 | #include "common.h" 10 | 11 | struct dyString 12 | /* Dynamically resizable string that you can do formatted 13 | * output to. */ 14 | { 15 | struct dyString *next; /* Next in list. */ 16 | char *string; /* Current buffer. */ 17 | int bufSize; /* Size of buffer. */ 18 | int stringSize; /* Size of string. */ 19 | }; 20 | 21 | struct dyString *newDyString(int initialBufSize); 22 | /* Allocate dynamic string with initial buffer size. (Pass zero for default) */ 23 | 24 | #define dyStringNew newDyString 25 | 26 | void freeDyString(struct dyString **pDs); 27 | /* Free up dynamic string. */ 28 | 29 | #define dyStringFree(a) freeDyString(a); 30 | 31 | void freeDyStringList(struct dyString **pDs); 32 | /* Free up a list of dynamic strings */ 33 | 34 | #define dyStringFreeList(a) freeDyStringList(a); 35 | 36 | void dyStringAppend(struct dyString *ds, char *string); 37 | /* Append zero terminated string to end of dyString. */ 38 | 39 | void dyStringAppendN(struct dyString *ds, char *string, int stringSize); 40 | /* Append string of given size to end of string. */ 41 | 42 | char dyStringAppendC(struct dyString *ds, char c); 43 | /* Append char to end of string. */ 44 | 45 | void dyStringAppendMultiC(struct dyString *ds, char c, int n); 46 | /* Append N copies of char to end of string. */ 47 | 48 | void dyStringAppendEscapeQuotes(struct dyString *dy, char *string, 49 | char quot, char esc); 50 | /* Append escaped-for-quotation version of string to dy. */ 51 | 52 | #define dyStringWriteOne(dy, var) dyStringAppendN(dy, (char *)(&var), sizeof(var)) 53 | /* Write one variable (binary!) to dyString - for cases when want to treat string like 54 | * a file stream. */ 55 | 56 | void dyStringVaPrintf(struct dyString *ds, char *format, va_list args); 57 | /* VarArgs Printf to end of dyString. */ 58 | 59 | void dyStringPrintf(struct dyString *ds, char *format, ...) 60 | /* Printf to end of dyString. */ 61 | #ifdef __GNUC__ 62 | __attribute__((format(printf, 2, 3))) 63 | #endif 64 | ; 65 | 66 | struct dyString *dyStringCreate(char *format, ...); 67 | /* Create a dyString with a printf style initial content */ 68 | 69 | #define dyStringClear(ds) (ds->string[0] = ds->stringSize = 0) 70 | /* Clear string. */ 71 | 72 | struct dyString * dyStringSub(char *orig, char *in, char *out); 73 | /* Make up a duplicate of orig with all occurences of in substituted 74 | * with out. */ 75 | 76 | void dyStringBumpBufSize(struct dyString *ds, int size); 77 | /* Force dyString buffer to be at least given size. */ 78 | 79 | char *dyStringCannibalize(struct dyString **pDy); 80 | /* Kill dyString, but return the string it is wrapping 81 | * (formerly dy->string). This should be free'd at your 82 | * convenience. */ 83 | 84 | #define dyStringContents(ds) (ds)->string 85 | /* return raw string. */ 86 | 87 | #define dyStringLen(ds) ds->stringSize 88 | /* return raw string length. */ 89 | 90 | void dyStringResize(struct dyString *ds, int newSize); 91 | /* resize a string, if the string expands, blanks are appended */ 92 | 93 | void dyStringQuoteString(struct dyString *dy, char quotChar, char *text); 94 | /* Append quotChar-quoted text (with any internal occurrences of quotChar 95 | * \-escaped) onto end of dy. */ 96 | 97 | #endif /* DYSTRING_H */ 98 | 99 | -------------------------------------------------------------------------------- /lisa/regpotential/errAbort.h: -------------------------------------------------------------------------------- 1 | /* ErrAbort.h - our error handler. 2 | * 3 | * This maintains two stacks - a warning message printer 4 | * stack, and a "abort handler" stack. 5 | * 6 | * By default the warnings will go to stderr, and 7 | * aborts will exit the program. You can push a 8 | * function on to the appropriate stack to change 9 | * this behavior. The top function on the stack 10 | * gets called. 11 | * 12 | * Most functions in this library will call errAbort() 13 | * if they run out of memory. 14 | * 15 | * This file is copyright 2002 Jim Kent, but license is hereby 16 | * granted for all use - public, private or commercial. */ 17 | 18 | #ifndef ERRABORT_H 19 | #define ERRABORT_H 20 | 21 | boolean isErrAbortInProgress(); 22 | /* Flag to indicate that an error abort is in progress. 23 | * Needed so that a warn handler can tell if it's really 24 | * being called because of a warning or an error. */ 25 | 26 | void errAbort(char *format, ...) 27 | /* Abort function, with optional (printf formatted) error message. */ 28 | #if defined(__GNUC__) 29 | __attribute__((format(printf, 1, 2))) 30 | #endif 31 | ; 32 | 33 | void vaErrAbort(char *format, va_list args); 34 | /* Abort function, with optional (vprintf formatted) error message. */ 35 | 36 | void errnoAbort(char *format, ...) 37 | /* Prints error message from UNIX errno first, then does errAbort. */ 38 | #if defined(__GNUC__) 39 | __attribute__((format(printf, 1, 2))) 40 | #endif 41 | ; 42 | 43 | typedef void (*AbortHandler)(); 44 | /* Function that can abort. */ 45 | 46 | void pushAbortHandler(AbortHandler handler); 47 | /* Set abort handler */ 48 | 49 | void popAbortHandler(); 50 | /* Revert to old abort handler. */ 51 | 52 | void noWarnAbort(); 53 | /* Abort without message. */ 54 | 55 | void pushDebugAbort(); 56 | /* Push abort handler that will invoke debugger. */ 57 | 58 | void vaWarn(char *format, va_list args); 59 | /* Call top of warning stack to issue warning. */ 60 | 61 | void warn(char *format, ...) 62 | /* Issue a warning message. */ 63 | #if defined(__GNUC__) 64 | __attribute__((format(printf, 1, 2))) 65 | #endif 66 | ; 67 | 68 | void errnoWarn(char *format, ...) 69 | /* Prints error message from UNIX errno first, then does rest of warning. */ 70 | #if defined(__GNUC__) 71 | __attribute__((format(printf, 1, 2))) 72 | #endif 73 | ; 74 | 75 | typedef void (*WarnHandler)(char *format, va_list args); 76 | /* Function that can warn. */ 77 | 78 | void pushWarnHandler(WarnHandler handler); 79 | /* Set warning handler */ 80 | 81 | void popWarnHandler(); 82 | /* Revert to old warn handler. */ 83 | 84 | void pushWarnAbort(); 85 | /* Push handler that will abort on warnings. */ 86 | 87 | void pushSilentWarnHandler(); 88 | /* Set warning handler to be quiet. Do a popWarnHandler to restore. */ 89 | 90 | void errAbortDebugnPushPopErr(); 91 | /* generate stack dump if there is a error in the push/pop functions */ 92 | 93 | #endif /* ERRABORT_H */ 94 | -------------------------------------------------------------------------------- /lisa/regpotential/gfxPoly.c: -------------------------------------------------------------------------------- 1 | /* gfxPoly - two dimensional polygon. */ 2 | 3 | #include "common.h" 4 | #include "gfxPoly.h" 5 | 6 | 7 | struct gfxPoly *gfxPolyNew() 8 | /* Create new (empty) polygon */ 9 | { 10 | struct gfxPoly *poly; 11 | AllocVar(poly); 12 | return poly; 13 | } 14 | 15 | void gfxPolyFree(struct gfxPoly **pPoly) 16 | /* Free up resources associated with polygon */ 17 | { 18 | struct gfxPoly *poly = *pPoly; 19 | if (poly != NULL) 20 | { 21 | if (poly->lastPoint != NULL) 22 | { 23 | poly->lastPoint->next = NULL; 24 | slFreeList(&poly->ptList); 25 | } 26 | freez(pPoly); 27 | } 28 | } 29 | 30 | void gfxPolyAddPoint(struct gfxPoly *poly, int x, int y) 31 | /* Add point to polygon. */ 32 | { 33 | struct gfxPoint *pt; 34 | poly->ptCount += 1; 35 | AllocVar(pt); 36 | pt->x = x; 37 | pt->y = y; 38 | if (poly->ptList == NULL) 39 | { 40 | poly->ptList = poly->lastPoint = pt; 41 | pt->next = pt; 42 | } 43 | else 44 | { 45 | poly->lastPoint->next = pt; 46 | pt->next = poly->ptList; 47 | poly->lastPoint = pt; 48 | } 49 | } 50 | 51 | -------------------------------------------------------------------------------- /lisa/regpotential/gfxPoly.h: -------------------------------------------------------------------------------- 1 | /* gfxPoly - two dimensional polygon. */ 2 | 3 | #ifndef GFXPOLY_H 4 | #define GFXPOLY_H 5 | 6 | struct gfxPoint 7 | /* A two-dimensional point, typically in pixel coordinates. */ 8 | { 9 | struct gfxPoint *next; 10 | int x, y; /* Position */ 11 | }; 12 | 13 | struct gfxPoly 14 | /* A two-dimensional polygon */ 15 | { 16 | struct gfxPoly *next; 17 | int ptCount; /* Number of points. */ 18 | struct gfxPoint *ptList; /* First point in list, which is circular. */ 19 | struct gfxPoint *lastPoint; /* Last point in list. */ 20 | }; 21 | 22 | struct gfxPoly *gfxPolyNew(); 23 | /* Create new (empty) polygon */ 24 | 25 | void gfxPolyFree(struct gfxPoly **pPoly); 26 | /* Free up resources associated with polygon */ 27 | 28 | void gfxPolyAddPoint(struct gfxPoly *poly, int x, int y); 29 | /* Add point to polygon. */ 30 | 31 | #endif /* GFXPOLY_H */ 32 | -------------------------------------------------------------------------------- /lisa/regpotential/hex.c: -------------------------------------------------------------------------------- 1 | /* Handy hexidecimal functions 2 | * If you don't want to use printf 3 | */ 4 | 5 | /* Copyright (C) 2013 The Regents of the University of California 6 | * See README in this or parent directory for licensing information. */ 7 | 8 | #include "common.h" 9 | 10 | char hexTab[16] = {'0', '1', '2', '3', '4', '5', '6', '7', 11 | '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', }; 12 | /* Convert 0-15 to a hex char */ 13 | 14 | 15 | char nibbleToHex(unsigned char n) 16 | /* convert nibble to hexidecimal character. 0 <= n <= 15. */ 17 | { 18 | return hexTab[n]; 19 | } 20 | 21 | void byteToHex(unsigned char n, char *hex) 22 | /* convert byte to hexidecimal characters. 0 <= n <= 255. */ 23 | { 24 | *hex++ = hexTab[n >> 4]; 25 | *hex++ = hexTab[n & 0xf]; 26 | } 27 | 28 | char *byteToHexString(unsigned char n) 29 | /* convert byte to hexidecimal string. 0 <= n <= 255. */ 30 | { 31 | char hex[3]; 32 | byteToHex(n, hex); 33 | hex[2] = 0; 34 | return cloneString(hex); 35 | } 36 | 37 | /* And the reverse functions: */ 38 | 39 | char hexToNibble(char n) 40 | /* convert hexidecimal character to nibble. 0-9a-f. */ 41 | { 42 | return n - ( n <= '9' ? '0' : ('a'-10) ); 43 | } 44 | 45 | 46 | unsigned char hexToByte(char *hex) 47 | /* convert byte to hexidecimal characters. 0 <= n <= 255. */ 48 | { 49 | unsigned char n = hexToNibble(*hex++); 50 | n <<= 4; 51 | n += hexToNibble(*hex++); 52 | return n; 53 | } 54 | 55 | 56 | void hexBinaryString(unsigned char *in, int inSize, char *out, int outSize) 57 | /* Convert possibly long binary string to hex string. 58 | * Out size needs to be at least 2x inSize+1 */ 59 | { 60 | assert(inSize * 2 +1 <= outSize); 61 | while (--inSize >= 0) 62 | { 63 | unsigned char c = *in++; 64 | *out++ = hexTab[c>>4]; 65 | *out++ = hexTab[c&0xf]; 66 | } 67 | *out = 0; 68 | } 69 | 70 | -------------------------------------------------------------------------------- /lisa/regpotential/hex.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef HEX_H 3 | #define HEX_H 4 | 5 | char nibbleToHex(char n); 6 | /* convert nibble to hexidecimal character. 0 <= n <= 15. */ 7 | 8 | void byteToHex(unsigned char n, char *hex); 9 | /* convert byte to two hexidecimal characters. 0 <= n <= 255. */ 10 | 11 | char *byteToHexString(unsigned char n); 12 | /* convert byte to hexidecimal string. 0 <= n <= 255. */ 13 | 14 | void hexBinaryString(unsigned char *in, int inSize, char *out, int outSize); 15 | /* Convert possibly long binary string to hex string. 16 | * Out size needs to be at least 2x inSize+1 */ 17 | 18 | /* Reverse Functions */ 19 | 20 | char hexToNibble(char n); 21 | /* convert hexidecimal character to nibble. 0-9a-f. */ 22 | 23 | unsigned char hexToByte(char *hex); 24 | /* convert byte to hexidecimal characters. 0 <= n <= 255. */ 25 | 26 | #endif /* HEX_H */ 27 | 28 | -------------------------------------------------------------------------------- /lisa/regpotential/hmmstats.c: -------------------------------------------------------------------------------- 1 | /* hmmstats.c - Stuff for doing statistical analysis in general and 2 | * hidden Markov models in particular. 3 | * 4 | * This file is copyright 2002 Jim Kent, but license is hereby 5 | * granted for all use - public, private or commercial. */ 6 | 7 | #include "common.h" 8 | #include "hmmstats.h" 9 | 10 | 11 | int scaledLog(double val) 12 | /* Return scaled log of val. */ 13 | { 14 | return round(logScaleFactor * log(val)); 15 | } 16 | 17 | double oneOverSqrtTwoPi = 0.39894228; 18 | 19 | double simpleGaussean(double x) 20 | /* Gaussean distribution with standard deviation 1 and mean 0. */ 21 | { 22 | return oneOverSqrtTwoPi * exp(-0.5*x*x ); 23 | } 24 | 25 | double gaussean(double x, double mean, double sd) 26 | /* Gaussean distribution with mean and standard deviation at point x */ 27 | { 28 | x -= mean; 29 | x /= sd; 30 | return oneOverSqrtTwoPi * exp(-0.5*x*x) / sd; 31 | } 32 | 33 | double calcVarianceFromSums(double sum, double sumSquares, bits64 n) 34 | /* Calculate variance. */ 35 | { 36 | double var = sumSquares - sum*sum/n; 37 | if (n > 1) 38 | var /= n-1; 39 | return var; 40 | } 41 | 42 | double calcStdFromSums(double sum, double sumSquares, bits64 n) 43 | /* Calculate standard deviation. */ 44 | { 45 | return sqrt(calcVarianceFromSums(sum, sumSquares, n)); 46 | } 47 | 48 | 49 | -------------------------------------------------------------------------------- /lisa/regpotential/hmmstats.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * Copyright (C) 2000 Jim Kent. This source code may be freely used * 3 | * for personal, academic, and non-profit purposes. Commercial use * 4 | * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) * 5 | *****************************************************************************/ 6 | /* hmmstats.h - Stuff for doing statistical analysis in general and 7 | * hidden Markov models in particular. */ 8 | #ifndef HMMSTATS_H 9 | #define HMMSTATS_H 10 | 11 | int scaledLog(double val); 12 | /* Return scaled log of val. */ 13 | 14 | #define logScaleFactor 1000 15 | /* Amount we scale logs by. */ 16 | 17 | double simpleGaussean(double x); 18 | /* Gaussean distribution with standard deviation 1 and mean 0. */ 19 | 20 | double gaussean(double x, double mean, double sd); 21 | /* Gaussean distribution with mean and standard deviation at point x */ 22 | 23 | double calcVarianceFromSums(double sum, double sumSquares, bits64 n); 24 | /* Calculate variance. */ 25 | 26 | double calcStdFromSums(double sum, double sumSquares, bits64 n); 27 | /* Calculate standard deviation. */ 28 | 29 | #endif /* HMMSTATS_H */ 30 | 31 | -------------------------------------------------------------------------------- /lisa/regpotential/https.h: -------------------------------------------------------------------------------- 1 | /* Connect via https. */ 2 | 3 | #ifndef NET_HTTPS_H 4 | #define NET_HTTPS_H 5 | 6 | int netConnectHttps(char *hostName, int port); 7 | /* Return socket for https connection with server or -1 if error. */ 8 | 9 | #endif//ndef NET_HTTPS_H 10 | -------------------------------------------------------------------------------- /lisa/regpotential/intExp.c: -------------------------------------------------------------------------------- 1 | /* Below is the worlds sleaziest little numerical expression 2 | * evaluator. Used to do only ints, now does doubles as well. 3 | * 4 | * This file is copyright 2002 Jim Kent, but license is hereby 5 | * granted for all use - public, private or commercial. */ 6 | 7 | #include "common.h" 8 | #include "kxTok.h" 9 | 10 | 11 | static struct kxTok *tok; 12 | 13 | #define nextTok() (tok = tok->next) 14 | 15 | #ifdef DEBUG 16 | static void nextTok() 17 | /* Advance to next token. */ 18 | { 19 | if (tok == NULL) 20 | printf("(null)"); 21 | else 22 | { 23 | printf("'%s' -> ", tok->string); 24 | if (tok->next == NULL) 25 | printf("(null)\n"); 26 | else 27 | printf("'%s'\n", tok->next->string); 28 | } 29 | tok = tok->next; 30 | } 31 | #endif /* DEBUG */ 32 | 33 | 34 | static double expression(); 35 | /* Forward declaration of main expression handler. */ 36 | 37 | static double number() 38 | /* Return number. */ 39 | { 40 | double val; 41 | if (tok == NULL) 42 | errAbort("Parse error in numerical expression"); 43 | if (!isdigit(tok->string[0])) 44 | errAbort("Expecting number, got %s", tok->string); 45 | val = atof(tok->string); 46 | nextTok(); 47 | return val; 48 | } 49 | 50 | static double atom() 51 | /* Return parenthetical expression or number. */ 52 | { 53 | double val; 54 | if (tok->type == kxtOpenParen) 55 | { 56 | nextTok(); 57 | val = expression(); 58 | if (tok->type == kxtCloseParen) 59 | { 60 | nextTok(); 61 | return val; 62 | } 63 | else 64 | { 65 | errAbort("Unmatched parenthesis"); 66 | return 0; 67 | } 68 | } 69 | else 70 | return number(); 71 | } 72 | 73 | 74 | static double uMinus() 75 | /* Unary minus. */ 76 | { 77 | double val; 78 | if (tok->type == kxtSub) 79 | { 80 | nextTok(); 81 | val = -atom(); 82 | return val; 83 | } 84 | else 85 | return atom(); 86 | } 87 | 88 | static double mulDiv() 89 | /* Multiplication or division. */ 90 | { 91 | double val = uMinus(); 92 | for (;;) 93 | { 94 | if (tok->type == kxtMul) 95 | { 96 | nextTok(); 97 | val *= uMinus(); 98 | } 99 | else if (tok->type == kxtDiv) 100 | { 101 | nextTok(); 102 | val /= uMinus(); 103 | } 104 | else 105 | break; 106 | } 107 | return val; 108 | } 109 | 110 | static double addSub() 111 | /* Addition or subtraction. */ 112 | { 113 | double val; 114 | val = mulDiv(); 115 | for (;;) 116 | { 117 | if (tok->type == kxtAdd) 118 | { 119 | nextTok(); 120 | val += mulDiv(); 121 | } 122 | else if (tok->type == kxtSub) 123 | { 124 | nextTok(); 125 | val -= mulDiv(); 126 | } 127 | else 128 | break; 129 | } 130 | return val; 131 | } 132 | 133 | static double expression() 134 | /* Wraps around lowest level of expression. */ 135 | { 136 | return addSub(); 137 | } 138 | 139 | double doubleExp(char *text) 140 | /* Convert text to double expression and evaluate. */ 141 | { 142 | double val; 143 | struct kxTok *tokList = tok = kxTokenize(text, FALSE); 144 | val = expression(); 145 | slFreeList(&tokList); 146 | return val; 147 | } 148 | 149 | int intExp(char *text) 150 | /* Convert text to int expression and evaluate. */ 151 | { 152 | return round(doubleExp(text)); 153 | } 154 | -------------------------------------------------------------------------------- /lisa/regpotential/internet.h: -------------------------------------------------------------------------------- 1 | /* internet - some stuff for routines that use the internet 2 | * and aren't afraid to include some internet specific structures 3 | * and the like. See also net for stuff that is higher level. */ 4 | 5 | #ifndef INTERNET_H 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | bits32 internetHostIp(char *hostName); 12 | /* Get IP v4 address (in host byte order) for hostName. 13 | * Warn and return 0 if there's a problem. */ 14 | 15 | boolean internetFillInAddress(char *hostName, int port, 16 | struct sockaddr_in *address); 17 | /* Fill in address. Warn and return FALSE if can't. */ 18 | 19 | boolean internetIpToDottedQuad(bits32 ip, char dottedQuad[17]); 20 | /* Convert IP4 address in host byte order to dotted quad 21 | * notation. Warn and return FALSE if there's a 22 | * problem. */ 23 | 24 | boolean internetDottedQuadToIp(char *dottedQuad, bits32 *retIp); 25 | /* Convert dotted quad format address to IP4 address in 26 | * host byte order. Warn and return FALSE if there's a 27 | * problem. */ 28 | 29 | boolean internetIsDottedQuad(char *s); 30 | /* Returns TRUE if it looks like s is a dotted quad. */ 31 | 32 | void internetParseDottedQuad(char *dottedQuad, unsigned char quad[4]); 33 | /* Parse dotted quads into quad */ 34 | 35 | void internetUnpackIp(bits32 packed, unsigned char unpacked[4]); 36 | /* Convert from 32 bit to 4-byte format with most significant 37 | * byte first. */ 38 | 39 | boolean internetIpInSubnet(unsigned char unpackedIp[4], 40 | unsigned char subnet[4]); 41 | /* Return true if unpacked IP address is in subnet. */ 42 | 43 | #endif /* INTERNET_H */ 44 | -------------------------------------------------------------------------------- /lisa/regpotential/kxTok.h: -------------------------------------------------------------------------------- 1 | /* kxTok - quick little tokenizer for stuff first 2 | * loaded into memory. Originally developed for 3 | * "Key eXpression" evaluator. 4 | * 5 | * This file is copyright 2002 Jim Kent, but license is hereby 6 | * granted for all use - public, private or commercial. */ 7 | 8 | #ifndef KXTOK_H 9 | #define KXTOK_H 10 | 11 | enum kxTokType 12 | { 13 | kxtEnd, 14 | kxtString, 15 | kxtWildString, 16 | kxtEquals, 17 | kxtGT, /* Greater Than */ 18 | kxtGE, /* Greater Than or Equal */ 19 | kxtLT, /* Less Than */ 20 | kxtLE, /* Less Than or Equal */ 21 | kxtAnd, 22 | kxtOr, 23 | kxtXor, 24 | kxtNot, 25 | kxtOpenParen, 26 | kxtCloseParen, 27 | kxtAdd, 28 | kxtSub, 29 | kxtDiv, 30 | kxtMul, 31 | kxtDot, 32 | kxtMod, 33 | kxtPunct, 34 | }; 35 | 36 | struct kxTok 37 | /* A key expression token. Input text is tokenized 38 | * into a list of these. */ 39 | { 40 | struct kxTok *next; 41 | enum kxTokType type; 42 | bool spaceBefore; /* True if there is a space before */ 43 | char string[1]; /* Allocated at run time */ 44 | }; 45 | 46 | struct kxTok *kxTokenize(char *text, boolean wildAst); 47 | /* Convert text to stream of tokens. If 'wildAst' is 48 | * TRUE then '*' character will be treated as wildcard 49 | * rather than multiplication sign. */ 50 | 51 | struct kxTok *kxTokenizeFancy(char *text, boolean wildAst, 52 | boolean wildPercent, boolean includeHyphen); 53 | /* Convert text to stream of tokens. If 'wildAst' is 54 | * TRUE then '*' character will be treated as wildcard 55 | * rather than multiplication sign. 56 | * If wildPercent is TRUE then the '%' character will be treated as a 57 | * wildcard (as in SQL) rather than a modulo (kxtMod) or percent sign. 58 | * If includeHyphen is TRUE then a '-' character in the middle of a String 59 | * token will be treated as a hyphen (part of the String token) instead of 60 | * a new kxtSub token. */ 61 | 62 | void kxTokIncludeQuotes(boolean val); 63 | /* Pass in TRUE if kxTok should include quote characters in string tokens. */ 64 | 65 | #endif /* KXTOK_K */ 66 | -------------------------------------------------------------------------------- /lisa/regpotential/localmem.h: -------------------------------------------------------------------------------- 1 | /* LocalMem.h - local memory routines. 2 | * 3 | * These routines are meant for the sort of scenario where 4 | * a lot of little to medium size pieces of memory are 5 | * allocated, and then disposed of all at once. 6 | * 7 | * This file is copyright 2002 Jim Kent, but license is hereby 8 | * granted for all use - public, private or commercial. */ 9 | 10 | #ifndef LOCALMEM_H 11 | #define LOCALMEM_H 12 | 13 | struct lm *lmInit(int blockSize); 14 | /* Create a local memory pool. Parameters are: 15 | * blockSize - how much system memory to allocate at a time. Can 16 | * pass in zero and a reasonable default will be used. 17 | */ 18 | 19 | void lmCleanup(struct lm **pLm); 20 | /* Clean up a local memory pool. */ 21 | 22 | size_t lmAvailable(struct lm *lm); 23 | // Returns currently available memory in pool 24 | 25 | size_t lmSize(struct lm *lm); 26 | // Returns current size of pool, even for memory already allocated 27 | 28 | void *lmAlloc(struct lm *lm, size_t size); 29 | /* Allocate memory from local pool. */ 30 | 31 | void *lmAllocMoreMem(struct lm *lm, void *pt, size_t oldSize, size_t newSize); 32 | /* Adjust memory size on a block, possibly relocating it. If block is grown, 33 | * new memory is zeroed. NOTE: in RARE cases, same pointer may be returned. */ 34 | 35 | void *lmCloneMem(struct lm *lm, void *pt, size_t size); 36 | /* Return a local mem copy of memory block. */ 37 | 38 | 39 | char *lmCloneStringZ(struct lm *lm, char *string, int size); 40 | /* Return local mem copy of string of given size, adding null terminator. */ 41 | 42 | char *lmCloneString(struct lm *lm, char *string); 43 | /* Return local mem copy of string. */ 44 | 45 | char *lmCloneFirstWord(struct lm *lm, char *line); 46 | /* Clone first word in line */ 47 | 48 | char *lmCloneSomeWord(struct lm *lm, char *line, int wordIx); 49 | /* Return a clone of the given space-delimited word within line. Returns NULL if 50 | * not that many words in line. */ 51 | 52 | struct slName *lmSlName(struct lm *lm, char *name); 53 | /* Return slName in memory. */ 54 | 55 | #define lmAllocVar(lm, pt) (pt = lmAlloc(lm, sizeof(*pt))); 56 | /* Shortcut to allocating a single variable in local mem and 57 | * assigning pointer to it. */ 58 | 59 | #define lmCloneVar(lm, pt) lmCloneMem(lm, pt, sizeof((pt)[0])) 60 | /* Allocate copy of a structure. */ 61 | 62 | #define lmAllocArray(lm, pt, size) (pt = lmAlloc(lm, sizeof(*pt) * (size))) 63 | /* Shortcut to allocating an array in local mem and 64 | * assigning pointer to it. */ 65 | 66 | #endif//ndef LOCALMEM_H 67 | -------------------------------------------------------------------------------- /lisa/regpotential/makefile: -------------------------------------------------------------------------------- 1 | O=bPlusTree.o bbiRead.o bbiWrite.o bits.o cirTree.o common.o dystring.o hash.o localmem.o udc.o portimpl.o memalloc.o dlist.o linefile.o pipeline.o verbose.o options.o net.o internet.o https.o base64.o errAbort.o osunix.o hmmstats.o cheapcgi.o mime.o obscure.o hex.o wildcmp.o intExp.o kxTok.o servBrcMcw.o servcl.o servCrunx.o servcis.o servmsII.o servpws.o zlibFace.o sqlNum.o bwgQuery.o 2 | 3 | 4 | CC=gcc 5 | ifeq (${MACHTYPE},) 6 | MACHTYPE:=$(shell uname -m) 7 | #MACHTYPE:=$(shell arch) 8 | # $(info MACHTYPE was empty, set to: ${MACHTYPE}) 9 | endif 10 | ifneq (,$(findstring -,$(MACHTYPE))) 11 | # $(info MACHTYPE has - sign ${MACHTYPE}) 12 | MACHTYPE:=$(shell uname -m) 13 | # $(info MACHTYPE has - sign set to: ${MACHTYPE}) 14 | endif 15 | 16 | HG_DEFS=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE -DMACHTYPE_${MACHTYPE} 17 | #HG_DEFS=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE 18 | HG_INC=-I../inc -I../../inc -I../../../inc -I../../../../inc -I../../../../../inc 19 | 20 | # to check for Mac OSX Darwin specifics: 21 | UNAME_S := $(shell uname -s) 22 | # to check for builds on hgwdev 23 | FULLWARN = $(shell uname -n) 24 | 25 | # autodetect if openssl is installed 26 | ifeq (${SSLDIR},) 27 | SSLDIR = /usr/include/openssl 28 | endif 29 | ifeq (${USE_SSL},) 30 | ifneq ($(wildcard ${SSLDIR}),) 31 | USE_SSL=1 32 | endif 33 | endif 34 | 35 | 36 | # libssl: disabled by default 37 | ifeq (${USE_SSL},1) 38 | ifneq (${SSL_DIR}, "/usr/include/openssl") 39 | ifneq ($(UNAME_S),Darwin) 40 | L+=-L${SSL_DIR}/lib 41 | endif 42 | HG_INC+=-I${SSL_DIR}/include 43 | endif 44 | # on hgwdev, already using the static library with mysqllient. 45 | ifeq (${FULLWARN},hgwdev) 46 | L+=/usr/lib64/libssl.a /usr/lib64/libcrypto.a -lkrb5 47 | else 48 | L+=-lssl -lcrypto 49 | endif 50 | HG_DEFS+=-DUSE_SSL 51 | endif 52 | 53 | # pass through COREDUMP 54 | ifneq (${COREDUMP},) 55 | HG_DEFS+=-DCOREDUMP 56 | endif 57 | 58 | 59 | SYS = $(shell uname -s) 60 | #SYS = $(shell arch) 61 | 62 | ifeq (${HG_WARN},) 63 | ifeq (${SYS},Darwin) 64 | HG_WARN = -Wall -Wno-unused-variable -Wno-deprecated-declarations 65 | HG_WARN_UNINIT= 66 | else 67 | ifeq (${SYS},SunOS) 68 | HG_WARN = -Wall -Wformat -Wimplicit -Wreturn-type 69 | HG_WARN_UNINIT=-Wuninitialized 70 | else 71 | ifeq (${FULLWARN},hgwdev) 72 | HG_WARN = -Wall -Werror -Wformat -Wformat-security -Wimplicit -Wreturn-type -Wempty-body 73 | HG_WARN_UNINIT=-Wuninitialized 74 | else 75 | HG_WARN = -Wall -Wformat -Wimplicit -Wreturn-type 76 | HG_WARN_UNINIT=-Wuninitialized 77 | endif 78 | endif 79 | endif 80 | # -Wuninitialized generates a warning without optimization 81 | ifeq ($(findstring -O,${COPT}),-O) 82 | HG_WARN += ${HG_WARN_UNINIT} 83 | endif 84 | endif 85 | 86 | # this is to hack around many make files not including HG_WARN in 87 | # the link line 88 | CFLAGS += ${HG_WARN} 89 | 90 | ifeq (${SCRIPTS},) 91 | SCRIPTS=${HOME}/bin/scripts 92 | endif 93 | 94 | # avoid an extra leading slash when DESTDIR is empty 95 | ifeq (${DESTDIR},) 96 | DESTBINDIR=${BINDIR} 97 | else 98 | DESTBINDIR=${DESTDIR}/${BINDIR} 99 | endif 100 | 101 | 102 | MKDIR=mkdir -p 103 | ifeq (${STRIP},) 104 | STRIP=true 105 | endif 106 | CVS=cvs 107 | GIT=git 108 | 109 | # portable naming of compiled executables: add ".exe" if compiled on 110 | # Windows (with cygwin). 111 | ifeq (${OS}, Windows_NT) 112 | AOUT=a 113 | EXE=.exe 114 | else 115 | AOUT=a.out 116 | EXE= 117 | endif 118 | 119 | 120 | %.o: %.c 121 | ${CC} ${COPT} ${CFLAGS} ${HG_DEFS} ${LOWELAB_DEFS} ${HG_WARN} ${HG_INC} ${XINC} -o $@ -c $< 122 | 123 | #$(MACHTYPE)/libjkweb.so: $(O) 124 | libjkweb.so: $(O) 125 | $(CC) $(O) -dynamiclib -o libjkweb.so -lm -lssl -lz -lcrypto 126 | 127 | # jshint: off unless JSHINT is already in environment 128 | ifeq (${JSHINT},) 129 | JSHINT=true 130 | endif 131 | 132 | -------------------------------------------------------------------------------- /lisa/regpotential/memalloc.h: -------------------------------------------------------------------------------- 1 | /* Let the user redirect where memory allocation/deallocation 2 | * happens. 'careful' routines help debug scrambled heaps. 3 | * 4 | * This file is copyright 2002 Jim Kent, but license is hereby 5 | * granted for all use - public, private or commercial. */ 6 | 7 | #ifndef MEMALLOC_H 8 | #define MEMALLOC_H 9 | 10 | struct memHandler 11 | { 12 | struct memHandler *next; 13 | void * (*alloc)(size_t size); 14 | void (*free)(void *vpt); 15 | void * (*realloc)(void* vpt, size_t size); 16 | }; 17 | 18 | struct memHandler *pushMemHandler(struct memHandler *newHandler); 19 | /* Use newHandler for memory requests until matching popMemHandler. 20 | * Returns previous top of memory handler stack. */ 21 | 22 | struct memHandler *popMemHandler(); 23 | /* Removes top element from memHandler stack and returns it. */ 24 | 25 | void setDefaultMemHandler(); 26 | /* Sets memHandler to the default. */ 27 | 28 | void pushCarefulMemHandler(size_t maxAlloc); 29 | /* Push the careful (paranoid, conservative, checks everything) 30 | * memory handler top of the memHandler stack and use it. */ 31 | 32 | void carefulCheckHeap(); 33 | /* Walk through allocated memory and make sure that all cookies are 34 | * in place. Only walks through what's been done since 35 | * pushCarefulMemHandler(). */ 36 | 37 | int carefulCountBlocksAllocated(); 38 | /* How many memory items are allocated? (Since called 39 | * pushCarefulMemHandler(). */ 40 | 41 | size_t carefulTotalAllocated(); 42 | /* Return total bases allocated */ 43 | 44 | void setMaxAlloc(size_t s); 45 | /* Set large allocation limit. */ 46 | 47 | void memTrackerStart(); 48 | /* Push memory handler that will track blocks allocated so that 49 | * they can be automatically released with memTrackerEnd(). */ 50 | 51 | void memTrackerEnd(); 52 | /* Free any remaining blocks and pop tracker memory handler. */ 53 | 54 | #endif /* MEMALLOC_H */ 55 | 56 | -------------------------------------------------------------------------------- /lisa/regpotential/mime.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * This file is copyright 2005 Jim Kent, but license is hereby 3 | * granted for all use - public, private or commercial. 4 | *****************************************************************************/ 5 | /* mime.h - parses MIME messages, especially from a cgi from a multipart web form */ 6 | 7 | #ifndef HASH_H 8 | #include "hash.h" 9 | #endif 10 | 11 | #define MIMEBUFSIZE 32*1024 /* size of buffer for mime input */ 12 | 13 | struct mimePart 14 | /* structure for an element of a MIME (multipart) message */ 15 | { 16 | struct mimePart *next; /* next (sibling) if is part of multipart */ 17 | struct hash *hdr; /* hash of part headers */ 18 | off_t size; /* determines if local mem or saved to tempfile */ 19 | /* only one of the next 3 pointers will be non-null, and that is the type */ 20 | char* data; /* if size< MAXPARTSIZE and does not contain null */ 21 | char* fileName; /* if size>=MAXPARTSIZE or data contains null */ 22 | boolean binary; /* if contains 0 chars, cannot store as a c-string */ 23 | struct mimePart *multi;/* points to head of child list if itself contains multiparts */ 24 | }; 25 | 26 | struct mimeBuf 27 | /* structure for buffering a MIME message during parsing */ 28 | { 29 | int d; /* descriptor (file,socket,etc) */ 30 | char buf[MIMEBUFSIZE]; /* actual buffer */ 31 | char *i; /* index into buffer, current location */ 32 | char *eop; /* end of part or -1 */ 33 | char *boundary; /* boundary pattern for marking end of mime part */ 34 | int blen; /* boundary pattern length (strlen) */ 35 | char *eod; /* end of data = eoi-(blen-1) */ 36 | char *eoi; /* end of input or -1 */ 37 | char *eom; /* end of memory just buf+MIMEBUFSIZE */ 38 | }; 39 | 40 | char *getMimeHeaderMainVal(char *header); 41 | /* Parse a typical mime header line returning the first 42 | * main value up to whitespace, punctuation, or end. 43 | * freeMem the returned string when done */ 44 | 45 | char *getMimeHeaderFieldVal(char *header, char *field); 46 | /* Parse a typical mime header line looking for field= 47 | * and return the value which may be quoted. 48 | * freeMem the returned string when done */ 49 | 50 | struct mimeBuf * initMimeBuf(int d); 51 | /* d is a descriptor for a file or socket or some other descriptor 52 | that the MIME input can be read from. 53 | Initializes the mimeBuf structure. */ 54 | 55 | struct mimePart *parseMultiParts(struct mimeBuf *b, char *altHeader); 56 | /* This is a recursive function. It parses multipart MIME messages. 57 | Data that are binary or too large will be saved in mimePart->filename 58 | otherwise saved as a c-string in mimePart->data. If multipart, 59 | then first child is mimePart->child, subsequent sibs are in child->next. 60 | altHeader is a string of headers that can be fed in if the headers have 61 | already been read off the stream by an earlier process, i.e. apache. 62 | */ 63 | -------------------------------------------------------------------------------- /lisa/regpotential/portimpl.c: -------------------------------------------------------------------------------- 1 | /* Implementation file for some portability stuff mostly aimed 2 | * at making the same code run under different web servers. 3 | * 4 | * This file is copyright 2002 Jim Kent, but license is hereby 5 | * granted for all use - public, private or commercial. */ 6 | 7 | #include "common.h" 8 | #include "htmshell.h" 9 | #include "portable.h" 10 | #include "obscure.h" 11 | #include "portimpl.h" 12 | #include 13 | 14 | 15 | static struct webServerSpecific *wss = NULL; 16 | 17 | static void setupWss() 18 | { 19 | if (wss == NULL) 20 | { 21 | char *s = getenv("SERVER_SOFTWARE"); 22 | wss = &wssDefault; 23 | if (s == NULL) 24 | { 25 | wss = &wssCommandLine; 26 | } 27 | else 28 | { 29 | if (strncmp(wssMicrosoftII.name, s, strlen(wssMicrosoftII.name)) == 0) 30 | wss = &wssMicrosoftII; 31 | else if (strncmp(wssMicrosoftPWS.name, s, strlen(wssMicrosoftPWS.name)) == 0) 32 | wss = &wssMicrosoftPWS; 33 | else 34 | { 35 | char *t = getenv("HTTP_HOST"); 36 | if (t != NULL) 37 | { 38 | if (sameWord(t, "Crunx")) 39 | wss = &wssLinux; 40 | else if (endsWith(t, "brc.mcw.edu")) 41 | wss = &wssBrcMcw; 42 | } 43 | } 44 | } 45 | } 46 | } 47 | 48 | void makeTempName(struct tempName *tn, char *base, char *suffix) 49 | /* Figure out a temp name, and how CGI and HTML will access it. */ 50 | { 51 | setupWss(); 52 | wss->makeTempName(tn,base,suffix); 53 | } 54 | 55 | char *cgiDir() 56 | { 57 | setupWss(); 58 | return wss->cgiDir(); 59 | } 60 | 61 | char *trashDir() 62 | /* Return the relative path to trash directory for CGI binaries */ 63 | { 64 | setupWss(); 65 | return wss->trashDir(); 66 | } 67 | 68 | double machineSpeed() 69 | /* Return relative speed of machine. UCSC CSE dept. 1999 web server is 1.0 */ 70 | { 71 | setupWss(); 72 | return wss->speed(); 73 | } 74 | 75 | void envUpdate(char *name, char *value) 76 | /* Update an environment string */ 77 | { 78 | int size = strlen(name) + strlen(value) + 2; 79 | char *s = needMem(size); 80 | safef(s, size, "%s=%s", name, value); 81 | putenv(s); 82 | } 83 | 84 | void mkdirTrashDirectory(char *prefix) 85 | /* create the specified trash directory if it doesn't exist */ 86 | { 87 | struct stat buf; 88 | char trashDirName[128]; 89 | safef(trashDirName, sizeof(trashDirName), "%s/%s", trashDir(), prefix); 90 | if (stat(trashDirName,&buf)) 91 | { 92 | int result = mkdir (trashDirName, S_IRWXU | S_IRWXG | S_IRWXO); 93 | if (0 != result) 94 | errnoAbort("failed to create directory %s", trashDirName); 95 | } 96 | } 97 | 98 | 99 | void makeDirsOnPath(char *pathName) 100 | /* Create directory specified by pathName. If pathName contains 101 | * slashes, create directory at each level of path if it doesn't 102 | * already exist. Abort with error message if there's a problem. 103 | * (It's not considered a problem for the directory to already 104 | * exist. ) */ 105 | { 106 | 107 | /* shortcut for paths that already exist */ 108 | if (fileExists(pathName)) 109 | return; 110 | 111 | /* Make local copy of pathName. */ 112 | int len = strlen(pathName); 113 | char pathCopy[len+1]; 114 | strcpy(pathCopy, pathName); 115 | 116 | /* Tolerate double-slashes in path, everyone else does it. */ 117 | 118 | /* Start at root if it's an absolute path name. */ 119 | char *s = pathCopy, *e; 120 | while (*s++ == '/') 121 | /* do nothing */; 122 | 123 | /* Step through it one slash at a time 124 | * making directory if possible, else dying. */ 125 | for (; !isEmpty(s); s = e) 126 | { 127 | /* Find end of this section and terminate string there. */ 128 | e = strchr(s, '/'); 129 | if (e != NULL) 130 | *e = 0; 131 | makeDir(pathCopy); 132 | if (e != NULL) 133 | *e++ = '/'; 134 | } 135 | } 136 | 137 | -------------------------------------------------------------------------------- /lisa/regpotential/portimpl.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * Copyright (C) 2000 Jim Kent. This source code may be freely used * 3 | * for personal, academic, and non-profit purposes. Commercial use * 4 | * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) * 5 | *****************************************************************************/ 6 | /* Implement portable stuff.... */ 7 | 8 | /* There is one of the following structures for each web server 9 | * we support. During run time looking at the environment variable 10 | * SERVER_SOFTWARE we decide which of these to use. */ 11 | struct webServerSpecific 12 | { 13 | char *name; 14 | 15 | /* Make a good name for a temp file. */ 16 | void (*makeTempName)(struct tempName *tn, char *base, char *suffix); 17 | 18 | /* Return directory to look for cgi in. */ 19 | char * (*cgiDir)(); 20 | 21 | #ifdef NEVER 22 | /* Return cgi suffix. */ 23 | char * (*cgiSuffix)(); 24 | #endif /* NEVER */ 25 | 26 | /* Return relative speed of CPU. (UCSC CSE 1999 FTP machine is 1.0) */ 27 | double (*speed)(); 28 | 29 | /* The relative path to trash directory for CGI binaries */ 30 | char * (*trashDir)(); 31 | 32 | }; 33 | 34 | 35 | extern struct webServerSpecific wssMicrosoftII, wssMicrosoftPWS, wssDefault, 36 | wssLinux, wssCommandLine, wssBrcMcw; 37 | 38 | char *rTempName(char *dir, char *base, char *suffix); 39 | /* Make a temp name that's almost certainly unique. */ 40 | -------------------------------------------------------------------------------- /lisa/regpotential/pybw.c: -------------------------------------------------------------------------------- 1 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 2 | #include 3 | #include "pybw.h" 4 | #define RP_DOC "Summarize data from bigwig file as regulatory potential returns numpy array of scores. bwfile_name, chrom_list, start_list, end_list, strand_list, weights, option (mean,max,min)" 5 | 6 | static PyObject *RPError; 7 | 8 | static PyObject * 9 | getrp(PyObject *self, PyObject *args) // self is the module object 10 | { 11 | PyObject *bigwigfileObj; /* bigwig file name strings */ 12 | PyObject *bedfile; /* bed file name strings */ 13 | PyObject *outfile; 14 | PyObject *decay; 15 | PyObject *left; 16 | PyObject *right; 17 | char *bigWigFile; 18 | char *bed; 19 | char *out; 20 | double d; 21 | int l; 22 | int r; 23 | #if PY_MAJOR_VERSION >= 3 24 | #define PyInt_Type PyLong_Type 25 | #define PyString_Type PyBytes_Type 26 | #define PyInt_AsLong PyLong_AsLong 27 | #define PyString_AsString PyBytes_AsString 28 | #endif 29 | //if (! PyArg_ParseTuple( args, "O!O!O!O!O!O!", &PyString_Type, &bigwigfileObj, &PyString_Type, &bedfile, &PyString_Type, &outfile, &PyFloat_Type, &decay, &PyInt_Type, &left, &PyInt_Type, &right)) { 30 | if (! PyArg_ParseTuple( args, "sssO!O!O!", &bigWigFile, &bed, &out, &PyFloat_Type, &decay, &PyInt_Type, &left, &PyInt_Type, &right)) { 31 | printf("%s %s %s %f %d %d \n", bigWigFile, bed, out, d, l, r); 32 | PyErr_SetString(RPError, "something bad happened!!!"); 33 | return NULL; 34 | } 35 | //bigWigFile = PyString_AsString(bigwigfileObj); 36 | //bed = PyString_AsString(bedfile); 37 | //out = PyString_AsString(outfile); 38 | d = PyFloat_AsDouble(decay); 39 | l = PyInt_AsLong(left); 40 | r = PyInt_AsLong(right); 41 | printf("%s %s %s %f %d %d \n", bigWigFile, bed, out, d, l, r); 42 | 43 | bigWigAverageOverBed(bigWigFile, bed, out, d, l, r); 44 | 45 | Py_INCREF(Py_None); 46 | return Py_None; 47 | } 48 | 49 | static PyMethodDef myMethods[] = { 50 | { "getrp", getrp, METH_VARARGS, RP_DOC }, 51 | { NULL, NULL, 0, NULL } 52 | }; 53 | 54 | #if PY_MAJOR_VERSION >= 3 55 | static struct PyModuleDef moduledef = { 56 | PyModuleDef_HEAD_INIT, 57 | "_bw", 58 | "epigenomics data RP module", 59 | -1, 60 | myMethods, 61 | NULL, 62 | NULL, 63 | NULL, 64 | NULL 65 | }; 66 | 67 | #define INITERROR return NULL 68 | //PyObject * 69 | PyMODINIT_FUNC 70 | PyInit__bw(void) 71 | #else 72 | #define INITERROR return 73 | //PyMODINIT_FUNC init_bw(void) 74 | void init_bw(void) 75 | #endif 76 | { 77 | #if PY_MAJOR_VERSION >= 3 78 | PyObject *m = PyModule_Create(&moduledef); 79 | #else 80 | PyObject *m = Py_InitModule("_bw", myMethods); 81 | #endif 82 | if (m == NULL) 83 | INITERROR; 84 | 85 | RPError = PyErr_NewException("_bw.Error", NULL, NULL); 86 | Py_INCREF(RPError); 87 | PyModule_AddObject(m, "rperror", RPError); 88 | /* import_array(); */ 89 | #if PY_MAJOR_VERSION >= 3 90 | return m; 91 | #endif 92 | } 93 | -------------------------------------------------------------------------------- /lisa/regpotential/pybw.h: -------------------------------------------------------------------------------- 1 | 2 | int bigWigSummary(char *bigWigFile, char *chrom, int start, int end, int dataPoints, double *summaryValues, char *summaryType); 3 | 4 | //void bigWigAverageOverBed(char *inBw, char *inBed, char *outTab, float d); 5 | 6 | void bigWigAverageOverBed(char *inBw, char *inBed, char *outTab, float alpha, int left, int right); 7 | -------------------------------------------------------------------------------- /lisa/regpotential/servBrcMcw.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for .brc.mcw.edu server goes here. 2 | * 3 | * This file is copyright 2004 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | #include "hash.h" 11 | 12 | 13 | static char *__trashDir = "/trash"; 14 | 15 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 16 | /* Figure out a temp name, and how CGI and HTML will access it. */ 17 | { 18 | char *tname; 19 | 20 | tname = rTempName(__trashDir, base, suffix); 21 | strcpy(tn->forCgi, tname); 22 | strcpy(tn->forHtml, tname); 23 | } 24 | 25 | static char *_cgiDir() 26 | { 27 | return "/cgi-bin/"; 28 | } 29 | 30 | static char *_trashDir() 31 | { 32 | return __trashDir; 33 | } 34 | 35 | static double _speed() 36 | { 37 | return 3.0; 38 | } 39 | 40 | struct webServerSpecific wssBrcMcw = 41 | { 42 | "default", 43 | _makeTempName, 44 | _cgiDir, 45 | _speed, 46 | _trashDir, 47 | }; 48 | -------------------------------------------------------------------------------- /lisa/regpotential/servCrunx.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for local linux server goes here. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | 11 | 12 | static char *__trashDir = "/home/httpd/html/trash"; 13 | 14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 15 | /* Figure out a temp name, and how CGI and HTML will access it. */ 16 | { 17 | char *tname; 18 | char *tempDirCgi = __trashDir; 19 | char *tempDirHtml = "/trash"; 20 | int tlcLen = strlen(tempDirCgi); 21 | int tlhLen = strlen(tempDirHtml); 22 | 23 | tname = rTempName(tempDirCgi, base, suffix); 24 | strcpy(tn->forCgi, tname); 25 | memcpy(tn->forHtml, tempDirHtml, tlhLen); 26 | strcpy(tn->forHtml+tlhLen, tn->forCgi+tlcLen); 27 | } 28 | 29 | static char *_cgiDir() 30 | { 31 | return "../cgi-bin/"; 32 | } 33 | 34 | static char *_trashDir() 35 | { 36 | return __trashDir; 37 | } 38 | 39 | static double _speed() 40 | { 41 | return 3.0; 42 | } 43 | 44 | struct webServerSpecific wssLinux = 45 | { 46 | "linux", 47 | _makeTempName, 48 | _cgiDir, 49 | _speed, 50 | _trashDir, 51 | }; 52 | -------------------------------------------------------------------------------- /lisa/regpotential/servcis.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for Comp Science dept. web server goes here. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | #include "hash.h" 11 | 12 | 13 | static char *__trashDir = "../trash"; 14 | 15 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 16 | /* Figure out a temp name, and how CGI and HTML will access it. */ 17 | { 18 | char *tname; 19 | 20 | tname = rTempName(__trashDir, base, suffix); 21 | strcpy(tn->forCgi, tname); 22 | strcpy(tn->forHtml, tname); 23 | } 24 | 25 | static char *_cgiDir() 26 | { 27 | return "../cgi-bin/"; 28 | } 29 | 30 | static char *_trashDir() 31 | { 32 | return __trashDir; 33 | } 34 | 35 | static double _speed() 36 | { 37 | return 3.0; 38 | } 39 | 40 | 41 | struct webServerSpecific wssDefault = 42 | { 43 | "default", 44 | _makeTempName, 45 | _cgiDir, 46 | _speed, 47 | _trashDir, 48 | }; 49 | -------------------------------------------------------------------------------- /lisa/regpotential/servcl.c: -------------------------------------------------------------------------------- 1 | /* "Web Server" for command line execution. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | 11 | 12 | static char *__trashDir = "."; 13 | 14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 15 | /* Figure out a temp name, and how CGI and HTML will access it. */ 16 | { 17 | char *tname = rTempName(__trashDir, base, suffix); 18 | strcpy(tn->forCgi, tname); 19 | strcpy(tn->forHtml, tn->forCgi); 20 | } 21 | 22 | static char *_cgiDir() 23 | { 24 | char *jkwebDir; 25 | if ((jkwebDir = getenv("JKWEB")) == NULL) 26 | return ""; 27 | else 28 | return jkwebDir; 29 | } 30 | 31 | static char *_trashDir() 32 | { 33 | return __trashDir; 34 | } 35 | 36 | static double _speed() 37 | { 38 | return 1.0; 39 | } 40 | 41 | 42 | struct webServerSpecific wssCommandLine = 43 | { 44 | "commandLine", 45 | _makeTempName, 46 | _cgiDir, 47 | _speed, 48 | _trashDir, 49 | }; 50 | -------------------------------------------------------------------------------- /lisa/regpotential/servmsII.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for the MS II Web Server goes here. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | 11 | 12 | static char *__trashDir = "..\\trash"; 13 | 14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 15 | /* Figure out a temp name, and how CGI and HTML will access it. */ 16 | { 17 | long tempIx = incCounterFile("tcounter"); 18 | sprintf(tn->forCgi, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix); 19 | sprintf(tn->forHtml, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix); 20 | } 21 | 22 | static char *_cgiDir() 23 | { 24 | return ""; 25 | } 26 | 27 | static char *_trashDir() 28 | { 29 | return __trashDir; 30 | } 31 | 32 | static double _speed() 33 | { 34 | return 2.5; 35 | } 36 | 37 | 38 | struct webServerSpecific wssMicrosoftII = 39 | { 40 | "Microsoft-IIS", 41 | _makeTempName, 42 | _cgiDir, 43 | _speed, 44 | _trashDir, 45 | }; 46 | -------------------------------------------------------------------------------- /lisa/regpotential/servpws.c: -------------------------------------------------------------------------------- 1 | /* Stuff that's specific for the Personal Web Server goes here. 2 | * 3 | * This file is copyright 2002 Jim Kent, but license is hereby 4 | * granted for all use - public, private or commercial. */ 5 | 6 | #include "common.h" 7 | #include "portable.h" 8 | #include "portimpl.h" 9 | #include "obscure.h" 10 | 11 | 12 | static char *__trashDir = "..\\trash"; 13 | 14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix) 15 | /* Figure out a temp name, and how CGI and HTML will access it. */ 16 | { 17 | long tempIx = incCounterFile("tcounter"); 18 | sprintf(tn->forCgi, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix); 19 | sprintf(tn->forHtml, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix); 20 | } 21 | 22 | static char *_cgiDir() 23 | { 24 | return "../cgi-bin/"; 25 | } 26 | 27 | static char *_trashDir() 28 | { 29 | return __trashDir; 30 | } 31 | 32 | static double _speed() 33 | { 34 | return 1.25; 35 | } 36 | 37 | struct webServerSpecific wssMicrosoftPWS = 38 | { 39 | "Microsoft-PWS", 40 | _makeTempName, 41 | _cgiDir, 42 | _speed, 43 | _trashDir, 44 | }; 45 | -------------------------------------------------------------------------------- /lisa/regpotential/sig.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * Copyright (C) 2000 Jim Kent. This source code may be freely used * 3 | * for personal, academic, and non-profit purposes. Commercial use * 4 | * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) * 5 | *****************************************************************************/ 6 | /* Sig.h - signatures that start various binary files. */ 7 | #ifndef SIG_H 8 | #define SIG_H 9 | 10 | #define aliSig 0xCDAB8245 11 | /* Binary alignment file. */ 12 | 13 | #define alxSig 0xA1B1C1D3 14 | /* Index into binary alignment file, sorted by start base offset. */ 15 | 16 | #define pgoSig 0x690 17 | /* Index into GDF file, sorted by start base offset. Signature is 32 bit. */ 18 | 19 | #define cdoSig 0xCD01 20 | /* Index into c2g text file, sorted by start base offset. 32 bit signature. */ 21 | 22 | #define xaoSig 0xA0B0C0D0 23 | /* Index into xeno alignment, sorted by start base offset. 32 bit signature. */ 24 | 25 | #define glSig 0xF1E2D3C4 26 | /* Binary gene file, sorted by chromosome and then starting offset. */ 27 | 28 | /* IX sig is int ixSig[4] = {0x693F8ED1, 0x7EDA1C32, 0x4BA58983, 0x277CB89C,}; 29 | * These are made by snofMake, and are indexes sorted by name. */ 30 | 31 | /* XI - same as IX but on big-endian (or is it little-endian) archetectures. */ 32 | 33 | #define nt4Signature 0x12345678 34 | /* Signature at the beginning of an nt4 file - 2 bit a nucleotide binary file. */ 35 | 36 | #define lm2Signature 0x12131416 37 | /* Signature at the beginning of a lm2 file - a 2nd order markov model for nucleotides. */ 38 | 39 | #define oocSig 0x584155f2 40 | /* Signature of file that contains over-represented oligomers for patSpace 41 | * algorithm. */ 42 | 43 | #define oocSigSwapped 0xf2554158 44 | /* Signature of file that contains over-represented oligomers for patSpace 45 | * algorithm. */ 46 | 47 | #define fofSig 0x13410da8 48 | /* Signature into fof type index file (that can index multiple external files). */ 49 | 50 | #define nibSig 0x6BE93D3A 51 | /* Signature into nib file (4 bits per nucleotide DNA file) */ 52 | 53 | #define qacSig 0x32b67998 54 | /* Signature of qac file (compressed quality file) */ 55 | 56 | #define caqSig 0x9879b632 57 | /* Signature of byte-swapped qac file. */ 58 | 59 | #define twoBitSig 0x1A412743 60 | /* Signature into 2bit file (2 bits per nucleotide DNA file) plus 61 | * information on N and masked bases. */ 62 | 63 | #define twoBitSwapSig 0x4327411A 64 | /* Signature of byte-swapped two-bit file. */ 65 | 66 | #define chromGraphSig 0x4528421C 67 | /* Signature of chromGraph binary data file */ 68 | 69 | #define chromGraphSwapSig 0x1C422845 70 | /* Signature of byte-swapped chromGraph binary data file */ 71 | 72 | #define genomeRangeTreeSig 0xf7fb8104 73 | /* Signature of genomeRangeTree binary data file */ 74 | 75 | #define genomeRangeTreeSwapSig 0x0481fbf7 76 | /* Signature of genomeRangeTree binary data file */ 77 | 78 | #define bptSig 0x78CA8C91 79 | /* Signature of generic b+ tree index file. */ 80 | 81 | #define bptSwapped 0x918CCA78 82 | /* Signature of generic b+ tree index file. */ 83 | 84 | #define cirTreeSig 0x2468ACE0 85 | /* Signature of a chromosome id r-tree index file. */ 86 | 87 | #define crTreeSig 0x2369ADE1 88 | /* Signature of a chromosome r-tree index file. */ 89 | 90 | #define bigWigSig 0x888FFC26 91 | /* Signature for a big wig file. */ 92 | 93 | #define bigBedSig 0x8789F2EB 94 | /* Signature for a big bed file. */ 95 | 96 | #define udcBitmapSig 0x4187E2F6 97 | /* Signature for a url data cache bitmap file. */ 98 | 99 | #endif /* SIG_H */ 100 | 101 | 102 | -------------------------------------------------------------------------------- /lisa/regpotential/sqlNum.h: -------------------------------------------------------------------------------- 1 | /* sqlNum.h - routines to convert from ascii to 2 | * unsigned/integer a bit more quickly than atoi. 3 | * Called sqlNum because it was first developed for use with 4 | * SQL databases, which tend to require a lot of conversion from 5 | * string to binary representation of numbers. In particular the 6 | * code generator AutoSQL puts in lots of calls to these routines 7 | * into it's parsers. Other parser in the source tree have come 8 | * to use these too though since they are fast and have good error 9 | * checking. 10 | * 11 | * This file is copyright 2002 Jim Kent, but license is hereby 12 | * granted for all use - public, private or commercial. */ 13 | 14 | #ifndef SQLNUM_H 15 | #define SQLNUM_H 16 | 17 | /* get off_t */ 18 | #include 19 | 20 | unsigned sqlUnsigned(char *s); 21 | /* Convert series of digits to unsigned integer about 22 | * twice as fast as atoi (by not having to skip white 23 | * space or stop except at the null byte.) */ 24 | 25 | unsigned sqlUnsignedInList(char **pS); 26 | /* Convert series of digits to unsigned integer about 27 | * twice as fast as atoi (by not having to skip white 28 | * space or stop except at the null byte.) 29 | * All of string is number. Number may be delimited by a comma. 30 | * Returns the position of the delimiter or the terminating 0. */ 31 | 32 | unsigned long sqlUnsignedLong(char *s); 33 | /* Convert series of digits to unsigned long about 34 | * twice as fast as atol (by not having to skip white 35 | * space or stop except at the null byte.) */ 36 | 37 | unsigned long sqlUnsignedLongInList(char **pS); 38 | /* Convert series of digits to unsigned long about 39 | * twice as fast as atol (by not having to skip white 40 | * space or stop except at the null byte.) 41 | * All of string is number. Number may be delimited by a comma. 42 | * Returns the position of the delimiter or the terminating 0. */ 43 | 44 | int sqlSigned(char *s); 45 | /* Convert string to signed integer. Unlike atol assumes 46 | * all of string is number. */ 47 | 48 | int sqlSignedInList(char **pS); 49 | /* Convert string to signed integer. Unlike atol assumes 50 | * all of string is number. Number may be delimited by a comma. 51 | * Returns the position of the delimiter or the terminating 0. */ 52 | 53 | long long sqlLongLong(char *s); 54 | /* Convert string to a long long. Unlike atol assumes all of string is 55 | * number. */ 56 | 57 | long long sqlLongLongInList(char **pS); 58 | /* Convert string to a long long. Unlike atol, assumes 59 | * all of string is number. Number may be delimited by a comma. 60 | * Returns the position of the delimiter or the terminating 0. */ 61 | 62 | float sqlFloat(char *s); 63 | /* Convert string to a float. Assumes all of string is number 64 | * and aborts on an error. */ 65 | 66 | float sqlFloatInList(char **pS); 67 | /* Convert string to a float. Assumes all of string is number 68 | * and aborts on an error. 69 | * Number may be delimited by a comma. 70 | * Returns the position of the delimiter or the terminating 0. */ 71 | 72 | double sqlDouble(char *s); 73 | /* Convert string to a double. Assumes all of string is number 74 | * and aborts on an error. */ 75 | 76 | double sqlDoubleInList(char **pS); 77 | /* Convert string to a double. Assumes all of string is number 78 | * and aborts on an error. 79 | * Number may be delimited by a comma. 80 | * Returns the position of the delimiter or the terminating 0. */ 81 | 82 | #endif /* SQLNUM_H */ 83 | 84 | -------------------------------------------------------------------------------- /lisa/regpotential/tokenizer.h: -------------------------------------------------------------------------------- 1 | /* tokenizer - A tokenizer structure that will chop up file into 2 | * tokens. It is aware of quoted strings and otherwise tends to return 3 | * white-space or punctuated-separated words, with punctuation in 4 | * a separate token. This is used by autoSql. */ 5 | 6 | #ifndef TOKENIZER_H 7 | #define TOKENIZER_H 8 | 9 | struct tokenizer 10 | /* This handles reading in tokens. */ 11 | { 12 | bool reuse; /* True if want to reuse this token. */ 13 | bool eof; /* True at end of file. */ 14 | int leadingSpaces; /* Number of leading spaces before token. */ 15 | struct lineFile *lf; /* Underlying file. */ 16 | char *curLine; /* Current line of text. */ 17 | char *linePt; /* Start position within current line. */ 18 | char *string; /* String value of token */ 19 | int sSize; /* Size of string. */ 20 | int sAlloc; /* Allocated string size. */ 21 | /* Some variables set after tokenizerNew to control details of 22 | * parsing. */ 23 | bool leaveQuotes; /* Leave quotes in string. */ 24 | bool uncommentC; /* Take out C (and C++) style comments. */ 25 | bool uncommentShell; /* Take out # style comments. */ 26 | }; 27 | 28 | struct tokenizer *tokenizerNew(char *fileName); 29 | /* Return a new tokenizer. */ 30 | 31 | struct tokenizer *tokenizerOnLineFile(struct lineFile *lf); 32 | /* Create a new tokenizer on open lineFile. */ 33 | 34 | void tokenizerFree(struct tokenizer **pTkz); 35 | /* Tear down a tokenizer. */ 36 | 37 | void tokenizerReuse(struct tokenizer *tkz); 38 | /* Reuse token. */ 39 | 40 | int tokenizerLineCount(struct tokenizer *tkz); 41 | /* Return line of current token. */ 42 | 43 | char *tokenizerFileName(struct tokenizer *tkz); 44 | /* Return name of file. */ 45 | 46 | char *tokenizerNext(struct tokenizer *tkz); 47 | /* Return token's next string (also available as tkz->string) or 48 | * NULL at EOF. This string will be overwritten with the next call 49 | * to tokenizerNext, so cloneString if you need to save it. */ 50 | 51 | void tokenizerErrAbort(struct tokenizer *tkz, char *format, ...); 52 | /* Print error message followed by file and line number and 53 | * abort. */ 54 | 55 | void tokenizerNotEnd(struct tokenizer *tkz); 56 | /* Squawk if at end. */ 57 | 58 | char *tokenizerMustHaveNext(struct tokenizer *tkz); 59 | /* Get next token, which must be there. */ 60 | 61 | void tokenizerMustMatch(struct tokenizer *tkz, char *string); 62 | /* Require next token to match string. Return next token 63 | * if it does, otherwise abort. */ 64 | 65 | #endif /* TOKENIZER_H */ 66 | 67 | -------------------------------------------------------------------------------- /lisa/regpotential/vGfx.c: -------------------------------------------------------------------------------- 1 | /* vGfx - interface to polymorphic graphic object 2 | * that currently can either be a memory buffer or 3 | * a postScript file. */ 4 | 5 | /* Copyright (C) 2011 The Regents of the University of California 6 | * See README in this or parent directory for licensing information. */ 7 | 8 | #include "common.h" 9 | #include "vGfx.h" 10 | 11 | 12 | 13 | /* Most of the implementation of this is in macros in vGfx.h. */ 14 | 15 | void vgClose(struct vGfx **pVg) 16 | /* Close down virtual graphics object, and finish writing it to file. */ 17 | { 18 | struct vGfx *vg = *pVg; 19 | if (vg != NULL) 20 | { 21 | vg->close(&vg->data); 22 | freez(pVg); 23 | } 24 | } 25 | 26 | struct vGfx *vgHalfInit(int width, int height) 27 | /* Close down virtual graphics object, and finish writing it to file. */ 28 | { 29 | struct vGfx *vg; 30 | AllocVar(vg); 31 | vg->width = width; 32 | vg->height = height; 33 | return vg; 34 | } 35 | 36 | int vgFindRgb(struct vGfx *vg, struct rgbColor *rgb) 37 | /* Find color index corresponding to rgb color. */ 38 | { 39 | return vgFindColorIx(vg, rgb->r, rgb->g, rgb->b); 40 | } 41 | 42 | Color vgContrastingColor(struct vGfx *vg, int backgroundIx) 43 | /* Return black or white whichever would be more visible over 44 | * background. */ 45 | { 46 | struct rgbColor c = vgColorIxToRgb(vg, backgroundIx); 47 | int val = (int)c.r + c.g + c.g + c.b; 48 | if (val > 512) 49 | return MG_BLACK; 50 | else 51 | return MG_WHITE; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /lisa/regpotential/vGfxPrivate.h: -------------------------------------------------------------------------------- 1 | /* vGfx private - stuff that the implementers of 2 | * a vGfx need to know about, but not the clients. */ 3 | 4 | /* Copyright (C) 2010 The Regents of the University of California 5 | * See README in this or parent directory for licensing information. */ 6 | 7 | 8 | struct vGfx *vgHalfInit(int width, int height); 9 | /* Return a partially initialized vGfx structure. 10 | * Generally not called by clients.*/ 11 | 12 | void vgMgMethods(struct vGfx *vg); 13 | /* Fill in virtual graphics methods for memory based drawing. */ 14 | 15 | /* A bunch of things to make the type-casting easier. 16 | * This is a price you pay for object oriented 17 | * polymorphism in C... */ 18 | 19 | typedef void (*vg_close)(void **pV); 20 | typedef void (*vg_dot)(void *v, int x, int y, int colorIx); 21 | typedef int (*vg_getDot)(void *v, int x, int y); 22 | typedef void (*vg_box)(void *v, int x, int y, 23 | int width, int height, int colorIx); 24 | typedef void (*vg_line)(void *v, 25 | int x1, int y1, int x2, int y2, int colorIx); 26 | typedef void (*vg_text)(void *v, int x, int y, int colorIx, void *font, 27 | char *text); 28 | typedef void (*vg_textRight)(void *v, int x, int y, int width, int height, 29 | int colorIx, void *font, char *text); 30 | typedef void (*vg_textCentered)(void *v, int x, int y, int width, int height, 31 | int colorIx, void *font, char *text); 32 | typedef int (*vg_findColorIx)(void *v, int r, int g, int b); 33 | typedef struct rgbColor (*vg_colorIxToRgb)(void *v, int colorIx); 34 | typedef void (*vg_setClip)(void *v, int x, int y, int width, int height); 35 | typedef void (*vg_setWriteMode)(void *v, unsigned int writeMode); 36 | typedef void (*vg_unclip)(void *v); 37 | typedef void (*vg_verticalSmear)(void *v, 38 | int xOff, int yOff, int width, int height, 39 | Color *dots, boolean zeroClear); 40 | typedef void (*vg_fillUnder)(void *v, int x1, int y1, 41 | int x2, int y2, int bottom, Color color); 42 | typedef void (*vg_drawPoly)(void *v, struct gfxPoly *poly, Color color, boolean filled); 43 | typedef void (*vg_setHint)(void *v, char *hint, char *value); 44 | typedef char * (*vg_getHint)(void *v, char *hint); 45 | typedef int (*vg_getFontPixelHeight)(void *v, void *font); 46 | typedef int (*vg_getFontStringWidth)(void *v, void *font, char *string); 47 | 48 | -------------------------------------------------------------------------------- /lisa/regpotential/verbose.c: -------------------------------------------------------------------------------- 1 | /* verbose.c - write out status messages according to the 2 | * current verbosity level. These messages go to stderr. */ 3 | 4 | /* Copyright (C) 2011 The Regents of the University of California 5 | * See README in this or parent directory for licensing information. */ 6 | 7 | #include "common.h" 8 | #include "portable.h" 9 | #include "verbose.h" 10 | 11 | 12 | static int logVerbosity = 1; /* The level of log verbosity. 0 is silent. */ 13 | static FILE *logFile; /* File to log to. */ 14 | 15 | static boolean checkedDotsEnabled = FALSE; /* have we check for dot output 16 | * being enabled? */ 17 | static boolean dotsEnabled = FALSE; /* is dot output enabled? */ 18 | 19 | void verboseVa(int verbosity, char *format, va_list args) 20 | /* Log with at given verbosity vprintf formatted args. */ 21 | { 22 | if (verbosity <= logVerbosity) 23 | { 24 | if (logFile == NULL) 25 | logFile = stderr; 26 | vfprintf(logFile, format, args); 27 | fflush(logFile); 28 | } 29 | } 30 | 31 | void verbose(int verbosity, char *format, ...) 32 | /* Write printf formatted message to log (which by 33 | * default is stderr) if global verbose variable 34 | * is set to verbosity or higher. */ 35 | { 36 | va_list args; 37 | va_start(args, format); 38 | verboseVa(verbosity, format, args); 39 | va_end(args); 40 | } 41 | 42 | static long lastTime = -1; // previous call time. 43 | 44 | void verboseTimeInit(void) 45 | /* Initialize or reinitialize the previous time for use by verboseTime. */ 46 | { 47 | lastTime = clock1000(); 48 | } 49 | 50 | void verboseTime(int verbosity, char *label, ...) 51 | /* Print label and how long it's been since last call. Start time can be 52 | * initialized with verboseTimeInit, otherwise the elapsed time will be 53 | * zero. */ 54 | { 55 | assert(label != NULL); // original version allowed this, but breaks some GCCs 56 | if (lastTime < 0) 57 | verboseTimeInit(); 58 | long time = clock1000(); 59 | va_list args; 60 | va_start(args, label); 61 | verboseVa(verbosity, label, args); 62 | verbose(verbosity, ": %ld millis\n", time - lastTime); 63 | lastTime = time; 64 | va_end(args); 65 | } 66 | 67 | 68 | boolean verboseDotsEnabled() 69 | /* check if outputting of happy dots are enabled. They will be enabled if the 70 | * verbosity is > 0, stderr is a tty and we don't appear to be running an 71 | * emacs shell. */ 72 | { 73 | if (!checkedDotsEnabled) 74 | { 75 | if (logFile == NULL) 76 | logFile = stderr; 77 | dotsEnabled = (logVerbosity > 0) && isatty(fileno(logFile)); 78 | if (dotsEnabled) 79 | { 80 | /* check for an possible emacs shell */ 81 | char *emacs = getenv("emacs"); 82 | char *term = getenv("TERM"); 83 | if ((emacs != NULL) && (emacs[0] == 't')) 84 | dotsEnabled = FALSE; 85 | else if ((term != NULL) && sameString(term, "dumb")) 86 | dotsEnabled = FALSE; 87 | } 88 | checkedDotsEnabled = TRUE; 89 | } 90 | return dotsEnabled; 91 | } 92 | 93 | void verboseDot() 94 | /* Write I'm alive dot (at verbosity level 1) */ 95 | { 96 | if (verboseDotsEnabled()) 97 | verbose(1, "."); 98 | } 99 | 100 | void verboseSetLevel(int verbosity) 101 | /* Set verbosity level in log. 0 for no logging, 102 | * higher number for increasing verbosity. */ 103 | { 104 | logVerbosity = verbosity; 105 | checkedDotsEnabled = FALSE; /* force rechecking of dots enabled */ 106 | } 107 | 108 | int verboseLevel(void) 109 | /* Get verbosity level. */ 110 | { 111 | return logVerbosity; 112 | } 113 | 114 | void verboseSetLogFile(char *name) 115 | /* Set logFile for verbose messages overrides stderr. */ 116 | { 117 | if (sameString(name, "stdout")) 118 | logFile = stdout; 119 | else if (sameString(name, "stderr")) 120 | logFile = stderr; 121 | else 122 | logFile = mustOpen(name, "w"); 123 | } 124 | 125 | FILE *verboseLogFile() 126 | /* Get the verbose log file. */ 127 | { 128 | if (logFile == NULL) 129 | logFile = stderr; 130 | return logFile; 131 | } 132 | -------------------------------------------------------------------------------- /lisa/regpotential/verbose.h: -------------------------------------------------------------------------------- 1 | /* verbose.h - write out status messages according to the 2 | * current verbosity level. These messages go to stderr. */ 3 | 4 | #ifndef VERBOSE_H 5 | #define VERBOSE_H 6 | 7 | void verbose(int verbosity, char *format, ...) 8 | /* Write printf formatted message to log (which by 9 | * default is stderr) if global verbose variable 10 | * is set to verbosity or higher. */ 11 | #if defined(__GNUC__) 12 | __attribute__((format(printf, 2, 3))) 13 | #endif 14 | ; 15 | 16 | void verboseVa(int verbosity, char *format, va_list args); 17 | /* Log with at given verbosity vprintf formatted args. */ 18 | 19 | void verboseTimeInit(void); 20 | /* Initialize or reinitialize the previous time for use by verboseTime. */ 21 | 22 | void verboseTime(int verbosity, char *label, ...) 23 | /* Print label and how long it's been since last call. Start time can be 24 | * initialized with verboseTimeInit, otherwise the elapsed time will be 25 | * zero. */ 26 | #if defined(__GNUC__) 27 | __attribute__((format(printf, 2, 3))) 28 | #endif 29 | ; 30 | 31 | void verboseDot(); 32 | /* Write I'm alive dot (at verbosity level 1) */ 33 | 34 | boolean verboseDotsEnabled(); 35 | /* check if outputting of happy dots are enabled. They will be enabled if the 36 | * verbosity is > 0, stderr is a tty and we don't appear to be running an 37 | * emacs shell. */ 38 | 39 | int verboseLevel(void); 40 | /* Get verbosity level. */ 41 | 42 | void verboseSetLevel(int verbosity); 43 | /* Set verbosity level in log. 0 for no logging, 44 | * higher number for increasing verbosity. */ 45 | 46 | void verboseSetLogFile(char *name); 47 | /* Set logFile for verbose messages overrides stderr. */ 48 | 49 | FILE *verboseLogFile(); 50 | /* Get the verbose log file. */ 51 | 52 | #endif /* VERBOSE_H */ 53 | 54 | -------------------------------------------------------------------------------- /lisa/regpotential/zlibFace.c: -------------------------------------------------------------------------------- 1 | /* Wrappers around zlib to make interfacing to it a bit easier. */ 2 | 3 | /* Copyright (C) 2009 The Regents of the University of California 4 | * See README in this or parent directory for licensing information. */ 5 | 6 | #include "common.h" 7 | #include 8 | 9 | static char *zlibErrorMessage(int err) 10 | /* Convert error code to errorMessage */ 11 | { 12 | switch (err) 13 | { 14 | case Z_STREAM_END: 15 | return "zlib stream end"; 16 | case Z_NEED_DICT: 17 | return "zlib need dictionary"; 18 | case Z_ERRNO: 19 | return "zlib errno"; 20 | case Z_STREAM_ERROR: 21 | return "zlib data error"; 22 | case Z_DATA_ERROR: 23 | return "zlib data error"; 24 | case Z_MEM_ERROR: 25 | return "zlib mem error"; 26 | case Z_BUF_ERROR: 27 | return "zlib buf error"; 28 | case Z_VERSION_ERROR: 29 | return "zlib version error"; 30 | case Z_OK: 31 | return NULL; 32 | default: 33 | { 34 | static char msg[128]; 35 | safef(msg, sizeof(msg), "zlib error code %d", err); 36 | return msg; 37 | } 38 | } 39 | } 40 | 41 | size_t zCompress( 42 | void *uncompressed, /* Start of area to compress. */ 43 | size_t uncompressedSize, /* Size of area to compress. */ 44 | void *compBuf, /* Where to put compressed bits */ 45 | size_t compBufSize) /* Size of compressed bits - calculate using zCompBufSize */ 46 | /* Compress data from memory to memory. Returns size after compression. */ 47 | { 48 | uLongf compSize = compBufSize; 49 | int err = compress((Bytef*)compBuf, &compSize, (Bytef*)uncompressed, (uLong)uncompressedSize); 50 | if (err != 0) 51 | errAbort("Couldn't zCompress %lld bytes: %s", 52 | (long long)uncompressedSize, zlibErrorMessage(err)); 53 | return compSize; 54 | } 55 | 56 | size_t zCompBufSize(size_t uncompressedSize) 57 | /* Return size of buffer needed to compress something of given size uncompressed. */ 58 | { 59 | return 1.001*uncompressedSize + 13; 60 | } 61 | 62 | size_t zUncompress( 63 | void *compressed, /* Compressed area */ 64 | size_t compressedSize, /* Size after compression */ 65 | void *uncompBuf, /* Where to put uncompressed bits */ 66 | size_t uncompBufSize) /* Max size of uncompressed bits. */ 67 | /* Uncompress data from memory to memory. Returns size after decompression. */ 68 | { 69 | uLongf uncSize = uncompBufSize; 70 | int err = uncompress(uncompBuf, &uncSize, compressed, compressedSize); 71 | if (err != 0) 72 | errAbort("Couldn't zUncompress %lld bytes: %s", 73 | (long long)compressedSize, zlibErrorMessage(err)); 74 | return uncSize; 75 | } 76 | 77 | void zSelfTest(int count) 78 | /* Run an internal diagnostic. */ 79 | { 80 | bits32 testData[count]; 81 | int uncSize = count*sizeof(bits32); 82 | int i; 83 | for (i=0; i&1>>{log}" 24 | -------------------------------------------------------------------------------- /lisa/rules/baseline.rule: -------------------------------------------------------------------------------- 1 | rule lisa_baseline_motif_99: 2 | input: 3 | background = "{sample}.background_gene.1000", 4 | foreground="{sample}.foreground_gene" 5 | output: 6 | #../PhaseQ_Figures/AR_motif99_baseline.csv 7 | "{sample}_motif99_baseline.csv" 8 | message: "lisa baseline for motif hit number" 9 | benchmark: 10 | "{sample}.motif99.baseline.benchmark.txt" 11 | log: "{sample}.log" 12 | params: species=config["species"], prefix="{sample}", 13 | cov=config['covariates'] 14 | shell: 15 | "lisa_baseline run --species {params.species} --prefix {params.prefix} --background {input.background} --foreground {input.foreground} --dtype motif99 2>&1>>{log}" 16 | 17 | rule lisa_baseline_chipseqpeak: 18 | input: 19 | background = "{sample}.background_gene.1000", 20 | foreground="{sample}.foreground_gene" 21 | output: 22 | "{sample}_chipseq_baseline.csv" 23 | message: "lisa baseline for chip-seq peak number" 24 | benchmark: 25 | "{sample}.chipseq.baseline.benchmark.txt" 26 | log: "{sample}.log" 27 | params: species=config["species"], prefix="{sample}", 28 | cov=config['covariates'] 29 | shell: 30 | "lisa_baseline run --species {params.species} --prefix {params.prefix} --background {input.background} --foreground {input.foreground} --dtype chipseq 2>&1>>{log}" 31 | -------------------------------------------------------------------------------- /lisa/rules/combine_chipseq.rule: -------------------------------------------------------------------------------- 1 | """ https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html Input Functions and unpack 2 | """ 3 | import os 4 | 5 | def get_combine_command(label): 6 | return "lisa_combine_ranks -prefix {wildcards.sample}_%s {input}" % label 7 | 8 | def get_inputs(method): 9 | if method == 'beta': 10 | #return "{sample}.lisa_direct.csv" 11 | return "{sample}.3000.lisa_direct.csv" 12 | elif method == 'knockout': 13 | return expand("{{sample}}.{epigenome}.chipseq.p_value.csv", epigenome=config['epigenome']) 14 | elif method == 'all': 15 | return "{sample}.3000.lisa_direct.csv", expand("{{sample}}.{epigenome}.chipseq.p_value.csv", epigenome=config['epigenome']) 16 | 17 | rule lisa_combine_chipseq_ranks: 18 | input: 19 | get_inputs(config['method']) 20 | output: 21 | "{sample}_chipseq_cauchy_combine_dedup.csv", 22 | "{sample}_chipseq_fisher_combine_dedup.csv" 23 | message: "combine p values of TF from ChIP-seq" 24 | shell: 25 | get_combine_command('chipseq') 26 | -------------------------------------------------------------------------------- /lisa/rules/combine_motif.rule: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def get_combine_command(label): 4 | return "lisa_combine_ranks -prefix {wildcards.sample}_%s {input}" % label 5 | 6 | def get_inputs(method): 7 | if method == 'knockout' or method == 'all': 8 | return expand("{{sample}}.{epigenome}.motif99.p_value.csv", epigenome=config['epigenome']) 9 | 10 | rule lisa_combine_motif_ranks: 11 | input: 12 | get_inputs(config['method']) 13 | output: 14 | "{sample}_motif_cauchy_combine_dedup.csv", 15 | "{sample}_motif_fisher_combine_dedup.csv" 16 | message: "combine p values of TF from motif" 17 | shell: 18 | get_combine_command('motif') 19 | -------------------------------------------------------------------------------- /lisa/rules/entropy.rule: -------------------------------------------------------------------------------- 1 | def get_entropy_command(cov, dtype, new_h5_count): 2 | frame="lisa_rank_tfs entropy --species {params.species} --epigenome {wildcards.epigenome} --coefficient {input.coef} --background {input.background} --foreground {input.foreground} --prefix {params.prefix} --dtype %s " % dtype 3 | if cov: 4 | frame+=" --covariates=True " 5 | else: 6 | frame+=" --covariates=False " 7 | 8 | if new_h5_count != None: 9 | frame+=" --new_h5 %s " % new_h5_count 10 | if new_h5_count == None: 11 | frame+=" --new_h5=None " 12 | return frame 13 | 14 | 15 | rule lisa_entropy_motif_99: 16 | input: 17 | coef="{sample}.{epigenome}.coefs.csv", 18 | background = "{sample}.background_gene.300", 19 | foreground="{sample}.foreground_gene" 20 | output: 21 | "{sample}.{epigenome}.motif99.entropy_rank.csv" 22 | message: "lisa cluster motif KL divergene ..." 23 | benchmark: 24 | "{sample}.{epigenome}.motif99.entropy.benchmark.txt" 25 | log: "{sample}.log" 26 | params: species=config["species"], prefix="{sample}" 27 | shell: 28 | get_entropy_command(config['covariates'], 'motif99', config['new_count_h5']) 29 | 30 | rule lisa_entropy_chipseqpeak: 31 | input: 32 | coef="{sample}.{epigenome}.coefs.csv", 33 | background = "{sample}.background_gene.300", 34 | foreground="{sample}.foreground_gene" 35 | output: 36 | "{sample}.{epigenome}.chipseq.entropy_rank.csv" 37 | message: "lisa cluster chip-seq KL divergene ..." 38 | benchmark: 39 | "{sample}.{epigenome}.chipseq.entropy.benchmark.txt" 40 | log: "{sample}.log" 41 | params: species=config["species"], prefix="{sample}", 42 | shell: 43 | get_entropy_command(config['covariates'], 'chipseq', config['new_count_h5']) 44 | -------------------------------------------------------------------------------- /lisa/rules/fastq.rule: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | rule lisa_bwa_aln: 4 | input: "%s/{sample}" % (config['input_root']) 5 | output: 6 | '%s/%s/{sample}.sai' % (config['prefix'], config['epigenome']) 7 | message: "lisa mapping fastq file" 8 | params: species=config["species"], 9 | epigenome=config['epigenome'], 10 | prefix=config['prefix'], 11 | index=config['index'] 12 | threads: 4 13 | shell: 14 | "bwa aln -q 5 -l 32 -k 2 -t {threads} {params.index} {input} > {output}" 15 | 16 | rule lisa_get_bam: 17 | input: 18 | sai='%s/%s/{sample}.sai' % (config['prefix'], config['epigenome']), 19 | fastq='%s/{sample}'%(config['input_root']) 20 | output: 21 | bam='%s/%s/{sample}.bam' % (config['prefix'], config['epigenome']), 22 | bam_u='%s/%s/{sample}_se.bam' % (config['prefix'], config['epigenome']) 23 | message: "lisa output BAM file" 24 | params: species=config["species"], 25 | epigenome=config['epigenome'], 26 | prefix=config['prefix'], 27 | index=config['index'], 28 | output_uprefix='%s/%s/{sample}_se' % (config['prefix'], config['epigenome']), 29 | output_prefix='%s/%s/{sample}' % (config['prefix'], config['epigenome']) 30 | shell: 31 | """ 32 | bwa samse {params.index} {input.sai} {input.fastq} > {input.sai}.sam 33 | samtools view -q 1 -Sb {input.sai}.sam > {input.sai}.bam 34 | samtools sort -m 5000000000 {input.sai}.bam {params.output_prefix} 35 | samtools rmdup -s {output.bam} {output.bam_u} 36 | samtools index {output.bam_u} 37 | rm {input.sai}.sam 38 | """ 39 | 40 | rule lisa_get_bigwig: 41 | input: 42 | '%s/%s/{sample}_se.bam' % (config['prefix'], config['epigenome']) 43 | output: 44 | '%s/%s/{sample}.bigwig' % (config['prefix'], config['epigenome']) 45 | threads: 4 46 | shell: 47 | "bamCoverage -b {input} -p {threads} -e 146 --binSize 8 --scaleFactor 1 --normalizeUsingRPKM -o {output}" 48 | -------------------------------------------------------------------------------- /lisa/rules/hdf5.rule: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | rule lisa_reg_potential: 4 | input: "{sample}" 5 | output: 6 | '{sample}.%s.%s.reg_potential.h5' % (config['prefix'], config['epigenome']) 7 | message: "lisa compute regulatory potential from bigwig" 8 | benchmark: 9 | "{sample}.benchmark.txt" 10 | log: "{sample}.log" 11 | params: species=config["species"], 12 | epigenome=config['epigenome'], 13 | prefix=config['prefix'] 14 | shell: "lisa_bw2hdf get_regpotential_hdf --species {params.species} --epigenome {params.epigenome} --prefix {params.prefix} {input} 2>&1>>{log}" 15 | 16 | rule lisa_read_count: 17 | input: "{sample}" 18 | output: 19 | '{sample}.%s.%s.1kb_read_count.h5' % (config['prefix'], config['epigenome']) 20 | message: "lisa compute read count from bigwig" 21 | log: "{sample}.log" 22 | params: species=config["species"], 23 | epigenome=config['epigenome'], 24 | prefix=config['prefix'] 25 | shell: "lisa_bw2hdf get_readcount_hdf --species {params.species} --epigenome {params.epigenome} --prefix {params.prefix} {input} 2>&1>>{log}" 26 | 27 | rule lisa_merge_reg_potential: 28 | input: 29 | expand('{sample}.%s.%s.reg_potential.h5' % (config['prefix'], config['epigenome']), 30 | sample=config['bigwigs']) 31 | output: 32 | '%s.%s.reg.h5' % (config['prefix'], config['epigenome']) 33 | message: "lisa compute reg potential from bigwig" 34 | params: species=config["species"], 35 | epigenome=config['epigenome'], 36 | prefix=config['prefix'] 37 | shell: 38 | "lisa_bw2hdf merge_reg_potential_hdf --species {params.species} --epigenome {params.epigenome} --prefix {params.prefix} {input}" 39 | 40 | rule lisa_merge_read_count: 41 | input: 42 | expand('{sample}.%s.%s.1kb_read_count.h5' % (config['prefix'], config['epigenome']), 43 | sample=config['bigwigs']) 44 | output: 45 | "%s.%s.readcount.h5" % (config['prefix'], config['epigenome']) 46 | message: "lisa compute read count from bigwig" 47 | params: species=config["species"], 48 | epigenome=config['epigenome'], 49 | prefix=config['prefix'] 50 | shell: 51 | "lisa_bw2hdf merge_readcount_hdf --species {params.species} --epigenome {params.epigenome} --prefix {params.prefix} {input}" 52 | 53 | -------------------------------------------------------------------------------- /lisa/rules/knockout.rule: -------------------------------------------------------------------------------- 1 | def get_knockout_command(cov, dtype, new_h5_rp, new_h5_count): 2 | frame="lisa_rank_tfs knockout --species {params.species} --epigenome {wildcards.epigenome} --coefficient {input.coef} --background {input.background} --foreground {input.foreground} --prefix {params.prefix} --dtype %s " % dtype 3 | if cov: 4 | frame+=" --covariates=True " 5 | else: 6 | frame+=" --covariates=False " 7 | 8 | if new_h5_rp != None: 9 | frame+=" --new_h5_rp %s --new_h5_count %s " % (new_h5_rp, new_h5_count) 10 | if new_h5_rp == None: 11 | frame+=" --new_h5_rp=None --new_h5_count=None " 12 | 13 | frame += " --only_newhdf5=%s" % (str(config['only_newhdf5'])) 14 | frame += ' >> {log} ' 15 | return frame 16 | 17 | rule lisa_in_silico_knockout_motif_99: 18 | input: 19 | coef="{sample}.{epigenome}.coefs.csv", 20 | #background = "{sample}.background_gene.300", 21 | background = "{sample}.background_gene.1000", 22 | foreground="{sample}.foreground_gene" 23 | output: 24 | "{sample}.{epigenome}.motif99.csv", 25 | "{sample}.{epigenome}.motif99.p_value.csv" 26 | message: "lisa In silico Knockout motif..." 27 | benchmark: 28 | "{sample}.{epigenome}.motif99.knockout.benchmark.txt" 29 | log: "{sample}.{epigenome}.log" 30 | params: species=config["species"], prefix="{sample}" 31 | shell: 32 | get_knockout_command(config['covariates'], 'motif99', 33 | config['new_rp_h5'], config['new_count_h5']) 34 | 35 | rule lisa_in_silico_knockout_tf_chipseqpeak: 36 | input: 37 | coef="{sample}.{epigenome}.coefs.csv", 38 | #background = "{sample}.background_gene.300", 39 | background = "{sample}.background_gene.1000", 40 | foreground="{sample}.foreground_gene" 41 | output: 42 | "{sample}.{epigenome}.chipseq.csv", 43 | "{sample}.{epigenome}.chipseq.p_value.csv" 44 | message: "lisa In silico Knockout chip-seq ..." 45 | benchmark: 46 | "{sample}.{epigenome}.chipseq.knockout.benchmark.txt" 47 | log: "{sample}.{epigenome}.log" 48 | params: species=config["species"], prefix="{sample}", 49 | shell: 50 | get_knockout_command(config['covariates'], 'chipseq', 51 | config['new_rp_h5'], config['new_count_h5']) 52 | -------------------------------------------------------------------------------- /lisa/rules/lisa_direct.rule: -------------------------------------------------------------------------------- 1 | rule lisa_direct_beta: 2 | input: 3 | #background = "{sample}.background_gene.300", 4 | background = "{sample}.background_gene.3000", 5 | foreground="{sample}.foreground_gene" 6 | output: 7 | "{sample}.3000.lisa_direct.csv" 8 | message: "lisa rank TF from peak directly..." 9 | benchmark: 10 | "{sample}.lisa_direct.benchmark.txt" 11 | log: "{sample}.log" 12 | params: species=config["species"], prefix="{sample}.3000" 13 | shell: 14 | "lisa_rank_tfs direct --species {params.species} --background {input.background} --foreground {input.foreground} --prefix {params.prefix} 2>&1>>{log}" 15 | 16 | 17 | rule lisa_direct_beta2: 18 | input: 19 | background = "{sample}.background_gene.1000", 20 | foreground="{sample}.foreground_gene" 21 | output: 22 | "{sample}.1000.lisa_direct.csv" 23 | message: "lisa rank TF from peak directly..." 24 | benchmark: 25 | "{sample}.lisa_direct.benchmark.txt" 26 | log: "{sample}.log" 27 | params: species=config["species"], prefix="{sample}.1000" 28 | shell: 29 | "lisa_rank_tfs direct --species {params.species} --background {input.background} --foreground {input.foreground} --prefix {params.prefix} 2>&1>>{log}" 30 | -------------------------------------------------------------------------------- /lisa/rules/model.rule: -------------------------------------------------------------------------------- 1 | def get_command(cov, new_h5): 2 | frame="lisa_model --sample_number {params.sample} --species {params.species} --epigenome {params.epigenome} --gene_set {input.gene_set} --prefix {params.prefix} --foreground {input.foreground} --background {input.background} " 3 | if cov: 4 | frame+=" --covariates=True " 5 | else: 6 | frame+=" --covariates=False " 7 | 8 | if new_h5 != None: 9 | frame+=" --new_h5 %s " % new_h5 10 | if new_h5 == None: 11 | frame+=" --new_h5=None " 12 | 13 | frame+=" --only_newhdf5=%s " % (str(config['only_newhdf5'])) 14 | return frame 15 | 16 | checkpoint lisa_regress: 17 | input: 18 | gene_set = "{sample}", 19 | background = "{sample}.background_gene.3000", 20 | foreground = "{sample}.foreground_gene" 21 | output: 22 | "{sample}.{epigenome}.coefs.csv", 23 | "{sample}.{epigenome}.lisa_predicted_rp.csv" 24 | message: "lisa regression steps..." 25 | benchmark: 26 | "{sample}.{epigenome}.benchmark.txt" 27 | log: "{sample}.{epigenome}.log" 28 | params: species=config["species"], 29 | epigenome="{epigenome}", 30 | prefix="{sample}.{epigenome}", 31 | sample=config["sample_number"] 32 | shell: 33 | get_command(config['covariates'], config['new_rp_h5']) 34 | -------------------------------------------------------------------------------- /lisa/utils.py: -------------------------------------------------------------------------------- 1 | """ lisa utlity functions """ 2 | from multiprocessing import Pool, cpu_count 3 | import math 4 | import numpy as np 5 | 6 | from scipy.stats import wilcoxon, ks_2samp 7 | import scipy 8 | import pandas as pd 9 | 10 | def multiple_apply(func, df, x, y, num_processes=None): 11 | ''' Apply a function separately to each column in a dataframe, in parallel.''' 12 | # If num_processes is not specified, default to minimum(#columns, #machine-cores) 13 | if num_processes==None: 14 | #num_processes = min(df.shape[1], cpu_count()) 15 | num_processes = 5 16 | 17 | # 'with' context manager takes care of pool.close() and pool.join() for us 18 | with Pool(num_processes) as pool: 19 | # we need a sequence of columns to pass pool.map 20 | seq = [[df[col_name][x].values, df[col_name][y].values] for col_name in df.columns] 21 | # pool.map returns results as a list 22 | results_list = pool.map(func, seq) 23 | # return list of processed columns, concatenated together as a new dataframe 24 | return pd.DataFrame(results_list, index=df.columns) 25 | 26 | 27 | def convert_name(name): 28 | try: 29 | name = name.decode('utf-8').replace("tf_", "") 30 | except: 31 | name = name.replace("tf_", "") 32 | return name 33 | 34 | def one_side_ks_test(x, y): 35 | """ http://stackoverflow.com/questions/16296225/one-sided-wilcoxon-signed-rank-test-using-scipy 36 | So, to get one-side p value, you just need prob/2. or 1-prob/2. 37 | 38 | here: one-side significant less x < y 39 | """ 40 | test = ks_2samp(x, y) 41 | d = test[0] 42 | p = test[1]/2 43 | return p 44 | 45 | def mannwhitneyu_test(x,y,how="two-sided"): 46 | try: 47 | return scipy.stats.mannwhitneyu(x,y,alternative=how)[1] 48 | except: 49 | return 1 50 | 51 | def binarize_gene_set(gene_set, *args): 52 | """ gene_set: one gene per line 53 | """ 54 | #print(gene_set) 55 | refseq, symbol = args 56 | with open(gene_set) as fin: 57 | gene_set = list(set([line.strip().upper() for line in fin])) 58 | gene_vec = np.zeros(len(refseq)) 59 | if len(np.intersect1d(refseq, gene_set)) > 5: 60 | #print('input refseq ...') 61 | gene_vec[np.in1d(refseq, gene_set)] = 1 62 | elif len(np.intersect1d(symbol, gene_set)) > 5: 63 | #print('input symbol ...') 64 | gene_vec[np.in1d(symbol, gene_set)] = 1 65 | else: 66 | raise Exception("no genes found in referenence...") 67 | return gene_vec 68 | 69 | class Weight: 70 | """ Exponential decay function """ 71 | def __init__(self, bin_length=1000): 72 | padding = int(1e5) # TSS +/- 100kb 73 | assert bin_length > 0 74 | assert (2*padding+bin_length)%bin_length == 0 75 | 76 | self.bin_length = bin_length 77 | self.bin_num = (2*padding+bin_length)/bin_length # bin number 78 | 79 | distances = np.array([z + bin_length/2 for z in 80 | range(int(-padding-bin_length/2), 81 | int(padding+bin_length/2), bin_length)], 82 | dtype=np.float32) 83 | self.alpha = -math.log(1.0/3.0)*10 # 1e5/1e4, 1e4: half decay 84 | self.balance_weight(distances) # weight 85 | 86 | def get_weight(self): 87 | """ get the weight """ 88 | return self.weight 89 | 90 | def get_binnum(self): 91 | """ get the bin number around TSS """ 92 | return self.bin_num 93 | 94 | def balance_weight(self, distances): 95 | """ function to balance weight according the TSS and bin center offset 96 | """ 97 | weight = np.exp(-np.fabs(distances) * self.alpha/1e5) 98 | self.weight = 2*weight/ (1+weight) 99 | -------------------------------------------------------------------------------- /lisa/workflows/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa/workflows/__init__.py -------------------------------------------------------------------------------- /lisa/workflows/cluster.json: -------------------------------------------------------------------------------- 1 | { 2 | "__default__" : 3 | { 4 | "queue" : "general", 5 | "nCPUs" : "3", 6 | "memory" : "16g", 7 | "time" : "24:00:00", 8 | "name" : "{rule}.{wildcards.sample}", 9 | "output" : "logs/cluster/{rule}.{wildcards.sample}.%j.out", 10 | "error" : "logs/cluster/{rule}.{wildcards.sample}.%j.err" 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /lisa/workflows/sbatch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PATH=/n/home04/xiaoleliu/ChiLin/alvin/xiaoleliu_lab/marge2/phaseI_init/miniconda3/bin:$PATH 4 | source activate lisa 5 | 6 | mkdir -p logs/cluster 7 | snakemake --unlock 8 | snakemake -j 3 --cluster-config cluster.json --immediate-submit --cluster "sbatch --time={cluster.time} --mem={cluster.memory} --partition={cluster.queue} --cpus-per-task={cluster.nCPUs} -J {cluster.name} -o {cluster.output} -e {cluster.error}" 9 | 10 | -------------------------------------------------------------------------------- /lisa/workflows/sbatch_dependency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Submit this clustering script for sbatch to snakemake with: 4 | snakemake -j 99 --debug --immediate-submit --cluster-config cluster.json --cluster 'sbatch_script.py {dependencies}' 5 | """ 6 | ## In order to submit all the jobs to the moab queuing system, one needs to write a wrapper. 7 | import sys 8 | import subprocess 9 | import re 10 | import os 11 | from snakemake.utils import read_job_properties 12 | import argparse 13 | 14 | parser = argparse.ArgumentParser(description='Snakemake script') 15 | parser.add_argument("dependencies", nargs="*", help="{{dependencies}} string given by snakemake\n") 16 | parser.add_argument("snakescript", help="Snakemake generated shell script with commands to execute snakemake rule\n") 17 | 18 | args = parser.parse_args() 19 | 20 | dependencies = args.dependencies 21 | jobscript = args.snakescript 22 | print(dependencies, file=sys.stderr) 23 | print(jobscript, file=sys.stderr) 24 | 25 | job_properties = read_job_properties(jobscript) 26 | # access property defined in the cluster configuration file (Snakemake >=3.6.0), cluster.json 27 | time = job_properties["cluster"]["time"] 28 | cpu = job_properties["cluster"]["nCPUs"] 29 | mem = job_properties["cluster"]["memory"] 30 | queue = job_properties["cluster"]["queue"] 31 | name = job_properties["cluster"]["name"] 32 | output = job_properties["cluster"]["output"] 33 | error = job_properties["cluster"]["error"] 34 | 35 | # all figure out job dependencies, the last argument is the jobscript which is baked in snakemake 36 | if dependencies == None or len(dependencies) < 1: 37 | deps = " " 38 | else: 39 | deps = " -d " + ','.join(["afterok:%s" % d for d in dependencies]) 40 | 41 | print(job_properties['rule'], file=sys.stderr) 42 | if job_properties['rule'].startswith('merge'): 43 | cmdline = 'sbatch --time={time} {deps} --mem=50 --partition={queue} --cpus-per-task=1 -J {name} -o {output} -e {error} --open-mode=append {job}'.format(name=name, time = time, queue=queue, output=output, error=error, deps=deps, job=jobscript) 44 | else: 45 | cmdline = 'sbatch --time={time} {deps} --mem={mem} --partition={queue} --cpus-per-task={cpu} -J {name} -o {output} -e {error} --open-mode=append {job}'.format(mem=mem, cpu=cpu, name=name, time = time, queue=queue, output=output, error=error, deps=deps, job=jobscript) 46 | 47 | popenrv = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate() 48 | 49 | print(cmdline, file=sys.stderr) 50 | #(b'Submitted batch job 86634327\n', None) 51 | print(popenrv, file=sys.stderr) 52 | print("%i" % int(popenrv[0].strip().split()[-1]), file=sys.stderr) 53 | print("%i" % int(popenrv[0].strip().split()[-1])) 54 | -------------------------------------------------------------------------------- /lisa/workflows/sbatch_dependency.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PATH=/n/home04/xiaoleliu/ChiLin/alvin/xiaoleliu_lab/marge2/phaseI_init/miniconda3/bin:$PATH 4 | source activate lisa 5 | 6 | mkdir -p logs/cluster 7 | #snakemake --unlock 8 | #parallel simple job 9 | #snakemake -j 150 --cluster-config ../cluster.json --immediate-submit --cluster "sbatch --time={cluster.time} --mem={cluster.memory} --partition={cluster.queue} --cpus-per-task={cluster.nCPUs} -J {cluster.name} -o {cluster.output} -e {cluster.error} --open-mode=append" 10 | 11 | #with dependencies/multi-dependencies on 12 | split -d -l 100 ../creeds_tf.txt ../creeds_tf.txt. 13 | for i in ../creeds_tf.txt.*;do 14 | echo "------" 15 | echo $i 16 | echo "------" 17 | snakemake --config gene_list=${i} -j 50 --immediate-submit --cluster-config ../cluster.json --cluster "export PATH=/n/home04/xiaoleliu/ChiLin/alvin/xiaoleliu_lab/marge2/phaseI_init/miniconda3/bin:$PATH;source activate lisa; ../sbatch_script.py {dependencies}" 18 | break 19 | done 20 | 21 | -------------------------------------------------------------------------------- /lisa_docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = lisa 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /lisa_docs/source/FAQ.rst: -------------------------------------------------------------------------------- 1 | 2 | Issues 3 | --------- 4 | Genes in the gene set should not be less than 20. 5 | -------------------------------------------------------------------------------- /lisa_docs/source/Installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | =============== 3 | 4 | Mac 5 | --------- 6 | 7 | .. code-block:: bash 8 | :linenos: 9 | 10 | brew install openssl 11 | export C_INCLUDE_PATH=${C_INCLUDE_PATH}:/usr/local/Cellar/openssl/your_version/include 12 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/Cellar/openssl/your_version/lib/" 13 | 14 | Linux 15 | --------- 16 | 17 | .. code-block:: bash 18 | :linenos: 19 | 20 | sudo apt-get install openssl 21 | 22 | Install conda python 3.6 23 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 24 | 25 | Follow the instruction: https://conda.io/miniconda.html to install python 3.6. 26 | 27 | other dependency 28 | ~~~~~~~~~~~~~~~~~~~ 29 | 30 | .. code-block:: bash 31 | :linenos: 32 | 33 | wget -c https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 34 | bash Miniconda3-latest-Linux-x86_64.sh 35 | 36 | .. code-block:: bash 37 | :linenos: 38 | 39 | # install miniconda3 40 | export PATH="${HOME}/miniconda3/bin:$PATH" 41 | conda install anaconda-client 42 | conda create -n lisa anaconda python=3 43 | source activate lisa 44 | conda install -c anaconda openssl 45 | conda install -c anaconda curl 46 | 47 | conda config --add channels defaults 48 | conda config --add channels conda-forge 49 | conda config --add channels bioconda 50 | conda install blas mkl-service 51 | 52 | # this is for curl and openssl header files 53 | export C_INCLUDE_PATH=${C_INCLUDE_PATH}:/usr/include/:${HOME}/.local/include:${HOME}/miniconda3/envs/lisa/include 54 | 55 | pip install deeptools 56 | pip install theano 57 | pip install fire 58 | pip install psutil 59 | pip install numpy 60 | pip install scipy 61 | pip install sklearn 62 | 63 | 64 | Install the module using: 65 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 66 | 67 | .. code-block:: bash 68 | :linenos: 69 | 70 | git clone https://github.com/qinqian/lisa 71 | cd lisa 72 | python setup.py install --user 73 | 74 | 75 | Get dependent data 76 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 77 | The related chromatin profile dataset will be released later, use LISA_ now. 78 | 79 | .. _LISA: http://lisa.cistrome.org 80 | -------------------------------------------------------------------------------- /lisa_docs/source/_static/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/1.png -------------------------------------------------------------------------------- /lisa_docs/source/_static/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/2.png -------------------------------------------------------------------------------- /lisa_docs/source/_static/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/3.png -------------------------------------------------------------------------------- /lisa_docs/source/_static/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/4.png -------------------------------------------------------------------------------- /lisa_docs/source/_static/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/5.png -------------------------------------------------------------------------------- /lisa_docs/source/_static/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/6.png -------------------------------------------------------------------------------- /lisa_docs/source/_static/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/7.png -------------------------------------------------------------------------------- /lisa_docs/source/_static/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/8.png -------------------------------------------------------------------------------- /lisa_docs/source/_static/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/9.png -------------------------------------------------------------------------------- /lisa_docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. lisa documentation master file, created by 2 | sphinx-quickstart on Fri Jul 28 19:59:12 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to lisa's documentation! 7 | ================================ 8 | 9 | LISA involve four methods to discover potential enhancer and rank TFs. If get stuck, try to create issues at https://github.com/qinqian/lisa. 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | :caption: Contents: 14 | 15 | Installation 16 | Tutorial 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /lisa_web/generate_heatmap_js.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from clustergrammer import Network 3 | import numpy as np 4 | import collections 5 | import argparse 6 | 7 | p = argparse.ArgumentParser() 8 | p.add_argument('-d') 9 | args = p.parse_args() 10 | 11 | net = Network() 12 | delta = pd.read_csv(args.d, index_col=0) 13 | status = delta.iloc[:-1, -1].values.reshape(-1) 14 | delta_f = delta.iloc[:-1, :-1] 15 | 16 | tf = delta_f.columns.map(lambda x: x.split('|')[1]) 17 | tf_dict = collections.OrderedDict() 18 | for i, t in enumerate(tf): 19 | tf_dict[t] = tf_dict.get(t, []) + [i] 20 | ids = [] 21 | for t in tf_dict: 22 | ids.append(tf_dict[t][:3]) 23 | ids = np.concatenate(ids) 24 | 25 | delta_f = delta_f.iloc[:, ids] 26 | 27 | target, = np.where(status == 1) 28 | cont, = np.where(status == 0) 29 | 30 | print(target.shape) 31 | if len(target) < 100: 32 | target_n = len(target) 33 | else: 34 | target_n = 100 35 | 36 | index = np.concatenate([np.random.choice(target, target_n), np.random.choice(cont, 100)]) 37 | status = status[index] 38 | delta_f = delta_f.iloc[index, :50] 39 | 40 | ann = pd.read_table('/data/home/qqin/01_Projects/Programming/dc2/scripts/hg38_best_dc_tfcr_basedon_frip_peak_dhs_all_nonhm_nonca.xls') 41 | 42 | ann = ann.iloc[:, [0, 6, 8]] 43 | 44 | ann_dict = {} 45 | for i in range(ann.shape[0]): 46 | ann_dict[str(ann.iloc[i, 0])]= ann.iloc[i, 1:] 47 | 48 | tf = delta_f.columns.map(lambda x: "TF: %s" % x.split('|')[1]) 49 | genes = delta_f.index.map(lambda x:x.split(':')[-1]) 50 | 51 | genes, index = np.unique(genes, return_index=True) 52 | status = status[index] 53 | 54 | tfs = [] 55 | for i,j in enumerate(tf): 56 | tfs.append("%s.%s" % (j, i)) 57 | 58 | ids = delta_f.columns.map(lambda x:x.split('|')[0]) 59 | fout = open("%s_heatmap_matrix.txt" % args.d, 'w') 60 | fout.write("\t\t%s\n" % ('\t'.join(tfs))) 61 | 62 | cls = [] 63 | for i in ids: 64 | if ann_dict.get(i, ['NA'])[0] == 'NA': 65 | cls.append("Cell Line: %s" % ('NA')) 66 | else: 67 | cls.append("Cell Line: %s" % (ann_dict[i][0])) 68 | fout.write("\t\t%s\n" % ('\t'.join(cls))) 69 | 70 | ts = [] 71 | for i in ids: 72 | if ann_dict.get(i, ['NA', 'NA'])[1] == 'NA': 73 | ts.append("Tissue: %s" % ('NA')) 74 | else: 75 | ts.append("Tissue: %s" % (ann_dict[i][1])) 76 | fout.write("\t\t%s\n" % ('\t'.join(ts))) 77 | 78 | for i in range(status.shape[0]): 79 | fout.write('%s\t%s\t%s\n' % ("Gene: %s"% genes[i], "Input Gene: %s" % status[i], '\t'.join(delta_f.iloc[i, :].map(str)))) 80 | fout.close() 81 | 82 | net.load_file("%s_heatmap_matrix.txt" % args.d) 83 | net.cluster() 84 | net.write_json_to_file('viz', '%s_mult_view.json' % args.d) 85 | 86 | -------------------------------------------------------------------------------- /lisa_web/lisa_scatter.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 4 | from plotly import tools 5 | 6 | from plotly.graph_objs import Scatter, Heatmap 7 | 8 | up=pd.read_csv('3_down.gene_symbol.lisa_direct.csv', header=None) 9 | dn=pd.read_csv('3_up.gene_symbol.lisa_direct.csv', header=None) 10 | 11 | print(up.head()) 12 | final = up.merge(dn, on=0) 13 | print(final.head()) 14 | trace0 = Scatter(x=-np.log10(final.iloc[:, 1]), 15 | y=-np.log10(final.iloc[:, 2]), mode= 'markers', 16 | marker= dict(size= 9, 17 | opacity= 0.9, 18 | line = dict(width = 0.8) 19 | ), 20 | text=final.iloc[:, 0], 21 | xaxis="Up-regulated gene set results", yaxis="Down-regulated gene set results") 22 | 23 | plot([trace0], filename='test.html') 24 | -------------------------------------------------------------------------------- /lisa_web/lisa_web.conf: -------------------------------------------------------------------------------- 1 | 2 | WSGIDaemonProcess lisa user=qqin group=lab threads=8 3 | WSGIScriptAlias / /project/Cistrome/LISA/lisa_web/lisa_web.wsgi 4 | WSGIScriptReloading On 5 | 6 | ServerName lisa.cistrome.org 7 | DocumentRoot /project/Cistrome/LISA/lisa_web/ 8 | LogLevel Debug 9 | CustomLog /project/Cistrome/LISA/access.log combined 10 | ErrorLog /project/Cistrome/LISA/error.log 11 | ServerSignature On 12 | 13 | 14 | require all granted 15 | 16 | 17 | 18 | AllowOverride AuthConfig Limit Indexes Options 19 | Options +ExecCGI -MultiViews +SymLinksIfOwnerMatch 20 | Require ip 155.52.47.121 21 | Require ip 127 22 | 23 | Require all granted 24 | 25 | 26 | Alias /data5/lisa_browser /project/Cistrome/LISA/lisa_bw 27 | 28 | 29 | AuthType Basic 30 | AuthName "Restricted Content" 31 | AuthUserFile /project/Cistrome/LISA/lisa_web/cistromedb_data/.htpasswd 32 | Require user lisa 33 | AllowOverride AuthConfig Limit Indexes Options 34 | Options +ExecCGI -MultiViews +SymLinksIfOwnerMatch 35 | # 36 | # Require all granted 37 | # 38 | 39 | Alias /cistromedb_data /project/Cistrome/LISA/lisa_web/cistromedb_data 40 | 41 | 42 | -------------------------------------------------------------------------------- /lisa_web/lisa_web.wsgi: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.stdout = sys.stderr 4 | p = '/data/home/qqin/lisa_web' 5 | 6 | activate_this = os.path.join('/data/home/qqin/rabit/rabitqqin/', 'bin', 'activate_this.py') 7 | execfile(activate_this, dict(__file__=activate_this)) 8 | 9 | sys.path.append(p) 10 | 11 | from lisa_web import app as application 12 | 13 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/__init__.pyc -------------------------------------------------------------------------------- /lisa_web/lisa_web/__init__.py~: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from logging.handlers import RotatingFileHandler 4 | import time 5 | import numpy as np 6 | 7 | from flask import Flask, render_template, redirect, url_for, send_from_directory 8 | from flask import request 9 | from flask_bootstrap import Bootstrap 10 | 11 | 12 | from flask_wtf import FlaskForm 13 | from flask_wtf.file import FileField, FileRequired 14 | from werkzeug.utils import secure_filename 15 | 16 | from wtforms import StringField 17 | from wtforms.validators import DataRequired 18 | 19 | class RabitForm(FlaskForm): 20 | name = StringField('Job Name', validators=[DataRequired()]) 21 | gene = FileField('Select Rabit input file', validators=[FileRequired()], render_kw={'multiple': True, 'data-preview-file-type':"text"}) 22 | 23 | # initialize an application 24 | app = Flask(__name__, instance_relative_config = True) 25 | app.config['UPLOADED_PATH'] = 'upload' 26 | app.secret_key = 's3cr3t' # crsf 27 | 28 | # debug mode on 29 | app.debug = True 30 | if not app.debug: 31 | app.logger.setLevel(logging.INFO) 32 | handler = RotatingFileHandler('log', maxBytes=10000000, backupCount=20) 33 | formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s: %(message)s") 34 | handler.setLevel(logging.INFO) 35 | handler.setFormatter(formatter) 36 | app.logger.addHandler(handler) 37 | 38 | @app.errorhandler(500) 39 | def internal_error(exception): 40 | app.logger.exception(exception) 41 | return "Sorry internal program error", 500 42 | 43 | @app.errorhandler(404) 44 | def page_not_found(e): 45 | return render_template('404.html'), 404 46 | 47 | @app.route('/', methods=['GET', 'POST']) 48 | def upload_file(): 49 | form = RabitForm() 50 | if form.validate_on_submit(): 51 | f = form.gene.data 52 | filename = secure_filename(f.filename) 53 | data = os.path.join(app.config['UPLOADED_PATH'], "%s.%s" % (f.filename, time.time())) 54 | f.save(data) 55 | app.logger.info("%s uploaded at %s" % (data, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))) 56 | os.system('cp %s download' % data) 57 | 58 | app.logger.info("lisa modeling finished %s" % (data, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))) 59 | output = "%s.o.t" % data 60 | with open("download/%s.txt" % os.path.basename(output), 'w') as out: 61 | with open(output) as inf: 62 | n = 0 63 | lines = inf.readlines() 64 | sample = np.array(lines[0].split()) 65 | t_vals = np.array(map(float, lines[1].split()[1:])) 66 | index = np.argsort(t_vals)[::-1] 67 | sample = sample[index] 68 | t_vals = t_vals[index] 69 | for i,j in zip(sample, t_vals): 70 | print >>out, "%s\t%s" % (i, j) 71 | return redirect(url_for('custom_download', filename="%s.txt" % os.path.basename(output))) 72 | return render_template('index.html', form = form) 73 | 74 | @app.route('/success', methods=['GET', 'POST']) 75 | def sucess(): 76 | return '

Succeed

' 77 | 78 | # add new static folder 79 | @app.route('/img/') 80 | def custom_static(filename): 81 | return send_from_directory('img', filename) 82 | 83 | # add new static folder 84 | @app.route('/download/') 85 | def custom_download(filename): 86 | return send_from_directory('download', filename) 87 | 88 | # interface 89 | Bootstrap(app) 90 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/check_genename.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def check_available_genes(genes, species='hg38'): 4 | if genes[0].startswith('ENSG'): 5 | genes = list(map(lambda x: x.split('.')[0], genes)) 6 | if species == 'hg38': 7 | ensemble = pd.read_csv('/project/Cistrome/LISA/lisa_web/download/Homo97_Ensembl.txt', sep='\t') 8 | else: 9 | ensemble = pd.read_csv('/project/Cistrome/LISA/lisa_web/download/Mus97_Ensembl.txt', sep='\t') 10 | ensemble.iloc[:, 0] = ensemble.iloc[:, 0].map(lambda x: x.split('.')[0]) 11 | symbols = ensemble.loc[ensemble.iloc[:, 0].isin(genes), 'gene_name'] 12 | return list(set(symbols)) 13 | else: 14 | return list(set(genes)) 15 | 16 | def clean_empty_lins(genes): 17 | filtered_genes = filter(lambda x:x!='', genes) 18 | return list(set(filtered_genes)) 19 | 20 | 21 | if __name__ == '__main__': 22 | # print(clean_empty_lins(['a', 'b', 'c', ''])) 23 | # print(check_available_genes(['ENSG00000174837', 24 | # 'ENSG00000232702', 25 | # 'ENSG00000172738'], 'human')) 26 | print(check_available_genes(['AR', 27 | 'FOXA1', 28 | 'TP53'], 'mouse')) 29 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /lisa_web/lisa_web/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /lisa_web/lisa_web/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /lisa_web/lisa_web/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /lisa_web/lisa_web/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /lisa_web/lisa_web/form.py: -------------------------------------------------------------------------------- 1 | from flask_wtf import FlaskForm 2 | from flask_wtf.file import FileField, FileRequired 3 | from wtforms import TextAreaField, BooleanField, SubmitField, SelectMultipleField, SelectField, StringField 4 | from wtforms.validators import DataRequired, Required, length, optional, Email 5 | from wtforms.fields.html5 import EmailField 6 | 7 | class LISAForm(FlaskForm): 8 | genes = TextAreaField('Genes', validators=[Required()]) 9 | labels = StringField('labels', validators=[optional()]) 10 | 11 | genes2 = TextAreaField('Genes2', validators=[optional()]) 12 | labels2 = StringField('labels 2', validators=[optional()]) 13 | 14 | background = TextAreaField('Background', validators=[optional()]) 15 | 16 | name = StringField('Job Name', validators=[optional()]) ## change to optional and give out a warning information 17 | mail = EmailField('Optional email', validators=[optional(), Email()]) 18 | method = SelectField("Methods", 19 | choices=[('knockout', 'ISD-RP for both motif and ChIP-seq'), 20 | ('beta', 'TF ChIP-seq Peak-RP'), 21 | ('all', 'All')], 22 | default='all') 23 | mark = SelectField("Chromatin profile", 24 | choices=[('H3K27ac', 'H3K27ac'), 25 | ('DNase', 'DNase-seq'), 26 | ('All', 'All'), 27 | #('H3K4me3', 'H3K4me3'), 28 | #('H3K27me3', 'H3K27me3'), 29 | #('H3K4me1', 'H3K4me1') 30 | #('ATAC-seq', 'ATAC-seq'), 31 | ], validators=[Required()], default='All') 32 | 33 | species = SelectField("Species", choices=[('hg38', 'Human'), ('mm10', 'Mouse')], default='hg38') 34 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/form.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/form.pyc -------------------------------------------------------------------------------- /lisa_web/lisa_web/images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/1.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/2.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/3.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/4.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/5.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/images/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/6.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/7.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/images/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/8.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/mail.py: -------------------------------------------------------------------------------- 1 | import smtplib 2 | from email.mime.base import MIMEBase 3 | from email.mime.multipart import MIMEMultipart # 3.0 4 | from email.mime.text import MIMEText 5 | 6 | #from email.Utils import COMMASPACE, formatdate 7 | import datetime 8 | from email import encoders 9 | 10 | COMMASPACE = ', ' 11 | 12 | 13 | def send_localhost_mail(resultOpt, subject, to, html, attachment, server="localhost"): 14 | msg = MIMEMultipart('alternative') 15 | fro = 'lisa@cistrome.org' 16 | msg['From'] = fro 17 | msg['To'] = COMMASPACE.join(to) 18 | #msg['Date'] = formatdate(localtime=True) 19 | msg['Subject'] = subject 20 | 21 | if resultOpt == 'html': 22 | msg.attach( MIMEText(html, 'html') ) 23 | else: 24 | import tempfile 25 | try: 26 | temp = tempfile.TemporaryFile() 27 | temp.write(attachment) 28 | temp.seek(0) 29 | part = MIMEBase('application', "octet-stream") 30 | part.set_payload( temp.read() ) 31 | Encoders.encode_base64(part) 32 | part.add_header('Content-Disposition', 'attachment; filename="%s"' 33 | % subject + ".xls") 34 | msg.attach(part) 35 | finally: 36 | temp.close() 37 | 38 | smtp = smtplib.SMTP(server) 39 | smtp.sendmail(fro, to, msg.as_string()) 40 | smtp.close() 41 | 42 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | #python generate_gallery.py 4 | 5 | #python generate_gallery2.py 6 | 7 | while read line; do 8 | fs=($line) 9 | echo ${fs[2]} | tr ',' '\n' > ${fs[0]}_${fs[1]}.txt 10 | done < <(cut -f 1,9,12 lisa_results_meta_table_mouse_with_gene_sets.xls | sed 1d | sort -k 1 | uniq) 11 | 12 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/1.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/2.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/3.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/4.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/5.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/6.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/7.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/8.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/Figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure1.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/Figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure2.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/Figure3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure3.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/Figure4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure4.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/Figure5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure5.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/Figure6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure6.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/Figure7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure7.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/Figure8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure8.png -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/MACRO_ape_all_cistrome_pwm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | 4 | mkdir -p macro_ape_cistrome 5 | for i in cistrome/*pwm;do 6 | pi=$(basename ${i/.pwm/}) 7 | for j in cistrome/*pwm;do 8 | pj=$(basename ${j/.pwm/}) 9 | java -cp ape-2.0.1.jar ru.autosome.macroape.EvalSimilarity $i $j 1>macro_ape_cistrome/${pi}_${pj}_macro_ape.txt 10 | done 11 | done 12 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/gallery.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/gallery.js -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/gallery.js~: -------------------------------------------------------------------------------- 1 | // The table generation function 2 | function tabulate(cl, data, columns, interact) { 3 | var table = d3.select("." + cl).append("table") 4 | .attr("class", "table compact hover row-border tab" + cl), 5 | thead = table.append("thead"), 6 | tbody = table.append("tbody").attr("class", "tbody"); 7 | 8 | // append the header row 9 | thead.append("tr") 10 | .selectAll("th") 11 | .data(columns) 12 | .enter() 13 | .append("th") 14 | .text(function(column) { return column; }); 15 | // create a row for each object in the data 16 | var rows = tbody.selectAll("tr") 17 | .data(data) 18 | .enter() 19 | .append("tr"); 20 | 21 | // create a cell in each row for each column 22 | var cells = rows.selectAll("td") 23 | .data(function(row) { 24 | return columns.map(function(column) { 25 | return {column: column, value: row[column]}; 26 | }); 27 | }) 28 | .enter() 29 | .append("td") 30 | .style({ 31 | "vertical-align": "middle" 32 | }) 33 | .attr({ 34 | data_id: function(d) { return d.value.split(';')[0].split('|')[0]; } 35 | }) 36 | .html(function(d) { 37 | if (d.column != 'Transcription Factor') { 38 | if (interact){ 39 | a = d.value; 40 | a = a.split(';'); 41 | if (a.length == 2) { 42 | return a[1]; 43 | } else { 44 | return d.value; 45 | } 46 | } else { 47 | if (cl == "tf2") { 48 | if (d.value.split(';').length==2) { 49 | return d.value.split(';')[1] + ""; 50 | } else { 51 | return ""; 52 | } 53 | } // for motifs 54 | else { 55 | a = d.value.split('|'); 56 | if (a.length == 2) { 57 | return a[1]; 58 | } else { 59 | return d.value; 60 | } 61 | } 62 | } 63 | } 64 | return d.value; 65 | }); 66 | 67 | if (cl != "tfl") { 68 | 69 | if (cl != "tf2") { 70 | $('.tab' + cl).ready(function() { 71 | $('.tab'+ cl).DataTable({ 72 | "order": [], 73 | }); 74 | }); 75 | } else { 76 | $('.tab' + cl).ready(function() { 77 | $('.tab'+ cl).DataTable({ 78 | "order": [], 79 | "columnDefs": [ 80 | { "width": "20%", "targets": 0 } 81 | ] 82 | }); 83 | }); 84 | } 85 | } 86 | return table; 87 | } 88 | 89 | function update_progress(status_url, status_div, div_heatmap_data) { 90 | // /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random 91 | $(".gallery").ready({ 92 | d3.csv('/gallery/lisa_results_meta_table_human_with_gene_sets.csv', function(error, d) { 93 | tabulate('gallery', d, ["Transcription Factor", "1st Sample p-value", "2nd Sample p-value", "3rd Sample p-value", "4th Sample p-value", "5th Sample p-value"], false, 'gallery'); 94 | }); 95 | }); 96 | }; 97 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/hzome_functions.js: -------------------------------------------------------------------------------- 1 | function ini_hzome(root_id){ 2 | 3 | // save gene data to global variable 4 | gene_data = {}; 5 | 6 | function get_mouseover(root_tip, gene_symbol){ 7 | 8 | // not sure if this is necessary 9 | if ( d3.select(root_tip + '_row_tip').classed(gene_symbol) ){ 10 | get_request(root_tip, gene_symbol); 11 | } 12 | 13 | } 14 | 15 | function get_request(root_tip, ini_gene_symbol){ 16 | 17 | var gene_symbol; 18 | if (ini_gene_symbol.indexOf(' ') > 0){ 19 | gene_symbol = ini_gene_symbol.split(' ')[0]; 20 | } else if (ini_gene_symbol.indexOf('_') > 0){ 21 | gene_symbol = ini_gene_symbol.split('_')[0]; 22 | } 23 | else { 24 | gene_symbol = ini_gene_symbol; 25 | } 26 | 27 | var base_url = 'https://amp.pharm.mssm.edu/Harmonizome/api/1.0/gene/'; 28 | var url = base_url + gene_symbol; 29 | 30 | $.get(url, function(data) { 31 | 32 | data = JSON.parse(data); 33 | 34 | // save data for repeated use 35 | gene_data[gene_symbol] = {} 36 | gene_data[gene_symbol].name = data.name; 37 | gene_data[gene_symbol].description = data.description; 38 | 39 | set_tooltip(data, root_tip, ini_gene_symbol); 40 | 41 | return data; 42 | 43 | }); 44 | } 45 | 46 | function set_tooltip(data, root_tip, gene_symbol){ 47 | 48 | if (data.name != undefined){ 49 | 50 | d3.selectAll(root_tip + '_row_tip') 51 | .html(function(){ 52 | var sym_name = gene_symbol + ': ' + data.name; 53 | var full_html = '

' + sym_name + '

' + '

' + 54 | data.description + '

'; 55 | return full_html; 56 | }); 57 | } 58 | } 59 | 60 | 61 | function gene_info(root_tip, gene_info){ 62 | 63 | var gene_symbol = gene_info.name; 64 | 65 | if (_.has(gene_data, gene_symbol)){ 66 | var inst_data = gene_data[gene_symbol]; 67 | set_tooltip(inst_data, root_tip, gene_symbol); 68 | } else{ 69 | setTimeout(get_mouseover, 250, root_tip, gene_symbol); 70 | } 71 | 72 | } 73 | 74 | hzome = {} 75 | 76 | hzome.gene_info = gene_info; 77 | hzome.gene_data = gene_data; 78 | hzome.get_mouseover = get_mouseover; 79 | hzome.get_request = get_request; 80 | 81 | return hzome; 82 | 83 | } 84 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/lisa.css~: -------------------------------------------------------------------------------- 1 | body { padding-top: 70px; } 2 | 3 | 4 | textarea { 5 | resize: none; 6 | max-width: 280px; 7 | max-height: 800px; 8 | height: 300px; 9 | overflow-y:hidden; 10 | } 11 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/lisa.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/lisa.jpg -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/lisa2.css: -------------------------------------------------------------------------------- 1 | .fixed-top { 2 | background-color: #2B3B61; 3 | color: #FFFFFF; 4 | padding-top: 15px; 5 | padding-bottom: 19px; 6 | height: 50px; 7 | } 8 | 9 | .fixed-bottom { 10 | background-color: #2B3B61; 11 | color: #FFFFFF; 12 | height: 52px; 13 | padding-top: 5px; 14 | padding-bottom: 10px; 15 | } 16 | 17 | .nav-a { 18 | color: #FFFFFF; 19 | text-decoration: blink; 20 | background-color: transparent; 21 | } 22 | 23 | * { 24 | margin: 0; 25 | } 26 | 27 | .body3 { 28 | padding-top: 32px; 29 | padding-bottom: 30px; 30 | font-family: lato, sans-serif; 31 | font-size: 14px; 32 | font-weight: normal; 33 | background-color: #fff; 34 | } 35 | 36 | .body2 { 37 | padding-top: 35px; 38 | padding-bottom: 30px; 39 | font-family: lato, sans-serif; 40 | font-size: 14px; 41 | font-weight: normal; 42 | background-color: #fff; 43 | } 44 | 45 | .body { 46 | padding-top: 0px; 47 | padding-bottom: 120px; 48 | margin-bottom: 120px; 49 | font-family: lato, sans-serif; 50 | font-size: 14px; 51 | font-weight: normal; 52 | height: 690px; 53 | background-color: #fff; 54 | } 55 | 56 | #navbar-example { 57 | // position: relative; 58 | z-index: 998; 59 | position: fixed; 60 | top: 248px; 61 | margin-left: 35px; 62 | width: 220px; 63 | left: 101px; 64 | } 65 | 66 | .progress { 67 | width: 100%; 68 | text-align: center; 69 | } 70 | 71 | .tf1, 72 | .tf2, 73 | .tf0, 74 | .tf, 75 | { 76 | height: 480px; 77 | overflow: auto; 78 | } 79 | 80 | #spyOnThis { 81 | height: 100%; 82 | width: 100%; 83 | position: relative; 84 | overflow-y: scroll; 85 | } 86 | 87 | .tfl 88 | { 89 | height: 600px; 90 | width: 100%; 91 | } 92 | 93 | .footer, 94 | .title 95 | { 96 | font-weight: bold; 97 | color: #444; 98 | text-align:center; 99 | background-color: #eee; 100 | border-top: 2px solid #444; 101 | border-bottom: 2px solid #444; 102 | } 103 | 104 | .genes2 { 105 | resize: none; 106 | height: 113px; 107 | } 108 | .genes { 109 | resize: none; 110 | height: 113px; 111 | } 112 | 113 | /* /\* .sp { *\/ */ 114 | /* /\* margin: 12px 12px 12px 12px; *\/ */ 115 | /* /\* text-align: center; *\/ */ 116 | /* /\* } *\/ */ 117 | 118 | 119 | /* /\* .main_content { *\/ */ 120 | /* /\* border-bottom: 2px solid #444; *\/ */ 121 | /* /\* text-align: center; *\/ */ 122 | /* /\* } *\/ */ 123 | 124 | .run { 125 | margin: 0px 24px 5px 2px; 126 | } 127 | 128 | /* .bd-example-modal-lg { */ 129 | /* width: 1000px; */ 130 | /* } */ 131 | 132 | td { vertical-align:middle; } 133 | td:hover { 134 | cursor: pointer; 135 | } 136 | 137 | img { 138 | opacity: 0.9; 139 | filter: alpha(opacity=90); /* For IE8 and earlier */ 140 | } 141 | 142 | .inspector_attrib_row { 143 | border-bottom: 1px #d6d6d6 solid; 144 | border-right: 1px #d6d6d6 solid 145 | } 146 | 147 | .circle-col { 148 | width: 12.5%; 149 | position: relative; 150 | float: left; 151 | } 152 | 153 | .circle { 154 | background: #A9A9A9; 155 | height: 23px; 156 | width: 23px; 157 | border-radius: 50%; 158 | margin: 0 auto; 159 | } 160 | 161 | div { 162 | display: block; 163 | } 164 | 165 | .green { 166 | background-color: #27ae60; 167 | } 168 | 169 | .red { 170 | background-color: #c0392b; 171 | } 172 | 173 | .btn-align { 174 | padding: 6px 12px; 175 | line-height: 1.42857143; 176 | vertical-align: middle; 177 | 178 | } 179 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/load_clustergram.js: -------------------------------------------------------------------------------- 1 | /* 2 | Example files 3 | */ 4 | 5 | var hzome = ini_hzome(); 6 | 7 | make_clust('mult_view.json'); 8 | 9 | var about_string = 'Zoom, scroll, and click buttons to interact with the clustergram. '; 10 | 11 | function make_clust(inst_network){ 12 | 13 | d3.json(inst_network, function(network_data){ 14 | 15 | // define arguments object 16 | var args = { 17 | root: '#container-id-1', 18 | 'network_data': network_data, 19 | 'about':about_string, 20 | 'row_tip_callback':hzome.gene_info, 21 | 'col_tip_callback':test_col_callback, 22 | 'tile_tip_callback':test_tile_callback, 23 | 'dendro_callback':dendro_callback, 24 | 'matrix_update_callback':matrix_update_callback, 25 | 'cat_update_callback': cat_update_callback, 26 | 'sidebar_width':150, 27 | // 'ini_view':{'N_row_var':20} 28 | // 'ini_expand':true 29 | }; 30 | 31 | resize_container(args); 32 | 33 | d3.select(window).on('resize',function(){ 34 | resize_container(args); 35 | cgm.resize_viz(); 36 | }); 37 | 38 | cgm = Clustergrammer(args); 39 | 40 | check_setup_enrichr(cgm); 41 | 42 | d3.select(cgm.params.root + ' .wait_message').remove(); 43 | 44 | }); 45 | 46 | } 47 | 48 | function matrix_update_callback(){ 49 | 50 | if (genes_were_found[this.root]){ 51 | enr_obj[this.root].clear_enrichr_results(false); 52 | } 53 | } 54 | 55 | function cat_update_callback(){ 56 | console.log('callback to run after cats are updated'); 57 | } 58 | 59 | function test_tile_callback(tile_data){ 60 | var row_name = tile_data.row_name; 61 | var col_name = tile_data.col_name; 62 | 63 | } 64 | 65 | function test_col_callback(col_data){ 66 | var col_name = col_data.name; 67 | } 68 | 69 | function dendro_callback(inst_selection){ 70 | 71 | var inst_rc; 72 | var inst_data = inst_selection.__data__; 73 | 74 | // toggle enrichr export section 75 | if (inst_data.inst_rc === 'row'){ 76 | d3.select('.enrichr_export_section') 77 | .style('display', 'block'); 78 | } else { 79 | d3.select('.enrichr_export_section') 80 | .style('display', 'none'); 81 | } 82 | 83 | } 84 | 85 | function resize_container(args){ 86 | 87 | var screen_width = window.innerWidth; 88 | var screen_height = window.innerHeight - 20; 89 | 90 | d3.select(args.root) 91 | .style('width', screen_width+'px') 92 | .style('height', screen_height+'px'); 93 | } 94 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/plot.R: -------------------------------------------------------------------------------- 1 | library(data.table) 2 | AR <- fread('MC00468.pwm.1kb') 3 | E2F2 <- fread('MS00712.pwm.1kb') 4 | GR <- fread('MC00170.pwm.1kb') 5 | 6 | AE <- cbind(AR$V2, E2F2$V2) 7 | AE <- AE[apply(AE, 1, function(x) all(x<10000)),] 8 | 9 | AG <- cbind(AR$V2, GR$V2) 10 | AG <- AG[apply(AG, 1, function(x) all(x<10000)),] 11 | 12 | print(head(AE)) 13 | print(head(AG)) 14 | 15 | png('motif_scatterplot.png', width=1500, height=800) 16 | par(mfrow=c(1,2), font=2, cex=1) 17 | plot(AE[,1], AE[,2], pch=19, col='blue', xlab='AR', ylab='E2F2') 18 | plot(AG[,1], AG[,2], pch=19, col='blue', xlab='AR', ylab='GR') 19 | dev.off() 20 | 21 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in cistrome/*pwm 4 | do 5 | #if [ ! -s ${i}.100bp.bin.npy ] 6 | if [ ! -s ${i}.100bp ] 7 | then 8 | #python bin/seqpos2 -f hg38_window100bp_both10bp.fa -p $i -o ${i}.100bp 9 | python bin/seqpos2 -f mm10_window100bp_both10bp.fa -p $i -o ${i}.100bp 10 | fi 11 | done 12 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/static/send_to_Enrichr.js: -------------------------------------------------------------------------------- 1 | function send_to_Enrichr(options) { // http://amp.pharm.mssm.edu/Enrichr/#help 2 | var defaultOptions = { 3 | description: "", 4 | popup: false 5 | }; 6 | 7 | if (typeof options.description == 'undefined') 8 | options.description = defaultOptions.description; 9 | if (typeof options.popup == 'undefined') 10 | options.popup = defaultOptions.popup; 11 | if (typeof options.list == 'undefined') 12 | alert('No genes defined.'); 13 | 14 | var form = document.createElement('form'); 15 | form.setAttribute('method', 'post'); 16 | form.setAttribute('action', 'https://amp.pharm.mssm.edu/Enrichr/enrich'); 17 | if (options.popup) 18 | form.setAttribute('target', '_blank'); 19 | form.setAttribute('enctype', 'multipart/form-data'); 20 | 21 | var listField = document.createElement('input'); 22 | listField.setAttribute('type', 'hidden'); 23 | listField.setAttribute('name', 'list'); 24 | listField.setAttribute('value', options.list); 25 | form.appendChild(listField); 26 | 27 | var descField = document.createElement('input'); 28 | descField.setAttribute('type', 'hidden'); 29 | descField.setAttribute('name', 'description'); 30 | descField.setAttribute('value', options.description); 31 | form.appendChild(descField); 32 | 33 | document.body.appendChild(form); 34 | form.submit(); 35 | document.body.removeChild(form); 36 | } -------------------------------------------------------------------------------- /lisa_web/lisa_web/templates/404.html: -------------------------------------------------------------------------------- 1 |

not found...

2 | -------------------------------------------------------------------------------- /lisa_web/lisa_web/templates/index.html~: -------------------------------------------------------------------------------- 1 | {% extends "bootstrap/base.html" %} 2 | {% import "bootstrap/fixes.html" as fixes %} 3 | {% import "bootstrap/wtf.html" as wtf %} 4 | 5 | {% block head %} 6 | {{super()}} 7 | {{fixes.ie8()}} 8 | {% endblock %} 9 | 10 | {% block metas %} 11 | 12 | {% endblock %} 13 | {% block title %}LISA{% endblock %} 14 | {% block html_attribs %} lang="en"{% endblock %} 15 | 16 | {% block styles %} 17 | {{ super() }} 18 | 19 | {% endblock %} 20 | 21 | 22 | {% block scripts %} 23 | {{ super() }} 24 | 25 | {% endblock %} 26 | 27 | 28 | {% block navbar %} 29 | {% endblock %} 30 | 31 | {% block content %} 32 |
33 |

LISA online: a web server for ranking TF from large-scale epigenome data

34 |
35 |
36 |
37 |
38 | {{ form.hidden_tag() }} 39 | {{ wtf.form_errors(form, hiddens="only") }} 40 | {{ wtf.form_field(form.genes) }} 41 | {{ wtf.form_field(form.mark) }} 42 | 43 | 44 | 45 |
46 |
47 |
48 |
49 | {% endblock %} 50 | -------------------------------------------------------------------------------- /lisa_web/lisa_web_requirement.txt: -------------------------------------------------------------------------------- 1 | Flask==0.12.1 2 | Flask-Bootstrap==3.3.7.1 3 | Flask-HTTPAuth==3.2.4 4 | Flask-Mail==0.9.1 5 | Flask-Script==2.0.5 6 | Flask-WTF==0.14.2 7 | Jinja2==2.9.6 8 | MarkupSafe==1.0 9 | WTForms==2.1 10 | Werkzeug==0.12.1 11 | amqp==2.2.1 12 | argparse==1.2.1 13 | billiard==3.5.0.3 14 | blinker==1.4 15 | celery==4.1.0 16 | click==6.7 17 | dominate==2.3.1 18 | itsdangerous==0.24 19 | kombu==4.1.0 20 | numpy==1.12.1 21 | pandas==0.20.3 22 | python-dateutil==2.6.1 23 | pytz==2017.2 24 | redis==2.10.5 25 | six==1.10.0 26 | vine==1.1.4 27 | visitor==0.1.3 28 | wsgiref==0.1.2 29 | -------------------------------------------------------------------------------- /lisa_web/output_profile_regulatory_potential.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import h5py 6 | 7 | with h5py.File(h5) as store: 8 | gene_annotation = np.array(list(map(lambda x: x.decode('utf-8'), 9 | store['RefSeq'][...]))) 10 | ids = list(map(lambda x: x.decode('utf-8').split('_')[0], 11 | store['IDs'][...])) 12 | 13 | high_quality_ids = list(set(high_quality_ids) & set(ids)) 14 | map_id = {} 15 | for i, c in enumerate(ids): 16 | map_id[c] = i 17 | idx = np.array([map_id[str(i)] for i in high_quality_ids]) 18 | sort_index = np.argsort(idx) 19 | index = idx[sort_index] 20 | iid = np.array(high_quality_ids)[sort_index] 21 | return pd.DataFrame(store['RP'][:, index], columns=iid, 22 | index=gene_annotation) 23 | 24 | with h5py.File("") as store: 25 | store['RP'] 26 | -------------------------------------------------------------------------------- /lisa_web/plotly_scatter.py: -------------------------------------------------------------------------------- 1 | #!/project/dev/qqin/miniconda3/bin/python 2 | import sys 3 | import pandas as pd 4 | import numpy as np 5 | from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 6 | from plotly import tools 7 | from plotly.graph_objs import Scatter, Heatmap, Layout, Figure 8 | 9 | up_r = sys.argv[1] 10 | dn_r = sys.argv[2] 11 | prefix = sys.argv[3] 12 | title = sys.argv[4] 13 | labels1 = sys.argv[5] 14 | labels2 = sys.argv[6] 15 | 16 | up=pd.read_csv(up_r, header=0) 17 | if '1' in up.columns: 18 | up=up.sort_values(by='1') 19 | if '0.1' in up.columns: 20 | up=up.sort_values(by='0.1') 21 | up.loc[:, 'name'] = up.iloc[:, 0].map(lambda x:x.split('|')[1]) 22 | up.drop_duplicates('name', inplace=True, keep='first') 23 | print(up.head()) 24 | 25 | dn=pd.read_csv(dn_r, header=0) 26 | if '1' in dn.columns: 27 | dn=dn.sort_values(by='1') 28 | if '0.1' in dn.columns: 29 | dn=dn.sort_values(by='0.1') 30 | dn.loc[:, 'name'] = dn.iloc[:, 0].map(lambda x:x.split('|')[1]) 31 | dn.drop_duplicates('name', inplace=True, keep='first') 32 | print(dn.head()) 33 | 34 | final = up.merge(dn, on='name', how='outer') 35 | final = final.loc[(final.iloc[:, 1]<=0.05) | (final.iloc[:, 4]<=0.05), :] 36 | xlim = -np.log10(np.min(final.iloc[:, 1]))*1.2 37 | ylim = -np.log10(np.min(final.iloc[:, 4]))*1.2 38 | print(xlim) 39 | print(ylim) 40 | 41 | #final.iloc[np.where(pd.isnull(final.iloc[:, 4]))[0], 3] = 1 42 | #final.iloc[np.where(pd.isnull(final.iloc[:, 1]))[0], 1] = 1 43 | top_index = np.union1d(np.argsort(final.iloc[:, 1])[:10], np.argsort(final.iloc[:, 4])[:10]) 44 | final_top = final.iloc[top_index, :] 45 | 46 | final = final.drop(final.index[top_index]) 47 | x = -np.log10(final.iloc[:, 1]) 48 | y = -np.log10(final.iloc[:, 4]) 49 | 50 | top_trace0 = Scatter(x=x, 51 | y=y, 52 | name='other TF with p-value < 0.01', 53 | mode='markers', 54 | text=final.iloc[:, 0], 55 | marker= dict(size= 8, 56 | opacity= 0.7, 57 | )) 58 | 59 | x = -np.log10(final_top.iloc[:, 1]) 60 | y = -np.log10(final_top.iloc[:, 4]) 61 | trace1 = Scatter(x=x, 62 | y=y, 63 | name='top TFs', 64 | # mode='markers+text', 65 | mode='markers', 66 | marker=dict(size= 6, 67 | opacity= 0.8), 68 | textfont=dict( 69 | family='sans serif', 70 | size=18, 71 | color='black' 72 | ), 73 | text = list(map(lambda x: "%s\n%s" % ('Cistrome ID|TF', x), final_top.iloc[:, 0])), 74 | hoverinfo = 'text', 75 | textposition='top right') 76 | 77 | layout = Layout( 78 | title=title, 79 | xaxis=dict( 80 | title='-log10(p-value) of Gene Set 1' if labels1.strip() == '' else '-log10(p-value) of %s' % labels1, 81 | showgrid=False, 82 | titlefont=dict( 83 | family='Arial', 84 | size=18), 85 | rangemode='tozero', 86 | range=[0, xlim] 87 | ), 88 | yaxis=dict( 89 | title='-log10(p-value) of Gene Set 2' if labels2.strip() == '' else '-log10(p-value) of %s' % labels2, 90 | showgrid=False, 91 | titlefont=dict( 92 | family='Arial', 93 | size=18 94 | ), 95 | rangemode='tozero', 96 | range=[0, ylim] 97 | ), 98 | hovermode = 'closest', 99 | width=850, 100 | height=650 101 | ) 102 | 103 | fig = Figure(data=[top_trace0, trace1], layout=layout) 104 | plot(fig, filename='%s.html' % prefix, show_link=False, auto_open=False) 105 | -------------------------------------------------------------------------------- /lisa_web/run-redis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ ! -d redis-stable/src ]; then 3 | curl -O http://download.redis.io/redis-stable.tar.gz 4 | tar xvzf redis-stable.tar.gz 5 | rm redis-stable.tar.gz 6 | fi 7 | cd redis-stable 8 | make 9 | src/redis-server 10 | -------------------------------------------------------------------------------- /lisa_web/run.py: -------------------------------------------------------------------------------- 1 | from flask_script import Manager 2 | from lisa_web import app 3 | 4 | manager = Manager(app) 5 | 6 | if __name__ == '__main__': 7 | manager.run() 8 | -------------------------------------------------------------------------------- /lisa_web/run.sh: -------------------------------------------------------------------------------- 1 | #AR 2 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/265_up.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/265_up.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/265_up.gene_symbol.foreground_gene > AR_session.txt 3 | 4 | 5 | #BCL6 6 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/27_up.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/27_up.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/27_up.gene_symbol.foreground_gene > BCL6_session.txt 7 | 8 | # ESR1 9 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/85_down.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/85_down.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/85_down.gene_symbol.foreground_gene > ESR1_session.txt 10 | 11 | 12 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/139_up.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/139_up.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/139_up.gene_symbol.foreground_gene > 139_session.txt 13 | 14 | # REST 15 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/272_up.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/272_up.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/272_up.gene_symbol.foreground_gene > REST_session.txt 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /lisa_web/run_browser.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | #python make_session.py upload/2017-08-22_0830220.34508.H3K27ac.coefs.csv upload/2017-08-18_1107470.10356.txt.H3K27ac.chipseq.p_value.csv upload/2017-08-24_0952320.67641.txt.foreground_gene 3 | links=$1 4 | links=${links/.coefs.csv/} 5 | 6 | ## http://lisa.cistrome.org//data5/lisa_browser/test.bed 7 | 8 | shuf /project/Cistrome/LISA/lisa_web/upload/$3 | head -20 > /project/Cistrome/LISA/lisa_web/upload/${3}.20 9 | python make_session.py /project/Cistrome/LISA/lisa_web/upload/$1 /project/Cistrome/LISA/lisa_web/upload/$2 /project/Cistrome/LISA/lisa_web/upload/${3}.20 > /project/Cistrome/LISA/lisa_web/upload/${links}.url 10 | 11 | -------------------------------------------------------------------------------- /lisa_web/run_celery.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | celery worker -A lisa_web.celery --loglevel=info -E -c 8 4 | 5 | -------------------------------------------------------------------------------- /lisa_web/run_heatmap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python generate_heatmap_js.py -d $1 3 | -------------------------------------------------------------------------------- /lisa_web/run_lisa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | source /project/dev/qqin/miniconda3/bin/activate base 4 | 5 | outdir=$3 6 | mkdir -p /project/Cistrome/LISA/lisa_web/upload/${outdir} 7 | cd /project/Cistrome/LISA/lisa_web/upload/${outdir} 8 | 9 | ###nice -n 15 lisa model --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome '['H3K27ac']' --cluster=False --covariates=False --random=True --prefix test --threads 3 AR.symbol 10 | 11 | nice -n 15 lisa model --method="$5" --web=True --new_rp_h5=None --new_count_h5=None --species $1 --epigenome "$2" --cluster=False --covariates=False --random=True --stat_background_number=300 --background=$6 --prefix ${outdir} --threads 4 $4 12 | 13 | echo "accomplished!!.." >> /project/Cistrome/LISA/lisa_web/upload/${outdir}_snakemake_output.txt 14 | sleep 1 15 | 16 | -------------------------------------------------------------------------------- /lisa_web/run_lisa2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | source /project/dev/qqin/miniconda3/bin/activate base 4 | 5 | outdir=$3 6 | mkdir -p /project/Cistrome/LISA/lisa_web/upload/${outdir} 7 | cd /project/Cistrome/LISA/lisa_web/upload/${outdir} 8 | 9 | nice -n 15 lisa model --method="$4" --web=True --new_rp_h5=None --new_count_h5=None --species $1 --epigenome "$2" --cluster=False --covariates=False --random=True --prefix ${outdir} --background=$7 --stat_background_number=300 --threads 4 $5 $6 10 | 11 | echo "accomplished!!.." >> /project/Cistrome/LISA/lisa_web/upload/${outdir}_snakemake_output.txt 12 | sleep 1 13 | 14 | -------------------------------------------------------------------------------- /lisa_web/run_plot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | source /project/dev/qqin/miniconda3/bin/activate base 3 | /project/dev/qqin/miniconda3/bin/python /project/Cistrome/LISA/lisa_web/plotly_scatter.py $1 $2 $3 $4 $5 $6 4 | -------------------------------------------------------------------------------- /lisa_web/test.sh: -------------------------------------------------------------------------------- 1 | nice -n 15 lisa model --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome '['H3K27ac']' --cluster=False --covariatee=False --random=True --prefix test --threads 3 AR.symbol 2 | 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """lisa: a bioinformatics software 2 | epigenome analysis to rank TFs from gene set 3 | """ 4 | import os 5 | from glob import glob 6 | from setuptools import setup, find_packages, Extension 7 | from numpy.distutils.misc_util import get_numpy_include_dirs 8 | 9 | def get_extension(): 10 | """ get extension for computing regulatory potential from bigwig """ 11 | bigwig_src = ['bigWigRegPotential.c', 'pybw.c', 12 | 'ffScore.c', 'rbTree.c', 'rangeTree.c', 'ffAli.c', 'ffAliHelp.c', 'fuzzyFind.c', 'bwgValsOnChrom.c', 'tokenizer.c', 'asParse.c', 'aliType.c', 'dnaseq.c', 'dnautil.c', 'gfxPoly.c', 'psl.c', 'binRange.c', 'sqlList.c', 'basicBed.c', 'bPlusTree.c','base64.c','bbiRead.c','bbiWrite.c', 'bits.c','bwgQuery.c','cheapcgi.c','cirTree.c','common.c','dlist.c','dystring.c','errAbort.c','hash.c','hex.c','hmmstats.c','https.c','intExp.c','internet.c','kxTok.c','linefile.c','localmem.c','memalloc.c','mime.c','net.c','obscure.c','options.c','osunix.c','pipeline.c','portimpl.c','servBrcMcw.c','servCrunx.c','servcis.c','servcl.c','servmsII.c','servpws.c','sqlNum.c','udc.c','verbose.c','wildcmp.c','zlibFace.c'] 13 | 14 | bigwig_src = list(map(lambda x: os.path.join('lisa', 'regpotential', x), bigwig_src)) 15 | ext = Extension('lisa._bw', 16 | sources=bigwig_src, 17 | extra_compile_args=['-O3', '-std=c99'], #, '-Wall'], 18 | libraries=['ssl', 'z', 'crypto']) 19 | return ext 20 | 21 | def main(): 22 | """setup entry 23 | """ 24 | setup( 25 | name='lisa', 26 | version='1.0', 27 | url='http://lisa.cistrome.org', 28 | author='Qian Qin', 29 | description=__doc__, 30 | packages=find_packages(), 31 | ext_modules=[get_extension(), ], 32 | include_dirs=['lisa/regpotential'] + get_numpy_include_dirs(), 33 | install_requires=['numpy==1.15.1', #'matplotlib', 'seaborn', 34 | 'scikit-learn', 'theano', 'fire', 35 | 'h5py', 'pandas', 36 | 'scipy', 37 | 'snakemake', 'PyYAML', 'yappi', 'mpmath' 38 | ], 39 | include_package_data=True, 40 | package_data={'lisa': ['rules/*', 'workflows/*', 'lisa.ini']}, 41 | scripts=glob('bin/*'), 42 | classifiers=[ 43 | 'Environment::Console', 44 | 'Operating System:: POSIX', 45 | "Programming Language :: Python :: 3", 46 | "Topic :: Scientific/Engineering :: Bio-Informatics"], 47 | keywords='ChIP-seq', 48 | license='OTHER', 49 | zip_safe=False) 50 | 51 | if __name__ == '__main__': 52 | main() 53 | --------------------------------------------------------------------------------