├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── batch
    ├── run.sh
    ├── run_combined.py
    └── run_combined.sh
├── bin
    ├── lisa
    ├── lisa_baseline
    ├── lisa_bw2hdf
    ├── lisa_combine_ranks
    ├── lisa_list_data.py
    ├── lisa_model
    ├── lisa_postmodel_background_selection
    ├── lisa_predict_tfbs
    ├── lisa_premodel_background_selection
    ├── lisa_rank_tfs
    ├── lisa_show_ranks
    └── lisa_update_conf
├── conda.recipe
    ├── build.sh
    └── meta.yaml
├── demo
    ├── AR.symbol
    ├── lisa_results_meta_table_human_with_gene_sets.xls
    ├── lisa_results_meta_table_mouse_with_gene_sets.xls
    └── run.sh
├── environment.yml
├── lisa
    ├── __init__.py
    ├── data.py
    ├── dependent_data.txt
    ├── lisa.ini
    ├── lisa.ini.bak
    ├── lisa.ini.latest
    ├── lisa.ini.latest.bak
    ├── lisa.ini.old
    ├── lisa.ini.updated
    ├── model.py
    ├── mouse.tfs
    ├── rank.py
    ├── regpotential
    │   ├── __init__.py
    │   ├── aliType.c
    │   ├── aliType.h
    │   ├── asParse.c
    │   ├── asParse.h
    │   ├── bPlusTree.c
    │   ├── bPlusTree.h
    │   ├── base64.c
    │   ├── base64.h
    │   ├── basicBed.c
    │   ├── basicBed.h
    │   ├── bbiFile.h
    │   ├── bbiRead.c
    │   ├── bbiWrite.c
    │   ├── bigBed.h
    │   ├── bigBedSummary.c
    │   ├── bigWig.h
    │   ├── bigWigRegPotential.c
    │   ├── bigWigSummary.c
    │   ├── binRange.c
    │   ├── binRange.h
    │   ├── bits.c
    │   ├── bits.h
    │   ├── bwgInternal.h
    │   ├── bwgQuery.c
    │   ├── bwgValsOnChrom.c
    │   ├── cheapcgi.c
    │   ├── cheapcgi.h
    │   ├── cirTree.c
    │   ├── cirTree.h
    │   ├── colHash.c
    │   ├── colHash.h
    │   ├── common.c
    │   ├── common.h
    │   ├── dlist.c
    │   ├── dlist.h
    │   ├── dnaseq.c
    │   ├── dnaseq.h
    │   ├── dnautil.c
    │   ├── dnautil.h
    │   ├── dystring.c
    │   ├── dystring.h
    │   ├── errAbort.c
    │   ├── errAbort.h
    │   ├── ffAli.c
    │   ├── ffAliHelp.c
    │   ├── ffScore.c
    │   ├── fuzzyFind.c
    │   ├── fuzzyFind.h
    │   ├── gfxPoly.c
    │   ├── gfxPoly.h
    │   ├── hash.c
    │   ├── hash.h
    │   ├── hex.c
    │   ├── hex.h
    │   ├── hmmstats.c
    │   ├── hmmstats.h
    │   ├── htmshell.h
    │   ├── https.c
    │   ├── https.h
    │   ├── intExp.c
    │   ├── internet.c
    │   ├── internet.h
    │   ├── kxTok.c
    │   ├── kxTok.h
    │   ├── linefile.c
    │   ├── linefile.h
    │   ├── localmem.c
    │   ├── localmem.h
    │   ├── makefile
    │   ├── makefile.1
    │   ├── memalloc.c
    │   ├── memalloc.h
    │   ├── mime.c
    │   ├── mime.h
    │   ├── net.c
    │   ├── net.h
    │   ├── obscure.c
    │   ├── obscure.h
    │   ├── options.c
    │   ├── options.h
    │   ├── osunix.c
    │   ├── pipeline.c
    │   ├── pipeline.h
    │   ├── portable.h
    │   ├── portimpl.c
    │   ├── portimpl.h
    │   ├── psl.c
    │   ├── psl.h
    │   ├── pybw.c
    │   ├── pybw.h
    │   ├── rangeTree.c
    │   ├── rangeTree.h
    │   ├── rbTree.c
    │   ├── rbTree.h
    │   ├── servBrcMcw.c
    │   ├── servCrunx.c
    │   ├── servcis.c
    │   ├── servcl.c
    │   ├── servmsII.c
    │   ├── servpws.c
    │   ├── sig.h
    │   ├── sqlList.c
    │   ├── sqlList.h
    │   ├── sqlNum.c
    │   ├── sqlNum.h
    │   ├── tokenizer.c
    │   ├── tokenizer.h
    │   ├── udc.c
    │   ├── udc.h
    │   ├── vGfx.c
    │   ├── vGfx.h
    │   ├── vGfxPrivate.h
    │   ├── verbose.c
    │   ├── verbose.h
    │   ├── wildcmp.c
    │   ├── zlibFace.c
    │   └── zlibFace.h
    ├── rules
    │   ├── __init__.py
    │   ├── background_selection.rule
    │   ├── baseline.rule
    │   ├── combine_chipseq.rule
    │   ├── combine_motif.rule
    │   ├── entropy.rule
    │   ├── fastq.rule
    │   ├── hdf5.rule
    │   ├── knockout.rule
    │   ├── lisa_direct.rule
    │   └── model.rule
    ├── utils.py
    └── workflows
    │   ├── Snakefile
    │   ├── __init__.py
    │   ├── cluster.json
    │   ├── sbatch.sh
    │   ├── sbatch_dependency.py
    │   └── sbatch_dependency.sh
├── lisa_docs
    ├── Makefile
    └── source
    │   ├── FAQ.rst
    │   ├── Installation.rst
    │   ├── Tutorial.rst
    │   ├── _static
    │       ├── 1.png
    │       ├── 2.png
    │       ├── 3.png
    │       ├── 4.png
    │       ├── 5.png
    │       ├── 6.png
    │       ├── 7.png
    │       ├── 8.png
    │       └── 9.png
    │   ├── conf.py
    │   └── index.rst
├── lisa_web
    ├── generate_heatmap_js.py
    ├── lisa_scatter.py
    ├── lisa_web.conf
    ├── lisa_web.wsgi
    ├── lisa_web
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── __init__.py~
    │   ├── check_genename.py
    │   ├── combined_gallery_multiple_display.html
    │   ├── combined_gallery_multiple_display_mm.html
    │   ├── fonts
    │   │   ├── FontAwesome.otf
    │   │   ├── fontawesome-webfont.eot
    │   │   ├── fontawesome-webfont.svg
    │   │   ├── fontawesome-webfont.ttf
    │   │   ├── fontawesome-webfont.woff
    │   │   └── fontawesome-webfont.woff2
    │   ├── form.py
    │   ├── form.pyc
    │   ├── gallery_multiple_display.html
    │   ├── gallery_template.html
    │   ├── generate_combined_gallery.py
    │   ├── generate_combined_gallery_mm.py
    │   ├── generate_gallery2.py
    │   ├── images
    │   │   ├── 1.png
    │   │   ├── 2.png
    │   │   ├── 3.png
    │   │   ├── 4.png
    │   │   ├── 5.png
    │   │   ├── 6.png
    │   │   ├── 7.png
    │   │   └── 8.png
    │   ├── mail.py
    │   ├── new_gallery.html
    │   ├── new_gallery_mm.html
    │   ├── run.sh
    │   ├── static
    │   │   ├── 1.png
    │   │   ├── 2.png
    │   │   ├── 3.png
    │   │   ├── 4.png
    │   │   ├── 5.png
    │   │   ├── 6.png
    │   │   ├── 7.png
    │   │   ├── 8.png
    │   │   ├── Enrichrgram.js
    │   │   ├── Figure1.png
    │   │   ├── Figure2.png
    │   │   ├── Figure3.png
    │   │   ├── Figure4.png
    │   │   ├── Figure5.png
    │   │   ├── Figure6.png
    │   │   ├── Figure7.png
    │   │   ├── Figure8.png
    │   │   ├── MACRO_ape_all_cistrome_pwm.sh
    │   │   ├── clustergrammer.js
    │   │   ├── clustergrammer.min.js
    │   │   ├── clustergrammer.node.js
    │   │   ├── clustergrammer.node.min.js
    │   │   ├── combined_lisa2_static.js
    │   │   ├── custom.css
    │   │   ├── d3.js
    │   │   ├── d3.v4.min.js
    │   │   ├── display.html
    │   │   ├── font-awesome.min.css
    │   │   ├── gallery.js
    │   │   ├── gallery.js~
    │   │   ├── hzome_functions.js
    │   │   ├── jquery-1.12.4.js
    │   │   ├── jquery-3.2.1.min.js
    │   │   ├── lisa.css
    │   │   ├── lisa.css~
    │   │   ├── lisa.jpg
    │   │   ├── lisa.js
    │   │   ├── lisa2.css
    │   │   ├── lisa2.js
    │   │   ├── lisa2_static.js
    │   │   ├── load_clustergram.js
    │   │   ├── multiple_display.html
    │   │   ├── plot.R
    │   │   ├── popper.min.js
    │   │   ├── popper.min.js.map
    │   │   ├── run.sh
    │   │   ├── send_to_Enrichr.js
    │   │   ├── seqpos.R
    │   │   └── underscore-min.js
    │   └── templates
    │   │   ├── #display.html#
    │   │   ├── 404.html
    │   │   ├── display.html
    │   │   ├── doc.html
    │   │   ├── gallery.html
    │   │   ├── gallery.html~
    │   │   ├── gallery_mm.html
    │   │   ├── index.html
    │   │   ├── index.html~
    │   │   ├── multiple_display.html
    │   │   ├── new_gallery.html
    │   │   ├── new_gallery_mm.html
    │   │   └── stat.html
    ├── lisa_web_requirement.txt
    ├── make_session.py
    ├── output_profile_regulatory_potential.py
    ├── plotly_scatter.py
    ├── run-redis.sh
    ├── run.py
    ├── run.sh
    ├── run_browser.sh
    ├── run_celery.sh
    ├── run_heatmap.sh
    ├── run_lisa.sh
    ├── run_lisa2.sh
    ├── run_plot.sh
    └── test.sh
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *so
 2 | record.txt
 3 | dist
 4 | build
 5 | lisa.egg-info
 6 | __pycache__
 7 | flycheck*
 8 | .nfs*
 9 | *.pwm.jpg
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2018-2019 Qian Qin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.in
2 | #recursive-include lisa/data *
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### LISA
 2 | Web version and documentation is hosted at http://lisa.cistrome.org. For large scale gene set analysis, we recommend user to install local version.
 3 | 
 4 | ### Preparation of Anaconda environment
 5 | 
 6 | ``` sh
 7 | wget -c https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
 8 | bash Miniconda3-latest-Linux-x86_64.sh
 9 | export PATH="${HOME}/miniconda3/bin:$PATH"
10 | 
11 | conda create -n lisa python=3.6 && conda config --add channels conda-forge && conda config --add channels bioconda
12 | 
13 | ```
14 | 
15 | ### Installation
16 | 
17 | ``` sh
18 | conda activate lisa
19 | # or for old conda
20 | source activate lisa
21 | export MKL_THREADING_LAYER=GNU
22 | 
23 | conda install -c qinqian lisa
24 | ```
25 | 
26 | To update, use `git clone https://github.com/qinqian/lisa && cd lisa && python setup.py develop`.
27 | 
28 | 
29 | ### Get pre-computed datasets from CistromeDB
30 | 
31 | User can download hg38 or mm10 datasets based on their experiments for human or mouse, the password can be obtained after LISA is published.
32 | 
33 | ``` sh
34 | wget --user=lisa --password='xxx'  http://lisa.cistrome.org/cistromedb_data/lisa_v1.0_hg38.tar.gz
35 | 
36 | # or
37 | 
38 | wget --user=lisa --password='xxx'  http://lisa.cistrome.org/cistromedb_data/lisa_v1.1_mm10.tar.gz
39 | ```
40 | 
41 | Then, user need to uncompress the datasets, and update the configuration for lisa. 
42 | 
43 | ``` sh
44 | tar xvfz lisa_v1.0_hg38.tar.gz
45 | lisa_update_conf --folder hg38/ --species hg38
46 | 
47 | # or
48 | 
49 | tar xvfz lisa_v1.0_mm10.tar.gz
50 | lisa_update_conf --folder mm10/ --species mm10
51 | ```
52 | 
53 | ### Usage
54 | 
55 | Given multiple gene set file `gene_set1`, `gene_set2`, `gene_set3` et al., each file has one gene (RefSeq id or gene symbol) for each row, user can predict transcriptional regulator ranking using the following commands with random background genes
56 | 
57 | ``` sh 
58 | time lisa model --method="all" --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome "['DNase', 'H3K27ac']" --cluster=False --covariates=False --random=True --prefix first_run --background=None --stat_background_number=1000 --threads 4 gene_set1 gene_set2 gene_set3 ...
59 | ```
60 | 
61 | Alternatively, user can generate a fixed background genes based on TAD and promoter activity, and input it to lisa,
62 | 
63 | ``` sh
64 | lisa_premodel_background_selection --species hg38 --epigenomes="['DNase']" --gene_set=None --prefix=test --random=None --background=dynamic_auto_tad
65 | cut -f 5 -d: test.background_gene.3000 > test.fixed.background_gene
66 | 
67 | time lisa model --method="all" --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome "['DNase', 'H3K27ac']" --cluster=False --covariates=False --random=True --prefix first_run --background=test.fixed.background_gene --stat_background_number=1000 --threads 4 gene_set1 gene_set2 gene_set3 ...
68 | ```
69 | 
70 | User can also input a customized background genes, which should include more than 30 unique RefSeq genes, all input genes are used for modeling and computing statistics, so `--stat_background_number` is ignored.
71 | 
72 | ``` sh
73 | time lisa model --method="all" --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome "['DNase', 'H3K27ac']" --cluster=False --covariates=False --random=True --prefix first_run --background=test.fixed.background_gene --threads 4 gene_set1 gene_set2 gene_set3 ...
74 | ```
75 | 
76 | ### Update LISA
77 | 
78 | ``` sh
79 | git clone http://github.com/qinqian/lisa/
80 | source activate lisa
81 | cd lisa && python setup.py develop
82 | lisa_update_conf --folder hg38/ --species hg38
83 | lisa_update_conf --folder mm10/ --species mm10
84 | ```
85 | 
86 | ### Remove LISA
87 | 
88 | ``` sh
89 | conda env remove -n lisa
90 | rm -r mm10/ hg38/
91 | ```
92 | 
93 | ### Citation 
94 | 
95 | Qin Q, Fan J, Zheng R, Wan C, Mei S, Wu Q. Inferring transcriptional regulators through integrative modeling of public chromatin accessibility and ChIP-seq data. 2019.
96 | 
97 | Please note that the reference is a preprint hosted at [biorxiv](https://www.biorxiv.org/content/10.1101/846139v1).
98 | 


--------------------------------------------------------------------------------
/batch/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash 
2 | for i in `seq 1 18`; do
3 |     echo $i
4 |     sbatch --array=3-100:2 --constraint="amd" --open-mode=append run_combined.sh $i
5 | done
6 | 


--------------------------------------------------------------------------------
/batch/run_combined.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import time
 4 | import glob
 5 | 
 6 | filenames=glob.glob('/n/home08/cliffmeyer/projects/lisa/gene_num_sample_size/output/*gene_symbol')
 7 | file_path="/n/home08/cliffmeyer/projects/lisa/gene_num_sample_size"
 8 | 
 9 | if __name__ == "__main__":
10 |     start = time.time()
11 |     try:
12 |         parser = argparse.ArgumentParser(description="""lisa TCGA gene sets.""")
13 |         #parser.add_argument( '-c', dest='chip',   type=str, required=True, help='input bed file' )
14 |         parser.add_argument( '-n', dest='number',   type=int, required=True)
15 |         parser.add_argument( '-s', dest='sample',   type=int, required=True)
16 |         args = parser.parse_args()
17 |         filename=filenames[args.number-1]
18 |         os.chdir(file_path)
19 |         os.system("mkdir -p %s_%s" % (filename, args.sample))
20 |         os.chdir("%s_%s" % (filename, args.sample))
21 |         os.system("cp %s ." % filename)
22 |         os.system("lisa model --method='all' --web=False --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome \'[\'DNase\']\' --cluster=False --covariates=False --random=True --prefix %s --threads 8 --sample-number %s %s" % (os.path.basename(filename)+"_"+str(args.sample), args.sample, os.path.basename(filename)))
23 |     except KeyboardInterrupt:
24 |         sys.stderr.write("User interrunpt me! ;-) Bye!\n")
25 | 


--------------------------------------------------------------------------------
/batch/run_combined.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J lisa_GEO # A single job name for the array
 3 | #SBATCH -n 8 # Number of cores
 4 | #SBATCH -N 1 # All cores on one machine
 5 | #SBATCH -p serial_requeue # Partition
 6 | #SBATCH --mem 10000 # Memory request (4Gb)
 7 | #SBATCH -t 0-8:00 # Maximum execution time (D-HH:MM)
 8 | #SBATCH -o lisa_%A_%a.out # Standard output
 9 | #SBATCH -e lisa_%A_%a.err # Standard error
10 | 
11 | # module load gcc/7.1.0-fasrc01 openmpi/2.1.0-fasrc01 hdf5/1.10.1-fasrc01
12 | 
13 | export PATH=/n/home08/cliffmeyer/Jingyu/miniconda3/bin:$PATH
14 | source activate lisa_python3_env
15 | 
16 | cd /n/home08/cliffmeyer/projects/lisa/gene_num_sample_size
17 | python run_combined.py -s "${SLURM_ARRAY_TASK_ID}" -n $1
18 | 
19 | 


--------------------------------------------------------------------------------
/bin/lisa_bw2hdf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """ input a bigwig, preprocess them to lisa
 3 | regulatory potential and 1kb read count, generate hdf5 file
 4 | """
 5 | import fire
 6 | from lisa.data import EpigenomeData
 7 | import h5py
 8 | import numpy as np
 9 | import os
10 | 
11 | class HDF(object):
12 |     """ interface for processing single bigwig to hdf5 """
13 |     def __init__(self, species, epigenome, prefix):
14 |         """ `epigenome` can be epigenome type, e.g. H3K27ac or ATAC-seq or DNase
15 |             `epigenome` can also be covariates, e.g., GC or mappability
16 | 
17 |             prefix is used to label output HDF5 files, for epigenome sample, use `project name`
18 |                                                        for covariates, use `covarates`
19 |         """
20 |         self.species = species
21 |         self.epigenome = epigenome
22 |         self.prefix = prefix
23 | 
24 |     def get_regpotential_hdf(self, bigwig):
25 |         """ input one bigwig file, generate temporary
26 |         hdf5 file for RP and read count """
27 |         data = EpigenomeData(self.species, self.epigenome)
28 |         data.create_RP_h5(bigwig, self.prefix)
29 | 
30 |     def merge_reg_potential_hdf(self, *hdf5):
31 |         """ processing a list of reg potential hdf5 files into one merged hdf5,
32 |         input should be from the same epigenome type, e.g. H3K4me3,
33 |         or from a list of covariates, e.g. GC.
34 |         """
35 |         with h5py.File(hdf5[0]) as inf:
36 |             nrp = inf["RP"].shape[0]
37 |             refseq = inf["RefSeq"][...]
38 |         with h5py.File('%s.%s.reg.h5' % (self.prefix, self.epigenome), "a") as store:
39 |             refseq_arr = store.create_dataset("RefSeq",
40 |                                               shape=(len(refseq), ),
41 |                                               dtype='S200',
42 |                                               compression='gzip',
43 |                                               shuffle=True, fletcher32=True)
44 |             refseq_arr[...] = refseq
45 | 
46 |             RP = store.create_dataset("RP", dtype=np.float32, shape=(nrp, len(hdf5)), compression='gzip', shuffle=True, fletcher32=True)
47 |             ids = store.create_dataset("IDs",
48 |                                        shape=(len(hdf5), ), dtype='S50',
49 |                                        compression='gzip', shuffle=True, fletcher32=True)
50 | 
51 |             iids = []
52 |             for i, d in enumerate(hdf5):
53 |                 with h5py.File(d) as inf:
54 |                     RP[:,i] = inf["RP"][:,0]
55 |                 store.flush()
56 |                 iids.append(str.encode(self.prefix + ".%s" % os.path.basename(d).split('.')[0], 'utf-8'))
57 | 
58 |             ids[...] = np.array(iids)
59 |             store.flush()
60 | 
61 |     def get_readcount_hdf(self, bigwig):
62 |         """ input one bigwig file, generate temporary
63 |         hdf5 file for RP and read count """
64 |         data = EpigenomeData(self.species, self.epigenome)
65 |         data.create_Count_h5(bigwig, self.prefix)
66 | 
67 |     def merge_readcount_hdf(self, *hdf5):
68 |         """ merge multiple hdf5 generated from process_one_bigwig """
69 |         with h5py.File(hdf5[0]) as inf:
70 |             nc = inf["OrderCount"].shape[0]
71 | 
72 |         with h5py.File('%s.%s.readcount.h5' % (self.prefix, self.epigenome), "a") as store:
73 |             ct = store.create_dataset("OrderCount", dtype=np.float32, shape=(nc, len(hdf5)), compression='gzip', shuffle=True, fletcher32=True)
74 |             ids = store.create_dataset("IDs", shape=(len(hdf5), ),
75 |                                        dtype='S50',
76 |                                        compression='gzip', shuffle=True, fletcher32=True)
77 | 
78 |             iids = []
79 |             for i, d in enumerate(hdf5):
80 |                 with h5py.File(d) as inf:
81 |                     ct[:,i] = inf["OrderCount"][:,0]
82 |                 store.flush()
83 |                 iids.append(str.encode(self.prefix + ".%s" % os.path.basename(d).split('.')[0], 'utf-8'))
84 |             ids[...] = np.array(iids)
85 |             store.flush()
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     fire.Fire(HDF)
90 | 


--------------------------------------------------------------------------------
/bin/lisa_combine_ranks:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import pandas as pd
 3 | import numpy as np
 4 | import os
 5 | import scipy.stats as stats
 6 | import argparse
 7 | import copy
 8 | 
 9 | def deduplicate(df):
10 |     out = copy.deepcopy(df)
11 |     #do not deduplicate, since row names is not consistent for cistromedb and imputed TFBS
12 |     out.loc[:, 'TF'] = df.index.map(lambda x: x.split('|')[1]) 
13 |     ##out.loc[:, 'TF'] = df.index.map(lambda x: x.split('_')[0])
14 |     out = out.drop_duplicates('TF', inplace=False)
15 |     print(out.head())
16 |     return out
17 | 
18 | def cauchy_p_value(p_vals, wi=None):
19 |     """https://arxiv.org/abs/1808.09011"""
20 |     p_vals = np.array(p_vals, np.float64)
21 |     if np.any(p_vals <= 1e-15): # np.finfo(np.float64) 1e-15
22 |         from mpmath import mp
23 |         mp.dps = 200
24 |         mp.pretty = True
25 |         p_vals = [mp.mpf(i) for i in p_vals]
26 |         t0 = sum([mp.tan((mp.mpf(0.5)-i)*mp.pi())/mp.mpf(3) for i in p_vals])
27 |         p =  mp.mpf(0.5)-mp.atan(t0)/mp.pi()
28 |         return t0, p
29 |     else:
30 |         if wi is None:
31 |             wi = 1.0 / len(p_vals)
32 |         stat = np.sum(wi * np.tan((0.5-p_vals) * np.pi))
33 |         return stat, 0.5 - np.arctan(stat)/np.pi
34 | 
35 | def main():
36 |     parser = argparse.ArgumentParser()
37 |     parser.add_argument('rankcsv', nargs='+', help='a list of TF rank csv files')
38 |     parser.add_argument('-tf', required=False, default='', type=str, help='target tf name')
39 |     parser.add_argument('-prefix', required=True,  type=str, help='output prefix')
40 |     args = parser.parse_args()
41 |     
42 |     individual_ranks = []
43 |     rank = pd.read_csv(args.rankcsv[0], index_col=0, header=None)
44 |     rank_dedup = deduplicate(rank)
45 |    
46 |     if len(args.rankcsv) == 1:
47 |         rank.to_csv('%s_cauchy_combine_raw.csv' % args.prefix)
48 |         rank_dedup.to_csv('%s_cauchy_combine_dedup.csv' % args.prefix)
49 | 
50 |         rank.to_csv('%s_fisher_combine_raw.csv' % args.prefix)
51 |         rank_dedup.to_csv('%s_fisher_combine_dedup.csv' % args.prefix)
52 |         return True
53 |     else:
54 |         rank_dedup.to_csv('%s_dedup.csv' % args.rankcsv[0].replace('.csv', ''))
55 |         rank_dedup.index = rank_dedup.TF
56 |         rank_dedup.drop('TF', axis=1)
57 | 
58 |     for r in args.rankcsv[1:]:
59 |         rank2 = pd.read_csv(r, index_col=0, header=None)
60 |         rank_dedup2 = deduplicate(rank2)
61 |         rank_dedup2.to_csv('%s_dedup.csv' % r.replace('.csv', ''))
62 |         rank_dedup2.index = rank_dedup2.TF
63 |         rank_dedup2.drop('TF', axis=1)
64 |         rank = rank.merge(rank2, left_index=True, right_index=True)
65 | 
66 |     print(rank.shape)
67 |     print(rank.head())
68 | 
69 |     # cauchy combination test 
70 |     combine_p = rank.apply(lambda x: cauchy_p_value(x)[1], axis=1)
71 |     combine_p.sort_values(inplace=True)
72 |     combine_p = pd.DataFrame(combine_p)
73 |     combine_p.to_csv('%s_cauchy_combine_raw.csv' % args.prefix)
74 |     combine_p = deduplicate(combine_p)
75 |     combine_p.to_csv('%s_cauchy_combine_dedup.csv' % args.prefix)
76 | 
77 |     combine_p2 = rank.apply(lambda x: stats.combine_pvalues(x, method='fisher')[1], axis=1)
78 |     ## load meta data
79 | 
80 |     combine_p2.sort_values(inplace=True)
81 |     combine_p2 = pd.DataFrame(combine_p2)
82 |     combine_p2.to_csv('%s_fisher_combine_raw.csv' % args.prefix)
83 |     combine_p2 = deduplicate(combine_p2)
84 |     combine_p2.to_csv('%s_fisher_combine_dedup.csv' % args.prefix)
85 | 
86 | if __name__ == '__main__':
87 |     main()
88 | 


--------------------------------------------------------------------------------
/bin/lisa_list_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import configparser
 5 | import sys
 6 | 
 7 | if len(sys.argv) > 2:
 8 |     sys.stderr.write('too many parameters...')
 9 |     sys.exit(1)
10 | 
11 | c = configparser.ConfigParser()
12 | c.read(sys.argv[1])
13 | 
14 | for s in c.sections():
15 |     for k in c[s].keys():
16 |         if k == 'bwa_index':
17 |             continue
18 |         if os.path.exists(c.get(s, k)):
19 |             print(c.get(s, k))
20 |         else:
21 |             print(c.get(s, k))
22 |             raise Exception('File not exists %s' % (c.get(s, k)))
23 | 


--------------------------------------------------------------------------------
/bin/lisa_postmodel_background_selection:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """ for each differential gene, select a nearest background gene
 3 | based on the lisa prediction of regulatory potential
 4 | 
 5 | if two DE genes have the same background gene, one of the DE genes would
 6 | choose the secondary nearest gene
 7 | """
 8 | from glob import glob
 9 | import fire
10 | import pandas as pd
11 | from lisa.data import EpigenomeData
12 | import numpy as np
13 | 
14 | def select_background(species, covariates, prefix, *files):
15 |     """
16 |     species: hg38 or mm10
17 |     covariates: True or False, whether to consider GC or not
18 |     prefix: the prefix of lisa_model output, e.g. gene symbols file name
19 |     files: a list of lisa prediction output files from lisa_model
20 |     """
21 |     epigenome = EpigenomeData(species, None)
22 |     pred = []
23 |     if covariates:
24 |         pred.append(epigenome.get_covariates_reg)
25 | 
26 |     fore_genes = glob("%s.*.foreground_gene" % prefix)[0]
27 |     fore_genes = np.genfromtxt(fore_genes, dtype='str')
28 | 
29 |     all_back_genes = glob("%s.*.all_background_gene" % prefix)[0]
30 |     all_back_genes = np.genfromtxt(all_back_genes, dtype='str')
31 | 
32 |     for lp in files:
33 |         if not 'H3K27me3' in lp:
34 |             df = pd.read_csv(lp, index_col=0)
35 |             pred.append(df)
36 | 
37 |     pred = pred[0].join(pred[1:])
38 |     pred = pred.rank(axis=0, ascending=False)
39 |     fore_genes_df = pred.loc[fore_genes]
40 |     all_back_genes_df = pred.loc[all_back_genes]
41 | 
42 |     # |rank_j K4me3 - rank_i K4me3| + | rank_j K27ac - rank_i K27ac | +  | rank_j GC - rank_i GC |
43 |     background_genes = set()
44 |     for fore_gene in fore_genes:
45 |         dist = all_back_genes_df.sub(fore_genes_df.loc[fore_gene], axis=1) \
46 |                                 .abs() \
47 |                                 .sum(axis=1)
48 |         dist.sort_values(axis=0, ascending=True, inplace=True)
49 |         for candidate in dist.index:
50 |             # if first nearest candidate background gene is already be taken
51 |             # choose the next one, etc...
52 |             if not candidate in background_genes:
53 |                 background_genes.add(candidate)
54 |                 break # match one background gene
55 | 
56 |     back_genes_df = pred.loc[list(background_genes)]
57 |     fore_genes_df.to_csv("%s.fore_gene.rank.csv" % prefix)
58 |     back_genes_df.to_csv("%s.back_gene.rank.csv" % prefix)
59 | 
60 | if __name__ == '__main__':
61 |     fire.Fire(select_background)
62 | 


--------------------------------------------------------------------------------
/bin/lisa_predict_tfbs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """ validate lisa model on prediction of TF binding sites """
 3 | import fire
 4 | from lisa.data import EpigenomeData
 5 | from lisa.utils import binarize_gene_set
 6 | from lisa.model import Logit
 7 | from sklearn.preprocessing import StandardScaler
 8 | from sklearn.metrics import make_scorer, roc_auc_score, average_precision_score, roc_curve, r2_score
 9 | import json
10 | import pandas as pd
11 | import numpy as np
12 | import h5py
13 | 
14 | def convert_name(name):
15 |     try:
16 |         name = name.decode('utf-8').replace("tf_", "")
17 |     except:
18 |         name = name.replace("tf_", "")
19 |     return name
20 | 
21 | def _get_hdf(epigenome, dtype):
22 |     """ get corresponding TF binding data type for 100bp window hit
23 |     """
24 |     tfbs_dict = dict(
25 |         motif99=epigenome.config.get_motif_index(99),
26 |         #motif98=epigenome.config.get_motif_index(98),
27 |         #motif97=epigenome.config.get_motif_index(97),
28 |         chipseq=epigenome.config.tf_chipseq
29 |     )
30 |     return tfbs_dict[dtype]
31 | 
32 | def predict_tfbs(species, epigenome, prefix, coefficients, chip_seq_id=None):
33 |     """
34 |     species: species for epigenome and gene_set
35 |     epigenome: one epigenome type, e.g. DNase
36 |     gene_set: a gene set file, one gene per line
37 |     """
38 |     epigenome = EpigenomeData(species, epigenome)
39 |     bin_100_to_1kb = np.load(epigenome.config.genome_window_map)
40 |     meta = pd.read_table(epigenome.config.get_meta,
41 |                          encoding="ISO-8859-1",
42 |                          index_col=0)
43 |     selection = 'factor'
44 | 
45 |     coef = pd.read_csv(coefficients, encoding="ISO-8859-1", index_col=0)
46 |     coef.index = coef.index.astype(str)
47 |     print(coef)
48 | 
49 |     aucs = []
50 |     prs = []
51 |     dtype = 'chipseq'
52 |     offset = -1 if dtype == 'chipseq' else 0
53 |     with h5py.File(_get_hdf(epigenome, dtype), mode='r') as store:
54 |         ids = store['IDs'][...]
55 |         for tfbs_id in ids:
56 |             try:
57 |                 tfbs_id_c = int(tfbs_id.decode('utf-8').split('_')[0])
58 |             except:
59 |                 tfbs_id_c = int(tfbs_id.split('_')[0])
60 |             if tfbs_id_c == int(chip_seq_id):
61 |                 tfbs_index = store[tfbs_id][...] + offset
62 |                 print(tfbs_index[:5])
63 |                 # 1kb window
64 |                 print(bin_100_to_1kb[-1])
65 |                 tfbs_bin = np.zeros(bin_100_to_1kb[-1] + 1, dtype=np.int32)
66 |                 # # 1kb 0-1 vector
67 |                 tfbs_bin[bin_100_to_1kb[tfbs_index]] = 1
68 | 
69 |                 read_count = epigenome.get_count(list(coef.index), False, None) # no hdf5 and covariates
70 |                 annotation = meta.loc[tfbs_id_c, selection]
71 |                 print(annotation)
72 |                 feature_x = np.log2(read_count+1)
73 | 
74 |                 scale = StandardScaler(with_std=False)
75 |                 ## scale = RobustScaler(quantile_range=(5, 95))
76 |                 feature_x = scale.fit_transform(feature_x)
77 |                 print(feature_x[:5])
78 | 
79 |                 score = np.dot(feature_x, coef.iloc[:, 0].values)
80 |                 aucs.append(roc_auc_score(tfbs_bin, score))
81 |                 print(aucs)
82 |                 prs.append(average_precision_score(tfbs_bin, score))
83 | 
84 |     with open("%s_direct_tfbs.txt" % prefix, 'w') as outf:
85 |         for i, j in zip(aucs, prs):
86 |             outf.write("%s\t%s"%(i, j))
87 | 
88 | if __name__ == '__main__':
89 |     fire.Fire(predict_tfbs)
90 | 
91 | 


--------------------------------------------------------------------------------
/bin/lisa_show_ranks:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import pandas as pd
 3 | import numpy as np
 4 | import argparse
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument('--tf')
 8 | parser.add_argument('--genes')
 9 | args = parser.parse_args()
10 | 
11 | df = pd.read_csv(args.genes, header=None)
12 | df.loc[:, 'TF' ] = df.iloc[:, 0].map(lambda x: x.split('|')[1])
13 | df.drop_duplicates('TF', inplace=True)
14 | print(np.where(df.loc[:, 'TF'] == args.tf)[0][0])
15 | 


--------------------------------------------------------------------------------
/bin/lisa_update_conf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """update local data directory for .ini configuration file
 4 | """
 5 | from pkg_resources import resource_filename
 6 | import fire
 7 | 
 8 | def update(folder, species):
 9 |     """ update the config given a folder
10 |     """
11 |     import os
12 |     import configparser
13 |     folder = os.path.abspath(folder)
14 |     
15 |     in_out = resource_filename("lisa", "lisa.ini")
16 |     print(in_out)
17 |     conf = configparser.ConfigParser()
18 |     conf.read(in_out)
19 |     assert species in ['hg38', 'mm10'], 'species not support'
20 | 
21 |     # common files
22 |     conf.set('basics', 'motif', os.path.join(folder, os.path.basename(conf.get('basics', 'motif'))))
23 |     conf.set('basics', 'meta',  os.path.join(folder, os.path.basename(conf.get('basics', 'meta'))))
24 |     # species specific files
25 |     for i in conf[species].keys():
26 |         conf.set(species, i, os.path.join(folder, os.path.basename(conf.get(species, i))))
27 | 
28 |     with open(in_out, 'w') as configfile:
29 |         conf.write(configfile)
30 | 
31 | if __name__ == '__main__':
32 |     fire.Fire(update)
33 | 


--------------------------------------------------------------------------------
/conda.recipe/build.sh:
--------------------------------------------------------------------------------
1 | cd $RECIPE_DIR/..
2 | 
3 | $PYTHON setup.py install --single-version-externally-managed --record=record.txt
4 | 


--------------------------------------------------------------------------------
/conda.recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | ## https://docs.anaconda.com/anaconda-cloud/user-guide/tutorials/
 2 | package:
 3 |   name: lisa
 4 |   version: 1.0
 5 | 
 6 | build:
 7 |   number: 7
 8 |   #entry_points:
 9 |   #  - lisa = ../bin/lisa
10 | 
11 | requirements:
12 |   build:
13 |     - python
14 |     - fire
15 |     - setuptools
16 |     - numpy ==1.15.1
17 |     - patchelf
18 |     - {{ compiler('c') }}
19 |   host:
20 |     - python
21 |     - zlib
22 |     - numpy ==1.15.1
23 |   run:
24 |     - python
25 |     - zlib
26 |     - mkl-service
27 |     - curl
28 |     - fire
29 |     - scikit-learn
30 |     - theano
31 |     - pandas
32 |     - h5py
33 |     - numpy ==1.15.1
34 |     - PyYAML >=3.12
35 |     - snakemake ==5.7.1
36 |     - yappi
37 |     - bwa
38 |     - samtools=0.1.19 
39 |     - bedtools=2.17.0 
40 |     - seqtk 
41 |     - ucsc-bedclip 
42 |     - ucsc-bedgraphtobigwig 
43 |     - ucsc-wigcorrelate 
44 |     - ucsc-wigtobigwig
45 |     - deeptools
46 |     - openssl=1.0
47 |     - mpmath
48 | 
49 |     #- matplotlib
50 |     #- seaborn
51 | 
52 | about:
53 |   home: http://lisa.cistrome.org
54 |   license: EULA
55 |   license_family: OTHER
56 |   license_file: ../LICENSE
57 |   summary: 'ChIP-seq/DNase-seq data-driven TF and CR prioritization tool'
58 | 


--------------------------------------------------------------------------------
/demo/AR.symbol:
--------------------------------------------------------------------------------
  1 | NR_045762
  2 | NM_001002231
  3 | NM_001256080
  4 | NM_005551
  5 | NR_045763
  6 | NM_001135099
  7 | NM_005656
  8 | NM_004917
  9 | NM_014668
 10 | NM_001030047
 11 | NM_001030048
 12 | NM_001648
 13 | NM_001255976
 14 | NM_020182
 15 | NM_199169
 16 | NM_199170
 17 | NM_199171
 18 | NM_024080
 19 | NR_046072
 20 | NM_001256339
 21 | NM_006167
 22 | NM_001161352
 23 | NM_001161353
 24 | NM_002247
 25 | NM_020752
 26 | NM_018414
 27 | NM_001105515
 28 | NM_005845
 29 | NM_020338
 30 | NM_007085
 31 | NM_001130518
 32 | NM_001127257
 33 | NM_020342
 34 | NM_033102
 35 | NM_018371
 36 | NR_024040
 37 | NM_001083924
 38 | NM_023938
 39 | NM_001100624
 40 | NM_006549
 41 | NM_153499
 42 | NM_153500
 43 | NM_172216
 44 | NM_172226
 45 | NM_054027
 46 | NM_015036
 47 | NM_002867
 48 | NM_001172
 49 | NM_032323
 50 | NR_026678
 51 | NM_012081
 52 | NM_001100625
 53 | NM_001104558
 54 | NM_024930
 55 | NM_006633
 56 | NM_138799
 57 | NM_014762
 58 | NM_007011
 59 | NM_152924
 60 | NM_000693
 61 | NM_006810
 62 | NR_028444
 63 | NM_001018011
 64 | NM_006006
 65 | NM_003711
 66 | NM_176895
 67 | NM_001105539
 68 | NM_023929
 69 | NM_004457
 70 | NM_203372
 71 | NM_018455
 72 | NM_000608
 73 | NM_001256301
 74 | NM_021205
 75 | NR_037962
 76 | NM_021614
 77 | NM_170775
 78 | XR_110583
 79 | NM_014146
 80 | NM_032463
 81 | NM_032464
 82 | NM_030806
 83 | NM_012152
 84 | XR_112606
 85 | NM_022782
 86 | NM_207446
 87 | NM_018960
 88 | NM_001077654
 89 | NM_014350
 90 | NM_000860
 91 | NM_001145816
 92 | NM_001256305
 93 | NM_013233
 94 | NM_173854
 95 | NM_001256307
 96 | NM_001256306
 97 | NM_000875
 98 | NM_207307
 99 | NM_001145775
100 | NM_001145776
101 | NM_004117
102 | NM_024409
103 | NM_000607
104 | NM_015261
105 | 


--------------------------------------------------------------------------------
/demo/run.sh:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | time lisa model --method="all" --web=False --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome "['DNase', 'H3K27ac']" --cluster=False --covariates=False --random=False --prefix AR.symbol --background=dynamic_auto_tad --stat_background_number=1000 --threads 8 AR.symbol
4 | 
5 | 


--------------------------------------------------------------------------------
/lisa/__init__.py:
--------------------------------------------------------------------------------
  1 | """ interface for loading lisa config file
  2 | """
  3 | from configparser import ConfigParser
  4 | 
  5 | class Config(ConfigParser):
  6 |     """ data input interface"""
  7 |     def __init__(self, f, s):
  8 |         """
  9 |         f: configuration file
 10 |         s: species
 11 |         """
 12 |         super().__init__()
 13 |         self.read(f)
 14 |         self.s = s
 15 | 
 16 |     @property
 17 |     def get_meta(self):
 18 |         if hasattr(self, "s"):
 19 |             return self.get('basics', 'meta')
 20 | 
 21 |     @property
 22 |     def get_annotation(self):
 23 |         if hasattr(self, "s"):
 24 |             return self.get(self.s, 'tssbin')
 25 | 
 26 |     @property
 27 |     def get_tss_refseq(self):
 28 |         if hasattr(self, "s"):
 29 |             return self.get(self.s, 'tss')
 30 | 
 31 |     def get_rp(self, factor):
 32 |         if hasattr(self, "s"):
 33 |             if factor == 'H3K4me3':
 34 |                 return self.get(self.s, '%s_1kbRP' % factor)
 35 |             return self.get(self.s, '%s_RP' % factor)
 36 | 
 37 |     @property
 38 |     def get_dnase_bin(self):
 39 |         if hasattr(self, "s"):
 40 |             return self.get(self.s, 'DNase_bin')
 41 | 
 42 |     def genome_count(self, factor):
 43 |         if hasattr(self, "s"):
 44 |             return self.get(self.s, '%s_count' % factor)
 45 | 
 46 |     @property
 47 |     def genome_window_map(self):
 48 |         if hasattr(self, "s"):
 49 |             return self.get(self.s, 'genome_window_map')
 50 | 
 51 |     @property
 52 |     def genome_window(self):
 53 |         if hasattr(self, "s"):
 54 |             return self.get(self.s, 'genome_window')
 55 | 
 56 |     @property
 57 |     def genome_100bp_window(self):
 58 |         if hasattr(self, "s"):
 59 |             return self.get(self.s, 'genome_100bp_window')
 60 | 
 61 |     @property
 62 |     def tf_chipseq(self):
 63 |         if hasattr(self, "s"):
 64 |             return self.get(self.s, 'tf_chipseq')
 65 | 
 66 |     @property
 67 |     def chrom(self):
 68 |         if hasattr(self, "s"):
 69 |             return self.get(self.s, 'chrom_len')
 70 | 
 71 |     @property
 72 |     def get_motif_meta(self):
 73 |         if hasattr(self, "s"):
 74 |             return self.get('basics', 'motif')
 75 | 
 76 |     def get_motif_index(self, cutoff=99):
 77 |         """ 100bp for deletion """
 78 |         if hasattr(self, "s"):
 79 |             return self.get(self.s, 'genome_100bp_motif_index%s' % cutoff)
 80 | 
 81 |     def get_motif_1kb(self, cutoff=99):
 82 |         """ 1kb for cluster """
 83 |         if hasattr(self, "s"):
 84 |             return self.get(self.s, 'genome_motif%s' % cutoff)
 85 | 
 86 |     @property
 87 |     def get_motif_sim(self):
 88 |         if hasattr(self, "s"):
 89 |             return self.get("basics", "motif_similarity")
 90 | 
 91 |     @property
 92 |     def get_beta(self):
 93 |         if hasattr(self, "s"):
 94 |             return self.get(self.s, 'tf_chipseq_beta')
 95 | 
 96 |     @property
 97 |     def get_udhs(self):
 98 |         if hasattr(self, "s"):
 99 |             return self.get(self.s, 'udhs_100bp_index')
100 | 
101 |     @property
102 |     def get_index(self):
103 |         """genome index
104 |         """
105 |         if hasattr(self, "s"):
106 |             return self.get(self.s, 'bwa_index')
107 | 
108 |     @property
109 |     def get_tad(self):
110 |         """genome index
111 |         """
112 |         if hasattr(self, "s"):
113 |             return self.get(self.s, 'tad_info')
114 | 
115 |     @property
116 |     def get_cluster(self):
117 |         """only for hg38 now......"""
118 |         if hasattr(self, "s"):
119 |             return self.get(self.s, 'cluster')
120 | 


--------------------------------------------------------------------------------
/lisa/lisa.ini:
--------------------------------------------------------------------------------
 1 | [basics]
 2 | motif = ${prefix}/cistrome.txt
 3 | meta  = ${prefix}/lisa_meta.xls
 4 | 
 5 | [mm10]
 6 | bwa_index = ${prefix}/mm10.fa
 7 | chrom_len = ${prefix}/mm10.genome
 8 | tssbin    = ${prefix}/mm10_window1kb_tss.bed
 9 | tss       = ${prefix}/mm10.tss
10 | H3K27ac_RP = ${prefix}/margeRP_H3K27ac_mm.h5
11 | DNase_RP   = ${prefix}/margeRP_DNase_mm.h5
12 | genome_window = ${prefix}/mm10_window1kb.bed
13 | genome_window_map = ${prefix}/mm10_100to1000window.out.npy
14 | H3K27ac_count = ${prefix}/mm10_window1kb_H3K27ac.h5
15 | DNase_count   = ${prefix}/mm10_window1kb_DNase.h5
16 | tf_chipseq      = ${prefix}/mm10_lisa_tf_100bp_all_nonhm_nonca_peak5fold.h5
17 | tf_chipseq_beta = ${prefix}/mm10_beta_peak5fold.h5
18 | cluster                    = ${prefix}/cluster_mouse
19 | genome_100bp_motif_index99 = ${prefix}/mm10_marge2_motif_100bp_99.h5
20 | tad_info                   = ${prefix}/mm10_promoter_TADann_H3K4me3_enhance_k27me3_Using.xls
21 | 
22 | [hg38]
23 | bwa_index = ${prefix}/hg38.fa
24 | chrom_len = ${prefix}/hg38.genome
25 | tssbin    = ${prefix}/hg38_window1kb_tss.bed
26 | tss       = ${prefix}/hg38.tss
27 | H3K27ac_RP = ${prefix}/margeRP_H3K27ac.h5
28 | DNase_RP   = ${prefix}/margeRP_DNase.h5
29 | genome_window = ${prefix}/hg38_window1kb.bed
30 | genome_window_map = ${prefix}/hg38_100to1000window.out.npy
31 | H3K27ac_count = ${prefix}/hg38_window1kb_H3K27ac.h5
32 | DNase_count   = ${prefix}/hg38_window1kb_DNase.h5
33 | tf_chipseq      = ${prefix}/hs_tf_new_peak_loct.h5
34 | tf_chipseq_beta = ${prefix}/hs_tf_new_beta_rp.h5
35 | cluster         = ${prefix}/cluster_human
36 | genome_100bp_motif_index99 = ${prefix}/marge2_motif_100bp_99.h5
37 | tad_info                   = ${prefix}/hg38_promoter_TADann_H3K4me3_enhance_k27me3_Using.xls
38 | 


--------------------------------------------------------------------------------
/lisa/lisa.ini.updated:
--------------------------------------------------------------------------------
 1 | [basics]
 2 | motif = /data/home/qqin/lisa_web/download/data/mm10/cistrome.txt
 3 | meta = /data/home/qqin/lisa_web/download/data/mm10/dc_meta_lisa_20180102.xls
 4 | 
 5 | [mm10]
 6 | bwa_index = /data/home/qqin/lisa_web/download/data/mm10/mm10.fa
 7 | meta = /data/home/qqin/lisa_web/download/data/mm10/margeFactor_mm.csv
 8 | chrom_len = /data/home/qqin/lisa_web/download/data/mm10/mm10.genome
 9 | tssbin = /data/home/qqin/lisa_web/download/data/mm10/mm10_window1kb_tss.bed
10 | tss = /data/home/qqin/lisa_web/download/data/mm10/mm10.tss
11 | h3k27ac_rp = /data/home/qqin/lisa_web/download/data/mm10/margeRP_H3K27ac_mm.h5
12 | dnase_rp = /data/home/qqin/lisa_web/download/data/mm10/margeRP_DNase_mm.h5
13 | genome_window_map = /data/home/qqin/lisa_web/download/data/mm10/mm10_100to1000window.out.npy
14 | h3k27ac_count = /data/home/qqin/lisa_web/download/data/mm10/mm10_window1kb_H3K27ac.h5
15 | dnase_count = /data/home/qqin/lisa_web/download/data/mm10/mm10_window1kb_DNase.h5
16 | tf_chipseq = /data/home/qqin/lisa_web/download/data/mm10/mm10_lisa_tf_100bp_all_nonhm_nonca_peak5fold.h5
17 | tf_chipseq_meta = /data/home/qqin/lisa_web/download/data/mm10/mm10_best_dc_tfcr_basedon_frip_peak_dhs_all_nonhm_nonca.xls
18 | tf_chipseq_beta = /data/home/qqin/lisa_web/download/data/mm10/mm10_beta_peak5fold.h5
19 | cluster = /data/home/qqin/lisa_web/download/data/mm10/cluster_mouse
20 | genome_100bp_motif_index99 = /data/home/qqin/lisa_web/download/data/mm10/mm10_marge2_motif_100bp_99.h5
21 | tad_info = /data/home/qqin/lisa_web/download/data/mm10/mm10_promoter_TADann_H3K4me3_enhance_k27me3_Using.xls
22 | 
23 | [hg38]
24 | bwa_index = /data/home/qqin/lisa_web/download/data/mm10/hg38.fa
25 | meta = /data/home/qqin/lisa_web/download/data/mm10/margeFactor.csv
26 | chrom_len = /data/home/qqin/lisa_web/download/data/mm10/hg38.genome
27 | tssbin = /data/home/qqin/lisa_web/download/data/mm10/hg38_window1kb_tss.bed
28 | tss = /data/home/qqin/lisa_web/download/data/mm10/hg38.tss
29 | h3k27ac_rp = /data/home/qqin/lisa_web/download/data/mm10/margeRP_H3K27ac.h5
30 | dnase_rp = /data/home/qqin/lisa_web/download/data/mm10/margeRP_DNase.h5
31 | genome_window_map = /data/home/qqin/lisa_web/download/data/mm10/hg38_100to1000window.out.npy
32 | h3k27ac_count = /data/home/qqin/lisa_web/download/data/mm10/hg38_window1kb_H3K27ac.h5
33 | dnase_count = /data/home/qqin/lisa_web/download/data/mm10/hg38_window1kb_DNase.h5
34 | tf_chipseq = /data/home/qqin/lisa_web/download/data/mm10/hs_tf_new_peak_loct.h5
35 | tf_chipseq_meta = /data/home/qqin/lisa_web/download/data/mm10/hs_tf_meta_qc.xls
36 | tf_chipseq_beta = /data/home/qqin/lisa_web/download/data/mm10/hs_tf_new_beta_rp.h5
37 | cluster = /data/home/qqin/lisa_web/download/data/mm10/cluster_human
38 | genome_100bp_motif_index99 = /data/home/qqin/lisa_web/download/data/mm10/marge2_motif_100bp_99.h5
39 | tad_info = /data/home/qqin/lisa_web/download/data/mm10/hg38_promoter_TADann_H3K4me3_enhance_k27me3_Using.xls
40 | 
41 | 


--------------------------------------------------------------------------------
/lisa/mouse.tfs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa/mouse.tfs


--------------------------------------------------------------------------------
/lisa/rank.py:
--------------------------------------------------------------------------------
 1 | """ two methods to rank TFs
 2 | """
 3 | import theano
 4 | import theano.tensor as T
 5 | import pandas as pd
 6 | import numpy as np
 7 | import scipy.stats as stats
 8 | from numpy.linalg import norm
 9 | 
10 | def get_insilico_knockout_tensor_op(lisa_prediction, precompute, coef, original_median=None):
11 |     """ use theano tensor operation to speed up
12 |     return a theano.function
13 | 
14 |     lisa_prediction: numpy array
15 |     precompute: numpy array
16 |     coef: pandas DataFrame
17 |     """
18 |     x = T.imatrix('E') # each motif tensor
19 |     precomp = theano.shared(precompute.astype(theano.config.floatX), name='precompute')
20 |     r = theano.shared(lisa_prediction.astype(theano.config.floatX), name='Lisa RP')
21 |     c = theano.shared(coef.iloc[:, 0].values.astype(theano.config.floatX), name='coefficients')
22 |     m = theano.shared(original_median.astype(theano.config.floatX), name='original_rp_median')
23 | 
24 |     # sample x (gene1_bin1, gene1_bin2...gene2_bin1,gene2_bin2...)
25 |     y = T.extra_ops.repeat(x, precompute.shape[0], axis=0)
26 |     tensor_del = y * precomp # sample x (gene,bin)
27 |     tensor_del = T.reshape(tensor_del, (c.shape[0],r.shape[0],200)) # sample x gene x bin
28 |     tensor_del = T.transpose(T.sum(tensor_del, axis=2), (1,0)) + T.constant(1) # one motif
29 | 
30 |     ##tensor_del_med = T.mean(tensor_del, axis=0)  # one motif
31 |     ##log_tensor_del = T.log2(tensor_del) - T.log2(tensor_del_med)
32 | 
33 |     log_tensor_del = T.log2(tensor_del) - m # original median already take log2
34 |     tensor_delta = r - T.dot(log_tensor_del, c)
35 | 
36 |     mode = theano.Mode(linker='cvm', optimizer='fast_run')
37 |     theano.config.exception_verbosity = 'high'
38 |     # theano.config.openmp = True
39 |     theano_delta_rp = theano.function([x], tensor_delta, mode=mode)
40 |     return theano_delta_rp
41 | 
42 | def rank_by_entropy(pq, kl=True):
43 |     """ evaluate kl divergence, wasserstein distance
44 |     wasserstein: http://pythonhosted.org/pyriemann/_modules/pyriemann/utils/distance.html
45 |     """
46 |     # to avoid Inf cases
47 |     pq = pq + 0.0000001
48 |     pq = pq/pq.sum(axis=0)
49 | 
50 |     if kl:     # entropy actually can calculate KL divergence
51 |         final=pq.iloc[:, :-1].apply(
52 |             lambda x: stats.entropy(x, pq.iloc[:, -1], base=2), axis=0)
53 |         label = 'KL'
54 |     else:      # JS divergence
55 |         final=pq.iloc[:, :-1].apply(
56 |             lambda x: JSD(x, pq.iloc[:, -1]), axis=0)
57 |         label = 'JSD'
58 |     final.sort_values(ascending=False, inplace=True)
59 |     rank = final.rank(ascending=False)
60 |     final = pd.concat([final, rank], axis=1)
61 |     final.columns = [label, 'rank']
62 |     return final
63 | 
64 | def JSD(P, Q):
65 |     """ compute JS divergence
66 |     JSD:  http://stackoverflow.com/questions/15880133/jensen-shannon-divergence
67 |     """
68 |     P = P / norm(P, ord=1)
69 |     Q = Q / norm(Q, ord=1)
70 |     M = 0.5 * (P + Q)
71 |     return 0.5 * (stats.entropy(P, M) + stats.entropy(Q, M))
72 | 


--------------------------------------------------------------------------------
/lisa/regpotential/__init__.py:
--------------------------------------------------------------------------------
1 | import lisa._bw as regpotential
2 | 


--------------------------------------------------------------------------------
/lisa/regpotential/aliType.c:
--------------------------------------------------------------------------------
 1 | /* aliType - some definitions for type of alignment. */
 2 | 
 3 | /* Copyright (C) 2011 The Regents of the University of California 
 4 |  * See README in this or parent directory for licensing information. */
 5 | #include "common.h"
 6 | #include "aliType.h"
 7 | 
 8 | 
 9 | char *gfTypeName(enum gfType type)
10 | /* Return string representing type. */
11 | {
12 | if (type == gftDna) return "DNA";
13 | if (type == gftRna) return "RNA";
14 | if (type == gftProt) return "protein";
15 | if (type == gftDnaX) return "DNAX";
16 | if (type == gftRnaX) return "RNAX";
17 | internalErr();
18 | return NULL;
19 | }
20 | 
21 | enum gfType gfTypeFromName(char *name)
22 | /* Return type from string. */
23 | {
24 | if (sameWord(name, "DNA")) return gftDna;
25 | if (sameWord(name, "RNA")) return gftRna;
26 | if (sameWord(name, "protein")) return gftProt;
27 | if (sameWord(name, "prot")) return gftProt;
28 | if (sameWord(name, "DNAX")) return gftDnaX;
29 | if (sameWord(name, "RNAX")) return gftRnaX;
30 | errAbort("Unknown sequence type '%s'", name);
31 | return 0;
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/lisa/regpotential/aliType.h:
--------------------------------------------------------------------------------
 1 | /* aliType - some definitions for type of alignment. */
 2 | 
 3 | #ifndef ALITYPE_H
 4 | #define ALITYPE_H
 5 | 
 6 | enum gfType
 7 | /* Types of sequence genoFind deals with. */
 8 |     {
 9 |     gftDna = 0,		/* DNA (genomic) */
10 |     gftRna = 1,		/* RNA */
11 |     gftProt = 2,         /* Protein. */
12 |     gftDnaX = 3,		/* Genomic DNA translated to protein */
13 |     gftRnaX = 4,         /* RNA translated to protein */
14 |     };
15 | 
16 | char *gfTypeName(enum gfType type);
17 | /* Return string representing type. */
18 | 
19 | enum gfType gfTypeFromName(char *name);
20 | /* Return type from string. */
21 | 
22 | enum ffStringency
23 | /* How tight of a match is required. */
24 |     {
25 |     ffExact = 0,	/* Only an exact match will do. */
26 | 
27 |     ffCdna = 1,		/* Near exact.  Tolerate long gaps in target (genomic) */
28 |     ffTight = 2,        /* Near exact.  Not so tolerant of long gaps in target. */
29 |     ffLoose = 3,        /* Less exact. */
30 |     };
31 | 
32 | #endif /* ALITYPE_H */
33 | 


--------------------------------------------------------------------------------
/lisa/regpotential/base64.c:
--------------------------------------------------------------------------------
  1 | /* Copyright (C) 2011 The Regents of the University of California 
  2 |  * See README in this or parent directory for licensing information. */
  3 | 
  4 | #include "common.h"
  5 | #include "base64.h"
  6 | 
  7 | 
  8 | char *base64Encode(char *input, size_t inplen)
  9 | /* Use base64 to encode a string.  Returns one long encoded
 10 |  * string which need to be freeMem'd. Note: big-endian algorithm.
 11 |  * For some applications you may need to break the base64 output
 12 |  * of this function into lines no longer than 76 chars.
 13 |  */
 14 | {
 15 | char b64[] = B64CHARS;
 16 | int words = (inplen+2)/3;
 17 | int remains = inplen % 3;
 18 | char *result = (char *)needMem(4*words+1);
 19 | size_t i=0, j=0;
 20 | int word = 0;
 21 | unsigned char *p = (unsigned char*) input;  
 22 | /* p must be unsigned char*,  because without "unsigned",
 23 | sign extend messes up last group outputted
 24 | when the value of the chars following last in input
 25 | happens to be char 0x80 or higher */
 26 | for(i=1; i<=words; i++)
 27 |     {
 28 |     word = 0;
 29 |     word |= *p++;
 30 |     word <<= 8;
 31 |     word |= *p++;
 32 |     word <<= 8;
 33 |     word |= *p++;
 34 |     if (i==words && remains>0)
 35 | 	{
 36 | 	word &= 0x00FFFF00;
 37 |     	if (remains==1)
 38 |     	    word &= 0x00FF0000;
 39 | 	}
 40 |     result[j++]=b64[word >> 18 & 0x3F];
 41 |     result[j++]=b64[word >> 12 & 0x3F];
 42 |     result[j++]=b64[word >> 6 & 0x3F];
 43 |     result[j++]=b64[word & 0x3F];
 44 |     }
 45 | result[j] = 0;
 46 | if (remains >0) result[j-1] = '=';    
 47 | if (remains==1) result[j-2] = '=';    
 48 | return result;
 49 | }
 50 | 
 51 | 
 52 | boolean base64Validate(char *input)
 53 | /* Return true if input is valid base64.
 54 |  * Note that the input string is changed by 
 55 |  * eraseWhiteSpace(). */
 56 | {
 57 | size_t i = 0, l = 0;
 58 | char *p = input;
 59 | boolean validB64 = TRUE;
 60 | 
 61 | /* remove whitespace which is unnecessary and  */
 62 | eraseWhiteSpace(input);  
 63 | 
 64 | l = strlen(p);
 65 | for(i=0;i<l;i++)
 66 |     {
 67 |     char c = ' ';
 68 |     if (!strchr(B64CHARS,c=*p++))
 69 |         {
 70 |         if (c != '=')
 71 |             {
 72 |             validB64 = FALSE;
 73 |             break;
 74 |             }
 75 |         }
 76 |     }
 77 | if (l%4)
 78 |     validB64 = FALSE;
 79 | return validB64;
 80 | }
 81 | 
 82 | char *base64Decode(char *input, size_t *returnSize)
 83 | /* Use base64 to decode a string.  Return decoded
 84 |  * string which will be freeMem'd. Note: big-endian algorithm.
 85 |  * Call eraseWhiteSpace() and check for invalid input 
 86 |  * before passing in input if needed.  
 87 |  * Optionally set return size for use with binary data.
 88 |  */
 89 | {
 90 | static int *map=NULL;
 91 | char b64[] = B64CHARS;
 92 | size_t inplen = strlen(input);
 93 | int words = (inplen+3)/4;
 94 | char *result = (char *)needMem(3*words+1);
 95 | size_t i=0, j=0;
 96 | int word = 0;
 97 | char *p = input;
 98 | 
 99 | if (!map)
100 |     {
101 |     int i = 0;
102 |     map = needMem(256*sizeof(int));
103 |     for (i = 0; i < 256; ++i)
104 | 	{
105 | 	map[i]=0;
106 | 	}
107 |     for (i = 0; i < 64; ++i)
108 | 	{
109 | 	map[(int)b64[i]]=i;
110 | 	}
111 |     }
112 | for(i=0; i<words; i++)
113 |     {
114 |     word = 0;
115 |     word |= map[(int)*p++];
116 |     word <<= 6;
117 |     word |= map[(int)*p++];
118 |     word <<= 6;
119 |     word |= map[(int)*p++];
120 |     word <<= 6;
121 |     word |= map[(int)*p++];
122 |     result[j++]=word >> 16 & 0xFF;
123 |     result[j++]=word >> 8 & 0xFF;
124 |     result[j++]=word & 0xFF;
125 |     }
126 | result[j] = 0;
127 | if (returnSize)
128 |     *returnSize = j;
129 |      
130 | return result;
131 | }
132 | 
133 | 


--------------------------------------------------------------------------------
/lisa/regpotential/base64.h:
--------------------------------------------------------------------------------
 1 | /* Base64 encoding and decoding.
 2 |  * by Galt Barber */
 3 | 
 4 | #ifndef BASE64_H
 5 | #define BASE64_H
 6 | 
 7 | #define B64CHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 8 | 
 9 | char *base64Encode(char *input, size_t inplen);
10 | /* Use base64 to encode a string.  Returns one long encoded
11 |  * string which need to be freeMem'd. Note: big-endian algorithm.
12 |  * For some applications you may need to break the base64 output
13 |  * of this function into lines no longer than 76 chars.
14 |  */
15 | 
16 | boolean base64Validate(char *input);
17 | /* Return true if input is valid base64.
18 |  * Note that the input string is changed by 
19 |  * eraseWhiteSpace(). */
20 | 
21 | char *base64Decode(char *input, size_t *returnSize);
22 | /* Use base64 to decode a string.  Return decoded
23 |  * string which will be freeMem'd. Note: big-endian algorithm.
24 |  * Call eraseWhiteSpace() and check for invalid input 
25 |  * before passing in input if needed.  
26 |  * Optionally set retun size for use with binary data.
27 |  */
28 | 
29 | #endif /* BASE64_H */
30 | 
31 | 


--------------------------------------------------------------------------------
/lisa/regpotential/bigBedSummary.c:
--------------------------------------------------------------------------------
  1 | /* bigBedSummary - Extract summary information from a bigBed file.. */
  2 | 
  3 | /* Copyright (C) 2011 The Regents of the University of California 
  4 |  * See README in this or parent directory for licensing information. */
  5 | #include "common.h"
  6 | #include "linefile.h"
  7 | #include "hash.h"
  8 | #include "options.h"
  9 | #include "sqlNum.h"
 10 | #include "bigBed.h"
 11 | #include "asParse.h"
 12 | #include "udc.h"
 13 | #include "obscure.h"
 14 | 
 15 | 
 16 | char *summaryType = "coverage";
 17 | 
 18 | void usage()
 19 | /* Explain usage and exit. */
 20 | {
 21 | errAbort(
 22 |   "bigBedSummary - Extract summary information from a bigBed file.\n"
 23 |   "usage:\n"
 24 |   "   bigBedSummary file.bb chrom start end dataPoints\n"
 25 |   "Get summary data from bigBed for indicated region, broken into\n"
 26 |   "dataPoints equal parts.  (Use dataPoints=1 for simple summary.)\n"
 27 |   "options:\n"
 28 |   "   -type=X where X is one of:\n"
 29 |   "         coverage - %% of region that is covered (default)\n"
 30 |   "         mean - average depth of covered regions\n"
 31 |   "         min - minimum depth of covered regions\n"
 32 |   "         max - maximum depth of covered regions\n"
 33 |   "   -fields - print out information on fields in file.\n"
 34 |   "      If fields option is used, the chrom, start, end, dataPoints\n"
 35 |   "      parameters may be omitted\n"
 36 |   "   -udcDir=/dir/to/cache - place to put cache for remote bigBed/bigWigs\n"
 37 |   );
 38 | }
 39 | 
 40 | static struct optionSpec options[] = {
 41 |    {"type", OPTION_STRING},
 42 |    {"fields", OPTION_BOOLEAN},
 43 |    {"udcDir", OPTION_STRING},
 44 |    {NULL, 0},
 45 | };
 46 | 
 47 | void bigBedSummary(char *fileName, char *chrom, int start, int end, int dataPoints)
 48 | /* bigBedSummary - Extract summary information from a bigBed file.. */
 49 | {
 50 | /* Make up values array initialized to not-a-number. */
 51 | double nan0 = strtod("NaN", NULL);
 52 | double summaryValues[dataPoints];
 53 | int i;
 54 | for (i=0; i<dataPoints; ++i)
 55 |     summaryValues[i] = nan0;
 56 | 
 57 | struct bbiFile *bbi = bigBedFileOpen(fileName);
 58 | if (bigBedSummaryArray(bbi, chrom, start, end, bbiSummaryTypeFromString(summaryType), 
 59 |       dataPoints, summaryValues))
 60 |     {
 61 |     for (i=0; i<dataPoints; ++i)
 62 | 	{
 63 | 	double val = summaryValues[i];
 64 | 	if (i != 0)
 65 | 	    printf("\t");
 66 | 	if (isnan(val))
 67 | 	    printf("n/a");
 68 | 	else
 69 | 	    printf("%g", val);
 70 | 	}
 71 |     printf("\n");
 72 |     }
 73 | else
 74 |     {
 75 |     errAbort("no data in region %s:%d-%d in %s\n", chrom, start, end, fileName);
 76 |     }
 77 | bbiFileClose(&bbi);
 78 | }
 79 | 
 80 | 
 81 | void bigBedFields(char *fileName)
 82 | /* Print out info about fields in bed file. */
 83 | {
 84 | struct bbiFile *bbi = bigBedFileOpen(fileName);
 85 | printf("%d bed definition fields, %d total fields\n", bbi->definedFieldCount, bbi->fieldCount);
 86 | struct asObject *as = bigBedAs(bbi);
 87 | if (as != NULL)
 88 |     {
 89 |     struct asColumn *col;
 90 |     for (col = as->columnList; col != NULL; col = col->next)
 91 |         {
 92 | 	printf("\t%s\t%s\n", col->name, col->comment);
 93 | 	}
 94 |     }
 95 | else
 96 |     {
 97 |     printf("No additional field information included.\n");
 98 |     }
 99 | }
100 | 
101 | int main(int argc, char *argv[])
102 | /* Process command line. */
103 | {
104 | optionInit(&argc, argv, options);
105 | udcSetDefaultDir(optionVal("udcDir", udcDefaultDir()));
106 | if (optionExists("fields"))
107 |     {
108 |     if (argc < 2)
109 |         usage();
110 |     bigBedFields(argv[1]);
111 |     }
112 | else
113 |     {
114 |     summaryType = optionVal("type", summaryType);
115 |     if (argc != 6)
116 | 	usage();
117 |     bigBedSummary(argv[1], argv[2], sqlUnsigned(argv[3]), sqlUnsigned(argv[4]), sqlUnsigned(argv[5]));
118 |     }
119 | if (verboseLevel() > 1)
120 |     printVmPeak();
121 | return 0;
122 | }
123 | 


--------------------------------------------------------------------------------
/lisa/regpotential/bigWigSummary.c:
--------------------------------------------------------------------------------
 1 | /* bigWigSummary - Extract summary information from a bigWig file.. */
 2 | 
 3 | /* Copyright (C) 2011 The Regents of the University of California 
 4 |  * See README in this or parent directory for licensing information. */
 5 | #include "common.h"
 6 | #include "linefile.h"
 7 | #include "hash.h"
 8 | #include "options.h"
 9 | #include "sqlNum.h"
10 | #include "udc.h"
11 | #include "bigWig.h"
12 | #include "obscure.h"
13 | 
14 | 
15 | char *summaryType = "mean";
16 | 
17 | 
18 | void usage()
19 | /* Explain usage and exit. */
20 | {
21 | errAbort(
22 |   "bigWigSummary - Extract summary information from a bigWig file.\n"
23 |   "usage:\n"
24 |   "   bigWigSummary file.bigWig chrom start end dataPoints\n"
25 |   "Get summary data from bigWig for indicated region, broken into\n"
26 |   "dataPoints equal parts.  (Use dataPoints=1 for simple summary.)\n"
27 |   "\nNOTE:  start and end coordinates are in BED format (0-based)\n\n"
28 |   "options:\n"
29 |   "   -type=X where X is one of:\n"
30 |   "         mean - average value in region (default)\n"
31 |   "         min - minimum value in region\n"
32 |   "         max - maximum value in region\n"
33 |   "         std - standard deviation in region\n"
34 |   "         coverage - %% of region that is covered\n"
35 |   "   -udcDir=/dir/to/cache - place to put cache for remote bigBed/bigWigs\n"
36 |   );
37 | }
38 | 
39 | static struct optionSpec options[] = {
40 |    {"type", OPTION_STRING},
41 |    {"udcDir", OPTION_STRING},
42 |    {NULL, 0},
43 | };
44 | 
45 | void bigWigSummary(char *bigWigFile, char *chrom, int start, int end, int dataPoints)
46 | /* bigWigSummary - Extract summary information from a bigWig file.. */
47 | {
48 | struct bbiFile *bwf = bigWigFileOpen(bigWigFile);
49 | 
50 | /* Make up values array initialized to not-a-number. */
51 | double nan0 = strtod("NaN", NULL);
52 | double summaryValues[dataPoints];
53 | int i;
54 | for (i=0; i<dataPoints; ++i)
55 |     summaryValues[i] = nan0;
56 | 
57 | if (bigWigSummaryArray(bwf, chrom, start, end, bbiSummaryTypeFromString(summaryType), 
58 |       dataPoints, summaryValues))
59 |     {
60 |     for (i=0; i<dataPoints; ++i)
61 | 	{
62 | 	double val = summaryValues[i];
63 | 	if (i != 0)
64 | 	    printf("\t");
65 | 	if (isnan(val))
66 | 	    printf("n/a");
67 | 	else
68 | 	    printf("%g", val);
69 | 	}
70 |     printf("\n");
71 |     }
72 | else
73 |     {
74 |     errAbort("no data in region %s:%d-%d in %s\n", chrom, start, end, bigWigFile);
75 |     }
76 | bigWigFileClose(&bwf);
77 | }
78 | 
79 | int main(int argc, char *argv[])
80 | /* Process command line. */
81 | {
82 | optionInit(&argc, argv, options);
83 | if (argc != 6)
84 |     usage();
85 | summaryType = optionVal("type", summaryType);
86 | udcSetDefaultDir(optionVal("udcDir", udcDefaultDir()));
87 | bigWigSummary(argv[1], argv[2], sqlUnsigned(argv[3]), sqlUnsigned(argv[4]), sqlUnsigned(argv[5]));
88 | if (verboseLevel() > 1)
89 |     printVmPeak();
90 | return 0;
91 | }
92 | 


--------------------------------------------------------------------------------
/lisa/regpotential/bits.h:
--------------------------------------------------------------------------------
  1 | /* bits - handle operations on arrays of bits. 
  2 |  *
  3 |  * This file is copyright 2002 Jim Kent, but license is hereby
  4 |  * granted for all use - public, private or commercial. */
  5 | 
  6 | #ifndef BITS_H
  7 | #define BITS_H
  8 | 
  9 | #include "localmem.h"
 10 | 
 11 | typedef unsigned char Bits;
 12 | 
 13 | #define bitToByteSize(bitSize) ((bitSize+7)/8)
 14 | /* Convert number of bits to number of bytes needed to store bits. */
 15 | 
 16 | Bits *bitAlloc(int bitCount);
 17 | /* Allocate bits. */
 18 | 
 19 | Bits *bitRealloc(Bits *b, int bitCount, int newBitCount);
 20 | /* Resize a bit array.  If b is null, allocate a new array */
 21 | 
 22 | Bits *bitClone(Bits* orig, int bitCount);
 23 | /* Clone bits. */
 24 | 
 25 | void bitFree(Bits **pB);
 26 | /* Free bits. */
 27 | 
 28 | Bits *lmBitAlloc(struct lm *lm,int bitCount);
 29 | // Allocate bits.  Must supply local memory.
 30 | 
 31 | Bits *lmBitRealloc(struct lm *lm, Bits *b, int bitCount, int newBitCount);
 32 | // Resize a bit array.  If b is null, allocate a new array.  Must supply local memory.
 33 | 
 34 | Bits *lmBitClone(struct lm *lm, Bits* orig, int bitCount);
 35 | // Clone bits.  Must supply local memory.
 36 | 
 37 | void bitSetOne(Bits *b, int bitIx);
 38 | /* Set a single bit. */
 39 | 
 40 | void bitClearOne(Bits *b, int bitIx);
 41 | /* Clear a single bit. */
 42 | 
 43 | void bitSetRange(Bits *b, int startIx, int bitCount);
 44 | /* Set a range of bits. */
 45 | 
 46 | boolean bitReadOne(Bits *b, int bitIx);
 47 | /* Read a single bit. */
 48 | 
 49 | int bitCountRange(Bits *b, int startIx, int bitCount);
 50 | /* Count number of bits set in range. */
 51 | 
 52 | int bitFindSet(Bits *b, int startIx, int bitCount);
 53 | /* Find the index of the the next set bit. */
 54 | 
 55 | int bitFindClear(Bits *b, int startIx, int bitCount);
 56 | /* Find the index of the the next clear bit. */
 57 | 
 58 | void bitClear(Bits *b, int bitCount);
 59 | /* Clear many bits (possibly up to 7 beyond bitCount). */
 60 | 
 61 | void bitClearRange(Bits *b, int startIx, int bitCount);
 62 | /* Clear a range of bits. */
 63 | 
 64 | void bitAnd(Bits *a, Bits *b, int bitCount);
 65 | /* And two bitmaps.  Put result in a. */
 66 | 
 67 | int bitAndCount(Bits *a, Bits *b, int bitCount);
 68 | // Without altering 2 bitmaps, count the AND bits.
 69 | 
 70 | void bitOr(Bits *a, Bits *b, int bitCount);
 71 | /* Or two bitmaps.  Put result in a. */
 72 | 
 73 | int bitOrCount(Bits *a, Bits *b, int bitCount);
 74 | // Without altering 2 bitmaps, count the OR'd bits.
 75 | 
 76 | void bitXor(Bits *a, Bits *b, int bitCount);
 77 | /* Xor two bitmaps.  Put result in a. */
 78 | 
 79 | int bitXorCount(Bits *a, Bits *b, int bitCount);
 80 | // Without altering 2 bitmaps, count the XOR'd bits.
 81 | 
 82 | void bitNot(Bits *a, int bitCount);
 83 | /* Flip all bits in a. */
 84 | 
 85 | void bitReverseRange(Bits *bits, int startIx, int bitCount);
 86 | // Reverses bits in range (e.g. 110010 becomes 010011)
 87 | 
 88 | void bitPrint(Bits *a, int startIx, int bitCount, FILE* out);
 89 | /* Print part or all of bit map as a string of 0s and 1s.  Mostly useful for
 90 |  * debugging */
 91 | 
 92 | void bitsOut(FILE* out, Bits *bits, int startIx, int bitCount, boolean onlyOnes);
 93 | // Print part or all of bit map as a string of 0s and 1s.
 94 | // If onlyOnes, enclose result in [] and use ' ' instead of '0'.
 95 | 
 96 | Bits *bitsIn(struct lm *lm,char *bitString, int len);
 97 | // Returns a bitmap from a string of 1s and 0s.  Any non-zero, non-blank char sets a bit.
 98 | // Returned bitmap is the size of len even if that is longer than the string.
 99 | // Optionally supply local memory.  Note does NOT handle enclosing []s printed with bitsOut().
100 | 
101 | extern int bitsInByte[256];
102 | /* Lookup table for how many bits are set in a byte. */
103 | 
104 | void bitsInByteInit();
105 | /* Initialize bitsInByte array. */
106 | 
107 | #endif /* BITS_H */
108 | 
109 | 


--------------------------------------------------------------------------------
/lisa/regpotential/colHash.c:
--------------------------------------------------------------------------------
 1 | /* colHash - stuff for fast lookup of index given an
 2 |  * rgb value. */
 3 | 
 4 | /* Copyright (C) 2011 The Regents of the University of California 
 5 |  * See README in this or parent directory for licensing information. */
 6 | 
 7 | #include "common.h"
 8 | #include "memgfx.h"
 9 | #include "colHash.h"
10 | 
11 | 
12 | struct colHash *colHashNew()
13 | /* Get a new color hash. */
14 | {
15 | struct colHash *cHash;
16 | AllocVar(cHash);
17 | cHash->freeEl = cHash->elBuf;
18 | return cHash;
19 | }
20 | 
21 | void colHashFree(struct colHash **pEl)
22 | /* Free up color hash. */
23 | {
24 | freez(pEl);
25 | }
26 | 
27 | struct colHashEl *colHashAdd(struct colHash *cHash, 
28 | 	unsigned r, unsigned g, unsigned b, int ix)
29 | /* Add new element to color hash. */
30 | {
31 | struct colHashEl *che = cHash->freeEl++, **pCel;
32 | che->col.r = r;
33 | che->col.g = g;
34 | che->col.b = b;
35 | che->ix = ix;
36 | pCel = &cHash->lists[colHashFunc(r,g,b)];
37 | slAddHead(pCel, che);
38 | return che;
39 | }
40 | 
41 | struct colHashEl *colHashLookup(struct colHash *cHash, 
42 | 	unsigned r, unsigned g, unsigned b)
43 | /* Lookup value in hash. */
44 | {
45 | struct colHashEl *che;
46 | for (che = cHash->lists[colHashFunc(r,g,b)]; che != NULL; che = che->next)
47 |     if (che->col.r == r && che->col.g == g && che->col.b == b)
48 | 	return che;
49 | return NULL;
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/lisa/regpotential/colHash.h:
--------------------------------------------------------------------------------
 1 | /* colHash - stuff for fast lookup of index given an
 2 |  * rgb value. */
 3 | 
 4 | /* Copyright (C) 2002 The Regents of the University of California 
 5 |  * See README in this or parent directory for licensing information. */
 6 | #ifndef COLHASH_H
 7 | #define COLHASH_H
 8 | 
 9 | #define colHashFunc(r,g,b) (r+g+g+b)
10 | 
11 | struct colHashEl
12 | /* An element in a color hash. */
13 |     {
14 |     struct colHashEl *next;	/* Next in list. */
15 |     struct rgbColor col;	/* Color RGB. */
16 |     int ix;			/* Color Index. */
17 |     };
18 | 
19 | struct colHash
20 | /* A hash on RGB colors. */
21 |     {
22 |     struct colHashEl *lists[4*256];	/* Hash chains. */
23 |     struct colHashEl elBuf[256];	/* Buffer of elements. */
24 |     struct colHashEl *freeEl;		/* Pointer to next free element. */
25 |     };
26 | 
27 | struct colHash *colHashNew();
28 | /* Get a new color hash. */
29 | 
30 | void colHashFree(struct colHash **pEl);
31 | /* Free up color hash. */
32 | 
33 | struct colHashEl *colHashAdd(struct colHash *cHash, 
34 | 	unsigned r, unsigned g, unsigned b, int ix);
35 | /* Add new element to color hash. */
36 | 
37 | struct colHashEl *colHashLookup(struct colHash *cHash, 
38 | 	unsigned r, unsigned g, unsigned b);
39 | /* Lookup value in hash. */
40 | 
41 | #endif /* COLHASH_H */
42 | 


--------------------------------------------------------------------------------
/lisa/regpotential/dnaseq.h:
--------------------------------------------------------------------------------
 1 | /* dnaSeq - stuff to manage DNA sequences. 
 2 |  *
 3 |  * This file is copyright 2002 Jim Kent, but license is hereby
 4 |  * granted for all use - public, private or commercial. */
 5 | 
 6 | #ifndef DNASEQ_H
 7 | #define DNASEQ_H
 8 | 
 9 | #ifndef DNAUTIL_H
10 | #include "dnautil.h"
11 | #endif
12 | 
13 | #ifndef BITS_H
14 | #include "bits.h"
15 | #endif
16 | 
17 | struct dnaSeq
18 | /* A dna sequence in one-character per base format. */
19 |     {
20 |     struct dnaSeq *next;  /* Next in list. */
21 |     char *name;           /* Name of sequence. */
22 |     DNA *dna;             /* Sequence base by base. */
23 |     int size;             /* Size of sequence. */
24 |     Bits* mask;           /* Repeat mask (optional) */
25 |     };
26 | 
27 | typedef struct dnaSeq bioSeq;	/* Preferred use if either DNA or protein. */
28 | typedef struct dnaSeq aaSeq;	/* Preferred use if protein. */
29 | 
30 | struct dnaSeq *newDnaSeq(DNA *dna, int size, char *name);
31 | /* Create a new DNA seq. */
32 | 
33 | struct dnaSeq *cloneDnaSeq(struct dnaSeq *seq);
34 | /* Duplicate dna sequence in RAM. */
35 | 
36 | void freeDnaSeq(struct dnaSeq **pSeq);
37 | /* Free up DNA seq.  */
38 | #define dnaSeqFree freeDnaSeq
39 | 
40 | void freeDnaSeqList(struct dnaSeq **pSeqList);
41 | /* Free up list of DNA sequences. */
42 | #define dnaSeqFreeList freeDnaSeqList
43 | 
44 | aaSeq *translateSeqN(struct dnaSeq *inSeq, unsigned offset, unsigned size, boolean stop);
45 | /* Return a translated sequence.  Offset is position of first base to
46 |  * translate. If size is 0 then use length of inSeq. */
47 | 
48 | aaSeq *translateSeq(struct dnaSeq *inSeq, unsigned offset, boolean stop);
49 | /* Return a translated sequence.  Offset is position of first base to
50 |  * translate. If stop is TRUE then stop at first stop codon.  (Otherwise 
51 |  * represent stop codons as 'Z'). */
52 | 
53 | boolean seqIsDna(bioSeq *seq);
54 | /* Make educated guess whether sequence is DNA or protein. */
55 | 
56 | boolean seqIsLower(bioSeq *seq);
57 | /* Return TRUE if sequence is all lower case. */
58 | 
59 | bioSeq *whichSeqIn(bioSeq **seqs, int seqCount, char *letters);
60 | /* Figure out which if any sequence letters is in. */
61 | 
62 | Bits *maskFromUpperCaseSeq(bioSeq *seq);
63 | /* Allocate a mask for sequence and fill it in based on
64 |  * sequence case. */
65 | 
66 | struct hash *dnaSeqHash(struct dnaSeq *seqList);
67 | /* Return hash of sequences keyed by name. */
68 | 
69 | int dnaSeqCmpName(const void *va, const void *vb);
70 | /* Compare to sort based on sequence name. */
71 | 
72 | #endif /* DNASEQ_H */
73 | 
74 | 


--------------------------------------------------------------------------------
/lisa/regpotential/dystring.h:
--------------------------------------------------------------------------------
 1 | /* dystring - dynamically resizing string.
 2 |  *
 3 |  * This file is copyright 2002 Jim Kent, but license is hereby
 4 |  * granted for all use - public, private or commercial. */
 5 | 
 6 | #ifndef DYSTRING_H	/* Wrapper to avoid including this twice. */
 7 | #define DYSTRING_H
 8 | 
 9 | #include "common.h"
10 | 
11 | struct dyString
12 | /* Dynamically resizable string that you can do formatted
13 |  * output to. */
14 |     {
15 |     struct dyString *next;	/* Next in list. */
16 |     char *string;		/* Current buffer. */
17 |     int bufSize;		/* Size of buffer. */
18 |     int stringSize;		/* Size of string. */
19 |     };
20 | 
21 | struct dyString *newDyString(int initialBufSize);
22 | /* Allocate dynamic string with initial buffer size.  (Pass zero for default) */
23 | 
24 | #define dyStringNew newDyString
25 | 
26 | void freeDyString(struct dyString **pDs);
27 | /* Free up dynamic string. */
28 | 
29 | #define dyStringFree(a) freeDyString(a);
30 | 
31 | void freeDyStringList(struct dyString **pDs);
32 | /* Free up a list of dynamic strings */
33 | 
34 | #define dyStringFreeList(a) freeDyStringList(a);
35 | 
36 | void dyStringAppend(struct dyString *ds, char *string);
37 | /* Append zero terminated string to end of dyString. */
38 | 
39 | void dyStringAppendN(struct dyString *ds, char *string, int stringSize);
40 | /* Append string of given size to end of string. */
41 | 
42 | char dyStringAppendC(struct dyString *ds, char c);
43 | /* Append char to end of string. */
44 | 
45 | void dyStringAppendMultiC(struct dyString *ds, char c, int n);
46 | /* Append N copies of char to end of string. */
47 | 
48 | void dyStringAppendEscapeQuotes(struct dyString *dy, char *string,
49 | 	char quot, char esc);
50 | /* Append escaped-for-quotation version of string to dy. */
51 | 
52 | #define dyStringWriteOne(dy, var) dyStringAppendN(dy, (char *)(&var), sizeof(var))
53 | /* Write one variable (binary!) to dyString - for cases when want to treat string like
54 |  * a file stream. */
55 | 
56 | void dyStringVaPrintf(struct dyString *ds, char *format, va_list args);
57 | /* VarArgs Printf to end of dyString. */
58 | 
59 | void dyStringPrintf(struct dyString *ds, char *format, ...)
60 | /*  Printf to end of dyString. */
61 | #ifdef __GNUC__
62 | __attribute__((format(printf, 2, 3)))
63 | #endif
64 |     ;
65 | 
66 | struct dyString *dyStringCreate(char *format, ...);
67 | /*  Create a dyString with a printf style initial content */
68 | 
69 | #define dyStringClear(ds) (ds->string[0] = ds->stringSize = 0)
70 | /* Clear string. */
71 | 
72 | struct dyString * dyStringSub(char *orig, char *in, char *out);
73 | /* Make up a duplicate of orig with all occurences of in substituted
74 |  * with out. */
75 | 
76 | void dyStringBumpBufSize(struct dyString *ds, int size);
77 | /* Force dyString buffer to be at least given size. */
78 | 
79 | char *dyStringCannibalize(struct dyString **pDy);
80 | /* Kill dyString, but return the string it is wrapping
81 |  * (formerly dy->string).  This should be free'd at your
82 |  * convenience. */
83 | 
84 | #define dyStringContents(ds) (ds)->string
85 | /* return raw string. */
86 | 
87 | #define dyStringLen(ds) ds->stringSize
88 | /* return raw string length. */
89 | 
90 | void dyStringResize(struct dyString *ds, int newSize);
91 | /* resize a string, if the string expands, blanks are appended */
92 | 
93 | void dyStringQuoteString(struct dyString *dy, char quotChar, char *text);
94 | /* Append quotChar-quoted text (with any internal occurrences of quotChar
95 |  * \-escaped) onto end of dy. */
96 | 
97 | #endif /* DYSTRING_H */
98 | 
99 | 


--------------------------------------------------------------------------------
/lisa/regpotential/errAbort.h:
--------------------------------------------------------------------------------
 1 | /* ErrAbort.h - our error handler. 
 2 |  *
 3 |  * This maintains two stacks - a warning message printer
 4 |  * stack, and a "abort handler" stack.
 5 |  *
 6 |  * By default the warnings will go to stderr, and
 7 |  * aborts will exit the program.  You can push a
 8 |  * function on to the appropriate stack to change
 9 |  * this behavior.  The top function on the stack
10 |  * gets called.
11 |  *
12 |  * Most functions in this library will call errAbort()
13 |  * if they run out of memory.  
14 |  *
15 |  * This file is copyright 2002 Jim Kent, but license is hereby
16 |  * granted for all use - public, private or commercial. */
17 | 
18 | #ifndef ERRABORT_H
19 | #define ERRABORT_H
20 | 
21 | boolean isErrAbortInProgress();  
22 | /* Flag to indicate that an error abort is in progress.
23 |  * Needed so that a warn handler can tell if it's really
24 |  * being called because of a warning or an error. */
25 | 
26 | void errAbort(char *format, ...)
27 | /* Abort function, with optional (printf formatted) error message. */
28 | #if defined(__GNUC__)
29 | __attribute__((format(printf, 1, 2)))
30 | #endif
31 | ;
32 | 
33 | void vaErrAbort(char *format, va_list args);
34 | /* Abort function, with optional (vprintf formatted) error message. */
35 | 
36 | void errnoAbort(char *format, ...)
37 | /* Prints error message from UNIX errno first, then does errAbort. */
38 | #if defined(__GNUC__)
39 | __attribute__((format(printf, 1, 2)))
40 | #endif
41 | ;
42 | 
43 | typedef void (*AbortHandler)();
44 | /* Function that can abort. */
45 | 
46 | void pushAbortHandler(AbortHandler handler);
47 | /* Set abort handler */
48 | 
49 | void popAbortHandler();
50 | /* Revert to old abort handler. */
51 | 
52 | void noWarnAbort();
53 | /* Abort without message. */
54 | 
55 | void pushDebugAbort();
56 | /* Push abort handler that will invoke debugger. */
57 | 
58 | void vaWarn(char *format, va_list args);
59 | /* Call top of warning stack to issue warning. */
60 | 
61 | void warn(char *format, ...)
62 | /* Issue a warning message. */
63 | #if defined(__GNUC__)
64 | __attribute__((format(printf, 1, 2)))
65 | #endif
66 | ;
67 | 
68 | void errnoWarn(char *format, ...)
69 | /* Prints error message from UNIX errno first, then does rest of warning. */
70 | #if defined(__GNUC__)
71 | __attribute__((format(printf, 1, 2)))
72 | #endif
73 | ;
74 | 
75 | typedef void (*WarnHandler)(char *format, va_list args);
76 | /* Function that can warn. */
77 | 
78 | void pushWarnHandler(WarnHandler handler);
79 | /* Set warning handler */
80 | 
81 | void popWarnHandler();
82 | /* Revert to old warn handler. */
83 | 
84 | void pushWarnAbort();
85 | /* Push handler that will abort on warnings. */
86 | 
87 | void pushSilentWarnHandler();
88 | /* Set warning handler to be quiet.  Do a popWarnHandler to restore. */
89 | 
90 | void errAbortDebugnPushPopErr();
91 | /*  generate stack dump if there is a error in the push/pop functions */
92 | 
93 | #endif /* ERRABORT_H */
94 | 


--------------------------------------------------------------------------------
/lisa/regpotential/gfxPoly.c:
--------------------------------------------------------------------------------
 1 | /* gfxPoly - two dimensional polygon. */
 2 | 
 3 | #include "common.h"
 4 | #include "gfxPoly.h"
 5 | 
 6 | 
 7 | struct gfxPoly *gfxPolyNew()
 8 | /* Create new (empty) polygon */
 9 | {
10 | struct gfxPoly *poly;
11 | AllocVar(poly);
12 | return poly;
13 | }
14 | 
15 | void gfxPolyFree(struct gfxPoly **pPoly)
16 | /* Free up resources associated with polygon */
17 | {
18 | struct gfxPoly *poly = *pPoly;
19 | if (poly != NULL)
20 |     {
21 |     if (poly->lastPoint != NULL)
22 | 	{
23 | 	poly->lastPoint->next = NULL;
24 | 	slFreeList(&poly->ptList);
25 | 	}
26 |     freez(pPoly);
27 |     }
28 | }
29 | 
30 | void gfxPolyAddPoint(struct gfxPoly *poly, int x, int y)
31 | /* Add point to polygon. */
32 | {
33 | struct gfxPoint *pt;
34 | poly->ptCount += 1;
35 | AllocVar(pt);
36 | pt->x = x;
37 | pt->y = y;
38 | if (poly->ptList == NULL)
39 |     {
40 |     poly->ptList = poly->lastPoint = pt;
41 |     pt->next = pt;
42 |     }
43 | else
44 |     {
45 |     poly->lastPoint->next = pt;
46 |     pt->next = poly->ptList;
47 |     poly->lastPoint = pt;
48 |     }
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/lisa/regpotential/gfxPoly.h:
--------------------------------------------------------------------------------
 1 | /* gfxPoly - two dimensional polygon. */
 2 | 
 3 | #ifndef GFXPOLY_H
 4 | #define GFXPOLY_H
 5 | 
 6 | struct gfxPoint
 7 | /* A two-dimensional point, typically in pixel coordinates. */
 8 |     {
 9 |     struct gfxPoint *next;
10 |     int x, y;		/* Position */
11 |     };
12 | 
13 | struct gfxPoly
14 | /* A two-dimensional polygon */
15 |     {
16 |     struct gfxPoly *next;
17 |     int ptCount;		/* Number of points. */
18 |     struct gfxPoint *ptList;	/* First point in list, which is circular. */
19 |     struct gfxPoint *lastPoint;	/* Last point in list. */
20 |     };
21 | 
22 | struct gfxPoly *gfxPolyNew();
23 | /* Create new (empty) polygon */
24 | 
25 | void gfxPolyFree(struct gfxPoly **pPoly);
26 | /* Free up resources associated with polygon */
27 | 
28 | void gfxPolyAddPoint(struct gfxPoly *poly, int x, int y);
29 | /* Add point to polygon. */
30 | 
31 | #endif /* GFXPOLY_H */
32 | 


--------------------------------------------------------------------------------
/lisa/regpotential/hex.c:
--------------------------------------------------------------------------------
 1 | /* Handy hexidecimal functions
 2 |  *   If you don't want to use printf
 3 |  */
 4 | 
 5 | /* Copyright (C) 2013 The Regents of the University of California 
 6 |  * See README in this or parent directory for licensing information. */
 7 | 
 8 | #include "common.h"
 9 | 
10 | char hexTab[16] = {'0', '1', '2', '3', '4', '5', '6', '7', 
11 | 	'8', '9', 'a', 'b', 'c', 'd', 'e', 'f', };
12 | /* Convert 0-15 to a hex char */
13 | 
14 | 
15 | char nibbleToHex(unsigned char n)
16 | /* convert nibble to hexidecimal character. 0 <= n <= 15. */
17 | {
18 | return hexTab[n];
19 | }
20 | 
21 | void byteToHex(unsigned char n, char *hex)
22 | /* convert byte to hexidecimal characters. 0 <= n <= 255. */
23 | {
24 | *hex++ = hexTab[n >> 4];
25 | *hex++ = hexTab[n & 0xf];
26 | }
27 | 
28 | char *byteToHexString(unsigned char n)
29 | /* convert byte to hexidecimal string. 0 <= n <= 255. */
30 | {
31 | char hex[3];
32 | byteToHex(n, hex);
33 | hex[2] = 0;
34 | return cloneString(hex);
35 | }
36 | 
37 | /* And the reverse functions: */
38 | 
39 | char hexToNibble(char n)
40 | /* convert hexidecimal character to nibble. 0-9a-f. */
41 | {
42 | return n - ( n <= '9' ? '0' : ('a'-10) );
43 | }
44 | 
45 | 
46 | unsigned char hexToByte(char *hex)
47 | /* convert byte to hexidecimal characters. 0 <= n <= 255. */
48 | {
49 | unsigned char n = hexToNibble(*hex++);
50 | n <<= 4;
51 | n += hexToNibble(*hex++);
52 | return n;
53 | }
54 | 
55 | 
56 | void hexBinaryString(unsigned char *in, int inSize, char *out, int outSize)
57 | /* Convert possibly long binary string to hex string.
58 |  * Out size needs to be at least 2x inSize+1 */
59 | {
60 | assert(inSize * 2 +1 <= outSize);
61 | while (--inSize >= 0)
62 |     {
63 |     unsigned char c = *in++;
64 |     *out++ = hexTab[c>>4];
65 |     *out++ = hexTab[c&0xf];
66 |     }
67 | *out = 0;
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------
/lisa/regpotential/hex.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef HEX_H
 3 | #define HEX_H
 4 | 
 5 | char nibbleToHex(char n);
 6 | /* convert nibble to hexidecimal character. 0 <= n <= 15. */
 7 | 
 8 | void byteToHex(unsigned char n, char *hex);
 9 | /* convert byte to two hexidecimal characters. 0 <= n <= 255. */
10 | 
11 | char *byteToHexString(unsigned char n);
12 | /* convert byte to hexidecimal string. 0 <= n <= 255. */
13 | 
14 | void hexBinaryString(unsigned char *in, int inSize, char *out, int outSize);
15 | /* Convert possibly long binary string to hex string.
16 |  * Out size needs to be at least 2x inSize+1 */
17 | 
18 | /* Reverse Functions */
19 | 
20 | char hexToNibble(char n);
21 | /* convert hexidecimal character to nibble. 0-9a-f. */
22 | 
23 | unsigned char hexToByte(char *hex);
24 | /* convert byte to hexidecimal characters. 0 <= n <= 255. */
25 | 
26 | #endif /* HEX_H */
27 | 
28 | 


--------------------------------------------------------------------------------
/lisa/regpotential/hmmstats.c:
--------------------------------------------------------------------------------
 1 | /* hmmstats.c - Stuff for doing statistical analysis in general and 
 2 |  * hidden Markov models in particular. 
 3 |  *
 4 |  * This file is copyright 2002 Jim Kent, but license is hereby
 5 |  * granted for all use - public, private or commercial. */
 6 | 
 7 | #include "common.h"
 8 | #include "hmmstats.h"
 9 | 
10 | 
11 | int scaledLog(double val)
12 | /* Return scaled log of val. */
13 | {
14 | return round(logScaleFactor * log(val));
15 | }
16 | 
17 | double oneOverSqrtTwoPi = 0.39894228;
18 | 
19 | double simpleGaussean(double x)
20 | /* Gaussean distribution with standard deviation 1 and mean 0. */
21 | {
22 | return oneOverSqrtTwoPi * exp(-0.5*x*x );
23 | }
24 | 
25 | double gaussean(double x, double mean, double sd)
26 | /* Gaussean distribution with mean and standard deviation at point x  */
27 | {
28 | x -= mean;
29 | x /= sd;
30 | return oneOverSqrtTwoPi * exp(-0.5*x*x) / sd;
31 | }
32 | 
33 | double calcVarianceFromSums(double sum, double sumSquares, bits64 n)
34 | /* Calculate variance. */
35 | {
36 | double var = sumSquares - sum*sum/n;
37 | if (n > 1)
38 |     var /= n-1;
39 | return var;
40 | }
41 | 
42 | double calcStdFromSums(double sum, double sumSquares, bits64 n)
43 | /* Calculate standard deviation. */
44 | {
45 | return sqrt(calcVarianceFromSums(sum, sumSquares, n));
46 | }
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/lisa/regpotential/hmmstats.h:
--------------------------------------------------------------------------------
 1 | /*****************************************************************************
 2 |  * Copyright (C) 2000 Jim Kent.  This source code may be freely used         *
 3 |  * for personal, academic, and non-profit purposes.  Commercial use          *
 4 |  * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) *
 5 |  *****************************************************************************/
 6 | /* hmmstats.h - Stuff for doing statistical analysis in general and 
 7 |  * hidden Markov models in particular. */
 8 | #ifndef HMMSTATS_H
 9 | #define HMMSTATS_H
10 | 
11 | int scaledLog(double val);
12 | /* Return scaled log of val. */
13 | 
14 | #define logScaleFactor 1000
15 | /* Amount we scale logs by. */
16 | 
17 | double simpleGaussean(double x);
18 | /* Gaussean distribution with standard deviation 1 and mean 0. */
19 | 
20 | double gaussean(double x, double mean, double sd);
21 | /* Gaussean distribution with mean and standard deviation at point x  */
22 | 
23 | double calcVarianceFromSums(double sum, double sumSquares, bits64 n);
24 | /* Calculate variance. */
25 | 
26 | double calcStdFromSums(double sum, double sumSquares, bits64 n);
27 | /* Calculate standard deviation. */
28 | 
29 | #endif /* HMMSTATS_H */
30 | 
31 | 


--------------------------------------------------------------------------------
/lisa/regpotential/https.h:
--------------------------------------------------------------------------------
 1 | /* Connect via https. */
 2 | 
 3 | #ifndef NET_HTTPS_H
 4 | #define NET_HTTPS_H
 5 | 
 6 | int netConnectHttps(char *hostName, int port);
 7 | /* Return socket for https connection with server or -1 if error. */
 8 | 
 9 | #endif//ndef NET_HTTPS_H
10 | 


--------------------------------------------------------------------------------
/lisa/regpotential/intExp.c:
--------------------------------------------------------------------------------
  1 | /* Below is the worlds sleaziest little numerical expression
  2 |  * evaluator. Used to do only ints, now does doubles as well. 
  3 |  *
  4 |  * This file is copyright 2002 Jim Kent, but license is hereby
  5 |  * granted for all use - public, private or commercial. */
  6 | 
  7 | #include "common.h"
  8 | #include "kxTok.h"
  9 | 
 10 | 
 11 | static struct kxTok *tok;
 12 | 
 13 | #define nextTok() (tok = tok->next) 
 14 | 
 15 | #ifdef DEBUG
 16 | static void nextTok()
 17 | /* Advance to next token. */
 18 | {
 19 | if (tok == NULL)
 20 |     printf("(null)");
 21 | else
 22 |     {
 23 |     printf("'%s' -> ", tok->string);
 24 |     if (tok->next == NULL)
 25 |         printf("(null)\n");
 26 |     else
 27 |         printf("'%s'\n", tok->next->string);
 28 |     }
 29 | tok = tok->next;
 30 | }
 31 | #endif /* DEBUG */
 32 | 
 33 | 
 34 | static double expression();
 35 | /* Forward declaration of main expression handler. */
 36 | 
 37 | static double number()
 38 | /* Return number. */
 39 | {
 40 | double val;
 41 | if (tok == NULL)
 42 |     errAbort("Parse error in numerical expression");
 43 | if (!isdigit(tok->string[0]))
 44 |     errAbort("Expecting number, got %s", tok->string);
 45 | val = atof(tok->string);
 46 | nextTok();
 47 | return val;
 48 | }
 49 | 
 50 | static double atom()
 51 | /* Return parenthetical expression or number. */
 52 | {
 53 | double val;
 54 | if (tok->type == kxtOpenParen)
 55 |     {
 56 |     nextTok();
 57 |     val = expression();
 58 |     if (tok->type == kxtCloseParen)
 59 | 	{
 60 |         nextTok();
 61 | 	return val;
 62 | 	}
 63 |     else
 64 | 	{
 65 |         errAbort("Unmatched parenthesis");
 66 | 	return 0;
 67 | 	}
 68 |     }
 69 | else
 70 |     return number();
 71 | }
 72 | 
 73 | 
 74 | static double uMinus()
 75 | /* Unary minus. */
 76 | {
 77 | double val;
 78 | if (tok->type == kxtSub)
 79 |     {
 80 |     nextTok();
 81 |     val = -atom();
 82 |     return val;
 83 |     }
 84 | else
 85 |     return atom();
 86 | }
 87 | 
 88 | static double mulDiv()
 89 | /* Multiplication or division. */
 90 | {
 91 | double val = uMinus();
 92 | for (;;)
 93 |     {
 94 |     if (tok->type == kxtMul)
 95 | 	{
 96 | 	nextTok();
 97 | 	val *= uMinus();
 98 | 	}
 99 |     else if (tok->type == kxtDiv)
100 | 	{
101 | 	nextTok();
102 | 	val /= uMinus();
103 | 	}
104 |     else
105 |         break;
106 |     }
107 | return val;
108 | }
109 | 
110 | static double addSub()
111 | /* Addition or subtraction. */
112 | {
113 | double val;
114 | val = mulDiv();
115 | for (;;)
116 |     {
117 |     if (tok->type == kxtAdd)
118 | 	{
119 | 	nextTok();
120 | 	val += mulDiv();
121 | 	}
122 |     else if (tok->type == kxtSub)
123 | 	{
124 | 	nextTok();
125 | 	val -= mulDiv();
126 | 	}
127 |     else
128 |         break;
129 |     }
130 | return val;
131 | }
132 | 
133 | static double expression()
134 | /* Wraps around lowest level of expression. */
135 | {
136 | return addSub();
137 | }
138 | 
139 | double doubleExp(char *text)
140 | /* Convert text to double expression and evaluate. */
141 | {
142 | double val;
143 | struct kxTok *tokList = tok = kxTokenize(text, FALSE);
144 | val = expression();
145 | slFreeList(&tokList);
146 | return val;
147 | }
148 | 
149 | int intExp(char *text)
150 | /* Convert text to int expression and evaluate. */
151 | {
152 | return round(doubleExp(text));
153 | }
154 | 


--------------------------------------------------------------------------------
/lisa/regpotential/internet.h:
--------------------------------------------------------------------------------
 1 | /* internet - some stuff for routines that use the internet
 2 |  * and aren't afraid to include some internet specific structures
 3 |  * and the like.   See also net for stuff that is higher level. */
 4 | 
 5 | #ifndef INTERNET_H
 6 | #include <sys/socket.h>
 7 | #include <netinet/in.h>
 8 | #include <arpa/inet.h>
 9 | #include <netdb.h>
10 | 
11 | bits32 internetHostIp(char *hostName);
12 | /* Get IP v4 address (in host byte order) for hostName.
13 |  * Warn and return 0 if there's a problem. */
14 | 
15 | boolean internetFillInAddress(char *hostName, int port, 
16 | 	struct sockaddr_in *address);
17 | /* Fill in address. Warn and return FALSE if can't.  */
18 | 
19 | boolean internetIpToDottedQuad(bits32 ip, char dottedQuad[17]);
20 | /* Convert IP4 address in host byte order to dotted quad 
21 |  * notation.  Warn and return FALSE if there's a 
22 |  * problem. */
23 | 
24 | boolean internetDottedQuadToIp(char *dottedQuad, bits32 *retIp);
25 | /* Convert dotted quad format address to IP4 address in
26 |  * host byte order.  Warn and return FALSE if there's a 
27 |  * problem. */
28 | 
29 | boolean internetIsDottedQuad(char *s);
30 | /* Returns TRUE if it looks like s is a dotted quad. */
31 | 
32 | void internetParseDottedQuad(char *dottedQuad, unsigned char quad[4]);
33 | /* Parse dotted quads into quad */
34 | 
35 | void internetUnpackIp(bits32 packed, unsigned char unpacked[4]);
36 | /* Convert from 32 bit to 4-byte format with most significant
37 |  * byte first. */
38 | 
39 | boolean internetIpInSubnet(unsigned char unpackedIp[4], 
40 | 	unsigned char subnet[4]);
41 | /* Return true if unpacked IP address is in subnet. */
42 | 
43 | #endif /* INTERNET_H */
44 | 


--------------------------------------------------------------------------------
/lisa/regpotential/kxTok.h:
--------------------------------------------------------------------------------
 1 | /* kxTok - quick little tokenizer for stuff first
 2 |  * loaded into memory.  Originally developed for
 3 |  * "Key eXpression" evaluator. 
 4 |  *
 5 |  * This file is copyright 2002 Jim Kent, but license is hereby
 6 |  * granted for all use - public, private or commercial. */
 7 | 
 8 | #ifndef KXTOK_H
 9 | #define KXTOK_H
10 | 
11 | enum kxTokType
12 |     {
13 |     kxtEnd,
14 |     kxtString,
15 |     kxtWildString,
16 |     kxtEquals,
17 |     kxtGT,      /* Greater Than */
18 |     kxtGE,      /* Greater Than or Equal */
19 |     kxtLT,      /* Less Than */
20 |     kxtLE,      /* Less Than or Equal */
21 |     kxtAnd,
22 |     kxtOr,
23 |     kxtXor,
24 |     kxtNot,
25 |     kxtOpenParen,
26 |     kxtCloseParen,
27 |     kxtAdd,
28 |     kxtSub,
29 |     kxtDiv,
30 |     kxtMul,
31 |     kxtDot,
32 |     kxtMod,
33 |     kxtPunct,
34 |     };
35 | 
36 | struct kxTok
37 | /* A key expression token.   Input text is tokenized
38 |  * into a list of these. */
39 |     {
40 |     struct kxTok *next;
41 |     enum kxTokType type;
42 |     bool spaceBefore;	/* True if there is a space before */
43 |     char string[1];  /* Allocated at run time */
44 |     };
45 | 
46 | struct kxTok *kxTokenize(char *text, boolean wildAst);
47 | /* Convert text to stream of tokens. If 'wildAst' is
48 |  * TRUE then '*' character will be treated as wildcard
49 |  * rather than multiplication sign. */
50 | 
51 | struct kxTok *kxTokenizeFancy(char *text, boolean wildAst,
52 | 			      boolean wildPercent, boolean includeHyphen);
53 | /* Convert text to stream of tokens. If 'wildAst' is
54 |  * TRUE then '*' character will be treated as wildcard
55 |  * rather than multiplication sign.  
56 |  * If wildPercent is TRUE then the '%' character will be treated as a 
57 |  * wildcard (as in SQL) rather than a modulo (kxtMod) or percent sign.
58 |  * If includeHyphen is TRUE then a '-' character in the middle of a String 
59 |  * token will be treated as a hyphen (part of the String token) instead of 
60 |  * a new kxtSub token. */
61 | 
62 | void kxTokIncludeQuotes(boolean val);
63 | /* Pass in TRUE if kxTok should include quote characters in string tokens. */
64 | 
65 | #endif /* KXTOK_K */
66 | 


--------------------------------------------------------------------------------
/lisa/regpotential/localmem.h:
--------------------------------------------------------------------------------
 1 | /* LocalMem.h - local memory routines. 
 2 |  * 
 3 |  * These routines are meant for the sort of scenario where
 4 |  * a lot of little to medium size pieces of memory are
 5 |  * allocated, and then disposed of all at once.
 6 |  *
 7 |  * This file is copyright 2002 Jim Kent, but license is hereby
 8 |  * granted for all use - public, private or commercial. */
 9 | 
10 | #ifndef LOCALMEM_H
11 | #define LOCALMEM_H
12 | 
13 | struct lm *lmInit(int blockSize);
14 | /* Create a local memory pool. Parameters are:
15 |  *      blockSize - how much system memory to allocate at a time.  Can
16 |  *                  pass in zero and a reasonable default will be used.
17 |  */
18 | 
19 | void lmCleanup(struct lm **pLm);
20 | /* Clean up a local memory pool. */
21 | 
22 | size_t lmAvailable(struct lm *lm);
23 | // Returns currently available memory in pool
24 | 
25 | size_t lmSize(struct lm *lm);
26 | // Returns current size of pool, even for memory already allocated
27 | 
28 | void *lmAlloc(struct lm *lm, size_t size);
29 | /* Allocate memory from local pool. */
30 | 
31 | void *lmAllocMoreMem(struct lm *lm, void *pt, size_t oldSize, size_t newSize);
32 | /* Adjust memory size on a block, possibly relocating it.  If block is grown,
33 |  * new memory is zeroed. NOTE: in RARE cases, same pointer may be returned. */
34 | 
35 | void *lmCloneMem(struct lm *lm, void *pt, size_t size);
36 | /* Return a local mem copy of memory block. */
37 | 
38 | 
39 | char *lmCloneStringZ(struct lm *lm, char *string, int size);
40 | /* Return local mem copy of string of given size, adding null terminator. */
41 | 
42 | char *lmCloneString(struct lm *lm, char *string);
43 | /* Return local mem copy of string. */
44 | 
45 | char *lmCloneFirstWord(struct lm *lm, char *line);
46 | /* Clone first word in line */
47 | 
48 | char *lmCloneSomeWord(struct lm *lm, char *line, int wordIx);
49 | /* Return a clone of the given space-delimited word within line.  Returns NULL if
50 |  * not that many words in line. */
51 | 
52 | struct slName *lmSlName(struct lm *lm, char *name);
53 | /* Return slName in memory. */
54 | 
55 | #define lmAllocVar(lm, pt) (pt = lmAlloc(lm, sizeof(*pt)));
56 | /* Shortcut to allocating a single variable in local mem and
57 |  * assigning pointer to it. */
58 | 
59 | #define lmCloneVar(lm, pt) lmCloneMem(lm, pt, sizeof((pt)[0]))
60 | /* Allocate copy of a structure. */
61 | 
62 | #define lmAllocArray(lm, pt, size) (pt = lmAlloc(lm, sizeof(*pt) * (size)))
63 | /* Shortcut to allocating an array in local mem and
64 |  * assigning pointer to it. */
65 | 
66 | #endif//ndef LOCALMEM_H
67 | 


--------------------------------------------------------------------------------
/lisa/regpotential/makefile:
--------------------------------------------------------------------------------
  1 | O=bPlusTree.o bbiRead.o bbiWrite.o bits.o cirTree.o common.o dystring.o hash.o localmem.o udc.o portimpl.o memalloc.o dlist.o linefile.o pipeline.o verbose.o options.o net.o internet.o https.o base64.o errAbort.o osunix.o hmmstats.o cheapcgi.o mime.o obscure.o hex.o wildcmp.o intExp.o kxTok.o servBrcMcw.o servcl.o servCrunx.o servcis.o servmsII.o servpws.o zlibFace.o sqlNum.o bwgQuery.o
  2 | 
  3 | 
  4 | CC=gcc
  5 | ifeq (${MACHTYPE},)
  6 |     MACHTYPE:=$(shell uname -m)
  7 |     #MACHTYPE:=$(shell arch)
  8 | #    $(info MACHTYPE was empty, set to: ${MACHTYPE})
  9 | endif
 10 | ifneq (,$(findstring -,$(MACHTYPE)))
 11 | #    $(info MACHTYPE has - sign ${MACHTYPE})
 12 |     MACHTYPE:=$(shell uname -m)
 13 | #    $(info MACHTYPE has - sign set to: ${MACHTYPE})
 14 | endif
 15 | 
 16 | HG_DEFS=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE -DMACHTYPE_${MACHTYPE}
 17 | #HG_DEFS=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE 
 18 | HG_INC=-I../inc -I../../inc -I../../../inc -I../../../../inc -I../../../../../inc
 19 | 
 20 | # to check for Mac OSX Darwin specifics:
 21 | UNAME_S := $(shell uname -s)
 22 | # to check for builds on hgwdev
 23 | FULLWARN = $(shell uname -n)
 24 | 
 25 | # autodetect if openssl is installed
 26 | ifeq (${SSLDIR},)
 27 |   SSLDIR = /usr/include/openssl
 28 | endif
 29 | ifeq (${USE_SSL},)
 30 |   ifneq ($(wildcard ${SSLDIR}),)
 31 |      USE_SSL=1
 32 |   endif
 33 | endif
 34 | 
 35 | 
 36 | # libssl: disabled by default
 37 | ifeq (${USE_SSL},1)
 38 |     ifneq (${SSL_DIR}, "/usr/include/openssl")
 39 |       ifneq ($(UNAME_S),Darwin)
 40 |         L+=-L${SSL_DIR}/lib
 41 |       endif
 42 |         HG_INC+=-I${SSL_DIR}/include
 43 |     endif
 44 |     # on hgwdev, already using the static library with mysqllient.
 45 |     ifeq (${FULLWARN},hgwdev)
 46 |        L+=/usr/lib64/libssl.a /usr/lib64/libcrypto.a -lkrb5
 47 |     else
 48 |        L+=-lssl -lcrypto
 49 |     endif
 50 |     HG_DEFS+=-DUSE_SSL
 51 | endif
 52 | 
 53 | # pass through COREDUMP
 54 | ifneq (${COREDUMP},)
 55 |     HG_DEFS+=-DCOREDUMP
 56 | endif
 57 | 
 58 | 
 59 | SYS = $(shell uname -s)
 60 | #SYS = $(shell arch)
 61 | 
 62 | ifeq (${HG_WARN},)
 63 |   ifeq (${SYS},Darwin)
 64 |       HG_WARN = -Wall -Wno-unused-variable -Wno-deprecated-declarations
 65 |       HG_WARN_UNINIT=
 66 |   else
 67 |     ifeq (${SYS},SunOS)
 68 |       HG_WARN = -Wall -Wformat -Wimplicit -Wreturn-type
 69 |       HG_WARN_UNINIT=-Wuninitialized
 70 |     else
 71 |       ifeq (${FULLWARN},hgwdev)
 72 |         HG_WARN = -Wall -Werror -Wformat -Wformat-security -Wimplicit -Wreturn-type -Wempty-body
 73 |         HG_WARN_UNINIT=-Wuninitialized
 74 |       else
 75 |         HG_WARN = -Wall -Wformat -Wimplicit -Wreturn-type
 76 |         HG_WARN_UNINIT=-Wuninitialized
 77 |       endif
 78 |     endif
 79 |   endif
 80 |   # -Wuninitialized generates a warning without optimization
 81 |   ifeq ($(findstring -O,${COPT}),-O)
 82 |      HG_WARN += ${HG_WARN_UNINIT}
 83 |   endif
 84 | endif
 85 | 
 86 | # this is to hack around many make files not including HG_WARN in
 87 | # the link line
 88 | CFLAGS += ${HG_WARN}
 89 | 
 90 | ifeq (${SCRIPTS},)
 91 |     SCRIPTS=${HOME}/bin/scripts
 92 | endif
 93 | 
 94 | # avoid an extra leading slash when DESTDIR is empty
 95 | ifeq (${DESTDIR},)
 96 |   DESTBINDIR=${BINDIR}
 97 | else
 98 |   DESTBINDIR=${DESTDIR}/${BINDIR}
 99 | endif
100 | 
101 | 
102 | MKDIR=mkdir -p
103 | ifeq (${STRIP},)
104 |    STRIP=true
105 | endif
106 | CVS=cvs
107 | GIT=git
108 | 
109 | # portable naming of compiled executables: add ".exe" if compiled on 
110 | # Windows (with cygwin).
111 | ifeq (${OS}, Windows_NT)
112 |   AOUT=a
113 |   EXE=.exe
114 | else
115 |   AOUT=a.out
116 |   EXE=
117 | endif
118 | 
119 | 
120 | %.o: %.c
121 | 	${CC} ${COPT} ${CFLAGS} ${HG_DEFS} ${LOWELAB_DEFS} ${HG_WARN} ${HG_INC} ${XINC} -o $@ -c $<
122 | 
123 | #$(MACHTYPE)/libjkweb.so: $(O)
124 | libjkweb.so: $(O)
125 | 	$(CC) $(O) -dynamiclib -o libjkweb.so -lm -lssl -lz  -lcrypto
126 | 
127 | # jshint: off unless JSHINT is already in environment
128 | ifeq (${JSHINT},)
129 |     JSHINT=true
130 | endif
131 | 
132 | 


--------------------------------------------------------------------------------
/lisa/regpotential/memalloc.h:
--------------------------------------------------------------------------------
 1 | /* Let the user redirect where memory allocation/deallocation
 2 |  * happens.  'careful' routines help debug scrambled heaps. 
 3 |  *
 4 |  * This file is copyright 2002 Jim Kent, but license is hereby
 5 |  * granted for all use - public, private or commercial. */
 6 | 
 7 | #ifndef MEMALLOC_H
 8 | #define MEMALLOC_H
 9 | 
10 | struct memHandler
11 |     {
12 |     struct memHandler *next;
13 |     void * (*alloc)(size_t size);
14 |     void (*free)(void *vpt);
15 |     void * (*realloc)(void* vpt, size_t size);
16 |     };
17 | 
18 | struct memHandler *pushMemHandler(struct memHandler *newHandler);
19 | /* Use newHandler for memory requests until matching popMemHandler.
20 |  * Returns previous top of memory handler stack. */
21 | 
22 | struct memHandler *popMemHandler();
23 | /* Removes top element from memHandler stack and returns it. */
24 | 
25 | void setDefaultMemHandler();
26 | /* Sets memHandler to the default. */
27 | 
28 | void pushCarefulMemHandler(size_t maxAlloc);
29 | /* Push the careful (paranoid, conservative, checks everything)
30 |  * memory handler  top of the memHandler stack and use it. */
31 | 
32 | void carefulCheckHeap();
33 | /* Walk through allocated memory and make sure that all cookies are
34 |  * in place. Only walks through what's been done since 
35 |  * pushCarefulMemHandler(). */
36 | 
37 | int carefulCountBlocksAllocated();
38 | /* How many memory items are allocated? (Since called
39 |  * pushCarefulMemHandler(). */
40 | 
41 | size_t carefulTotalAllocated();
42 | /* Return total bases allocated */
43 | 
44 | void setMaxAlloc(size_t s);
45 | /* Set large allocation limit. */
46 | 
47 | void memTrackerStart();
48 | /* Push memory handler that will track blocks allocated so that
49 |  * they can be automatically released with memTrackerEnd().  */
50 | 
51 | void memTrackerEnd();
52 | /* Free any remaining blocks and pop tracker memory handler. */
53 | 
54 | #endif /* MEMALLOC_H */
55 | 
56 | 


--------------------------------------------------------------------------------
/lisa/regpotential/mime.h:
--------------------------------------------------------------------------------
 1 | /*****************************************************************************
 2 |  * This file is copyright 2005 Jim Kent, but license is hereby
 3 |  * granted for all use - public, private or commercial. 
 4 |  *****************************************************************************/
 5 | /* mime.h - parses MIME messages, especially from a cgi from a multipart web form */
 6 | 
 7 | #ifndef HASH_H
 8 | #include "hash.h"
 9 | #endif 
10 | 
11 | #define MIMEBUFSIZE 32*1024  /* size of buffer for mime input */
12 | 
13 | struct mimePart
14 | /* structure for an element of a MIME (multipart) message */
15 |     {
16 |     struct mimePart *next; /* next (sibling) if is part of multipart */
17 |     struct hash *hdr;      /* hash of part headers */
18 |     off_t size;     /* determines if local mem or saved to tempfile */
19 |       /* only one of the next 3 pointers will be non-null, and that is the type */
20 |     char* data;     /* if size< MAXPARTSIZE and does not contain null */
21 |     char* fileName; /* if size>=MAXPARTSIZE or data contains null */
22 |     boolean binary; /* if contains 0 chars, cannot store as a c-string */
23 |     struct mimePart *multi;/* points to head of child list if itself contains multiparts */
24 |     };
25 | 
26 | struct mimeBuf
27 | /* structure for buffering a MIME message during parsing */
28 |     {
29 |     int d;                  /* descriptor (file,socket,etc) */
30 |     char buf[MIMEBUFSIZE];  /* actual buffer */
31 |     char *i;                /* index into buffer, current location */
32 |     char *eop;              /* end of part or -1 */
33 |     char *boundary;         /* boundary pattern for marking end of mime part */
34 |     int  blen;              /* boundary pattern length (strlen) */
35 |     char *eod;              /* end of data = eoi-(blen-1) */
36 |     char *eoi;              /* end of input or -1 */
37 |     char *eom;              /* end of memory just buf+MIMEBUFSIZE */
38 |     };
39 | 
40 | char *getMimeHeaderMainVal(char *header);
41 | /* Parse a typical mime header line returning the first
42 |  * main value up to whitespace, punctuation, or end. 
43 |  * freeMem the returned string when done */
44 | 
45 | char *getMimeHeaderFieldVal(char *header, char *field);
46 | /* Parse a typical mime header line looking for field=
47 |  * and return the value which may be quoted.
48 |  * freeMem the returned string when done */
49 | 
50 | struct mimeBuf * initMimeBuf(int d);
51 | /* d is a descriptor for a file or socket or some other descriptor 
52 |    that the MIME input can be read from. 
53 |    Initializes the mimeBuf structure. */
54 | 
55 | struct mimePart *parseMultiParts(struct mimeBuf *b, char *altHeader);
56 | /* This is a recursive function.  It parses multipart MIME messages.
57 |    Data that are binary or too large will be saved in mimePart->filename
58 |    otherwise saved as a c-string in mimePart->data.  If multipart,
59 |    then first child is mimePart->child, subsequent sibs are in child->next.
60 |    altHeader is a string of headers that can be fed in if the headers have
61 |    already been read off the stream by an earlier process, i.e. apache.
62 |  */
63 | 


--------------------------------------------------------------------------------
/lisa/regpotential/portimpl.c:
--------------------------------------------------------------------------------
  1 | /* Implementation file for some portability stuff mostly aimed
  2 |  * at making the same code run under different web servers.
  3 |  *
  4 |  * This file is copyright 2002 Jim Kent, but license is hereby
  5 |  * granted for all use - public, private or commercial. */
  6 | 
  7 | #include "common.h"
  8 | #include "htmshell.h"
  9 | #include "portable.h"
 10 | #include "obscure.h"
 11 | #include "portimpl.h"
 12 | #include <dirent.h>
 13 | 
 14 | 
 15 | static struct webServerSpecific *wss = NULL;
 16 | 
 17 | static void setupWss()
 18 | {
 19 | if (wss == NULL)
 20 |     {
 21 |     char *s = getenv("SERVER_SOFTWARE");
 22 |     wss = &wssDefault;
 23 |     if (s == NULL)
 24 |         {
 25 | 	wss = &wssCommandLine;
 26 |         }
 27 |     else
 28 |         {
 29 |         if (strncmp(wssMicrosoftII.name, s, strlen(wssMicrosoftII.name)) == 0)
 30 |             wss = &wssMicrosoftII;
 31 |         else if (strncmp(wssMicrosoftPWS.name, s, strlen(wssMicrosoftPWS.name)) == 0)
 32 |             wss = &wssMicrosoftPWS;
 33 | 	else 
 34 | 	    {
 35 | 	    char *t = getenv("HTTP_HOST");
 36 | 	    if (t != NULL)
 37 | 		{
 38 | 		if (sameWord(t, "Crunx"))
 39 | 		    wss = &wssLinux;
 40 | 		else if (endsWith(t, "brc.mcw.edu"))
 41 | 		    wss = &wssBrcMcw;
 42 | 		}
 43 | 	    }
 44 |         }
 45 |     }
 46 | }
 47 | 
 48 | void makeTempName(struct tempName *tn, char *base, char *suffix)
 49 | /* Figure out a temp name, and how CGI and HTML will access it. */
 50 | {
 51 | setupWss();
 52 | wss->makeTempName(tn,base,suffix);
 53 | }
 54 | 
 55 | char *cgiDir()
 56 | {
 57 | setupWss();
 58 | return wss->cgiDir();
 59 | }
 60 | 
 61 | char *trashDir()
 62 | /* Return the relative path to trash directory for CGI binaries */
 63 | {
 64 | setupWss();
 65 | return wss->trashDir();
 66 | }
 67 | 
 68 | double machineSpeed()
 69 | /* Return relative speed of machine.  UCSC CSE dept. 1999 web server is 1.0 */
 70 | {
 71 | setupWss();
 72 | return wss->speed();
 73 | }
 74 | 
 75 | void envUpdate(char *name, char *value)
 76 | /* Update an environment string */
 77 | {
 78 | int size = strlen(name) + strlen(value) + 2;
 79 | char *s = needMem(size);
 80 | safef(s, size, "%s=%s", name, value);
 81 | putenv(s);
 82 | }
 83 | 
 84 | void mkdirTrashDirectory(char *prefix)
 85 | /*	create the specified trash directory if it doesn't exist */
 86 | {
 87 | struct stat buf;
 88 | char trashDirName[128];
 89 | safef(trashDirName, sizeof(trashDirName), "%s/%s", trashDir(), prefix);
 90 | if (stat(trashDirName,&buf))
 91 |     {
 92 |     int result = mkdir (trashDirName, S_IRWXU | S_IRWXG | S_IRWXO);
 93 |     if (0 != result)
 94 | 	errnoAbort("failed to create directory %s", trashDirName);
 95 |     }
 96 | }
 97 | 
 98 | 
 99 | void makeDirsOnPath(char *pathName)
100 | /* Create directory specified by pathName.  If pathName contains
101 |  * slashes, create directory at each level of path if it doesn't
102 |  * already exist.  Abort with error message if there's a problem.
103 |  * (It's not considered a problem for the directory to already
104 |  * exist. ) */
105 | {
106 | 
107 | /* shortcut for paths that already exist */
108 | if (fileExists(pathName))
109 |     return;
110 | 
111 | /* Make local copy of pathName. */
112 | int len = strlen(pathName);
113 | char pathCopy[len+1];
114 | strcpy(pathCopy, pathName);
115 | 
116 | /* Tolerate double-slashes in path, everyone else does it. */
117 | 
118 | /* Start at root if it's an absolute path name. */
119 | char *s = pathCopy, *e;
120 | while (*s++ == '/')
121 |     /* do nothing */;
122 | 
123 | /* Step through it one slash at a time 
124 |  * making directory if possible, else dying. */
125 | for (; !isEmpty(s); s = e)
126 |     {
127 |     /* Find end of this section and terminate string there. */
128 |     e = strchr(s, '/');
129 |     if (e != NULL)
130 | 	*e = 0;
131 |     makeDir(pathCopy);
132 |     if (e != NULL)
133 | 	*e++ = '/';
134 |     }
135 | }
136 | 
137 | 


--------------------------------------------------------------------------------
/lisa/regpotential/portimpl.h:
--------------------------------------------------------------------------------
 1 | /*****************************************************************************
 2 |  * Copyright (C) 2000 Jim Kent.  This source code may be freely used         *
 3 |  * for personal, academic, and non-profit purposes.  Commercial use          *
 4 |  * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) *
 5 |  *****************************************************************************/
 6 | /* Implement portable stuff.... */
 7 | 
 8 | /* There is one of the following structures for each web server
 9 |  * we support.  During run time looking at the environment variable
10 |  * SERVER_SOFTWARE we decide which of these to use. */
11 | struct webServerSpecific
12 |     {
13 |     char *name;
14 | 
15 |     /* Make a good name for a temp file. */
16 |     void (*makeTempName)(struct tempName *tn, char *base, char *suffix);
17 | 
18 |     /* Return directory to look for cgi in. */
19 |     char * (*cgiDir)();
20 | 
21 | #ifdef NEVER
22 |     /* Return cgi suffix. */
23 |     char * (*cgiSuffix)();
24 | #endif /* NEVER */
25 |     
26 |     /* Return relative speed of CPU. (UCSC CSE 1999 FTP machine is 1.0) */
27 |     double (*speed)();
28 | 
29 |     /* The relative path to trash directory for CGI binaries */
30 |     char * (*trashDir)();
31 | 
32 |     };
33 | 
34 | 
35 | extern struct webServerSpecific wssMicrosoftII, wssMicrosoftPWS, wssDefault,
36 | 	wssLinux, wssCommandLine, wssBrcMcw;
37 | 
38 | char *rTempName(char *dir, char *base, char *suffix);
39 | /* Make a temp name that's almost certainly unique. */
40 | 


--------------------------------------------------------------------------------
/lisa/regpotential/pybw.c:
--------------------------------------------------------------------------------
 1 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 2 | #include <Python.h>
 3 | #include "pybw.h"
 4 | #define RP_DOC "Summarize data from bigwig file as regulatory potential returns numpy array of scores. bwfile_name, chrom_list, start_list, end_list, strand_list, weights, option (mean,max,min)"
 5 | 
 6 | static PyObject *RPError;
 7 | 
 8 | static PyObject *
 9 | getrp(PyObject *self, PyObject *args) // self is the module object
10 | {
11 |   PyObject *bigwigfileObj;  /* bigwig file name strings */
12 |   PyObject *bedfile;   /* bed file name strings */
13 |   PyObject *outfile; 
14 |   PyObject *decay;  
15 |   PyObject *left;  
16 |   PyObject *right; 
17 |   char *bigWigFile; 
18 |   char *bed;
19 |   char *out;
20 |   double d;
21 |   int l;
22 |   int r;
23 | #if PY_MAJOR_VERSION >= 3
24 | #define PyInt_Type PyLong_Type
25 | #define PyString_Type PyBytes_Type 
26 | #define PyInt_AsLong PyLong_AsLong
27 | #define PyString_AsString PyBytes_AsString
28 | #endif
29 |   //if (! PyArg_ParseTuple( args, "O!O!O!O!O!O!", &PyString_Type, &bigwigfileObj, &PyString_Type, &bedfile, &PyString_Type, &outfile, &PyFloat_Type, &decay, &PyInt_Type, &left, &PyInt_Type, &right)) {
30 |   if (! PyArg_ParseTuple( args, "sssO!O!O!", &bigWigFile, &bed, &out, &PyFloat_Type, &decay, &PyInt_Type, &left, &PyInt_Type, &right)) {
31 |     printf("%s %s %s %f %d %d \n", bigWigFile, bed, out, d, l, r);
32 |     PyErr_SetString(RPError, "something bad happened!!!");
33 |     return NULL;
34 |   }
35 |   //bigWigFile = PyString_AsString(bigwigfileObj);
36 |   //bed = PyString_AsString(bedfile);
37 |   //out = PyString_AsString(outfile);
38 |   d = PyFloat_AsDouble(decay);
39 |   l = PyInt_AsLong(left);
40 |   r = PyInt_AsLong(right);
41 |   printf("%s %s %s %f %d %d \n", bigWigFile, bed, out, d, l, r);
42 | 
43 |   bigWigAverageOverBed(bigWigFile, bed, out, d, l, r);
44 | 
45 |   Py_INCREF(Py_None);
46 |   return Py_None;
47 | }
48 | 
49 | static PyMethodDef myMethods[] = {
50 |     { "getrp", getrp, METH_VARARGS, RP_DOC },
51 |     { NULL, NULL, 0, NULL }
52 | };
53 | 
54 | #if PY_MAJOR_VERSION >= 3
55 | static struct PyModuleDef moduledef = {
56 |         PyModuleDef_HEAD_INIT,
57 |         "_bw",
58 |         "epigenomics data RP module", 
59 |         -1,
60 |         myMethods,
61 |         NULL,
62 |         NULL,
63 |         NULL,
64 |         NULL
65 | };
66 | 
67 | #define INITERROR return NULL
68 | //PyObject *
69 | PyMODINIT_FUNC
70 | PyInit__bw(void)
71 | #else
72 | #define INITERROR return
73 | //PyMODINIT_FUNC init_bw(void)
74 | void init_bw(void)
75 | #endif
76 | {       
77 | #if PY_MAJOR_VERSION >= 3
78 |     PyObject *m = PyModule_Create(&moduledef);
79 | #else
80 |     PyObject *m = Py_InitModule("_bw", myMethods);
81 | #endif
82 |     if (m == NULL)
83 |         INITERROR;
84 | 
85 |     RPError = PyErr_NewException("_bw.Error", NULL, NULL);
86 |     Py_INCREF(RPError);
87 |     PyModule_AddObject(m, "rperror", RPError);
88 |     /* import_array(); */
89 | #if PY_MAJOR_VERSION >= 3
90 |     return m;
91 | #endif
92 | }
93 | 


--------------------------------------------------------------------------------
/lisa/regpotential/pybw.h:
--------------------------------------------------------------------------------
1 | 
2 | int bigWigSummary(char *bigWigFile, char *chrom, int start, int end, int dataPoints, double *summaryValues, char *summaryType);
3 | 
4 | //void bigWigAverageOverBed(char *inBw, char *inBed, char *outTab, float d);
5 | 
6 | void bigWigAverageOverBed(char *inBw, char *inBed, char *outTab, float alpha, int left, int right);
7 | 


--------------------------------------------------------------------------------
/lisa/regpotential/servBrcMcw.c:
--------------------------------------------------------------------------------
 1 | /* Stuff that's specific for .brc.mcw.edu server goes here. 
 2 |  *
 3 |  * This file is copyright 2004 Jim Kent, but license is hereby
 4 |  * granted for all use - public, private or commercial. */
 5 | 
 6 | #include "common.h"
 7 | #include "portable.h"
 8 | #include "portimpl.h"
 9 | #include "obscure.h"
10 | #include "hash.h"
11 | 
12 | 
13 | static char *__trashDir = "/trash";
14 | 
15 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
16 | /* Figure out a temp name, and how CGI and HTML will access it. */
17 | {
18 | char *tname;
19 | 
20 | tname = rTempName(__trashDir, base, suffix);
21 | strcpy(tn->forCgi, tname);
22 | strcpy(tn->forHtml, tname);
23 | }
24 | 
25 | static char *_cgiDir()
26 | {
27 | return "/cgi-bin/";
28 | }
29 | 
30 | static char *_trashDir()
31 | {
32 | return __trashDir;
33 | }
34 | 
35 | static double _speed()
36 | {
37 | return 3.0;
38 | }
39 |     
40 | struct webServerSpecific wssBrcMcw =
41 |     {
42 |     "default",
43 |     _makeTempName,
44 |     _cgiDir,
45 |     _speed,
46 |     _trashDir,
47 |     };
48 | 


--------------------------------------------------------------------------------
/lisa/regpotential/servCrunx.c:
--------------------------------------------------------------------------------
 1 | /* Stuff that's specific for local linux server goes here. 
 2 |  *
 3 |  * This file is copyright 2002 Jim Kent, but license is hereby
 4 |  * granted for all use - public, private or commercial. */
 5 | 
 6 | #include "common.h"
 7 | #include "portable.h"
 8 | #include "portimpl.h"
 9 | #include "obscure.h"
10 | 
11 | 
12 | static char *__trashDir = "/home/httpd/html/trash";
13 | 
14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
15 | /* Figure out a temp name, and how CGI and HTML will access it. */
16 | {
17 | char *tname;
18 | char *tempDirCgi = __trashDir;
19 | char *tempDirHtml = "/trash";
20 | int tlcLen = strlen(tempDirCgi);
21 | int tlhLen = strlen(tempDirHtml);
22 | 
23 | tname = rTempName(tempDirCgi, base, suffix);
24 | strcpy(tn->forCgi, tname);
25 | memcpy(tn->forHtml, tempDirHtml, tlhLen);
26 | strcpy(tn->forHtml+tlhLen, tn->forCgi+tlcLen);
27 | }
28 | 
29 | static char *_cgiDir()
30 | {
31 | return "../cgi-bin/";
32 | }
33 | 
34 | static char *_trashDir()
35 | {
36 | return __trashDir;
37 | }
38 | 
39 | static double _speed()
40 | {
41 | return 3.0;
42 | }
43 |     
44 | struct webServerSpecific wssLinux =
45 |     {
46 |     "linux",
47 |     _makeTempName,
48 |     _cgiDir,
49 |     _speed,
50 |     _trashDir,
51 |     };
52 | 


--------------------------------------------------------------------------------
/lisa/regpotential/servcis.c:
--------------------------------------------------------------------------------
 1 | /* Stuff that's specific for Comp Science dept. web server goes here. 
 2 |  *
 3 |  * This file is copyright 2002 Jim Kent, but license is hereby
 4 |  * granted for all use - public, private or commercial. */
 5 | 
 6 | #include "common.h"
 7 | #include "portable.h"
 8 | #include "portimpl.h"
 9 | #include "obscure.h"
10 | #include "hash.h"
11 | 
12 | 
13 | static char *__trashDir = "../trash";
14 | 
15 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
16 | /* Figure out a temp name, and how CGI and HTML will access it. */
17 | {
18 | char *tname;
19 | 
20 | tname = rTempName(__trashDir, base, suffix);
21 | strcpy(tn->forCgi, tname);
22 | strcpy(tn->forHtml, tname);
23 | }
24 | 
25 | static char *_cgiDir()
26 | {
27 | return "../cgi-bin/";
28 | }
29 | 
30 | static char *_trashDir()
31 | {
32 | return __trashDir;
33 | }
34 | 
35 | static double _speed()
36 | {
37 | return 3.0;
38 | }
39 | 
40 |     
41 | struct webServerSpecific wssDefault =
42 |     {
43 |     "default",
44 |     _makeTempName,
45 |     _cgiDir,
46 |     _speed,
47 |     _trashDir,
48 |     };
49 | 


--------------------------------------------------------------------------------
/lisa/regpotential/servcl.c:
--------------------------------------------------------------------------------
 1 | /* "Web Server" for command line execution. 
 2 |  *
 3 |  * This file is copyright 2002 Jim Kent, but license is hereby
 4 |  * granted for all use - public, private or commercial. */
 5 | 
 6 | #include "common.h"
 7 | #include "portable.h"
 8 | #include "portimpl.h"
 9 | #include "obscure.h"
10 | 
11 | 
12 | static char *__trashDir = ".";
13 | 
14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
15 | /* Figure out a temp name, and how CGI and HTML will access it. */
16 | {
17 | char *tname = rTempName(__trashDir, base, suffix);
18 | strcpy(tn->forCgi, tname);
19 | strcpy(tn->forHtml, tn->forCgi);
20 | }
21 | 
22 | static char *_cgiDir()
23 | {
24 | char *jkwebDir;
25 | if ((jkwebDir = getenv("JKWEB")) == NULL)
26 |     return "";
27 | else
28 |     return jkwebDir;
29 | }
30 | 
31 | static char *_trashDir()
32 | {
33 | return __trashDir;
34 | }
35 | 
36 | static double _speed()
37 | {
38 | return 1.0;
39 | }
40 |     
41 |     
42 | struct webServerSpecific wssCommandLine =
43 |     {
44 |     "commandLine",
45 |     _makeTempName,
46 |     _cgiDir,
47 |     _speed,
48 |     _trashDir,
49 |     };
50 | 


--------------------------------------------------------------------------------
/lisa/regpotential/servmsII.c:
--------------------------------------------------------------------------------
 1 | /* Stuff that's specific for the MS II Web Server goes here. 
 2 |  *
 3 |  * This file is copyright 2002 Jim Kent, but license is hereby
 4 |  * granted for all use - public, private or commercial. */
 5 | 
 6 | #include "common.h"
 7 | #include "portable.h"
 8 | #include "portimpl.h"
 9 | #include "obscure.h"
10 | 
11 | 
12 | static char *__trashDir = "..\\trash";
13 | 
14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
15 | /* Figure out a temp name, and how CGI and HTML will access it. */
16 | {
17 | long tempIx = incCounterFile("tcounter");
18 | sprintf(tn->forCgi, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix);
19 | sprintf(tn->forHtml, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix);
20 | }
21 | 
22 | static char *_cgiDir()
23 | {
24 | return "";
25 | }
26 | 
27 | static char *_trashDir()
28 | {
29 | return __trashDir;
30 | }
31 | 
32 | static double _speed()
33 | {
34 | return 2.5;
35 | }
36 | 
37 |     
38 | struct webServerSpecific wssMicrosoftII =
39 |     {
40 |     "Microsoft-IIS",
41 |     _makeTempName,
42 |     _cgiDir,
43 |     _speed,
44 |     _trashDir,
45 |     };
46 | 


--------------------------------------------------------------------------------
/lisa/regpotential/servpws.c:
--------------------------------------------------------------------------------
 1 | /* Stuff that's specific for the Personal Web Server goes here. 
 2 |  *
 3 |  * This file is copyright 2002 Jim Kent, but license is hereby
 4 |  * granted for all use - public, private or commercial. */
 5 | 
 6 | #include "common.h"
 7 | #include "portable.h"
 8 | #include "portimpl.h"
 9 | #include "obscure.h"
10 | 
11 | 
12 | static char *__trashDir = "..\\trash";
13 | 
14 | static void _makeTempName(struct tempName *tn, char *base, char *suffix)
15 | /* Figure out a temp name, and how CGI and HTML will access it. */
16 | {
17 | long tempIx = incCounterFile("tcounter");
18 | sprintf(tn->forCgi, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix);
19 | sprintf(tn->forHtml, "%s\\%s%ld%s", __trashDir, base, tempIx, suffix);
20 | }
21 | 
22 | static char *_cgiDir()
23 | {
24 | return "../cgi-bin/";
25 | }
26 | 
27 | static char *_trashDir()
28 | {
29 | return __trashDir;
30 | }
31 | 
32 | static double _speed()
33 | {
34 | return 1.25;
35 | }
36 |         
37 | struct webServerSpecific wssMicrosoftPWS =
38 |     {
39 |     "Microsoft-PWS",
40 |     _makeTempName,
41 |     _cgiDir,
42 |     _speed,
43 |     _trashDir,
44 |     };
45 | 


--------------------------------------------------------------------------------
/lisa/regpotential/sig.h:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************
  2 |  * Copyright (C) 2000 Jim Kent.  This source code may be freely used         *
  3 |  * for personal, academic, and non-profit purposes.  Commercial use          *
  4 |  * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) *
  5 |  *****************************************************************************/
  6 | /* Sig.h - signatures that start various binary files. */
  7 | #ifndef SIG_H
  8 | #define SIG_H
  9 | 
 10 | #define aliSig 0xCDAB8245
 11 | /* Binary alignment file. */
 12 | 
 13 | #define alxSig 0xA1B1C1D3
 14 | /* Index into binary alignment file, sorted by start base offset. */
 15 | 
 16 | #define pgoSig 0x690
 17 | /* Index into GDF file, sorted by start base offset. Signature is 32 bit. */
 18 | 
 19 | #define cdoSig 0xCD01
 20 | /* Index into c2g text file, sorted by start base offset. 32 bit signature. */
 21 | 
 22 | #define xaoSig 0xA0B0C0D0
 23 | /* Index into xeno alignment, sorted by start base offset.  32 bit signature. */
 24 | 
 25 | #define glSig 0xF1E2D3C4
 26 | /* Binary gene file, sorted by chromosome and then starting offset. */
 27 | 
 28 | /* IX sig is int ixSig[4] = {0x693F8ED1, 0x7EDA1C32, 0x4BA58983, 0x277CB89C,};
 29 |  * These are made by snofMake, and are indexes sorted by name. */
 30 | 
 31 | /* XI - same as IX but on big-endian (or is it little-endian) archetectures. */
 32 | 
 33 | #define nt4Signature 0x12345678
 34 | /* Signature at the beginning of an nt4 file - 2 bit a nucleotide binary file. */
 35 | 
 36 | #define lm2Signature 0x12131416
 37 | /* Signature at the beginning of a lm2 file - a 2nd order markov model for nucleotides. */
 38 | 
 39 | #define oocSig 0x584155f2
 40 | /* Signature of file that contains over-represented oligomers for patSpace
 41 |  * algorithm. */
 42 | 
 43 | #define oocSigSwapped 0xf2554158
 44 | /* Signature of file that contains over-represented oligomers for patSpace
 45 |  * algorithm. */
 46 | 
 47 | #define fofSig 0x13410da8
 48 | /* Signature into fof type index file (that can index multiple external files). */
 49 | 
 50 | #define nibSig 0x6BE93D3A
 51 | /* Signature into nib file (4 bits per nucleotide DNA file) */
 52 | 
 53 | #define qacSig 0x32b67998
 54 | /* Signature of qac file (compressed quality file) */
 55 | 
 56 | #define caqSig 0x9879b632
 57 | /* Signature of byte-swapped qac file. */
 58 | 
 59 | #define twoBitSig 0x1A412743
 60 | /* Signature into 2bit file (2 bits per nucleotide DNA file) plus
 61 |  * information on N and masked bases. */
 62 | 
 63 | #define twoBitSwapSig 0x4327411A
 64 | /* Signature of byte-swapped two-bit file. */
 65 | 
 66 | #define chromGraphSig 0x4528421C
 67 | /* Signature of chromGraph binary data file */
 68 | 
 69 | #define chromGraphSwapSig 0x1C422845
 70 | /* Signature of byte-swapped chromGraph binary data file */
 71 | 
 72 | #define genomeRangeTreeSig     0xf7fb8104
 73 | /* Signature of genomeRangeTree binary data file */
 74 | 
 75 | #define genomeRangeTreeSwapSig 0x0481fbf7
 76 | /* Signature of genomeRangeTree binary data file */
 77 | 
 78 | #define bptSig 0x78CA8C91
 79 | /* Signature of generic b+ tree index file. */
 80 | 
 81 | #define bptSwapped 0x918CCA78
 82 | /* Signature of generic b+ tree index file. */
 83 | 
 84 | #define cirTreeSig 0x2468ACE0
 85 | /* Signature of a chromosome id r-tree index file. */
 86 | 
 87 | #define crTreeSig 0x2369ADE1
 88 | /* Signature of a chromosome r-tree index file. */
 89 | 
 90 | #define bigWigSig 0x888FFC26
 91 | /* Signature for a big wig file. */
 92 | 
 93 | #define bigBedSig 0x8789F2EB
 94 | /* Signature for a big bed file. */
 95 | 
 96 | #define udcBitmapSig 0x4187E2F6
 97 | /* Signature for a url data cache bitmap file. */
 98 | 
 99 | #endif /* SIG_H */
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/lisa/regpotential/sqlNum.h:
--------------------------------------------------------------------------------
 1 | /* sqlNum.h - routines to convert from ascii to
 2 |  * unsigned/integer a bit more quickly than atoi. 
 3 |  * Called sqlNum because it was first developed for use with
 4 |  * SQL databases, which tend to require a lot of conversion from
 5 |  * string to binary representation of numbers. In particular the
 6 |  * code generator AutoSQL puts in lots of calls to these routines
 7 |  * into it's parsers.  Other parser in the source tree have come
 8 |  * to use these too though since they are fast and have good error
 9 |  * checking.
10 |  *
11 |  * This file is copyright 2002 Jim Kent, but license is hereby
12 |  * granted for all use - public, private or commercial. */
13 | 
14 | #ifndef SQLNUM_H
15 | #define SQLNUM_H
16 | 
17 | /* get off_t */
18 | #include <sys/types.h>
19 | 
20 | unsigned sqlUnsigned(char *s);
21 | /* Convert series of digits to unsigned integer about
22 |  * twice as fast as atoi (by not having to skip white 
23 |  * space or stop except at the null byte.) */
24 | 
25 | unsigned sqlUnsignedInList(char **pS);
26 | /* Convert series of digits to unsigned integer about
27 |  * twice as fast as atoi (by not having to skip white 
28 |  * space or stop except at the null byte.) 
29 |  * All of string is number. Number may be delimited by a comma. 
30 |  * Returns the position of the delimiter or the terminating 0. */
31 | 
32 | unsigned long sqlUnsignedLong(char *s);
33 | /* Convert series of digits to unsigned long about
34 |  * twice as fast as atol (by not having to skip white 
35 |  * space or stop except at the null byte.) */
36 | 
37 | unsigned long sqlUnsignedLongInList(char **pS);
38 | /* Convert series of digits to unsigned long about
39 |  * twice as fast as atol (by not having to skip white 
40 |  * space or stop except at the null byte.) 
41 |  * All of string is number. Number may be delimited by a comma. 
42 |  * Returns the position of the delimiter or the terminating 0. */
43 | 
44 | int sqlSigned(char *s);
45 | /* Convert string to signed integer.  Unlike atol assumes 
46 |  * all of string is number. */
47 | 
48 | int sqlSignedInList(char **pS);
49 | /* Convert string to signed integer.  Unlike atol assumes 
50 |  * all of string is number. Number may be delimited by a comma. 
51 |  * Returns the position of the delimiter or the terminating 0. */
52 | 
53 | long long sqlLongLong(char *s);
54 | /* Convert string to a long long.  Unlike atol assumes all of string is
55 |  * number. */
56 | 
57 | long long sqlLongLongInList(char **pS);
58 | /* Convert string to a long long.  Unlike atol, assumes 
59 |  * all of string is number. Number may be delimited by a comma. 
60 |  * Returns the position of the delimiter or the terminating 0. */
61 | 
62 | float sqlFloat(char *s);
63 | /* Convert string to a float.  Assumes all of string is number
64 |  * and aborts on an error. */
65 | 
66 | float sqlFloatInList(char **pS);
67 | /* Convert string to a float.  Assumes all of string is number
68 |  * and aborts on an error. 
69 |  * Number may be delimited by a comma. 
70 |  * Returns the position of the delimiter or the terminating 0. */
71 | 
72 | double sqlDouble(char *s);
73 | /* Convert string to a double.  Assumes all of string is number
74 |  * and aborts on an error. */
75 | 
76 | double sqlDoubleInList(char **pS);
77 | /* Convert string to a double.  Assumes all of string is number
78 |  * and aborts on an error.
79 |  * Number may be delimited by a comma.
80 |  * Returns the position of the delimiter or the terminating 0. */
81 | 
82 | #endif /* SQLNUM_H */
83 |  
84 | 


--------------------------------------------------------------------------------
/lisa/regpotential/tokenizer.h:
--------------------------------------------------------------------------------
 1 | /* tokenizer - A tokenizer structure that will chop up file into
 2 |  * tokens.  It is aware of quoted strings and otherwise tends to return
 3 |  * white-space or punctuated-separated words, with punctuation in
 4 |  * a separate token.  This is used by autoSql. */
 5 | 
 6 | #ifndef TOKENIZER_H
 7 | #define TOKENIZER_H
 8 | 
 9 | struct tokenizer
10 | /* This handles reading in tokens. */
11 |     {
12 |     bool reuse;	         /* True if want to reuse this token. */
13 |     bool eof;            /* True at end of file. */
14 |     int leadingSpaces;	 /* Number of leading spaces before token. */
15 |     struct lineFile *lf; /* Underlying file. */
16 |     char *curLine;       /* Current line of text. */
17 |     char *linePt;        /* Start position within current line. */
18 |     char *string;        /* String value of token */
19 |     int sSize;           /* Size of string. */
20 |     int sAlloc;          /* Allocated string size. */
21 |       /* Some variables set after tokenizerNew to control details of
22 |        * parsing. */
23 |     bool leaveQuotes;	 /* Leave quotes in string. */
24 |     bool uncommentC;	 /* Take out C (and C++) style comments. */
25 |     bool uncommentShell; /* Take out # style comments. */
26 |     };
27 | 
28 | struct tokenizer *tokenizerNew(char *fileName);
29 | /* Return a new tokenizer. */
30 | 
31 | struct tokenizer *tokenizerOnLineFile(struct lineFile *lf);
32 | /* Create a new tokenizer on open lineFile. */
33 | 
34 | void tokenizerFree(struct tokenizer **pTkz);
35 | /* Tear down a tokenizer. */
36 | 
37 | void tokenizerReuse(struct tokenizer *tkz);
38 | /* Reuse token. */
39 | 
40 | int tokenizerLineCount(struct tokenizer *tkz);
41 | /* Return line of current token. */
42 | 
43 | char *tokenizerFileName(struct tokenizer *tkz);
44 | /* Return name of file. */
45 | 
46 | char *tokenizerNext(struct tokenizer *tkz);
47 | /* Return token's next string (also available as tkz->string) or
48 |  * NULL at EOF. This string will be overwritten with the next call
49 |  * to tokenizerNext, so cloneString if you need to save it. */
50 | 
51 | void tokenizerErrAbort(struct tokenizer *tkz, char *format, ...);
52 | /* Print error message followed by file and line number and
53 |  * abort. */
54 | 
55 | void tokenizerNotEnd(struct tokenizer *tkz);
56 | /* Squawk if at end. */
57 | 
58 | char *tokenizerMustHaveNext(struct tokenizer *tkz);
59 | /* Get next token, which must be there. */
60 | 
61 | void tokenizerMustMatch(struct tokenizer *tkz, char *string);
62 | /* Require next token to match string.  Return next token
63 |  * if it does, otherwise abort. */
64 | 
65 | #endif /* TOKENIZER_H */
66 | 
67 | 


--------------------------------------------------------------------------------
/lisa/regpotential/vGfx.c:
--------------------------------------------------------------------------------
 1 | /* vGfx - interface to polymorphic graphic object
 2 |  * that currently can either be a memory buffer or
 3 |  * a postScript file. */
 4 | 
 5 | /* Copyright (C) 2011 The Regents of the University of California 
 6 |  * See README in this or parent directory for licensing information. */
 7 | 
 8 | #include "common.h"
 9 | #include "vGfx.h"
10 | 
11 | 
12 | 
13 | /* Most of the implementation of this is in macros in vGfx.h. */
14 | 
15 | void vgClose(struct vGfx **pVg)
16 | /* Close down virtual graphics object, and finish writing it to file. */
17 | {
18 | struct vGfx *vg = *pVg;
19 | if (vg != NULL)
20 |     {
21 |     vg->close(&vg->data);
22 |     freez(pVg);
23 |     }
24 | }
25 | 
26 | struct vGfx *vgHalfInit(int width, int height)
27 | /* Close down virtual graphics object, and finish writing it to file. */
28 | {
29 | struct vGfx *vg;
30 | AllocVar(vg);
31 | vg->width = width;
32 | vg->height = height;
33 | return vg;
34 | }
35 | 
36 | int vgFindRgb(struct vGfx *vg, struct rgbColor *rgb)
37 | /* Find color index corresponding to rgb color. */
38 | {
39 | return vgFindColorIx(vg, rgb->r, rgb->g, rgb->b);
40 | }
41 | 
42 | Color vgContrastingColor(struct vGfx *vg, int backgroundIx)
43 | /* Return black or white whichever would be more visible over
44 |  * background. */
45 | {
46 | struct rgbColor c = vgColorIxToRgb(vg, backgroundIx);
47 | int val = (int)c.r + c.g + c.g + c.b;
48 | if (val > 512)
49 |     return MG_BLACK;
50 | else
51 |     return MG_WHITE;
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/lisa/regpotential/vGfxPrivate.h:
--------------------------------------------------------------------------------
 1 | /* vGfx private - stuff that the implementers of 
 2 |  * a vGfx need to know about, but not the clients. */
 3 | 
 4 | /* Copyright (C) 2010 The Regents of the University of California 
 5 |  * See README in this or parent directory for licensing information. */
 6 | 
 7 | 
 8 | struct vGfx *vgHalfInit(int width, int height);
 9 | /* Return a partially initialized vGfx structure. 
10 |  * Generally not called by clients.*/
11 | 
12 | void vgMgMethods(struct vGfx *vg);
13 | /* Fill in virtual graphics methods for memory based drawing. */
14 | 
15 | /* A bunch of things to make the type-casting easier.
16 |  * This is a price you pay for object oriented
17 |  * polymorphism in C... */
18 | 
19 | typedef void (*vg_close)(void **pV);
20 | typedef void (*vg_dot)(void *v, int x, int y, int colorIx);
21 | typedef int (*vg_getDot)(void *v, int x, int y);
22 | typedef void (*vg_box)(void *v, int x, int y, 
23 | 	int width, int height, int colorIx);
24 | typedef void (*vg_line)(void *v, 
25 | 	int x1, int y1, int x2, int y2, int colorIx);
26 | typedef void (*vg_text)(void *v, int x, int y, int colorIx, void *font,
27 | 	char *text);
28 | typedef void (*vg_textRight)(void *v, int x, int y, int width, int height,
29 | 	int colorIx, void *font, char *text);
30 | typedef void (*vg_textCentered)(void *v, int x, int y, int width, int height,
31 | 	int colorIx, void *font, char *text);
32 | typedef int (*vg_findColorIx)(void *v, int r, int g, int b);
33 | typedef struct rgbColor (*vg_colorIxToRgb)(void *v, int colorIx);
34 | typedef void (*vg_setClip)(void *v, int x, int y, int width, int height);
35 | typedef void (*vg_setWriteMode)(void *v, unsigned int writeMode);
36 | typedef void (*vg_unclip)(void *v);
37 | typedef void (*vg_verticalSmear)(void *v,
38 | 	    int xOff, int yOff, int width, int height, 
39 | 	    Color *dots, boolean zeroClear);
40 | typedef void (*vg_fillUnder)(void *v, int x1, int y1, 
41 | 	int x2, int y2, int bottom, Color color);
42 | typedef void (*vg_drawPoly)(void *v, struct gfxPoly *poly, Color color, boolean filled);
43 | typedef void (*vg_setHint)(void *v, char *hint, char *value);
44 | typedef char * (*vg_getHint)(void *v, char *hint);
45 | typedef int (*vg_getFontPixelHeight)(void *v, void *font);
46 | typedef int (*vg_getFontStringWidth)(void *v, void *font, char *string);
47 | 
48 | 


--------------------------------------------------------------------------------
/lisa/regpotential/verbose.c:
--------------------------------------------------------------------------------
  1 | /* verbose.c - write out status messages according to the
  2 |  * current verbosity level.  These messages go to stderr. */
  3 | 
  4 | /* Copyright (C) 2011 The Regents of the University of California 
  5 |  * See README in this or parent directory for licensing information. */
  6 | 
  7 | #include "common.h"
  8 | #include "portable.h"
  9 | #include "verbose.h"
 10 | 
 11 | 
 12 | static int logVerbosity = 1;	/* The level of log verbosity.  0 is silent. */
 13 | static FILE *logFile;	/* File to log to. */
 14 | 
 15 | static boolean checkedDotsEnabled = FALSE;  /* have we check for dot output
 16 |                                              * being enabled? */
 17 | static boolean dotsEnabled = FALSE;         /* is dot output enabled? */
 18 | 
 19 | void verboseVa(int verbosity, char *format, va_list args)
 20 | /* Log with at given verbosity vprintf formatted args. */
 21 | {
 22 | if (verbosity <= logVerbosity)
 23 |     {
 24 |     if (logFile == NULL)
 25 |         logFile = stderr;
 26 |     vfprintf(logFile, format, args);
 27 |     fflush(logFile);
 28 |     }
 29 | }
 30 | 
 31 | void verbose(int verbosity, char *format, ...)
 32 | /* Write printf formatted message to log (which by
 33 |  * default is stderr) if global verbose variable
 34 |  * is set to verbosity or higher. */
 35 | {
 36 | va_list args;
 37 | va_start(args, format);
 38 | verboseVa(verbosity, format, args);
 39 | va_end(args);
 40 | }
 41 | 
 42 | static long lastTime = -1;  // previous call time.
 43 | 
 44 | void verboseTimeInit(void)
 45 | /* Initialize or reinitialize the previous time for use by verboseTime. */
 46 | {
 47 | lastTime = clock1000();
 48 | }
 49 | 
 50 | void verboseTime(int verbosity, char *label, ...)
 51 | /* Print label and how long it's been since last call.  Start time can be
 52 |  * initialized with verboseTimeInit, otherwise the elapsed time will be
 53 |  * zero. */
 54 | {
 55 | assert(label != NULL);  // original version allowed this, but breaks some GCCs
 56 | if (lastTime < 0)
 57 |     verboseTimeInit();
 58 | long time = clock1000();
 59 | va_list args;
 60 | va_start(args, label);
 61 | verboseVa(verbosity, label, args);
 62 | verbose(verbosity, ": %ld millis\n", time - lastTime);
 63 | lastTime = time;
 64 | va_end(args);
 65 | }
 66 | 
 67 | 
 68 | boolean verboseDotsEnabled()
 69 | /* check if outputting of happy dots are enabled.  They will be enabled if the
 70 |  * verbosity is > 0, stderr is a tty and we don't appear to be running an
 71 |  * emacs shell. */
 72 | {
 73 | if (!checkedDotsEnabled)
 74 |     {
 75 |     if (logFile == NULL)
 76 |         logFile = stderr;
 77 |     dotsEnabled = (logVerbosity > 0) && isatty(fileno(logFile));
 78 |     if (dotsEnabled)
 79 |         {
 80 |         /* check for an possible emacs shell */
 81 |         char *emacs = getenv("emacs");
 82 |         char *term = getenv("TERM");
 83 |         if ((emacs != NULL) && (emacs[0] == 't'))
 84 |             dotsEnabled = FALSE;
 85 |         else if ((term != NULL) && sameString(term, "dumb"))
 86 |             dotsEnabled = FALSE;
 87 |         }
 88 |     checkedDotsEnabled = TRUE;
 89 |     }
 90 | return dotsEnabled;
 91 | }
 92 | 
 93 | void verboseDot()
 94 | /* Write I'm alive dot (at verbosity level 1) */
 95 | {
 96 | if (verboseDotsEnabled())
 97 |     verbose(1, ".");
 98 | }
 99 | 
100 | void verboseSetLevel(int verbosity)
101 | /* Set verbosity level in log.  0 for no logging,
102 |  * higher number for increasing verbosity. */
103 | {
104 | logVerbosity = verbosity;
105 | checkedDotsEnabled = FALSE; /* force rechecking of dots enabled */
106 | }
107 | 
108 | int verboseLevel(void)
109 | /* Get verbosity level. */
110 | {
111 | return logVerbosity;
112 | }
113 | 
114 | void verboseSetLogFile(char *name)
115 | /* Set logFile for verbose messages overrides stderr. */
116 | {
117 | if (sameString(name, "stdout"))
118 |     logFile = stdout;
119 | else if (sameString(name, "stderr"))
120 |     logFile = stderr;
121 | else
122 |     logFile = mustOpen(name, "w");
123 | }
124 | 
125 | FILE *verboseLogFile()
126 | /* Get the verbose log file. */
127 | {
128 | if (logFile == NULL)
129 |     logFile = stderr;
130 | return logFile;
131 | }
132 | 


--------------------------------------------------------------------------------
/lisa/regpotential/verbose.h:
--------------------------------------------------------------------------------
 1 | /* verbose.h - write out status messages according to the
 2 |  * current verbosity level.  These messages go to stderr. */
 3 | 
 4 | #ifndef VERBOSE_H
 5 | #define VERBOSE_H
 6 | 
 7 | void verbose(int verbosity, char *format, ...)
 8 | /* Write printf formatted message to log (which by
 9 |  * default is stderr) if global verbose variable
10 |  * is set to verbosity or higher. */
11 | #if defined(__GNUC__)
12 | __attribute__((format(printf, 2, 3)))
13 | #endif
14 |     ;
15 | 
16 | void verboseVa(int verbosity, char *format, va_list args);
17 | /* Log with at given verbosity vprintf formatted args. */
18 | 
19 | void verboseTimeInit(void);
20 | /* Initialize or reinitialize the previous time for use by verboseTime. */
21 | 
22 | void verboseTime(int verbosity, char *label, ...)
23 | /* Print label and how long it's been since last call.  Start time can be
24 |  * initialized with verboseTimeInit, otherwise the elapsed time will be
25 |  * zero. */
26 | #if defined(__GNUC__)
27 | __attribute__((format(printf, 2, 3)))
28 | #endif
29 |     ;
30 | 
31 | void verboseDot();
32 | /* Write I'm alive dot (at verbosity level 1) */
33 | 
34 | boolean verboseDotsEnabled();
35 | /* check if outputting of happy dots are enabled.  They will be enabled if the
36 |  * verbosity is > 0, stderr is a tty and we don't appear to be running an
37 |  * emacs shell. */
38 | 
39 | int verboseLevel(void);
40 | /* Get verbosity level. */
41 | 
42 | void verboseSetLevel(int verbosity);
43 | /* Set verbosity level in log.  0 for no logging,
44 |  * higher number for increasing verbosity. */
45 | 
46 | void verboseSetLogFile(char *name);
47 | /* Set logFile for verbose messages overrides stderr. */
48 | 
49 | FILE *verboseLogFile();
50 | /* Get the verbose log file. */
51 | 
52 | #endif /* VERBOSE_H */
53 | 
54 | 


--------------------------------------------------------------------------------
/lisa/regpotential/zlibFace.c:
--------------------------------------------------------------------------------
 1 | /* Wrappers around zlib to make interfacing to it a bit easier. */
 2 | 
 3 | /* Copyright (C) 2009 The Regents of the University of California 
 4 |  * See README in this or parent directory for licensing information. */
 5 | 
 6 | #include "common.h"
 7 | #include <zlib.h>
 8 | 
 9 | static char *zlibErrorMessage(int err)
10 | /* Convert error code to errorMessage */
11 | {
12 | switch (err)
13 |     {
14 |     case Z_STREAM_END:
15 |         return "zlib stream end";
16 |     case Z_NEED_DICT:
17 |         return "zlib need dictionary";
18 |     case Z_ERRNO:
19 |         return "zlib errno";
20 |     case Z_STREAM_ERROR:
21 |         return "zlib data error";
22 |     case Z_DATA_ERROR:
23 |         return "zlib data error";
24 |     case Z_MEM_ERROR:
25 |         return "zlib mem error";
26 |     case Z_BUF_ERROR:
27 |         return "zlib buf error";
28 |     case Z_VERSION_ERROR:
29 |         return "zlib version error";
30 |     case Z_OK:
31 |         return NULL;
32 |     default:
33 | 	{
34 | 	static char msg[128];
35 | 	safef(msg, sizeof(msg), "zlib error code %d", err);
36 |         return msg;
37 | 	}
38 |     }
39 | }
40 | 
41 | size_t zCompress(
42 | 	void *uncompressed, 	/* Start of area to compress. */
43 | 	size_t uncompressedSize,  /* Size of area to compress. */
44 | 	void *compBuf,       /* Where to put compressed bits */
45 | 	size_t compBufSize) /* Size of compressed bits - calculate using zCompBufSize */
46 | /* Compress data from memory to memory.  Returns size after compression. */
47 | {
48 | uLongf compSize = compBufSize;
49 | int err = compress((Bytef*)compBuf, &compSize, (Bytef*)uncompressed, (uLong)uncompressedSize);
50 | if (err != 0)
51 |     errAbort("Couldn't zCompress %lld bytes: %s", 
52 |     	(long long)uncompressedSize, zlibErrorMessage(err));
53 | return compSize;
54 | }
55 | 
56 | size_t zCompBufSize(size_t uncompressedSize)
57 | /* Return size of buffer needed to compress something of given size uncompressed. */
58 | {
59 | return 1.001*uncompressedSize + 13;
60 | }
61 | 
62 | size_t zUncompress(
63 |         void *compressed,	/* Compressed area */
64 | 	size_t compressedSize,	/* Size after compression */
65 | 	void *uncompBuf,	/* Where to put uncompressed bits */
66 | 	size_t uncompBufSize)	/* Max size of uncompressed bits. */
67 | /* Uncompress data from memory to memory.  Returns size after decompression. */
68 | {
69 | uLongf uncSize = uncompBufSize;
70 | int err = uncompress(uncompBuf,  &uncSize, compressed, compressedSize);
71 | if (err != 0)
72 |     errAbort("Couldn't zUncompress %lld bytes: %s", 
73 |     	(long long)compressedSize, zlibErrorMessage(err));
74 | return uncSize;
75 | }
76 | 
77 | void zSelfTest(int count)
78 | /* Run an internal diagnostic. */
79 | {
80 | bits32 testData[count];
81 | int uncSize = count*sizeof(bits32);
82 | int i;
83 | for (i=0; i<count; ++i)
84 |     testData[i] = i;
85 | int compBufSize = zCompBufSize(uncSize);
86 | char compBuf[compBufSize];
87 | int compSize = zCompress(testData, uncSize, compBuf, compBufSize);
88 | char uncBuf[uncSize];
89 | zUncompress(compBuf, compSize, uncBuf, uncSize);
90 | if (memcmp(uncBuf, testData, uncSize) != 0)
91 |     errAbort("zSelfTest %d failed", count);
92 | else
93 |     verbose(2, "zSelfTest %d passed, compression ratio %3.1f\n", count, (double)compSize/uncSize);
94 | }
95 | 


--------------------------------------------------------------------------------
/lisa/regpotential/zlibFace.h:
--------------------------------------------------------------------------------
 1 | /* Wrappers around zlib to make interfacing to it a bit easier. */
 2 | 
 3 | #ifndef ZLIBFACE_H
 4 | #define ZLIBFACE_H
 5 | 
 6 | size_t zCompress(
 7 | 	void *uncompressed, 	/* Start of area to compress. */
 8 | 	size_t uncompressedSize,  /* Size of area to compress. */
 9 | 	void *compBuf,       /* Where to put compressed bits */
10 | 	size_t compBufSize); /* Size of compressed bits - calculate using zCompBufSize */
11 | /* Compress data from memory to memory.  Returns size after compression. */
12 | 
13 | size_t zCompBufSize(size_t uncompressedSize);
14 | /* Return size of buffer needed to compress something of given size uncompressed. */
15 | 
16 | size_t zUncompress(
17 |         void *compressed,	/* Compressed area */
18 | 	size_t compressedSize,	/* Size after compression */
19 | 	void *uncompBuf,	/* Where to put uncompressed bits */
20 | 	size_t uncompBufSize);	/* Max size of uncompressed bits. */
21 | /* Uncompress data from memory to memory.  Returns size after decompression. */
22 | 
23 | void zSelfTest(int count);
24 | /* Run an internal diagnostic. */
25 | 
26 | #endif /* ZLIBFACE_H */
27 | 


--------------------------------------------------------------------------------
/lisa/rules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa/rules/__init__.py


--------------------------------------------------------------------------------
/lisa/rules/background_selection.rule:
--------------------------------------------------------------------------------
 1 | rule lisa_select_background:
 2 |     input:
 3 |         #expand("{{sample}}.{epigenome}.lisa_predicted_rp.csv", epigenome=config["epigenome"])
 4 |         "{sample}"
 5 |     output:
 6 |         "{sample}.background_gene.3000",
 7 |         #"{sample}.background_gene.300",
 8 |         "{sample}.background_gene.1000",
 9 |         "{sample}.foreground_gene"
10 |         # "{sample}.fore_gene.rank.csv",
11 |         # "{sample}.back_gene.rank.csv"
12 |     message: "lisa selecting background genes..."
13 |     benchmark:
14 |         "{sample}.background_gene.benchmark.txt"
15 |     log: "{sample}.log"
16 |     params: species=config["species"], prefix="{sample}",
17 |             cov=config["covariates"],
18 |             epigenomes=str(config["epigenome"]),
19 |             random=config['random'],
20 |             user_background=config['background'],
21 |             stat_background_num=config['stat_background_num']
22 |     shell:
23 |         "lisa_premodel_background_selection --species {params.species} --epigenomes '{params.epigenomes}' --gene_set {input} --prefix={params.prefix} --random={params.random} --background={params.user_background} --stat_step_background_number={params.stat_background_num} 2>&1>>{log}"
24 | 


--------------------------------------------------------------------------------
/lisa/rules/baseline.rule:
--------------------------------------------------------------------------------
 1 | rule lisa_baseline_motif_99:
 2 |     input:
 3 |         background = "{sample}.background_gene.1000",
 4 |         foreground="{sample}.foreground_gene"
 5 |     output:
 6 |         #../PhaseQ_Figures/AR_motif99_baseline.csv
 7 |         "{sample}_motif99_baseline.csv"
 8 |     message: "lisa baseline for motif hit number"
 9 |     benchmark:
10 |         "{sample}.motif99.baseline.benchmark.txt"
11 |     log: "{sample}.log"
12 |     params: species=config["species"], prefix="{sample}",
13 |             cov=config['covariates']
14 |     shell:
15 |         "lisa_baseline run --species {params.species} --prefix {params.prefix} --background {input.background} --foreground {input.foreground} --dtype motif99 2>&1>>{log}"
16 | 
17 | rule lisa_baseline_chipseqpeak:
18 |     input:
19 |         background = "{sample}.background_gene.1000",
20 |         foreground="{sample}.foreground_gene"
21 |     output:
22 |         "{sample}_chipseq_baseline.csv"
23 |     message: "lisa baseline for chip-seq peak number"
24 |     benchmark:
25 |         "{sample}.chipseq.baseline.benchmark.txt"
26 |     log: "{sample}.log"
27 |     params: species=config["species"], prefix="{sample}",
28 |             cov=config['covariates']
29 |     shell:
30 |         "lisa_baseline run --species {params.species} --prefix {params.prefix} --background {input.background} --foreground {input.foreground} --dtype chipseq 2>&1>>{log}"
31 | 


--------------------------------------------------------------------------------
/lisa/rules/combine_chipseq.rule:
--------------------------------------------------------------------------------
 1 | """ https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html Input Functions and unpack
 2 | """
 3 | import os
 4 | 
 5 | def get_combine_command(label):
 6 |     return "lisa_combine_ranks -prefix {wildcards.sample}_%s {input}" % label
 7 | 
 8 | def get_inputs(method):
 9 |     if method == 'beta':
10 |         #return "{sample}.lisa_direct.csv"
11 |         return "{sample}.3000.lisa_direct.csv"
12 |     elif method == 'knockout':
13 |         return expand("{{sample}}.{epigenome}.chipseq.p_value.csv", epigenome=config['epigenome'])
14 |     elif method == 'all':
15 |         return "{sample}.3000.lisa_direct.csv", expand("{{sample}}.{epigenome}.chipseq.p_value.csv", epigenome=config['epigenome'])
16 | 
17 | rule lisa_combine_chipseq_ranks:
18 |     input:
19 |         get_inputs(config['method'])
20 |     output:
21 |         "{sample}_chipseq_cauchy_combine_dedup.csv",
22 |         "{sample}_chipseq_fisher_combine_dedup.csv"
23 |     message: "combine p values of TF from ChIP-seq"
24 |     shell:
25 |         get_combine_command('chipseq')
26 | 


--------------------------------------------------------------------------------
/lisa/rules/combine_motif.rule:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def get_combine_command(label):
 4 |     return "lisa_combine_ranks -prefix {wildcards.sample}_%s {input}" % label
 5 | 
 6 | def get_inputs(method):
 7 |     if method == 'knockout' or method == 'all':
 8 |         return expand("{{sample}}.{epigenome}.motif99.p_value.csv", epigenome=config['epigenome'])
 9 | 
10 | rule lisa_combine_motif_ranks:
11 |     input:
12 |         get_inputs(config['method'])
13 |     output:
14 |         "{sample}_motif_cauchy_combine_dedup.csv",
15 |         "{sample}_motif_fisher_combine_dedup.csv"
16 |     message: "combine p values of TF from motif"
17 |     shell:
18 |         get_combine_command('motif')
19 | 


--------------------------------------------------------------------------------
/lisa/rules/entropy.rule:
--------------------------------------------------------------------------------
 1 | def get_entropy_command(cov, dtype, new_h5_count):
 2 |     frame="lisa_rank_tfs entropy --species {params.species}  --epigenome {wildcards.epigenome} --coefficient {input.coef} --background {input.background} --foreground {input.foreground} --prefix {params.prefix} --dtype %s " % dtype
 3 |     if cov:
 4 |         frame+=" --covariates=True "
 5 |     else:
 6 |         frame+=" --covariates=False "
 7 | 
 8 |     if new_h5_count != None:
 9 |         frame+=" --new_h5 %s " % new_h5_count
10 |     if new_h5_count == None:
11 |         frame+=" --new_h5=None "
12 |     return frame
13 | 
14 | 
15 | rule lisa_entropy_motif_99:
16 |     input:
17 |         coef="{sample}.{epigenome}.coefs.csv",
18 |         background = "{sample}.background_gene.300",
19 |         foreground="{sample}.foreground_gene"
20 |     output:
21 |         "{sample}.{epigenome}.motif99.entropy_rank.csv"
22 |     message: "lisa cluster motif KL divergene ..."
23 |     benchmark:
24 |         "{sample}.{epigenome}.motif99.entropy.benchmark.txt"
25 |     log: "{sample}.log"
26 |     params: species=config["species"], prefix="{sample}"
27 |     shell:
28 |         get_entropy_command(config['covariates'], 'motif99', config['new_count_h5'])
29 | 
30 | rule lisa_entropy_chipseqpeak:
31 |     input:
32 |         coef="{sample}.{epigenome}.coefs.csv",
33 |         background = "{sample}.background_gene.300",
34 |         foreground="{sample}.foreground_gene"
35 |     output:
36 |         "{sample}.{epigenome}.chipseq.entropy_rank.csv"
37 |     message: "lisa cluster chip-seq KL divergene ..."
38 |     benchmark:
39 |         "{sample}.{epigenome}.chipseq.entropy.benchmark.txt"
40 |     log: "{sample}.log"
41 |     params: species=config["species"], prefix="{sample}",
42 |     shell:
43 |         get_entropy_command(config['covariates'], 'chipseq', config['new_count_h5'])
44 | 


--------------------------------------------------------------------------------
/lisa/rules/fastq.rule:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | rule lisa_bwa_aln:
 4 |     input: "%s/{sample}" % (config['input_root'])
 5 |     output:
 6 |         '%s/%s/{sample}.sai' % (config['prefix'], config['epigenome'])
 7 |     message: "lisa mapping fastq file"
 8 |     params: species=config["species"],
 9 |             epigenome=config['epigenome'],
10 |             prefix=config['prefix'],
11 |             index=config['index']
12 |     threads: 4
13 |     shell:
14 |         "bwa aln -q 5 -l 32 -k 2 -t {threads} {params.index} {input} > {output}"
15 | 
16 | rule lisa_get_bam:
17 |     input:
18 |         sai='%s/%s/{sample}.sai' % (config['prefix'], config['epigenome']),
19 |         fastq='%s/{sample}'%(config['input_root'])
20 |     output:
21 |         bam='%s/%s/{sample}.bam' % (config['prefix'], config['epigenome']),
22 |         bam_u='%s/%s/{sample}_se.bam' % (config['prefix'], config['epigenome'])
23 |     message: "lisa output BAM file"
24 |     params: species=config["species"],
25 |             epigenome=config['epigenome'],
26 |             prefix=config['prefix'],
27 |             index=config['index'],
28 |             output_uprefix='%s/%s/{sample}_se' % (config['prefix'], config['epigenome']),
29 |             output_prefix='%s/%s/{sample}' % (config['prefix'], config['epigenome'])
30 |     shell:
31 |         """
32 |         bwa samse {params.index} {input.sai} {input.fastq} > {input.sai}.sam
33 |         samtools view -q 1 -Sb {input.sai}.sam > {input.sai}.bam
34 |         samtools sort -m 5000000000 {input.sai}.bam {params.output_prefix}
35 |         samtools rmdup -s {output.bam} {output.bam_u}
36 |         samtools index {output.bam_u}
37 |         rm {input.sai}.sam
38 |         """
39 | 
40 | rule lisa_get_bigwig:
41 |     input:
42 |         '%s/%s/{sample}_se.bam' % (config['prefix'], config['epigenome'])
43 |     output:
44 |         '%s/%s/{sample}.bigwig' % (config['prefix'], config['epigenome'])
45 |     threads: 4
46 |     shell:
47 |         "bamCoverage -b {input} -p {threads} -e 146 --binSize 8 --scaleFactor 1 --normalizeUsingRPKM -o {output}"
48 | 


--------------------------------------------------------------------------------
/lisa/rules/hdf5.rule:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | rule lisa_reg_potential:
 4 |     input: "{sample}"
 5 |     output:
 6 |         '{sample}.%s.%s.reg_potential.h5' % (config['prefix'], config['epigenome'])
 7 |     message: "lisa compute regulatory potential from bigwig"
 8 |     benchmark:
 9 |         "{sample}.benchmark.txt"
10 |     log: "{sample}.log"
11 |     params: species=config["species"],
12 |             epigenome=config['epigenome'],
13 |             prefix=config['prefix']
14 |     shell: "lisa_bw2hdf get_regpotential_hdf --species {params.species} --epigenome {params.epigenome} --prefix {params.prefix} {input} 2>&1>>{log}"
15 | 
16 | rule lisa_read_count:
17 |     input: "{sample}"
18 |     output:
19 |         '{sample}.%s.%s.1kb_read_count.h5' % (config['prefix'], config['epigenome'])
20 |     message: "lisa compute read count from bigwig"
21 |     log: "{sample}.log"
22 |     params: species=config["species"],
23 |             epigenome=config['epigenome'],
24 |             prefix=config['prefix']
25 |     shell: "lisa_bw2hdf get_readcount_hdf --species {params.species} --epigenome {params.epigenome} --prefix {params.prefix} {input} 2>&1>>{log}"
26 | 
27 | rule lisa_merge_reg_potential:
28 |     input:
29 |         expand('{sample}.%s.%s.reg_potential.h5' % (config['prefix'], config['epigenome']),
30 |                sample=config['bigwigs'])
31 |     output:
32 |         '%s.%s.reg.h5' % (config['prefix'], config['epigenome'])
33 |     message: "lisa compute reg potential from bigwig"
34 |     params: species=config["species"],
35 |             epigenome=config['epigenome'],
36 |             prefix=config['prefix']
37 |     shell:
38 |         "lisa_bw2hdf merge_reg_potential_hdf --species {params.species} --epigenome {params.epigenome} --prefix {params.prefix} {input}"
39 | 
40 | rule lisa_merge_read_count:
41 |     input:
42 |         expand('{sample}.%s.%s.1kb_read_count.h5' % (config['prefix'], config['epigenome']),
43 |                sample=config['bigwigs'])
44 |     output:
45 |         "%s.%s.readcount.h5" % (config['prefix'], config['epigenome'])
46 |     message: "lisa compute read count from bigwig"
47 |     params: species=config["species"],
48 |             epigenome=config['epigenome'],
49 |             prefix=config['prefix']
50 |     shell:
51 |         "lisa_bw2hdf merge_readcount_hdf --species {params.species} --epigenome {params.epigenome} --prefix {params.prefix} {input}"
52 | 
53 | 


--------------------------------------------------------------------------------
/lisa/rules/knockout.rule:
--------------------------------------------------------------------------------
 1 | def get_knockout_command(cov, dtype, new_h5_rp, new_h5_count):
 2 |     frame="lisa_rank_tfs knockout --species {params.species}  --epigenome {wildcards.epigenome} --coefficient {input.coef} --background {input.background} --foreground {input.foreground} --prefix {params.prefix} --dtype %s " % dtype
 3 |     if cov:
 4 |         frame+=" --covariates=True "
 5 |     else:
 6 |         frame+=" --covariates=False "
 7 | 
 8 |     if new_h5_rp != None:
 9 |         frame+=" --new_h5_rp %s --new_h5_count %s " % (new_h5_rp, new_h5_count)
10 |     if new_h5_rp == None:
11 |         frame+=" --new_h5_rp=None --new_h5_count=None "
12 | 
13 |     frame += " --only_newhdf5=%s" % (str(config['only_newhdf5']))
14 |     frame += ' >> {log} '
15 |     return frame
16 | 
17 | rule lisa_in_silico_knockout_motif_99:
18 |     input:
19 |         coef="{sample}.{epigenome}.coefs.csv",
20 |         #background = "{sample}.background_gene.300",
21 |         background = "{sample}.background_gene.1000",
22 |         foreground="{sample}.foreground_gene"
23 |     output:
24 |         "{sample}.{epigenome}.motif99.csv",
25 |         "{sample}.{epigenome}.motif99.p_value.csv"
26 |     message: "lisa In silico Knockout motif..."
27 |     benchmark:
28 |         "{sample}.{epigenome}.motif99.knockout.benchmark.txt"
29 |     log: "{sample}.{epigenome}.log"
30 |     params: species=config["species"], prefix="{sample}"
31 |     shell:
32 |         get_knockout_command(config['covariates'], 'motif99',
33 |                              config['new_rp_h5'], config['new_count_h5'])
34 | 
35 | rule lisa_in_silico_knockout_tf_chipseqpeak:
36 |     input:
37 |         coef="{sample}.{epigenome}.coefs.csv",
38 |         #background = "{sample}.background_gene.300",
39 |         background = "{sample}.background_gene.1000",
40 |         foreground="{sample}.foreground_gene"
41 |     output:
42 |         "{sample}.{epigenome}.chipseq.csv",
43 |         "{sample}.{epigenome}.chipseq.p_value.csv"
44 |     message: "lisa In silico Knockout chip-seq ..."
45 |     benchmark:
46 |         "{sample}.{epigenome}.chipseq.knockout.benchmark.txt"
47 |     log: "{sample}.{epigenome}.log"
48 |     params: species=config["species"], prefix="{sample}",
49 |     shell:
50 |         get_knockout_command(config['covariates'], 'chipseq',
51 |                              config['new_rp_h5'], config['new_count_h5'])
52 | 


--------------------------------------------------------------------------------
/lisa/rules/lisa_direct.rule:
--------------------------------------------------------------------------------
 1 | rule lisa_direct_beta:
 2 |     input:
 3 |         #background = "{sample}.background_gene.300",
 4 |         background = "{sample}.background_gene.3000",
 5 |         foreground="{sample}.foreground_gene"
 6 |     output:
 7 |         "{sample}.3000.lisa_direct.csv"
 8 |     message: "lisa rank TF from peak directly..."
 9 |     benchmark:
10 |         "{sample}.lisa_direct.benchmark.txt"
11 |     log: "{sample}.log"
12 |     params: species=config["species"], prefix="{sample}.3000"
13 |     shell:
14 |         "lisa_rank_tfs direct --species {params.species} --background {input.background} --foreground {input.foreground} --prefix {params.prefix} 2>&1>>{log}"
15 | 
16 | 
17 | rule lisa_direct_beta2:
18 |     input:
19 |         background = "{sample}.background_gene.1000",
20 |         foreground="{sample}.foreground_gene"
21 |     output:
22 |         "{sample}.1000.lisa_direct.csv"
23 |     message: "lisa rank TF from peak directly..."
24 |     benchmark:
25 |         "{sample}.lisa_direct.benchmark.txt"
26 |     log: "{sample}.log"
27 |     params: species=config["species"], prefix="{sample}.1000"
28 |     shell:
29 |         "lisa_rank_tfs direct --species {params.species} --background {input.background} --foreground {input.foreground} --prefix {params.prefix} 2>&1>>{log}"
30 | 


--------------------------------------------------------------------------------
/lisa/rules/model.rule:
--------------------------------------------------------------------------------
 1 | def get_command(cov, new_h5):
 2 |     frame="lisa_model --sample_number {params.sample} --species {params.species} --epigenome {params.epigenome} --gene_set {input.gene_set} --prefix {params.prefix} --foreground {input.foreground} --background {input.background} "
 3 |     if cov:
 4 |         frame+=" --covariates=True "
 5 |     else:
 6 |         frame+=" --covariates=False "
 7 | 
 8 |     if new_h5 != None:
 9 |         frame+=" --new_h5 %s " % new_h5
10 |     if new_h5 == None:
11 |         frame+=" --new_h5=None "
12 | 
13 |     frame+=" --only_newhdf5=%s " % (str(config['only_newhdf5']))
14 |     return frame
15 | 
16 | checkpoint lisa_regress:
17 |     input:
18 |         gene_set = "{sample}",
19 |         background = "{sample}.background_gene.3000",
20 |         foreground = "{sample}.foreground_gene"
21 |     output:
22 |         "{sample}.{epigenome}.coefs.csv",
23 |         "{sample}.{epigenome}.lisa_predicted_rp.csv"
24 |     message: "lisa regression steps..."
25 |     benchmark:
26 |         "{sample}.{epigenome}.benchmark.txt"
27 |     log: "{sample}.{epigenome}.log"
28 |     params: species=config["species"],
29 |             epigenome="{epigenome}",
30 |             prefix="{sample}.{epigenome}",
31 |             sample=config["sample_number"]
32 |     shell:
33 |         get_command(config['covariates'], config['new_rp_h5'])
34 | 


--------------------------------------------------------------------------------
/lisa/utils.py:
--------------------------------------------------------------------------------
 1 | """ lisa utlity functions """
 2 | from multiprocessing import Pool, cpu_count
 3 | import math
 4 | import numpy as np
 5 | 
 6 | from scipy.stats import wilcoxon, ks_2samp
 7 | import scipy
 8 | import pandas as pd
 9 | 
10 | def multiple_apply(func, df, x, y, num_processes=None):
11 |     ''' Apply a function separately to each column in a dataframe, in parallel.'''
12 |     # If num_processes is not specified, default to minimum(#columns, #machine-cores)
13 |     if num_processes==None:
14 |         #num_processes = min(df.shape[1], cpu_count())
15 |         num_processes = 5
16 |     
17 |     # 'with' context manager takes care of pool.close() and pool.join() for us
18 |     with Pool(num_processes) as pool:
19 |         # we need a sequence of columns to pass pool.map
20 |         seq = [[df[col_name][x].values, df[col_name][y].values] for col_name in df.columns]
21 |         # pool.map returns results as a list
22 |         results_list = pool.map(func, seq)
23 |         # return list of processed columns, concatenated together as a new dataframe
24 |         return pd.DataFrame(results_list, index=df.columns)
25 | 
26 | 
27 | def convert_name(name):
28 |     try:
29 |         name = name.decode('utf-8').replace("tf_", "")
30 |     except:
31 |         name = name.replace("tf_", "")
32 |     return name
33 | 
34 | def one_side_ks_test(x, y):
35 |     """ http://stackoverflow.com/questions/16296225/one-sided-wilcoxon-signed-rank-test-using-scipy
36 |     So, to get one-side p value, you just need prob/2. or 1-prob/2.
37 | 
38 |     here: one-side significant less x < y
39 |     """
40 |     test = ks_2samp(x, y)
41 |     d = test[0]
42 |     p = test[1]/2
43 |     return p
44 | 
45 | def mannwhitneyu_test(x,y,how="two-sided"):
46 |     try:
47 |         return scipy.stats.mannwhitneyu(x,y,alternative=how)[1]
48 |     except:
49 |         return 1
50 | 
51 | def binarize_gene_set(gene_set, *args):
52 |     """ gene_set: one gene per line
53 |     """
54 |     #print(gene_set)
55 |     refseq, symbol = args
56 |     with open(gene_set) as fin:
57 |         gene_set = list(set([line.strip().upper() for line in fin]))
58 |     gene_vec = np.zeros(len(refseq))
59 |     if len(np.intersect1d(refseq, gene_set)) > 5:
60 |         #print('input refseq ...')
61 |         gene_vec[np.in1d(refseq, gene_set)] = 1
62 |     elif len(np.intersect1d(symbol, gene_set)) > 5:
63 |         #print('input symbol ...')
64 |         gene_vec[np.in1d(symbol, gene_set)] = 1
65 |     else:
66 |         raise Exception("no genes found in referenence...")
67 |     return gene_vec
68 | 
69 | class Weight:
70 |     """ Exponential decay function """
71 |     def __init__(self, bin_length=1000):
72 |         padding = int(1e5)            # TSS +/- 100kb
73 |         assert bin_length > 0
74 |         assert (2*padding+bin_length)%bin_length == 0
75 | 
76 |         self.bin_length = bin_length
77 |         self.bin_num = (2*padding+bin_length)/bin_length      # bin number
78 | 
79 |         distances = np.array([z + bin_length/2 for z in
80 |                               range(int(-padding-bin_length/2),
81 |                                     int(padding+bin_length/2), bin_length)],
82 |                              dtype=np.float32)
83 |         self.alpha = -math.log(1.0/3.0)*10                    # 1e5/1e4, 1e4: half decay
84 |         self.balance_weight(distances)                        # weight
85 | 
86 |     def get_weight(self):
87 |         """ get the weight """
88 |         return self.weight
89 | 
90 |     def get_binnum(self):
91 |         """ get the bin number around TSS """
92 |         return self.bin_num
93 | 
94 |     def balance_weight(self, distances):
95 |         """ function to balance weight according the TSS and bin center offset
96 |         """
97 |         weight = np.exp(-np.fabs(distances) * self.alpha/1e5)
98 |         self.weight = 2*weight/ (1+weight)
99 | 


--------------------------------------------------------------------------------
/lisa/workflows/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa/workflows/__init__.py


--------------------------------------------------------------------------------
/lisa/workflows/cluster.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "__default__" :
 3 |     {
 4 |         "queue"     : "general",
 5 |         "nCPUs"     : "3",
 6 |         "memory"    : "16g",
 7 |         "time"      : "24:00:00",
 8 |         "name"      : "{rule}.{wildcards.sample}",
 9 |         "output"    : "logs/cluster/{rule}.{wildcards.sample}.%j.out",
10 |         "error"     : "logs/cluster/{rule}.{wildcards.sample}.%j.err"
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/lisa/workflows/sbatch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PATH=/n/home04/xiaoleliu/ChiLin/alvin/xiaoleliu_lab/marge2/phaseI_init/miniconda3/bin:$PATH
 4 | source activate lisa
 5 | 
 6 | mkdir -p logs/cluster
 7 | snakemake --unlock
 8 | snakemake -j 3 --cluster-config cluster.json --immediate-submit --cluster "sbatch --time={cluster.time} --mem={cluster.memory} --partition={cluster.queue} --cpus-per-task={cluster.nCPUs} -J {cluster.name} -o {cluster.output} -e {cluster.error}"
 9 | 
10 | 


--------------------------------------------------------------------------------
/lisa/workflows/sbatch_dependency.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Submit this clustering script for sbatch to snakemake with:
 4 |     snakemake -j 99 --debug --immediate-submit --cluster-config cluster.json --cluster 'sbatch_script.py {dependencies}'
 5 | """
 6 | ## In order to submit all the jobs to the moab queuing system, one needs to write a wrapper.
 7 | import sys
 8 | import subprocess
 9 | import re
10 | import os
11 | from snakemake.utils import read_job_properties
12 | import argparse
13 | 
14 | parser = argparse.ArgumentParser(description='Snakemake script')
15 | parser.add_argument("dependencies", nargs="*", help="{{dependencies}} string given by snakemake\n")
16 | parser.add_argument("snakescript", help="Snakemake generated shell script with commands to execute snakemake rule\n")
17 | 
18 | args = parser.parse_args()
19 | 
20 | dependencies = args.dependencies
21 | jobscript = args.snakescript
22 | print(dependencies, file=sys.stderr)
23 | print(jobscript, file=sys.stderr)
24 | 
25 | job_properties = read_job_properties(jobscript)
26 | # access property defined in the cluster configuration file (Snakemake >=3.6.0), cluster.json
27 | time = job_properties["cluster"]["time"]
28 | cpu = job_properties["cluster"]["nCPUs"]
29 | mem = job_properties["cluster"]["memory"]
30 | queue = job_properties["cluster"]["queue"]
31 | name = job_properties["cluster"]["name"]
32 | output = job_properties["cluster"]["output"]
33 | error = job_properties["cluster"]["error"]
34 | 
35 | # all figure out job dependencies, the last argument is the jobscript which is baked in snakemake
36 | if dependencies == None or len(dependencies) < 1:
37 |     deps = " "
38 | else:
39 |     deps = " -d " + ','.join(["afterok:%s" % d for d in dependencies])
40 | 
41 | print(job_properties['rule'], file=sys.stderr)
42 | if job_properties['rule'].startswith('merge'):
43 |     cmdline = 'sbatch --time={time} {deps} --mem=50 --partition={queue} --cpus-per-task=1 -J {name} -o {output} -e {error} --open-mode=append {job}'.format(name=name, time = time, queue=queue, output=output, error=error, deps=deps, job=jobscript)
44 | else:
45 |     cmdline = 'sbatch --time={time} {deps} --mem={mem} --partition={queue} --cpus-per-task={cpu} -J {name} -o {output} -e {error} --open-mode=append {job}'.format(mem=mem, cpu=cpu, name=name, time = time, queue=queue, output=output, error=error, deps=deps, job=jobscript)
46 | 
47 | popenrv = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()
48 | 
49 | print(cmdline, file=sys.stderr)
50 | #(b'Submitted batch job 86634327\n', None)
51 | print(popenrv, file=sys.stderr)
52 | print("%i" % int(popenrv[0].strip().split()[-1]), file=sys.stderr)
53 | print("%i" % int(popenrv[0].strip().split()[-1]))
54 | 


--------------------------------------------------------------------------------
/lisa/workflows/sbatch_dependency.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PATH=/n/home04/xiaoleliu/ChiLin/alvin/xiaoleliu_lab/marge2/phaseI_init/miniconda3/bin:$PATH
 4 | source activate lisa
 5 | 
 6 | mkdir -p logs/cluster
 7 | #snakemake --unlock
 8 | #parallel simple job
 9 | #snakemake -j 150 --cluster-config ../cluster.json --immediate-submit --cluster "sbatch --time={cluster.time} --mem={cluster.memory} --partition={cluster.queue} --cpus-per-task={cluster.nCPUs} -J {cluster.name} -o {cluster.output} -e {cluster.error} --open-mode=append"
10 | 
11 | #with dependencies/multi-dependencies on
12 | split -d -l 100 ../creeds_tf.txt ../creeds_tf.txt.
13 | for i in ../creeds_tf.txt.*;do
14 | echo "------"
15 | echo $i
16 | echo "------"
17 | snakemake --config gene_list=${i} -j 50 --immediate-submit --cluster-config ../cluster.json --cluster "export PATH=/n/home04/xiaoleliu/ChiLin/alvin/xiaoleliu_lab/marge2/phaseI_init/miniconda3/bin:$PATH;source activate lisa; ../sbatch_script.py {dependencies}"
18 | break
19 | done
20 | 
21 | 


--------------------------------------------------------------------------------
/lisa_docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = lisa
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/lisa_docs/source/FAQ.rst:
--------------------------------------------------------------------------------
1 | 
2 | Issues
3 | ---------
4 | Genes in the gene set should not be less than 20.
5 | 


--------------------------------------------------------------------------------
/lisa_docs/source/Installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ===============
 3 | 
 4 | Mac
 5 | ---------
 6 | 
 7 | .. code-block:: bash
 8 |    :linenos:
 9 | 
10 |    brew install openssl
11 |    export C_INCLUDE_PATH=${C_INCLUDE_PATH}:/usr/local/Cellar/openssl/your_version/include
12 |    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/Cellar/openssl/your_version/lib/"
13 | 
14 | Linux
15 | ---------
16 | 
17 | .. code-block:: bash
18 |    :linenos:
19 | 
20 |    sudo apt-get install openssl
21 | 
22 | Install conda python 3.6
23 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
24 | 
25 | Follow the instruction: https://conda.io/miniconda.html to install python 3.6.
26 | 
27 | other dependency
28 | ~~~~~~~~~~~~~~~~~~~
29 | 
30 | .. code-block:: bash
31 |    :linenos:
32 | 
33 |    wget -c https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
34 |    bash Miniconda3-latest-Linux-x86_64.sh
35 | 
36 | .. code-block:: bash
37 |    :linenos:
38 | 
39 |    # install miniconda3
40 |    export PATH="${HOME}/miniconda3/bin:$PATH"
41 |    conda install anaconda-client
42 |    conda create -n lisa anaconda python=3
43 |    source activate lisa
44 |    conda install -c anaconda openssl
45 |    conda install -c anaconda curl
46 | 
47 |    conda config --add channels defaults
48 |    conda config --add channels conda-forge
49 |    conda config --add channels bioconda
50 |    conda install blas mkl-service
51 | 
52 |    # this is for curl and openssl header files
53 |    export C_INCLUDE_PATH=${C_INCLUDE_PATH}:/usr/include/:${HOME}/.local/include:${HOME}/miniconda3/envs/lisa/include
54 |    
55 |    pip install deeptools
56 |    pip install theano
57 |    pip install fire
58 |    pip install psutil
59 |    pip install numpy
60 |    pip install scipy
61 |    pip install sklearn
62 | 
63 | 
64 | Install the module using:
65 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
66 | 
67 | .. code-block:: bash
68 |    :linenos:
69 |  
70 |    git clone https://github.com/qinqian/lisa
71 |    cd lisa
72 |    python setup.py install --user
73 | 
74 | 
75 | Get dependent data
76 | ~~~~~~~~~~~~~~~~~~~~~~~~~
77 | The related chromatin profile dataset will be released later, use LISA_ now.
78 | 
79 | .. _LISA: http://lisa.cistrome.org
80 | 


--------------------------------------------------------------------------------
/lisa_docs/source/_static/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/1.png


--------------------------------------------------------------------------------
/lisa_docs/source/_static/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/2.png


--------------------------------------------------------------------------------
/lisa_docs/source/_static/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/3.png


--------------------------------------------------------------------------------
/lisa_docs/source/_static/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/4.png


--------------------------------------------------------------------------------
/lisa_docs/source/_static/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/5.png


--------------------------------------------------------------------------------
/lisa_docs/source/_static/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/6.png


--------------------------------------------------------------------------------
/lisa_docs/source/_static/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/7.png


--------------------------------------------------------------------------------
/lisa_docs/source/_static/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/8.png


--------------------------------------------------------------------------------
/lisa_docs/source/_static/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_docs/source/_static/9.png


--------------------------------------------------------------------------------
/lisa_docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. lisa documentation master file, created by
 2 |    sphinx-quickstart on Fri Jul 28 19:59:12 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to lisa's documentation!
 7 | ================================
 8 | 
 9 | LISA involve four methods to discover potential enhancer and rank TFs. If get stuck, try to create issues at https://github.com/qinqian/lisa.
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 |    :caption: Contents:
14 | 
15 |    Installation
16 |    Tutorial
17 | 
18 | Indices and tables
19 | ==================
20 | 
21 | * :ref:`genindex`
22 | * :ref:`modindex`
23 | * :ref:`search`
24 | 


--------------------------------------------------------------------------------
/lisa_web/generate_heatmap_js.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from clustergrammer import Network
 3 | import numpy as np
 4 | import collections
 5 | import argparse 
 6 | 
 7 | p = argparse.ArgumentParser()
 8 | p.add_argument('-d')
 9 | args = p.parse_args()
10 | 
11 | net = Network()
12 | delta = pd.read_csv(args.d, index_col=0)
13 | status = delta.iloc[:-1, -1].values.reshape(-1)
14 | delta_f = delta.iloc[:-1, :-1]
15 | 
16 | tf = delta_f.columns.map(lambda x: x.split('|')[1])
17 | tf_dict = collections.OrderedDict()
18 | for i, t in enumerate(tf):
19 |     tf_dict[t] = tf_dict.get(t, []) + [i] 
20 | ids = []
21 | for t in tf_dict:
22 |     ids.append(tf_dict[t][:3])
23 | ids = np.concatenate(ids)
24 | 
25 | delta_f = delta_f.iloc[:, ids]
26 | 
27 | target, = np.where(status == 1)
28 | cont, = np.where(status == 0)
29 | 
30 | print(target.shape)
31 | if len(target) < 100:
32 |      target_n = len(target)
33 | else:
34 |      target_n = 100
35 | 
36 | index = np.concatenate([np.random.choice(target, target_n), np.random.choice(cont, 100)])
37 | status = status[index]
38 | delta_f = delta_f.iloc[index, :50]
39 | 
40 | ann = pd.read_table('/data/home/qqin/01_Projects/Programming/dc2/scripts/hg38_best_dc_tfcr_basedon_frip_peak_dhs_all_nonhm_nonca.xls')
41 | 
42 | ann = ann.iloc[:, [0, 6, 8]]
43 | 
44 | ann_dict = {}
45 | for i in range(ann.shape[0]):
46 |     ann_dict[str(ann.iloc[i, 0])]= ann.iloc[i, 1:]
47 | 
48 | tf = delta_f.columns.map(lambda x: "TF: %s"  % x.split('|')[1]) 
49 | genes = delta_f.index.map(lambda x:x.split(':')[-1])
50 | 
51 | genes, index = np.unique(genes, return_index=True)
52 | status = status[index]
53 | 
54 | tfs = []
55 | for i,j in enumerate(tf):
56 |     tfs.append("%s.%s" % (j, i))
57 | 
58 | ids = delta_f.columns.map(lambda x:x.split('|')[0])
59 | fout = open("%s_heatmap_matrix.txt" % args.d, 'w')
60 | fout.write("\t\t%s\n" % ('\t'.join(tfs)))
61 | 
62 | cls = []
63 | for i in ids:
64 |     if ann_dict.get(i, ['NA'])[0] == 'NA':
65 |         cls.append("Cell Line: %s" % ('NA'))
66 |     else:
67 |         cls.append("Cell Line: %s" % (ann_dict[i][0]))
68 | fout.write("\t\t%s\n" % ('\t'.join(cls)))
69 | 
70 | ts = []
71 | for i in ids:
72 |     if ann_dict.get(i, ['NA', 'NA'])[1] == 'NA':
73 |         ts.append("Tissue: %s" % ('NA'))
74 |     else:
75 |         ts.append("Tissue: %s" % (ann_dict[i][1]))
76 | fout.write("\t\t%s\n" % ('\t'.join(ts)))
77 | 
78 | for i in range(status.shape[0]):
79 |     fout.write('%s\t%s\t%s\n' % ("Gene: %s"% genes[i], "Input Gene: %s" % status[i], '\t'.join(delta_f.iloc[i, :].map(str))))
80 | fout.close()
81 | 
82 | net.load_file("%s_heatmap_matrix.txt" % args.d)
83 | net.cluster()
84 | net.write_json_to_file('viz', '%s_mult_view.json' % args.d)
85 | 
86 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_scatter.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
 4 | from plotly import tools
 5 | 
 6 | from plotly.graph_objs import Scatter, Heatmap
 7 | 
 8 | up=pd.read_csv('3_down.gene_symbol.lisa_direct.csv', header=None)
 9 | dn=pd.read_csv('3_up.gene_symbol.lisa_direct.csv', header=None)
10 | 
11 | print(up.head())
12 | final = up.merge(dn, on=0)
13 | print(final.head())
14 | trace0 = Scatter(x=-np.log10(final.iloc[:, 1]),
15 |                  y=-np.log10(final.iloc[:, 2]), mode= 'markers',
16 |                  marker= dict(size= 9,
17 |                               opacity= 0.9,
18 |                               line = dict(width = 0.8)
19 |                              ),
20 |                  text=final.iloc[:, 0],
21 |                  xaxis="Up-regulated gene set results", yaxis="Down-regulated gene set results")
22 | 
23 | plot([trace0], filename='test.html')
24 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web.conf:
--------------------------------------------------------------------------------
 1 | <VirtualHost *:80>
 2 | WSGIDaemonProcess lisa user=qqin group=lab threads=8
 3 | WSGIScriptAlias / /project/Cistrome/LISA/lisa_web/lisa_web.wsgi
 4 | WSGIScriptReloading On
 5 | 
 6 | ServerName lisa.cistrome.org
 7 | DocumentRoot /project/Cistrome/LISA/lisa_web/
 8 | LogLevel Debug
 9 | CustomLog /project/Cistrome/LISA/access.log combined
10 | ErrorLog /project/Cistrome/LISA/error.log
11 | ServerSignature On
12 | 
13 | <Location />
14 |     require all granted
15 | </Location>
16 | 
17 | <Directory "/project/Cistrome/LISA/lisa_bw">
18 |     AllowOverride AuthConfig Limit Indexes Options
19 |     Options +ExecCGI -MultiViews +SymLinksIfOwnerMatch
20 |     Require ip 155.52.47.121
21 |     Require ip 127
22 |     <FilesMatch "\.(bw|bed|gz|bb|tbi)$">
23 |         Require all granted
24 |     </FilesMatch>
25 | </Directory>
26 | Alias /data5/lisa_browser /project/Cistrome/LISA/lisa_bw
27 | 
28 | <Directory /project/Cistrome/LISA/lisa_web/cistromedb_data>
29 |   AuthType Basic
30 |   AuthName "Restricted Content"
31 |   AuthUserFile /project/Cistrome/LISA/lisa_web/cistromedb_data/.htpasswd
32 |   Require user lisa
33 |   AllowOverride AuthConfig Limit Indexes Options
34 |   Options +ExecCGI -MultiViews +SymLinksIfOwnerMatch
35 |   #<FilesMatch "\.tar\.gz$">
36 |   #    Require all granted
37 |   #</FilesMatch>
38 | </Directory>
39 | Alias /cistromedb_data /project/Cistrome/LISA/lisa_web/cistromedb_data
40 | 
41 | </VirtualHost>
42 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web.wsgi:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | sys.stdout = sys.stderr
 4 | p = '/data/home/qqin/lisa_web'
 5 | 
 6 | activate_this = os.path.join('/data/home/qqin/rabit/rabitqqin/', 'bin', 'activate_this.py')
 7 | execfile(activate_this, dict(__file__=activate_this))
 8 | 
 9 | sys.path.append(p)
10 | 
11 | from lisa_web import app as application
12 | 
13 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/__init__.pyc


--------------------------------------------------------------------------------
/lisa_web/lisa_web/__init__.py~:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from logging.handlers import RotatingFileHandler
 4 | import time
 5 | import numpy as np
 6 | 
 7 | from flask import Flask, render_template, redirect, url_for, send_from_directory
 8 | from flask import request
 9 | from flask_bootstrap import Bootstrap
10 | 
11 | 
12 | from flask_wtf import FlaskForm
13 | from flask_wtf.file import FileField, FileRequired
14 | from werkzeug.utils import secure_filename
15 | 
16 | from wtforms import StringField
17 | from wtforms.validators import DataRequired
18 | 
19 | class RabitForm(FlaskForm):
20 |     name = StringField('Job Name', validators=[DataRequired()])
21 |     gene = FileField('Select Rabit input file', validators=[FileRequired()],  render_kw={'multiple': True, 'data-preview-file-type':"text"})
22 | 
23 | # initialize an application
24 | app = Flask(__name__, instance_relative_config = True)
25 | app.config['UPLOADED_PATH'] = 'upload'
26 | app.secret_key = 's3cr3t' # crsf
27 | 
28 | # debug mode on
29 | app.debug = True
30 | if not app.debug:
31 |     app.logger.setLevel(logging.INFO)
32 |     handler = RotatingFileHandler('log', maxBytes=10000000, backupCount=20)
33 |     formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s: %(message)s")
34 |     handler.setLevel(logging.INFO)
35 |     handler.setFormatter(formatter)
36 |     app.logger.addHandler(handler)
37 | 
38 | @app.errorhandler(500)
39 | def internal_error(exception):
40 |     app.logger.exception(exception)
41 |     return "Sorry internal program error", 500
42 | 
43 | @app.errorhandler(404)
44 | def page_not_found(e):
45 |     return render_template('404.html'), 404
46 | 
47 | @app.route('/', methods=['GET', 'POST'])
48 | def upload_file():
49 |     form = RabitForm()
50 |     if form.validate_on_submit():
51 |         f = form.gene.data
52 |         filename = secure_filename(f.filename)
53 |         data = os.path.join(app.config['UPLOADED_PATH'], "%s.%s" % (f.filename, time.time()))
54 |         f.save(data)
55 |         app.logger.info("%s uploaded at %s" % (data, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())))
56 |         os.system('cp %s download' % data)
57 | 
58 |         app.logger.info("lisa modeling finished %s" % (data, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())))
59 |         output = "%s.o.t" % data
60 |         with open("download/%s.txt" % os.path.basename(output), 'w') as out:
61 |             with open(output) as inf:
62 |                 n = 0
63 |                 lines = inf.readlines()
64 |                 sample = np.array(lines[0].split())
65 |                 t_vals = np.array(map(float, lines[1].split()[1:]))
66 |                 index = np.argsort(t_vals)[::-1]
67 |                 sample = sample[index]
68 |                 t_vals = t_vals[index]
69 |                 for i,j in zip(sample, t_vals):
70 |                     print >>out, "%s\t%s" % (i, j)
71 |         return redirect(url_for('custom_download', filename="%s.txt" % os.path.basename(output)))
72 |     return render_template('index.html', form = form)
73 | 
74 | @app.route('/success', methods=['GET', 'POST'])
75 | def sucess():
76 |     return '<h1>Succeed</h1>'
77 | 
78 | # add new static folder
79 | @app.route('/img/<path:filename>')
80 | def custom_static(filename):
81 |     return send_from_directory('img', filename)
82 | 
83 | # add new static folder
84 | @app.route('/download/<path:filename>')
85 | def custom_download(filename):
86 |     return send_from_directory('download', filename)
87 | 
88 | # interface
89 | Bootstrap(app)
90 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/check_genename.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | def check_available_genes(genes, species='hg38'):
 4 |     if genes[0].startswith('ENSG'):
 5 |         genes = list(map(lambda x: x.split('.')[0], genes))
 6 |         if species == 'hg38':
 7 |             ensemble = pd.read_csv('/project/Cistrome/LISA/lisa_web/download/Homo97_Ensembl.txt', sep='\t')
 8 |         else:
 9 |             ensemble = pd.read_csv('/project/Cistrome/LISA/lisa_web/download/Mus97_Ensembl.txt', sep='\t')
10 |         ensemble.iloc[:, 0] = ensemble.iloc[:, 0].map(lambda x: x.split('.')[0])
11 |         symbols = ensemble.loc[ensemble.iloc[:, 0].isin(genes), 'gene_name']
12 |         return list(set(symbols))
13 |     else:
14 |         return list(set(genes))
15 | 
16 | def clean_empty_lins(genes):
17 |     filtered_genes = filter(lambda x:x!='', genes)
18 |     return list(set(filtered_genes))
19 | 
20 | 
21 | if __name__ == '__main__':
22 | #    print(clean_empty_lins(['a', 'b', 'c', '']))
23 |     # print(check_available_genes(['ENSG00000174837',
24 |     #                              'ENSG00000232702',
25 |     #                              'ENSG00000172738'], 'human'))
26 |     print(check_available_genes(['AR',
27 |                                  'FOXA1',
28 |                                  'TP53'], 'mouse'))
29 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/fonts/FontAwesome.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/FontAwesome.otf


--------------------------------------------------------------------------------
/lisa_web/lisa_web/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/lisa_web/lisa_web/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/lisa_web/lisa_web/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/lisa_web/lisa_web/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/lisa_web/lisa_web/form.py:
--------------------------------------------------------------------------------
 1 | from flask_wtf import FlaskForm
 2 | from flask_wtf.file import FileField, FileRequired
 3 | from wtforms import TextAreaField, BooleanField, SubmitField, SelectMultipleField, SelectField, StringField
 4 | from wtforms.validators import DataRequired, Required, length, optional, Email
 5 | from wtforms.fields.html5 import EmailField
 6 | 
 7 | class LISAForm(FlaskForm):
 8 |     genes = TextAreaField('Genes', validators=[Required()])
 9 |     labels = StringField('labels', validators=[optional()])
10 | 
11 |     genes2 = TextAreaField('Genes2', validators=[optional()])
12 |     labels2 = StringField('labels 2', validators=[optional()])
13 | 
14 |     background = TextAreaField('Background', validators=[optional()])
15 | 
16 |     name = StringField('Job Name', validators=[optional()]) ## change to optional and give out a warning information
17 |     mail = EmailField('Optional email', validators=[optional(), Email()])
18 |     method = SelectField("Methods",
19 |                          choices=[('knockout', 'ISD-RP for both motif and ChIP-seq'),
20 |                                   ('beta', 'TF ChIP-seq Peak-RP'),
21 |                                   ('all', 'All')],
22 |                          default='all')
23 |     mark = SelectField("Chromatin profile",
24 |                        choices=[('H3K27ac', 'H3K27ac'),
25 |                                 ('DNase', 'DNase-seq'),
26 |                                 ('All', 'All'),
27 |                                 #('H3K4me3', 'H3K4me3'),
28 |                                 #('H3K27me3', 'H3K27me3'),
29 |                                 #('H3K4me1', 'H3K4me1')
30 |                                 #('ATAC-seq', 'ATAC-seq'),
31 |                                ], validators=[Required()], default='All')
32 | 
33 |     species = SelectField("Species", choices=[('hg38', 'Human'), ('mm10', 'Mouse')], default='hg38')
34 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/form.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/form.pyc


--------------------------------------------------------------------------------
/lisa_web/lisa_web/images/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/1.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/images/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/2.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/images/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/3.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/images/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/4.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/images/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/5.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/images/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/6.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/images/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/7.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/images/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/images/8.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/mail.py:
--------------------------------------------------------------------------------
 1 | import smtplib
 2 | from email.mime.base import MIMEBase
 3 | from email.mime.multipart import MIMEMultipart # 3.0
 4 | from email.mime.text import MIMEText
 5 | 
 6 | #from email.Utils import COMMASPACE, formatdate
 7 | import datetime
 8 | from email import encoders
 9 | 
10 | COMMASPACE = ', '
11 | 
12 | 
13 | def send_localhost_mail(resultOpt, subject, to, html, attachment, server="localhost"):
14 |     msg = MIMEMultipart('alternative')
15 |     fro = 'lisa@cistrome.org'
16 |     msg['From'] = fro
17 |     msg['To'] = COMMASPACE.join(to)
18 |     #msg['Date'] = formatdate(localtime=True)
19 |     msg['Subject'] = subject
20 | 
21 |     if resultOpt == 'html':
22 |         msg.attach( MIMEText(html, 'html') )
23 |     else:
24 |         import tempfile
25 |         try:
26 |            temp = tempfile.TemporaryFile()
27 |            temp.write(attachment)
28 |            temp.seek(0)
29 |            part = MIMEBase('application', "octet-stream")
30 |            part.set_payload( temp.read() )
31 |            Encoders.encode_base64(part)
32 |            part.add_header('Content-Disposition', 'attachment; filename="%s"'
33 |                            % subject + ".xls")
34 |            msg.attach(part)
35 |         finally:
36 |            temp.close()
37 | 
38 |     smtp = smtplib.SMTP(server)
39 |     smtp.sendmail(fro, to, msg.as_string())
40 |     smtp.close()
41 | 
42 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | #python generate_gallery.py
 4 | 
 5 | #python generate_gallery2.py
 6 | 
 7 | while read line; do
 8 |     fs=($line)
 9 |     echo ${fs[2]} | tr ',' '\n' > ${fs[0]}_${fs[1]}.txt
10 | done < <(cut -f 1,9,12 lisa_results_meta_table_mouse_with_gene_sets.xls | sed 1d | sort -k 1 | uniq)
11 | 
12 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/1.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/2.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/3.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/4.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/5.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/6.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/7.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/8.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/Figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure1.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/Figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure2.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/Figure3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure3.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/Figure4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure4.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/Figure5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure5.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/Figure6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure6.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/Figure7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure7.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/Figure8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/Figure8.png


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/MACRO_ape_all_cistrome_pwm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | 
 4 | mkdir -p macro_ape_cistrome
 5 | for i in cistrome/*pwm;do
 6 |     pi=$(basename ${i/.pwm/})
 7 |     for j in cistrome/*pwm;do
 8 |         pj=$(basename ${j/.pwm/})
 9 |         java -cp ape-2.0.1.jar ru.autosome.macroape.EvalSimilarity $i $j 1>macro_ape_cistrome/${pi}_${pj}_macro_ape.txt
10 |     done
11 | done
12 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/gallery.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/gallery.js


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/gallery.js~:
--------------------------------------------------------------------------------
 1 | // The table generation function
 2 | function tabulate(cl, data, columns, interact) {
 3 |   var table = d3.select("." + cl).append("table")
 4 |         .attr("class", "table compact hover row-border tab" + cl),
 5 |       thead = table.append("thead"),
 6 |       tbody = table.append("tbody").attr("class", "tbody");
 7 | 
 8 |   // append the header row
 9 |   thead.append("tr")
10 |     .selectAll("th")
11 |     .data(columns)
12 |     .enter()
13 |     .append("th")
14 |     .text(function(column) { return column; });
15 |   // create a row for each object in the data
16 |   var rows = tbody.selectAll("tr")
17 |         .data(data)
18 |         .enter()
19 |         .append("tr");
20 | 
21 |   // create a cell in each row for each column
22 |   var cells = rows.selectAll("td")
23 |         .data(function(row) {
24 |           return columns.map(function(column) {
25 |             return {column: column, value: row[column]};
26 |           });
27 |         })
28 |         .enter()
29 |         .append("td")
30 |         .style({
31 |           "vertical-align": "middle"
32 |         })
33 |         .attr({
34 |           data_id: function(d) { return d.value.split(';')[0].split('|')[0]; }
35 |         })
36 |         .html(function(d) {
37 |           if (d.column != 'Transcription Factor') {
38 |             if (interact){
39 |               a = d.value;
40 |               a = a.split(';');
41 |               if (a.length == 2) {
42 |                 return a[1];
43 |               } else {
44 |                 return d.value;
45 |               }
46 |             } else {
47 |               if (cl == "tf2") {
48 |                 if (d.value.split(';').length==2) {
49 |                     return d.value.split(';')[1] + "<img class='img-fluid' style='vertical-align:middle' height='39' src='http://lisa.cistrome.org/static/" + d.value.split(';')[0] + ".pwm.jpg'>";
50 |                 } else {
51 |                     return "";
52 |                 }
53 |               } // for motifs
54 |               else {
55 |                 a = d.value.split('|');
56 |                 if (a.length == 2) {
57 |                    return a[1];
58 |                 } else {
59 |                    return d.value;
60 |                }
61 |               }
62 |            }
63 |           }
64 |           return d.value;
65 |         });
66 | 
67 |   if (cl != "tfl") {
68 | 
69 |     if (cl != "tf2") {
70 |        $('.tab' + cl).ready(function() {
71 |          $('.tab'+ cl).DataTable({
72 |            "order": [],
73 |          });
74 |        });
75 |     } else {
76 |        $('.tab' + cl).ready(function() {
77 |          $('.tab'+ cl).DataTable({
78 |            "order": [],
79 |            "columnDefs": [
80 |               { "width": "20%", "targets": 0 }
81 |            ]
82 |          });
83 |        });
84 |     }
85 |   }
86 |   return table;
87 | }
88 | 
89 | function update_progress(status_url, status_div, div_heatmap_data) {
90 |   // /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random
91 |   $(".gallery").ready({
92 |     d3.csv('/gallery/lisa_results_meta_table_human_with_gene_sets.csv', function(error, d) {
93 |       tabulate('gallery', d, ["Transcription Factor", "1st Sample p-value", "2nd Sample p-value", "3rd Sample p-value", "4th Sample p-value", "5th Sample p-value"], false, 'gallery');
94 |     });
95 |   });
96 | };
97 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/hzome_functions.js:
--------------------------------------------------------------------------------
 1 | function ini_hzome(root_id){
 2 | 
 3 |   // save gene data to global variable
 4 |   gene_data = {};
 5 | 
 6 |   function get_mouseover(root_tip, gene_symbol){
 7 | 
 8 |     // not sure if this is necessary
 9 |     if ( d3.select(root_tip + '_row_tip').classed(gene_symbol) ){
10 |      get_request(root_tip, gene_symbol);
11 |     }
12 | 
13 |   }
14 | 
15 |   function get_request(root_tip, ini_gene_symbol){
16 | 
17 |     var gene_symbol;
18 |     if (ini_gene_symbol.indexOf(' ') > 0){
19 |       gene_symbol = ini_gene_symbol.split(' ')[0];
20 |     } else if (ini_gene_symbol.indexOf('_') > 0){
21 |       gene_symbol = ini_gene_symbol.split('_')[0];
22 |     }
23 |     else {
24 |       gene_symbol = ini_gene_symbol;
25 |     }
26 | 
27 |     var base_url = 'https://amp.pharm.mssm.edu/Harmonizome/api/1.0/gene/';
28 |     var url = base_url + gene_symbol;
29 | 
30 |     $.get(url, function(data) {
31 | 
32 |       data = JSON.parse(data);
33 | 
34 |       // save data for repeated use
35 |       gene_data[gene_symbol] = {}
36 |       gene_data[gene_symbol].name = data.name;
37 |       gene_data[gene_symbol].description = data.description;
38 | 
39 |       set_tooltip(data, root_tip, ini_gene_symbol);
40 | 
41 |       return data;
42 | 
43 |     });
44 |   }
45 | 
46 |   function set_tooltip(data, root_tip, gene_symbol){
47 | 
48 |     if (data.name != undefined){
49 | 
50 |       d3.selectAll(root_tip + '_row_tip')
51 |         .html(function(){
52 |             var sym_name = gene_symbol + ': ' + data.name;
53 |             var full_html = '<p>' + sym_name + '</p>' +  '<p>' +
54 |               data.description + '</p>';
55 |             return full_html;
56 |         });
57 |     }
58 |   }
59 | 
60 | 
61 |   function gene_info(root_tip, gene_info){
62 | 
63 |     var gene_symbol = gene_info.name;
64 | 
65 |     if (_.has(gene_data, gene_symbol)){
66 |       var inst_data = gene_data[gene_symbol];
67 |       set_tooltip(inst_data, root_tip, gene_symbol);
68 |     } else{
69 |       setTimeout(get_mouseover, 250, root_tip, gene_symbol);
70 |     }
71 | 
72 |   }
73 | 
74 |   hzome = {}
75 | 
76 |   hzome.gene_info = gene_info;
77 |   hzome.gene_data = gene_data;
78 |   hzome.get_mouseover = get_mouseover;
79 |   hzome.get_request = get_request;
80 | 
81 |   return hzome;
82 | 
83 | }
84 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/lisa.css~:
--------------------------------------------------------------------------------
 1 | body { padding-top: 70px; }
 2 | 
 3 | 
 4 | textarea {
 5 |   resize: none;
 6 |   max-width: 280px;
 7 |   max-height: 800px; 
 8 |   height: 300px;
 9 |   overflow-y:hidden;
10 | }
11 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/lisa.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liulab-dfci/lisa/c60207a9e58efe943374aa7e002f61ee29042532/lisa_web/lisa_web/static/lisa.jpg


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/lisa2.css:
--------------------------------------------------------------------------------
  1 | .fixed-top {
  2 |     background-color: #2B3B61;
  3 |     color: #FFFFFF;
  4 |     padding-top: 15px;
  5 |     padding-bottom: 19px;
  6 |     height: 50px;
  7 | }
  8 | 
  9 | .fixed-bottom {
 10 |     background-color: #2B3B61;
 11 |     color: #FFFFFF;
 12 |     height: 52px;
 13 |     padding-top: 5px;
 14 |     padding-bottom: 10px;
 15 | }
 16 | 
 17 | .nav-a {
 18 |     color: #FFFFFF;
 19 |     text-decoration: blink;
 20 |     background-color: transparent;
 21 | }
 22 | 
 23 | * {
 24 |     margin: 0;
 25 | }
 26 | 
 27 | .body3 {
 28 |     padding-top: 32px;
 29 |     padding-bottom: 30px;
 30 |     font-family: lato, sans-serif;
 31 |     font-size: 14px;
 32 |     font-weight: normal;
 33 |     background-color: #fff;
 34 | }
 35 | 
 36 | .body2 {
 37 |     padding-top: 35px;
 38 |     padding-bottom: 30px;
 39 |     font-family: lato, sans-serif;
 40 |     font-size: 14px;
 41 |     font-weight: normal;
 42 |     background-color: #fff;
 43 | }
 44 | 
 45 | .body {
 46 |     padding-top: 0px;
 47 |     padding-bottom: 120px;
 48 |     margin-bottom: 120px;
 49 |     font-family: lato, sans-serif;
 50 |     font-size: 14px;
 51 |     font-weight: normal;
 52 |     height: 690px;
 53 |     background-color: #fff;
 54 | }
 55 | 
 56 | #navbar-example {
 57 |  //   position: relative;
 58 |     z-index: 998;
 59 |     position: fixed;
 60 |     top: 248px;
 61 |     margin-left: 35px;
 62 |     width: 220px;
 63 |     left: 101px;
 64 | }
 65 | 
 66 | .progress {
 67 |     width: 100%;
 68 |     text-align: center;
 69 | }
 70 | 
 71 | .tf1,
 72 | .tf2,
 73 | .tf0,
 74 | .tf,
 75 | {
 76 |     height: 480px;
 77 |     overflow: auto;
 78 | }
 79 | 
 80 | #spyOnThis {
 81 |  height: 100%;
 82 |  width: 100%;
 83 |  position: relative;
 84 |  overflow-y: scroll;
 85 | }
 86 | 
 87 | .tfl
 88 | {
 89 |     height: 600px;
 90 |     width: 100%;
 91 | }
 92 | 
 93 | .footer,
 94 | .title
 95 | {
 96 |   font-weight: bold;
 97 |   color: #444;
 98 |   text-align:center;
 99 |   background-color: #eee;
100 |   border-top: 2px solid #444;
101 |   border-bottom: 2px solid #444;
102 | }
103 | 
104 | .genes2 {
105 |   resize: none;
106 |   height: 113px; 
107 | }
108 | .genes {
109 |   resize: none;
110 |   height: 113px; 
111 | }
112 | 
113 | /* /\* .sp { *\/ */
114 | /* /\*   margin: 12px 12px 12px 12px; *\/ */
115 | /* /\*   text-align: center; *\/ */
116 | /* /\* } *\/ */
117 | 
118 | 
119 | /* /\* .main_content { *\/ */
120 | /* /\*   border-bottom: 2px solid #444; *\/ */
121 | /* /\*   text-align: center; *\/ */
122 | /* /\* } *\/ */
123 | 
124 | .run {
125 |     margin: 0px 24px 5px 2px;
126 | }
127 | 
128 | /* .bd-example-modal-lg { */
129 | /*     width: 1000px; */
130 | /* } */
131 | 
132 | td { vertical-align:middle; }
133 | td:hover {
134 |    cursor: pointer;
135 | }
136 | 
137 | img {
138 |   opacity: 0.9;
139 |   filter: alpha(opacity=90); /* For IE8 and earlier */
140 | }
141 | 
142 | .inspector_attrib_row {
143 | border-bottom: 1px #d6d6d6 solid;
144 | border-right: 1px #d6d6d6 solid
145 | }
146 | 
147 | .circle-col {
148 | width: 12.5%;
149 | position: relative;
150 | float: left;
151 | }
152 | 
153 | .circle {
154 | background: #A9A9A9;
155 | height: 23px;
156 | width: 23px;
157 | border-radius: 50%;
158 | margin: 0 auto;
159 | }
160 | 
161 | div {
162 |     display: block;
163 | }
164 | 
165 | .green {
166 | background-color: #27ae60;
167 | }
168 | 
169 | .red {
170 | background-color: #c0392b;
171 | }
172 | 
173 | .btn-align {
174 |     padding: 6px 12px;
175 |     line-height: 1.42857143;
176 |     vertical-align: middle;
177 | 
178 | }
179 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/load_clustergram.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | Example files
 3 | */
 4 | 
 5 | var hzome = ini_hzome();
 6 | 
 7 | make_clust('mult_view.json');
 8 | 
 9 | var about_string = 'Zoom, scroll, and click buttons to interact with the clustergram. <a href="http://amp.pharm.mssm.edu/clustergrammer/help"> <i class="fa fa-question-circle" aria-hidden="true"></i> </a>';
10 | 
11 | function make_clust(inst_network){
12 | 
13 |     d3.json(inst_network, function(network_data){
14 | 
15 |       // define arguments object
16 |       var args = {
17 |         root: '#container-id-1',
18 |         'network_data': network_data,
19 |         'about':about_string,
20 |         'row_tip_callback':hzome.gene_info,
21 |         'col_tip_callback':test_col_callback,
22 |         'tile_tip_callback':test_tile_callback,
23 |         'dendro_callback':dendro_callback,
24 |         'matrix_update_callback':matrix_update_callback,
25 |         'cat_update_callback': cat_update_callback,
26 |         'sidebar_width':150,
27 |         // 'ini_view':{'N_row_var':20}
28 |         // 'ini_expand':true
29 |       };
30 | 
31 |       resize_container(args);
32 | 
33 |       d3.select(window).on('resize',function(){
34 |         resize_container(args);
35 |         cgm.resize_viz();
36 |       });
37 | 
38 |       cgm = Clustergrammer(args);
39 | 
40 |       check_setup_enrichr(cgm);
41 | 
42 |       d3.select(cgm.params.root + ' .wait_message').remove();
43 | 
44 |   });
45 | 
46 | }
47 | 
48 | function matrix_update_callback(){
49 | 
50 |   if (genes_were_found[this.root]){
51 |     enr_obj[this.root].clear_enrichr_results(false);
52 |   }
53 | }
54 | 
55 | function cat_update_callback(){
56 |   console.log('callback to run after cats are updated');
57 | }
58 | 
59 | function test_tile_callback(tile_data){
60 |   var row_name = tile_data.row_name;
61 |   var col_name = tile_data.col_name;
62 | 
63 | }
64 | 
65 | function test_col_callback(col_data){
66 |   var col_name = col_data.name;
67 | }
68 | 
69 | function dendro_callback(inst_selection){
70 | 
71 |   var inst_rc;
72 |   var inst_data = inst_selection.__data__;
73 | 
74 |   // toggle enrichr export section
75 |   if (inst_data.inst_rc === 'row'){
76 |     d3.select('.enrichr_export_section')
77 |       .style('display', 'block');
78 |   } else {
79 |     d3.select('.enrichr_export_section')
80 |       .style('display', 'none');
81 |   }
82 | 
83 | }
84 | 
85 | function resize_container(args){
86 | 
87 |   var screen_width = window.innerWidth;
88 |   var screen_height = window.innerHeight - 20;
89 | 
90 |   d3.select(args.root)
91 |     .style('width', screen_width+'px')
92 |     .style('height', screen_height+'px');
93 | }
94 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/plot.R:
--------------------------------------------------------------------------------
 1 | library(data.table)
 2 | AR   <- fread('MC00468.pwm.1kb')
 3 | E2F2 <- fread('MS00712.pwm.1kb')
 4 | GR <-   fread('MC00170.pwm.1kb')
 5 | 
 6 | AE <- cbind(AR$V2, E2F2$V2)
 7 | AE <- AE[apply(AE, 1, function(x) all(x<10000)),]
 8 | 
 9 | AG <- cbind(AR$V2, GR$V2)
10 | AG <- AG[apply(AG, 1, function(x) all(x<10000)),]
11 | 
12 | print(head(AE))
13 | print(head(AG))
14 | 
15 | png('motif_scatterplot.png', width=1500, height=800)
16 | par(mfrow=c(1,2), font=2, cex=1)
17 | plot(AE[,1], AE[,2], pch=19, col='blue', xlab='AR', ylab='E2F2')
18 | plot(AG[,1], AG[,2], pch=19, col='blue', xlab='AR', ylab='GR')
19 | dev.off()
20 | 
21 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for i in cistrome/*pwm
 4 | do
 5 |    #if [ ! -s ${i}.100bp.bin.npy ]
 6 |    if [ ! -s ${i}.100bp ]
 7 |    then
 8 |       #python bin/seqpos2 -f  hg38_window100bp_both10bp.fa -p $i -o ${i}.100bp
 9 |       python bin/seqpos2 -f  mm10_window100bp_both10bp.fa -p $i -o ${i}.100bp
10 |    fi
11 | done
12 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/static/send_to_Enrichr.js:
--------------------------------------------------------------------------------
 1 | function send_to_Enrichr(options) { // http://amp.pharm.mssm.edu/Enrichr/#help
 2 |     var defaultOptions = {
 3 |     description: "",
 4 |     popup: false
 5 |   };
 6 | 
 7 |   if (typeof options.description == 'undefined')
 8 |     options.description = defaultOptions.description;
 9 |   if (typeof options.popup == 'undefined')
10 |     options.popup = defaultOptions.popup;
11 |   if (typeof options.list == 'undefined')
12 |     alert('No genes defined.');
13 | 
14 |   var form = document.createElement('form');
15 |   form.setAttribute('method', 'post');
16 |   form.setAttribute('action', 'https://amp.pharm.mssm.edu/Enrichr/enrich');
17 |   if (options.popup)
18 |     form.setAttribute('target', '_blank');
19 |   form.setAttribute('enctype', 'multipart/form-data');
20 | 
21 |   var listField = document.createElement('input');
22 |   listField.setAttribute('type', 'hidden');
23 |   listField.setAttribute('name', 'list');
24 |   listField.setAttribute('value', options.list);
25 |   form.appendChild(listField);
26 | 
27 |   var descField = document.createElement('input');
28 |   descField.setAttribute('type', 'hidden');
29 |   descField.setAttribute('name', 'description');
30 |   descField.setAttribute('value', options.description);
31 |   form.appendChild(descField);
32 | 
33 |   document.body.appendChild(form);
34 |   form.submit();
35 |   document.body.removeChild(form);
36 | }


--------------------------------------------------------------------------------
/lisa_web/lisa_web/templates/404.html:
--------------------------------------------------------------------------------
1 | <h1>not found...</h1>
2 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web/templates/index.html~:
--------------------------------------------------------------------------------
 1 | {% extends "bootstrap/base.html" %}
 2 | {% import "bootstrap/fixes.html" as fixes %}
 3 | {% import "bootstrap/wtf.html" as wtf %}
 4 | 
 5 | {% block head %}
 6 |   {{super()}}
 7 |   {{fixes.ie8()}}
 8 | {% endblock %}
 9 | 
10 | {% block metas %}
11 |     <meta charset="utf-8">
12 | {% endblock %}
13 | {% block title %}LISA{% endblock %}
14 | {% block html_attribs %} lang="en"{% endblock %}
15 | 
16 | {% block styles %}
17 | {{ super() }}
18 | <link rel="stylesheet" href="{{ url_for('.static', filename='lisa.css') }}">
19 | {% endblock %}
20 | 
21 | 
22 | {% block scripts %}
23 | {{ super() }}
24 | <script href="{{ url_for('.static', filename='lisa.js') }}" type="text/javascript"></script>
25 | {% endblock %}
26 | 
27 | 
28 | {% block navbar %}
29 | {% endblock %}
30 | 
31 | {% block content %}
32 | <div class="container-fluid">
33 |   <h2>LISA online: a web server for ranking TF from large-scale epigenome data</h2>
34 |   <hr>
35 |   <div class="row">
36 |     <div>
37 |       <form action="{{ url_for('upload_file') }}" class="form form-horizontal" method="post" role="form"  enctype="multipart/form-data">
38 |         {{ form.hidden_tag() }}
39 |         {{ wtf.form_errors(form, hiddens="only") }}
40 |         {{ wtf.form_field(form.genes) }}
41 | 	{{ wtf.form_field(form.mark) }}
42 | <!--         <div class="form-group"> -->
43 |             <input class="btn btn-primary" type="submit" value="Go">
44 | <!--           </div>  -->
45 |       </form>
46 |     </div>
47 |   </div>
48 | </div>
49 | {% endblock %}
50 | 


--------------------------------------------------------------------------------
/lisa_web/lisa_web_requirement.txt:
--------------------------------------------------------------------------------
 1 | Flask==0.12.1
 2 | Flask-Bootstrap==3.3.7.1
 3 | Flask-HTTPAuth==3.2.4
 4 | Flask-Mail==0.9.1
 5 | Flask-Script==2.0.5
 6 | Flask-WTF==0.14.2
 7 | Jinja2==2.9.6
 8 | MarkupSafe==1.0
 9 | WTForms==2.1
10 | Werkzeug==0.12.1
11 | amqp==2.2.1
12 | argparse==1.2.1
13 | billiard==3.5.0.3
14 | blinker==1.4
15 | celery==4.1.0
16 | click==6.7
17 | dominate==2.3.1
18 | itsdangerous==0.24
19 | kombu==4.1.0
20 | numpy==1.12.1
21 | pandas==0.20.3
22 | python-dateutil==2.6.1
23 | pytz==2017.2
24 | redis==2.10.5
25 | six==1.10.0
26 | vine==1.1.4
27 | visitor==0.1.3
28 | wsgiref==0.1.2
29 | 


--------------------------------------------------------------------------------
/lisa_web/output_profile_regulatory_potential.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | import h5py
 6 | 
 7 | with h5py.File(h5) as store:
 8 |     gene_annotation = np.array(list(map(lambda x: x.decode('utf-8'),
 9 |                                         store['RefSeq'][...])))
10 |     ids = list(map(lambda x: x.decode('utf-8').split('_')[0],
11 |                    store['IDs'][...]))
12 | 
13 |     high_quality_ids = list(set(high_quality_ids) & set(ids))
14 |     map_id = {}
15 |     for i, c in enumerate(ids):
16 |         map_id[c] = i
17 |         idx = np.array([map_id[str(i)] for i in high_quality_ids])
18 |         sort_index = np.argsort(idx)
19 |         index = idx[sort_index]
20 |         iid = np.array(high_quality_ids)[sort_index]
21 |         return pd.DataFrame(store['RP'][:, index], columns=iid,
22 |                             index=gene_annotation)
23 | 
24 | with h5py.File("") as store:
25 |     store['RP']
26 | 


--------------------------------------------------------------------------------
/lisa_web/plotly_scatter.py:
--------------------------------------------------------------------------------
  1 | #!/project/dev/qqin/miniconda3/bin/python
  2 | import sys
  3 | import pandas as pd
  4 | import numpy as np
  5 | from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
  6 | from plotly import tools
  7 | from plotly.graph_objs import Scatter, Heatmap, Layout, Figure
  8 | 
  9 | up_r = sys.argv[1]
 10 | dn_r = sys.argv[2]
 11 | prefix = sys.argv[3]
 12 | title = sys.argv[4]
 13 | labels1 = sys.argv[5]
 14 | labels2 = sys.argv[6]
 15 | 
 16 | up=pd.read_csv(up_r, header=0)
 17 | if '1' in up.columns:
 18 |     up=up.sort_values(by='1')
 19 | if '0.1' in up.columns:
 20 |     up=up.sort_values(by='0.1')
 21 | up.loc[:, 'name'] = up.iloc[:, 0].map(lambda x:x.split('|')[1])
 22 | up.drop_duplicates('name', inplace=True, keep='first')
 23 | print(up.head())
 24 | 
 25 | dn=pd.read_csv(dn_r, header=0)
 26 | if '1' in dn.columns:
 27 |     dn=dn.sort_values(by='1')
 28 | if '0.1' in dn.columns:
 29 |     dn=dn.sort_values(by='0.1')
 30 | dn.loc[:, 'name'] = dn.iloc[:, 0].map(lambda x:x.split('|')[1])
 31 | dn.drop_duplicates('name', inplace=True, keep='first')
 32 | print(dn.head())
 33 | 
 34 | final = up.merge(dn, on='name', how='outer')
 35 | final = final.loc[(final.iloc[:, 1]<=0.05) | (final.iloc[:, 4]<=0.05), :]
 36 | xlim = -np.log10(np.min(final.iloc[:, 1]))*1.2
 37 | ylim = -np.log10(np.min(final.iloc[:, 4]))*1.2
 38 | print(xlim)
 39 | print(ylim)
 40 | 
 41 | #final.iloc[np.where(pd.isnull(final.iloc[:, 4]))[0], 3] = 1
 42 | #final.iloc[np.where(pd.isnull(final.iloc[:, 1]))[0], 1] = 1
 43 | top_index = np.union1d(np.argsort(final.iloc[:, 1])[:10], np.argsort(final.iloc[:, 4])[:10])
 44 | final_top = final.iloc[top_index, :]
 45 | 
 46 | final = final.drop(final.index[top_index])
 47 | x = -np.log10(final.iloc[:, 1])
 48 | y = -np.log10(final.iloc[:, 4])
 49 | 
 50 | top_trace0 = Scatter(x=x,
 51 |                      y=y, 
 52 |                      name='other TF with p-value < 0.01',
 53 |                      mode='markers',
 54 |                      text=final.iloc[:, 0], 
 55 |                      marker= dict(size= 8,
 56 |                                   opacity= 0.7,
 57 |                                   ))
 58 | 
 59 | x = -np.log10(final_top.iloc[:, 1])
 60 | y = -np.log10(final_top.iloc[:, 4])
 61 | trace1 = Scatter(x=x,
 62 |                  y=y,
 63 |                  name='top TFs',
 64 |                  # mode='markers+text',
 65 |                  mode='markers',
 66 |                  marker=dict(size= 6,
 67 |                              opacity= 0.8),
 68 |                  textfont=dict(
 69 |                     family='sans serif',
 70 |                     size=18,
 71 |                     color='black'
 72 |                  ),
 73 |                  text = list(map(lambda x: "%s\n%s" % ('Cistrome ID|TF', x), final_top.iloc[:, 0])),
 74 |                  hoverinfo = 'text',
 75 |                  textposition='top right')
 76 | 
 77 | layout = Layout(
 78 |     title=title,
 79 |     xaxis=dict(
 80 |         title='-log10(p-value) of Gene Set 1' if labels1.strip() == '' else '-log10(p-value) of %s' % labels1,
 81 |         showgrid=False,
 82 |         titlefont=dict(
 83 |             family='Arial',
 84 |             size=18),
 85 |         rangemode='tozero',
 86 |         range=[0, xlim]
 87 |     ),
 88 |     yaxis=dict(
 89 |         title='-log10(p-value) of Gene Set 2' if labels2.strip() == '' else '-log10(p-value) of %s' % labels2,
 90 |         showgrid=False,
 91 |         titlefont=dict(
 92 |             family='Arial',
 93 |             size=18
 94 |         ),
 95 |         rangemode='tozero',
 96 |         range=[0, ylim]
 97 |     ),
 98 |     hovermode = 'closest',
 99 |     width=850,
100 |     height=650
101 | )
102 | 
103 | fig = Figure(data=[top_trace0, trace1], layout=layout)
104 | plot(fig, filename='%s.html' % prefix, show_link=False, auto_open=False)
105 | 


--------------------------------------------------------------------------------
/lisa_web/run-redis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ ! -d redis-stable/src ]; then
 3 |     curl -O http://download.redis.io/redis-stable.tar.gz
 4 |     tar xvzf redis-stable.tar.gz
 5 |     rm redis-stable.tar.gz
 6 | fi
 7 | cd redis-stable
 8 | make
 9 | src/redis-server
10 | 


--------------------------------------------------------------------------------
/lisa_web/run.py:
--------------------------------------------------------------------------------
1 | from flask_script import Manager
2 | from lisa_web import app
3 | 
4 | manager = Manager(app)
5 | 
6 | if __name__ == '__main__':
7 |     manager.run()
8 | 


--------------------------------------------------------------------------------
/lisa_web/run.sh:
--------------------------------------------------------------------------------
 1 | #AR
 2 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/265_up.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/265_up.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/265_up.gene_symbol.foreground_gene > AR_session.txt
 3 | 
 4 | 
 5 | #BCL6
 6 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/27_up.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/27_up.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/27_up.gene_symbol.foreground_gene > BCL6_session.txt
 7 | 
 8 | # ESR1
 9 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/85_down.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/85_down.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/85_down.gene_symbol.foreground_gene > ESR1_session.txt
10 | 
11 | 
12 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/139_up.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/139_up.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/139_up.gene_symbol.foreground_gene > 139_session.txt
13 | 
14 | # REST
15 | python make_session.py /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/272_up.gene_symbol.H3K27ac.coefs.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/272_up.gene_symbol.H3K27ac.chipseq.p_value.csv /data5/home/chenfei/JingyuFan/data_collection/MARGE/LISA_figures/human_ebi_random/272_up.gene_symbol.foreground_gene > REST_session.txt
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/lisa_web/run_browser.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | #python make_session.py upload/2017-08-22_0830220.34508.H3K27ac.coefs.csv upload/2017-08-18_1107470.10356.txt.H3K27ac.chipseq.p_value.csv upload/2017-08-24_0952320.67641.txt.foreground_gene
 3 | links=$1
 4 | links=${links/.coefs.csv/}
 5 | 
 6 | ## http://lisa.cistrome.org//data5/lisa_browser/test.bed
 7 | 
 8 | shuf /project/Cistrome/LISA/lisa_web/upload/$3 | head -20 > /project/Cistrome/LISA/lisa_web/upload/${3}.20
 9 | python make_session.py /project/Cistrome/LISA/lisa_web/upload/$1 /project/Cistrome/LISA/lisa_web/upload/$2 /project/Cistrome/LISA/lisa_web/upload/${3}.20 > /project/Cistrome/LISA/lisa_web/upload/${links}.url
10 | 
11 | 


--------------------------------------------------------------------------------
/lisa_web/run_celery.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash 
2 | 
3 | celery worker -A lisa_web.celery --loglevel=info -E -c 8
4 | 
5 | 


--------------------------------------------------------------------------------
/lisa_web/run_heatmap.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python generate_heatmap_js.py -d $1
3 | 


--------------------------------------------------------------------------------
/lisa_web/run_lisa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | source /project/dev/qqin/miniconda3/bin/activate base
 4 | 
 5 | outdir=$3
 6 | mkdir -p /project/Cistrome/LISA/lisa_web/upload/${outdir}
 7 | cd /project/Cistrome/LISA/lisa_web/upload/${outdir}
 8 | 
 9 | ###nice -n 15 lisa model --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome '['H3K27ac']' --cluster=False --covariates=False --random=True --prefix test --threads 3 AR.symbol
10 | 
11 | nice -n 15 lisa model --method="$5" --web=True --new_rp_h5=None --new_count_h5=None --species $1 --epigenome "$2" --cluster=False --covariates=False --random=True --stat_background_number=300 --background=$6 --prefix ${outdir} --threads 4 $4
12 | 
13 | echo "accomplished!!.."  >> /project/Cistrome/LISA/lisa_web/upload/${outdir}_snakemake_output.txt
14 | sleep 1
15 | 
16 | 


--------------------------------------------------------------------------------
/lisa_web/run_lisa2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | source /project/dev/qqin/miniconda3/bin/activate base
 4 | 
 5 | outdir=$3
 6 | mkdir -p /project/Cistrome/LISA/lisa_web/upload/${outdir}
 7 | cd /project/Cistrome/LISA/lisa_web/upload/${outdir}
 8 | 
 9 | nice -n 15 lisa model --method="$4" --web=True --new_rp_h5=None --new_count_h5=None --species $1 --epigenome "$2" --cluster=False --covariates=False --random=True --prefix ${outdir} --background=$7 --stat_background_number=300 --threads 4 $5 $6
10 | 
11 | echo "accomplished!!.."  >> /project/Cistrome/LISA/lisa_web/upload/${outdir}_snakemake_output.txt
12 | sleep 1
13 | 
14 | 


--------------------------------------------------------------------------------
/lisa_web/run_plot.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 | source /project/dev/qqin/miniconda3/bin/activate base
3 | /project/dev/qqin/miniconda3/bin/python /project/Cistrome/LISA/lisa_web/plotly_scatter.py $1 $2 $3 $4 $5 $6
4 | 


--------------------------------------------------------------------------------
/lisa_web/test.sh:
--------------------------------------------------------------------------------
1 | nice -n 15 lisa model --web=True --new_rp_h5=None --new_count_h5=None --species hg38 --epigenome '['H3K27ac']' --cluster=False --covariatee=False --random=True --prefix test --threads 3 AR.symbol
2 | 
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """lisa: a bioinformatics software
 2 | epigenome analysis to rank TFs from gene set
 3 | """
 4 | import os
 5 | from glob import glob
 6 | from setuptools import setup, find_packages, Extension
 7 | from numpy.distutils.misc_util import get_numpy_include_dirs
 8 | 
 9 | def get_extension():
10 |     """ get extension for computing regulatory potential from bigwig """
11 |     bigwig_src = ['bigWigRegPotential.c', 'pybw.c',
12 |                   'ffScore.c', 'rbTree.c', 'rangeTree.c', 'ffAli.c', 'ffAliHelp.c', 'fuzzyFind.c', 'bwgValsOnChrom.c', 'tokenizer.c', 'asParse.c', 'aliType.c', 'dnaseq.c', 'dnautil.c', 'gfxPoly.c', 'psl.c', 'binRange.c', 'sqlList.c', 'basicBed.c', 'bPlusTree.c','base64.c','bbiRead.c','bbiWrite.c', 'bits.c','bwgQuery.c','cheapcgi.c','cirTree.c','common.c','dlist.c','dystring.c','errAbort.c','hash.c','hex.c','hmmstats.c','https.c','intExp.c','internet.c','kxTok.c','linefile.c','localmem.c','memalloc.c','mime.c','net.c','obscure.c','options.c','osunix.c','pipeline.c','portimpl.c','servBrcMcw.c','servCrunx.c','servcis.c','servcl.c','servmsII.c','servpws.c','sqlNum.c','udc.c','verbose.c','wildcmp.c','zlibFace.c']
13 | 
14 |     bigwig_src = list(map(lambda x: os.path.join('lisa', 'regpotential', x), bigwig_src))
15 |     ext = Extension('lisa._bw',
16 |                     sources=bigwig_src,
17 |                     extra_compile_args=['-O3', '-std=c99'], #, '-Wall'],
18 |                     libraries=['ssl', 'z', 'crypto'])
19 |     return ext
20 | 
21 | def main():
22 |     """setup entry
23 |     """
24 |     setup(
25 |         name='lisa',
26 |         version='1.0',
27 |         url='http://lisa.cistrome.org',
28 |         author='Qian Qin',
29 |         description=__doc__,
30 |         packages=find_packages(),
31 |         ext_modules=[get_extension(), ],
32 |         include_dirs=['lisa/regpotential'] + get_numpy_include_dirs(),
33 |         install_requires=['numpy==1.15.1', #'matplotlib', 'seaborn', 
34 |                           'scikit-learn', 'theano', 'fire',
35 |                           'h5py', 'pandas',
36 |                           'scipy',
37 |                           'snakemake', 'PyYAML', 'yappi', 'mpmath'
38 |                           ],
39 |         include_package_data=True,
40 |         package_data={'lisa': ['rules/*', 'workflows/*', 'lisa.ini']},
41 |         scripts=glob('bin/*'),
42 |         classifiers=[
43 |             'Environment::Console',
44 |             'Operating System:: POSIX',
45 |             "Programming Language :: Python :: 3",
46 |             "Topic :: Scientific/Engineering :: Bio-Informatics"],
47 |         keywords='ChIP-seq',
48 |         license='OTHER',
49 |         zip_safe=False)
50 | 
51 | if __name__ == '__main__':
52 |     main()
53 | 


--------------------------------------------------------------------------------