├── .gitignore ├── LICENSE ├── README.md ├── array_tools.py ├── data_tools.py ├── hic_oe.py ├── linear_algebra.py ├── minimds.py ├── minimds_inter.py ├── plotting.py ├── requirements.txt ├── scripts ├── README.txt ├── bin_bed.py ├── chrom_from_tar.sh ├── chromosome3d_input.py ├── chromsde_input.py ├── distance_decay.py ├── figures │ ├── fig1.py │ ├── fig1.sh │ ├── fig10.py │ ├── fig10.sh │ ├── fig2.py │ ├── fig2.sh │ ├── fig4.py │ ├── fig4.sh │ ├── fig5.py │ ├── fig5.sh │ ├── fig6.py │ ├── fig6.sh │ ├── fig7.py │ ├── fig7.sh │ ├── fig8.py │ ├── fig8.sh │ ├── fig9.py │ ├── fig9.sh │ ├── sup1.py │ ├── sup1.sh │ ├── sup2.py │ ├── sup2.sh │ ├── sup3.py │ └── sup3.sh ├── get_chrom_sizes.py ├── get_gm12878.sh ├── get_gm12878_inter.sh ├── heatmap.py ├── hicpro │ ├── HindIII_resfrag_hg19_downsampled.bed │ ├── hic.matrix │ ├── hicpro_interchromosomal.sh │ └── hicpro_to_bedpe.py ├── hsa_input.py ├── install_chromosome3d.sh ├── install_chromsde.sh ├── install_hsa.sh ├── install_mogen.sh ├── misc.py ├── mogen_input.py ├── normalize.py ├── normalize.sh ├── parameters │ ├── parameters_chr10_10kb.txt │ ├── parameters_chr11_10kb.txt │ ├── parameters_chr12_10kb.txt │ ├── parameters_chr13_10kb.txt │ ├── parameters_chr14_10kb.txt │ ├── parameters_chr15_10kb.txt │ ├── parameters_chr16_10kb.txt │ ├── parameters_chr17_10kb.txt │ ├── parameters_chr18_10kb.txt │ ├── parameters_chr19_10kb.txt │ ├── parameters_chr1_10kb.txt │ ├── parameters_chr20_10kb.txt │ ├── parameters_chr21_10kb.txt │ ├── parameters_chr22_100kb.txt │ ├── parameters_chr22_10kb.txt │ ├── parameters_chr2_10kb.txt │ ├── parameters_chr3_10kb.txt │ ├── parameters_chr4_10kb.txt │ ├── parameters_chr5_10kb.txt │ ├── parameters_chr6_10kb.txt │ ├── parameters_chr7_10kb.txt │ ├── parameters_chr8_10kb.txt │ ├── parameters_chr9_10kb.txt │ └── parameters_chrX_10kb.txt ├── run_chromsde.m ├── run_chromsde_100kb.m ├── run_chromsde_rep1.m ├── run_chromsde_rep2.m ├── simple_tad.py ├── struct_to_pdb.py ├── structure_to_xyz.sh └── test.sh ├── tad.py └── tools.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Lila Rieber & Shaun Mahony 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # miniMDS 2 | 3 | miniMDS is a tool for inferring and plotting 3D structures from normalized Hi-C data, using a novel approximation to multidimensional scaling (MDS). It produces a single 3D structure from a Hi-C BED file, representing an ensemble average of chromosome conformations within the population of cells. Using parallelization, it is able to process high-resolution data quickly with limited memory requirements. The human genome can be inferred at kilobase-resolution within several hours on a desktop computer. Standard MDS results in inaccuracies for sparse high-resolution data, but miniMDS focuses on local substructures to achieve greater accuracy. miniMDS also supports interchromosomal structural inference. Together with Mayavi, miniMDS produces publication-quality images and gifs. 4 | 5 | ## Update 9/27/18 6 | 7 | Major improvements in miniMDS. Please pull code for latest version. 8 | 9 | ## Citation 10 | 11 | Rieber, L., & Mahony, S. (2017). miniMDS: 3D structural inference from high-resolution Hi-C data. Bioinformatics, 33(14), i261-i266. 12 | 13 | ## Installation 14 | 15 | Requirements: 16 | * python (must be python 3 for plotting, otherwise 2.7 is fine) 17 | * Python dependencies can be installed using 18 | ``pip install -r requirements.txt`` 19 | * The following optional dependencies can be installed manually: 20 | * [mayavi](http://docs.enthought.com/mayavi/mayavi/installation.html#installing-with-pip) (for plotting) 21 | * [ImageMagick](https://www.imagemagick.org/script/index.php) (for creating gifs) 22 | 23 | ## Testing 24 | 25 | Please run test.sh (in the scripts directory) and report any issues. 26 | 27 | ## TLDR 28 | 29 | ``python minimds.py [Hi-C BED path]`` 30 | 31 | ## Usage 32 | 33 | ### Input file format 34 | 35 | miniMDS uses intra- or inter-chromosomal BED files as input. Data must be normalized prior to use (for example, using [HiC-Pro](http://nservant.github.io/HiC-Pro/)). 36 | 37 | Format: 38 | 39 | >chrA bin1\_start bin1\_end chrB bin2\_start bin2\_end normalized\_contact\_frequency 40 | 41 | Example - chr22 intra-chromosomal data at 10-Kbp resolution: 42 | 43 | >chr22 16050000 16060000 chr22 16050000 16060000 12441.5189291 44 | > 45 | >... 46 | 47 | Do NOT include lines with 0 counts, e.g. 48 | 49 | >chr22 16050000 16060000 chr22 16050000 16060000 0 50 | 51 | ### Intra-chromosomal miniMDS 52 | 53 | Intra-chromosomal analysis is performed using minimds.py. 54 | 55 | To view help: 56 | 57 | ``python minimds.py -h`` 58 | 59 | By default, full MDS is used: 60 | 61 | ``python minimds.py GM12878_combined_22_5kb.bed`` 62 | 63 | To use partitioned MDS: 64 | 65 | ``python minimds.py --partitioned GM12878_combined_22_5kb.bed`` 66 | 67 | By default structures are saved to [PREFIX]_structure.tsv, e.g. GM12878_combined_22_100kb.bed would output GM12878_combined_22_100kb_structure.tsv. You can use the -o option with a custom path where you want to save the structure. 68 | 69 | ``python minimds.py -o test_structure.tsv GM12878_combined_22_5kb.bed`` 70 | 71 | Structures are saved to tsv files. The header contains the name of the chromosome, the resolution, and the starting genomic coordinate. Each line in the file contains the genomic bin number followed by the 3D coordinates (with "nan" for missing data). 72 | 73 | Example - chr22 at 10-Kbp resolution: 74 | 75 | >chr22 76 | > 77 | >10000 78 | > 79 | >16050000 80 | > 81 | >0 0.589878298045 0.200029092422 0.182515056542 82 | > 83 | >1 0.592088232028 0.213915817254 0.186657230841 84 | > 85 | >2 nan nan nan 86 | > 87 | >... 88 | 89 | 0 corresponds to the bin 16050000-16060000, 1 corresponds to the bin 16060000-16070000, etc. 90 | 91 | #### Parameters (optional) 92 | 93 | #### Resolution ratio 94 | 95 | miniMDS first infers a global intrachromosomal structure at low resolution, which it uses as a scaffold for high-resolution inference. By default a resolution ratio of 10 is used. So if your input file is 100-kb resolution, a 1-Mb structure will be used for approximation. The resolution ratio can be changed with the l option. 96 | 97 | ``python minimds.py -l 20 GM12878_combined_22_5kb.bed`` 98 | 99 | The value you choose depends on your tradeoff between speed and accuracy (but must be an integer). Lower resolutions (i.e. higher ratios) are faster but less accurate. 100 | 101 | ##### Controlling the number of partitions 102 | 103 | The miniMDS algorithm creates partitions in the high-resolution data and performs MDS on each partition individually. A greater number of partitions can increase speed but also reduce accuracy. On the other hand, for very sparse data a greater number of partitions can actually increase accuracy. If your output appears "clumpy", increase the number of partitions. 104 | 105 | The number of partitions cannot be set directly because partitions are created empirically to maximize clustering of the data. However, the degree of clustering of the data can be tweaked with the following parameters: 106 | 107 | >-m: minimum partition size (as a fraction of the data). Default = 0.05 108 | > 109 | >-p: smoothing parameter (between 0 and 1). Default = 0.1 110 | 111 | Make these parameters smaller to increase the number of partitions. For very high resolution data (such as 5-Kbp), m=0.01 and p=0.01 is recommended: 112 | 113 | ``python minimds.py -m 0.01 -p 0.01 GM12878_combined_22_5kb.bed`` 114 | 115 | You can limit the maximum RAM (in Kb) used by any given partition using -r (default = 32000000): 116 | 117 | ``python minimds.py -r 50000 GM12878_combined_22_5kb.bed`` 118 | 119 | ##### Number of threads 120 | 121 | miniMDS uses multithreading to achieve greater speed. By default, 3 threads are requested, because this is safe for standard 4-core desktop computers. However, the number of threads used will never exceed the number of processors or the number of partitions, regardless of what is requested. You can change the number of requested threads using -n. 122 | 123 | For example, to run miniMDS with four threads: 124 | 125 | ``python minimds.py -n 4 GM12878_combined_22_5kb.bed`` 126 | 127 | ##### Scaling factor 128 | 129 | The scaling factor a describes the assumed relationship between contact frequencies and physical distances: distance = contact_frequency^(-1/a). The default value is 4, based on Wang et al 2016. You can change the scaling factor using -a. 130 | 131 | ``python minimds.py -a 3 GM12878_combined_22_5kb.bed`` 132 | 133 | a can be any value >1, including non-integer. 134 | 135 | A secondary scaling factor is used for short-range interactions. The default value is 2.5. You can change this using -a2. (Reducing this can help with "clumping" in the structure.) 136 | 137 | ``python minimds.py -a2 2 GM12878_combined_22_5kb.bed`` 138 | 139 | ##### Prior 140 | 141 | Exponential decay in contact frequency with genomic separation is a hallmark of Hi-C data. To reduce noise, miniMDS corrects contact frequencies with a distance-decay prior. The default prior weight is 0.05. You can change the weight using -w. 142 | 143 | ``python minimds.py -w 0 GM12878_combined_22_5kb.bed`` 144 | 145 | w can be any value between 0 and 1. 146 | 147 | ##### Classical MDS 148 | 149 | Classical MDS (cMDS), also called principal coordinates analysis, is a variant of MDS that is faster under certain circumstances. The miniMDS tool supports cMDS but NOT with partitioned MDS. Use the --classical option. 150 | 151 | ``python minimds.py --classical GM12878_combined_22_5kb.bed`` 152 | 153 | This mode is mainly used for testing. 154 | 155 | ### Inter-chromosomal miniMDS 156 | 157 | Inter-chromosomal analysis is performed using minimds_inter.py 158 | 159 | To view help: 160 | 161 | ``python minimds_inter.py -h`` 162 | 163 | The usage of minimds_inter.py is similar to minimds.py, however inter-chromosomal files are required in addition to intra-chromosomal. To avoid entering filenames separately for each chromosome, you must name your files using a standard format. 164 | 165 | Intra-chromosomal format: 166 | 167 | >{prefix}\_{ChrA}\_{resolution}{kb or Mb}.bed 168 | 169 | Example: 170 | 171 | >GM12878_combined_22_100kb.bed 172 | 173 | Inter-chromosomal format: 174 | 175 | >{prefix}\_{ChrA}\_{ChrB}_{resolution}{kb or mb}.bed 176 | 177 | where A is before B in: 178 | 179 | >1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X 180 | 181 | Example: 182 | 183 | >GM12878_combined_21_22_100kb.bed 184 | 185 | Enter the prefix, inter-chromosomal resolution, and intra-chromosomal resolution: 186 | 187 | ``python minimds_inter.py [prefix] [inter-chromosomal resolution] [intra-chromosomal resolution]`` 188 | 189 | For example, if your files are stored in the directory _data_: 190 | 191 | ``python minimds_inter.py data/GM12878_combined 1000000 10000`` 192 | 193 | Because of the challenges of inter-chromosomal inference, it is recommended that a resolution no greater than 1-Mbp be used for human inter-chromosomal data. 194 | 195 | #### Other parameters (optional) 196 | 197 | All of the parameters from minimds.py are also available for minimds_inter.py 198 | 199 | ###### Specifying chromosomes 200 | 201 | By default, minimds_inter.py uses all human chromosomes other than Y. You can specify any number of chromosomes (in order) using the option -c. 202 | 203 | To perform interchromosomal analysis on chromosomes 1 and 2: 204 | 205 | ``python minimds_inter.py -c 1 -c 2 data/GM12878_combined 1000000 10000`` 206 | 207 | You can specify a different number of autosomes using -C. To perform interchromosomal analysis on all yeast autosomes: 208 | 209 | ``python minimds_inter.py -C 16 my_yeast_dir 100000 10000`` 210 | 211 | ### Plotting 212 | 213 | Read a structure: 214 | 215 | import data_tools 216 | structure = data_tools.structure_from_file("GM12878_combined_22_100kb_structure.tsv") 217 | 218 | Create an interactive 3D plot in Mayavi. (Mayavi allows you to rotate the image and save a view.) 219 | 220 | import plotting 221 | plotting.plot_structure_interactive(structure, color=(0,0.5,0.7), radius=0.01, enrichments=my_enrichments) 222 | 223 | If _radius_ is not selected, the to-scale radius of heterochromatin is used. 224 | 225 | _enrichments_ is a vector with a numerical value for each bin in the structure (i.e. bins that do not have a nan coordinate). For example, this could represent ChIP-seq enrichments for each bin. This option overrides _color_ and will use a rainbow colormap, with blue representing low values and red representing high values. 226 | 227 | Multiple structures can be plotted simultaneously: 228 | 229 | chroms = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X") 230 | structures = [data_tools.structure_from_file(f"GM12878_combined_{chrom}_100kb_structure.tsv") for chrom in chroms] 231 | plotting.plot_structures_interactive(structures) 232 | 233 | plotting.py has 23 built-in colors designed to be maximally different to the human eye. By default, these colors are used when plotting multiple structures. You can also specify a list of colors: 234 | 235 | chroms = (1, 2) 236 | structures = [data_tools.structure_from_file(f"GM12878_combined_{chrom}_100kb_structure.tsv") for chrom in chroms] 237 | plotting.plot_structures_interactive(structures, colors=[(1,0,0), (0,0,1)]) 238 | 239 | plot_structures_interactive and plot_structures_gif output a file "structures_legend.png" showing the colors for each structure. 240 | 241 | _all_enrichments_ is a list of enrichments, e.g. 242 | 243 | plotting.plot_structures_interactive(structures, all_enrichments=[enrichments1, enrichments2]) 244 | 245 | The radius can also be specified, as above. 246 | 247 | The option _cut_ creates a cross-section of the plot. For example, this is useful for viewing the interior of the nucleus. 248 | 249 | chroms = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X") 250 | structures = [data_tools.structure_from_file(f"GM12878_combined_{chrom}_100kb_structure.tsv" for chrom in chroms)] 251 | plotting.plot_structures_interactive(structures, cut=True) 252 | 253 | A plot can be saved as a gif: 254 | 255 | ``plotting.plot_structure_gif(structure, struct, color=(1,0,0), radius=None, increment=10)`` 256 | 257 | will create struct.gif 258 | 259 | A smaller value of _increment_ will lead to a smoother gif. Increments must be a factor of 360. 260 | 261 | Multiple structures can also be plotted in a single gif: 262 | 263 | ``plotting.plot_structures_gif(structures, struct, colors=default_colors, radius=None, increment=10)`` 264 | 265 | ## Troubleshooting 266 | 267 | ### miniMDS won't complete due to an error 268 | 269 | The majority of user errors are due to problems in formatting the input file (see below). In particular, make sure that there are no lines in the input file with 0 counts. This can lead to issues such as empty rows in matrices. If your input file looks fine, please post the error in the issues tab. We try to respond promptly. 270 | 271 | ### Output structure looks bad 272 | 273 | The art of Hi-C analysis involves developing an intuition for whether a structure looks good or bad. There are several reasons a bad structure can occur. 274 | 275 | #### Over-partitioning 276 | 277 | If your dataset is small and not sparse (small chromosomes, low-resolution, high-coverage), partitioning is less beneficial. First, the computational efficiency is less necessary. Second, partitioning loses information. We recommend testing your dataset with full MDS first. Only if this is computationally intractable or the output structure looks bad (see under-partitioning) do we recommend partitioned MDS. 278 | 279 | ``python minimds.py --full [Hi-C BED path]`` 280 | 281 | Rao GM12878 chr22 250-kb resolution looks better with full MDS. Signs of over-partitioning include outliers and a clumpy or incoherent structure. 282 | 283 | Even if you use partitioned MDS, you can reduce the number of partitions to avoid over-partitioning. Increasing the values of the -m or -p parameters (see below) will reduce the number of partitions. 284 | 285 | #### Under-partitioning 286 | 287 | Many datasets will output a dense spherical structure if full MDS is used, such as Rao GM12878 chr22 10-kb resolution. In theses cases partitioned MDS can be used to produce a more defined structure. If there are too few partitions, the partitions themselves may appear dense and clumpy. Decreasing the values of the -m or -p parameters (see below) will increase the number of partitions, further improving the structure. 288 | 289 | #### Resolution is too high 290 | 291 | Though miniMDS allows structural inference to be achieved at greater resolutions, the degree of improvement will depend on the quality of input data. When performing structural inference, many Hi-C datasets must be processed at lower resolution than for other types of analysis. If miniMDS won't produce good structures at any parameter setting, take a look at the sparsity of your dataset, which will determine its optimal resolution. Sparsity can be estimated as the number of (nonzero) lines in the input file. For example, mesenchymal allele-phased chr22 40-kb resolution structures look spherical, regardless of whether they were generated from full MDS or partitioned MDS, or with an increased partition number. We see that the input file has only 28,239 lines, compared to 347,273 lines in the Rao GM12878 file for the same chromosome at the same resolution (a gold-standard dataset). Thus we reduce the resolution of the input file using bin_bed.py in the scripts folder: 292 | 293 | ``python bin_bed.py [input file (higher resolution)] [desired low resolution (bp)] [output file (lower resolution)]`` 294 | 295 | ``python bin_bed.py mesenchymal_22_40kb.bed 500000 mesenchymal_22_500kb.bed`` 296 | 297 | The lower-resolution file has 2181 lines, compared to 2551 in Rao GM12878. 298 | 299 | #### Normalization problems 300 | 301 | Most datasets should produce okay structures at 1-Mb resolution using full MDS. If not, there could be an issue with normalization, which sometimes produces artifacts. As a sanity check, try inferring structures using raw (un-normalized) data. For a good dataset, this should produce okay structures other than a few outliers. 302 | 303 | #### Data quality problems 304 | 305 | If your raw low-resolution structures look bad, there may be a deeper problem with the data. A simple QC metric is the distance decay, the rapid decrease in contact frequency with linear genomic distance. This can be plotted using distance_decay.py in the scripts folder. 306 | 307 | ``python distance_decay.py [Hi-C bed file]`` 308 | -------------------------------------------------------------------------------- /array_tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def contactToDist(contactMat, alpha): 4 | """Convert contact matrix to distance matrix.""" 5 | distMat = np.zeros_like(contactMat) 6 | numRows = len(contactMat) 7 | for i in range(numRows): 8 | for j in range(i+1): 9 | if contactMat[i,j] != 0: 10 | distMat[i,j] = contactMat[i,j]**(-1./alpha) 11 | return distMat 12 | 13 | def makeSymmetric(mat): 14 | """Make below-diagonal matrix symmetric, in place""" 15 | for row in range(len(mat)): 16 | for col in range(row): 17 | mat[col,row] = mat[row,col] 18 | -------------------------------------------------------------------------------- /data_tools.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | from tools import Tracker 4 | from linear_algebra import * 5 | import array_tools as at 6 | from tad import * 7 | from hic_oe import get_expected 8 | 9 | class ChromParameters(object): 10 | """Basic information on chromosome, inferred from input file""" 11 | def __init__(self, minPos, maxPos, res, name): 12 | self.minPos = minPos #minimum genomic coordinate 13 | self.maxPos = maxPos #maximum genomic coordinate 14 | self.res = res #resolution (bp) 15 | self.name = name #e.g. "chr22" 16 | 17 | def getLength(self): 18 | """Number of possible loci""" 19 | return int((self.maxPos - self.minPos)/self.res) + 1 20 | 21 | def getAbsoluteIndex(self, genCoord): 22 | """Converts genomic coordinate into absolute index. Absolute indexing includes empty (zero) points.""" 23 | if genCoord < self.minPos or genCoord > self.maxPos + self.res: 24 | return None 25 | else: 26 | return int((genCoord - self.minPos)/self.res) 27 | 28 | def getGenCoord(self, abs_index): 29 | """Converts absolute index into genomic coordinate""" 30 | return self.minPos + self.res * abs_index 31 | 32 | def reduceRes(self, resRatio): 33 | """Creates low-res version of this chromosome""" 34 | lowRes = self.res * resRatio 35 | lowMinPos = (self.minPos/lowRes)*lowRes #approximate at low resolution 36 | lowMaxPos = (self.maxPos/lowRes)*lowRes 37 | return ChromParameters(lowMinPos, lowMaxPos, lowRes, self.name) 38 | 39 | class Structure(object): 40 | """Intrachromosomal structure of points or substructures in 3-D space""" 41 | def __init__(self, points, structures, chrom, offset): 42 | self.points = points 43 | if len(structures) == 0 or structures is None: 44 | self.structures = [] 45 | else: 46 | self.setstructures(structures) 47 | self.chrom = chrom #chromosome parameters 48 | self.offset = offset #absolute indexing offset (for substructures only) 49 | 50 | def getCoords(self): 51 | return [point.pos for point in self.getPoints()] 52 | 53 | def setCoords(self, coords): 54 | for coord, abs_index in zip(coords, self.nonzero_abs_indices()): 55 | self.points[abs_index - self.offset].pos = coord 56 | 57 | def nonzero_abs_indices(self): 58 | """Absolute indices for all non-zero points.""" 59 | return np.array([point.absolute_index for point in self.getPoints()]) 60 | 61 | def nonzero_bins_whole_chrom(self): 62 | """Nonzero bin numbers with indexing relative to chromosome position 0 (not chrom.minPos)""" 63 | return self.nonzero_abs_indices() + int(self.chrom.minPos/self.chrom.res) 64 | 65 | def getPoints(self): 66 | """All non-zero points""" 67 | return self.points[np.where(self.points != 0)[0]] 68 | 69 | def subsamplePoints(self, start_abs_index, end_abs_index): 70 | """Set structure's points to only include start_abs_index through end_abs_index""" 71 | points = self.points[start_abs_index:end_abs_index+1] 72 | self.chrom.maxPos = self.chrom.getGenCoord(end_abs_index) 73 | self.chrom.minPos = self.chrom.getGenCoord(start_abs_index) 74 | #re-index 75 | for abs_index in np.where(points != 0)[0]: 76 | points[abs_index].absolute_index = abs_index 77 | self.points = points 78 | self.set_rel_indices() 79 | 80 | def getGenCoords(self): 81 | """Non-zero genomic coordinates of structure""" 82 | return [self.chrom.getGenCoord(abs_index) for abs_index in self.nonzero_abs_indices()] 83 | 84 | def get_rel_index(self, genCoord): 85 | """Converts genomic coordinate into relative index.""" 86 | abs_index = self.chrom.getAbsoluteIndex(genCoord) 87 | if abs_index is None: 88 | return None 89 | else: 90 | abs_index -= self.offset 91 | if abs_index >= 0 and abs_index < len(self.points): 92 | point = self.points[abs_index] 93 | if point == 0: 94 | return None 95 | else: 96 | return point.relative_index 97 | else: 98 | return None 99 | 100 | def setstructures(self, structures): 101 | self.structures = structures 102 | self.points = np.zeros(max([max(structure.nonzero_abs_indices()) for structure in structures]) + 1, dtype=object) #reset 103 | for structure in self.structures: 104 | for point in structure.points: 105 | if point != 0: 106 | self.points[point.absolute_index] = point 107 | 108 | def createSubstructure(self, points, offset): 109 | """Creates substructure containing points""" 110 | substructure = Structure(points, [], self.chrom, offset) 111 | #substructure.set_rel_indices() 112 | self.structures.append(substructure) 113 | 114 | def transform(self, r, t): 115 | """Rotates by r; translates by t""" 116 | if r is None: #default: no rotation 117 | r = np.mat(np.identity(3)) 118 | if t is None: #default: no translation 119 | t = np.mat(np.zeros(3)).T 120 | a = np.mat(self.getCoords()) 121 | n = len(a) 122 | a_transformed = np.array(((r*a.T) + np.tile(t, (1, n))).T) 123 | for i, abs_index in enumerate(self.nonzero_abs_indices()): 124 | self.points[abs_index - self.offset].pos = a_transformed[i] 125 | 126 | def write(self, outpath): 127 | with open(outpath, "w") as out: 128 | out.write(self.chrom.name + "\n") 129 | out.write(str(self.chrom.res) + "\n") 130 | out.write(str(self.chrom.minPos) + "\n") 131 | abs_index = self.offset 132 | for point in self.points: 133 | if point == 0: 134 | out.write("\t".join((str(abs_index), "nan", "nan", "nan")) + "\n") 135 | else: 136 | out.write("\t".join((str(abs_index), str(point.pos[0]), str(point.pos[1]), str(point.pos[2]))) + "\n") 137 | abs_index += 1 138 | out.close() 139 | 140 | def set_rel_indices(self): 141 | """Relative indexing is index relative to non-zero points only""" 142 | for i, abs_index in enumerate(self.nonzero_abs_indices()): 143 | assert abs_index >= self.offset 144 | self.points[abs_index - self.offset].relative_index = i 145 | 146 | def rescale(self): 147 | """Rescale radius of gyration of structure to 1""" 148 | rg = radius_of_gyration(self) 149 | for i, point in enumerate(self.points): 150 | if point != 0: 151 | x, y, z = point.pos 152 | self.points[i].pos = (x/rg, y/rg, z/rg) 153 | 154 | class Point(object): 155 | """Point in 3-D space""" 156 | def __init__(self, pos, chrom, absolute_index, relative_index): 157 | self.pos = pos #3D coordinates 158 | self.chrom = chrom #chromosome parameters 159 | self.absolute_index = absolute_index #index relative to all points in structure (including null/zero points) 160 | self.relative_index = relative_index #index relative to only non-zero points 161 | 162 | def structureFromBed(path, size=None, chrom=None, start=None, end=None, offset=0, chrom_order=None): 163 | """Initializes structure from intrachromosomal BED file.""" 164 | if chrom is None: 165 | chrom = chromFromBed(path) 166 | 167 | if start is None: 168 | start = chrom.minPos 169 | 170 | if end is None: 171 | end = chrom.maxPos 172 | 173 | structure = Structure([], [], chrom, offset) 174 | structure.points = np.zeros(int((end - start)/chrom.res) + 1, dtype=object) #true if locus should be added 175 | 176 | if size is not None: 177 | tracker = Tracker("Identifying loci", size) 178 | 179 | #add loci 180 | with open(path) as listFile: 181 | for line in listFile: 182 | line = line.strip().split() 183 | pos1 = int(line[1]) 184 | pos2 = int(line[4]) 185 | 186 | #intrachromosomal 187 | if chrom_order is None: 188 | if pos1 >= start and pos1 <= end and pos2 >= start and pos2 <= end: 189 | abs_index1 = structure.chrom.getAbsoluteIndex(pos1) 190 | abs_index2 = structure.chrom.getAbsoluteIndex(pos2) 191 | if abs_index1 != abs_index2: #non-self-interacting 192 | structure.points[int((pos1 - start)/chrom.res)] = Point((0,0,0), structure.chrom, abs_index1, 0) 193 | structure.points[int((pos2 - start)/chrom.res)] = Point((0,0,0), structure.chrom, abs_index2, 0) 194 | elif chrom_order == 1: 195 | if pos1 >= start and pos1 <= end: 196 | abs_index = structure.chrom.getAbsoluteIndex(pos1) 197 | structure.points[int((pos1 - start)/chrom.res)] = Point((0,0,0), structure.chrom, abs_index, 0) 198 | elif chrom_order == 2: 199 | if pos2 >= start and pos2 <= end: 200 | abs_index = structure.chrom.getAbsoluteIndex(pos2) 201 | structure.points[int((pos2 - start)/chrom.res)] = Point((0,0,0), structure.chrom, abs_index, 0) 202 | else: 203 | sys.exit("Invalid chrom_order") 204 | 205 | if size is not None: 206 | tracker.increment() 207 | 208 | structure.set_rel_indices() 209 | 210 | return structure 211 | 212 | def round_down(pos, res): 213 | return int(np.floor(float(pos)/res)) * res 214 | 215 | def round_up(pos, res): 216 | return int(np.ceil(float(pos)/res)) * res 217 | 218 | def chromFromBed(path, return_both=False): 219 | """Initialize ChromParams from BED file""" 220 | minPos1 = sys.float_info.max 221 | maxPos1 = 0 222 | minPos2 = sys.float_info.max 223 | maxPos2 = 0 224 | 225 | with open(path) as infile: 226 | for i, line in enumerate(infile): 227 | line = line.split() 228 | pos1 = int(line[1]) 229 | pos2 = int(line[4]) 230 | 231 | if i == 0: #get info from first line 232 | name1 = line[0] 233 | res1 = (int(line[2]) - pos1) 234 | name2 = line[3] 235 | res2 = int(line[5]) - pos2 236 | 237 | if pos1 < minPos1: 238 | minPos1 = pos1 239 | if pos1 > maxPos1: 240 | maxPos1 = pos1 241 | 242 | if pos2 < minPos2: 243 | minPos2 = pos2 244 | if pos2 > maxPos2: 245 | maxPos2 = pos2 246 | 247 | if return_both: 248 | return ChromParameters(round_down(minPos1, res1), round_up(maxPos1, res1), res1, name1), \ 249 | ChromParameters(round_down(minPos2, res2), round_up(maxPos2, res2), res2, name2) 250 | else: 251 | return ChromParameters(round_down(min((minPos1, minPos2)), res1), round_up(max((maxPos1,maxPos2)), res1), res1, name1) 252 | 253 | def matFromBed(path, size=None, structure1=None, structure2=None): 254 | """Converts BED file to matrix. Only includes loci in structure.""" 255 | if structure1 is None: 256 | structure1 = structureFromBed(path, size) 257 | 258 | #intrachromosomal 259 | if structure2 is None: 260 | structure2 = structure1 261 | intrachromosomal = True 262 | else: 263 | intrachromosomal = False 264 | 265 | mat = np.zeros((len(structure1.nonzero_abs_indices()), len(structure2.nonzero_abs_indices()))) 266 | 267 | if size is not None: 268 | tracker = Tracker("Filling matrix", size) 269 | 270 | with open(path) as infile: 271 | for line in infile: 272 | line = line.strip().split() 273 | loc1 = int(line[1]) 274 | loc2 = int(line[4]) 275 | index1 = structure1.get_rel_index(loc1) 276 | index2 = structure2.get_rel_index(loc2) 277 | if index1 is not None and index2 is not None: 278 | val = float(line[6]) 279 | mat[index1, index2] += val 280 | if intrachromosomal: 281 | mat[index2, index1] += val 282 | if size is not None: 283 | tracker.increment() 284 | 285 | rowsums = np.array([sum(row) for row in mat]) 286 | if len(np.where(rowsums == 0)[0]) > 0: 287 | print(np.array(structure.getGenCoords())[np.where(rowsums == 0)[0]]) 288 | assert len(np.where(rowsums == 0)[0]) == 0 289 | 290 | return mat 291 | 292 | def highToLow(highstructure, resRatio): 293 | """Reduces resolution of structure""" 294 | lowChrom = highstructure.chrom.reduceRes(resRatio) 295 | 296 | low_n = int(len(highstructure.points)/resRatio) + 1 297 | 298 | lowstructure = Structure(np.zeros(low_n, dtype=object), [], lowChrom, highstructure.offset/resRatio) 299 | 300 | allPointsToMerge = [[] for i in range(low_n)] 301 | 302 | for highPoint in highstructure.getPoints(): 303 | #pointsToMerge = [] 304 | high_abs_index = highPoint.absolute_index - highstructure.offset 305 | low_abs_index = int(high_abs_index/resRatio) 306 | allPointsToMerge[low_abs_index].append(highPoint) 307 | 308 | index = lowstructure.offset 309 | for i, pointsToMerge in enumerate(allPointsToMerge): 310 | if len(pointsToMerge) > 0: 311 | meanCoord = np.mean(np.array([point.pos for point in pointsToMerge]), axis=0) 312 | lowstructure.points[i] = Point(meanCoord, lowChrom, i + lowstructure.offset, index) 313 | index += 1 314 | 315 | return lowstructure 316 | 317 | def structure_from_file(path): 318 | hasMore = True 319 | with open(path) as infile: 320 | name = infile.readline().strip() 321 | res = int(float(infile.readline().strip())) 322 | minPos = int(float(infile.readline().strip())) 323 | chrom = ChromParameters(minPos, None, res, name) 324 | structure = Structure([], [], chrom, 0) 325 | index = 0 326 | while hasMore: 327 | line = infile.readline().strip().split() 328 | if len(line) == 0: 329 | hasMore = False 330 | else: 331 | num = int(line[0]) 332 | if line[1] == "nan": 333 | point = 0 334 | else: 335 | x = float(line[1]) 336 | y = float(line[2]) 337 | z = float(line[3]) 338 | point = Point((x,y,z), chrom, num, index) 339 | index += 1 340 | structure.points.append(point) 341 | 342 | structure.points = np.array(structure.points) 343 | structure.chrom.maxPos = structure.chrom.minPos + structure.chrom.res*num #max pos is last point num 344 | return structure 345 | 346 | def make_compatible(structures): 347 | """Enforce that points be shared by all structures""" 348 | gen_coord_dict = {} 349 | for i, structure in enumerate(structures): 350 | for gen_coord in structure.getGenCoords(): 351 | if gen_coord in gen_coord_dict: 352 | gen_coord_dict[gen_coord] += 1 353 | else: 354 | gen_coord_dict[gen_coord] = 1 355 | 356 | consensus = [] 357 | n = len(structures) 358 | for gen_coord in gen_coord_dict.keys(): 359 | if gen_coord_dict[gen_coord] == n: 360 | consensus.append(gen_coord) 361 | 362 | consensus = np.sort(consensus) 363 | 364 | for structure in structures: 365 | new_chrom = ChromParameters(consensus[0], consensus[-1] + structure.chrom.res, structure.chrom.res, structure.chrom.name) 366 | new_points = np.zeros(new_chrom.getLength(), dtype=object) 367 | for i, gen_coord in enumerate(consensus): 368 | old_abs_index = structure.chrom.getAbsoluteIndex(gen_coord) 369 | new_abs_index = new_chrom.getAbsoluteIndex(gen_coord) 370 | pos = structure.points[old_abs_index - structure.offset].pos 371 | new_points[new_abs_index - structure.offset] = Point(pos, new_chrom, new_abs_index, i) 372 | structure.points = new_points 373 | structure.chrom = new_chrom 374 | 375 | def consensus_chrom(chroms): 376 | """Enforce that chromosomes have same range""" 377 | consensus_res = chroms[0].res 378 | consensus_name = chroms[0].name 379 | for chrom in chroms: 380 | assert chrom.res == consensus_res 381 | assert chrom.name == consensus_name 382 | minPos = max([chrom.minPos for chrom in chroms]) 383 | maxPos = min([chrom.maxPos for chrom in chroms]) 384 | return ChromParameters(minPos, maxPos, consensus_res, consensus_name) 385 | 386 | def make_points_compatible(structures): 387 | """Enforce that points be shared by all structures. Don't change ChromParameters.""" 388 | gen_coord_dict = {} 389 | for i, structure in enumerate(structures): 390 | for gen_coord in structure.getGenCoords(): 391 | if gen_coord in gen_coord_dict: 392 | gen_coord_dict[gen_coord] += 1 393 | else: 394 | gen_coord_dict[gen_coord] = 1 395 | 396 | consensus = [] 397 | n = len(structures) 398 | for gen_coord in gen_coord_dict.keys(): 399 | if gen_coord_dict[gen_coord] == n: 400 | consensus.append(gen_coord) 401 | 402 | consensus = np.sort(consensus) 403 | 404 | for structure in structures: 405 | new_points = np.zeros(structure.chrom.getLength(), dtype=object) 406 | for i, gen_coord in enumerate(consensus): 407 | abs_index = structure.chrom.getAbsoluteIndex(gen_coord) 408 | pos = structure.points[abs_index - structure.offset].pos 409 | new_points[abs_index - structure.offset] = Point(pos, structure.chrom, abs_index, i) 410 | structure.points = new_points 411 | 412 | def transform(trueLow, highSubstructure, res_ratio): 413 | #approximate as low resolution 414 | inferredLow = highToLow(highSubstructure, res_ratio) 415 | 416 | scaling_factor = radius_of_gyration(trueLow)/radius_of_gyration(inferredLow) 417 | for i, point in enumerate(inferredLow.points): 418 | if point != 0: 419 | x, y, z = point.pos 420 | inferredLow.points[i].pos = (x*scaling_factor, y*scaling_factor, z*scaling_factor) 421 | 422 | #recover the transformation for inferred from true low structure 423 | r, t = getTransformation(inferredLow, trueLow) 424 | t /= scaling_factor 425 | 426 | #transform high structure 427 | highSubstructure.transform(r, t) 428 | 429 | def distmat(path, structure, size=None, alpha=4, weight=0.05): 430 | contactMat = matFromBed(path, size, structure) 431 | 432 | assert len(structure.nonzero_abs_indices()) == len(contactMat) 433 | 434 | expected = get_expected(contactMat) 435 | distMat = np.zeros_like(contactMat) 436 | for i in range(len(contactMat)): 437 | for j in range(i): 438 | corrected = (1-weight)*contactMat[i,j] + weight*expected[i-j-1] 439 | if corrected != 0: 440 | dist = corrected**(-1./alpha) 441 | distMat[i,j] = dist 442 | distMat[j,i] = dist 443 | 444 | rowsums = np.array([sum(row) for row in distMat]) 445 | assert len(np.where(rowsums == 0)[0]) == 0 446 | 447 | distMat = distMat/np.mean(distMat) #normalize 448 | 449 | return distMat 450 | 451 | def size_from_bed(path): 452 | with open(path) as in_file: 453 | for i, line in enumerate(in_file): 454 | pass 455 | return i 456 | -------------------------------------------------------------------------------- /hic_oe.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def oe(mat): 4 | n = len(mat) 5 | 6 | tots = np.zeros(n-1) 7 | counts = np.zeros(n-1) 8 | 9 | for i in range(n): 10 | for j in range(i): 11 | observed = mat[i,j] 12 | if observed != 0: 13 | s = i - j 14 | tots[s - 1] += observed 15 | counts[s - 1] += 1 16 | 17 | avgs = np.zeros(n-1) 18 | for i, count in enumerate(counts): 19 | if count != 0: 20 | avgs[i] = tots[i]/count 21 | 22 | oe_mat = np.zeros_like(mat) 23 | 24 | for i in range(n): 25 | for j in range(i): 26 | observed = mat[i,j] 27 | s = i - j 28 | expected = avgs[s - 1] 29 | if expected != 0: 30 | oe_mat[i,j] = observed/expected 31 | 32 | return oe_mat 33 | 34 | def get_expected(mat): 35 | n = len(mat) 36 | 37 | tots = np.zeros(n-1) 38 | counts = np.zeros(n-1) 39 | 40 | for i in range(n): 41 | for j in range(i): 42 | observed = mat[i,j] 43 | if observed != 0: 44 | s = i - j 45 | tots[s - 1] += observed 46 | counts[s - 1] += 1 47 | 48 | avgs = np.zeros(n-1) 49 | for i, count in enumerate(counts): 50 | if count != 0: 51 | avgs[i] = tots[i]/count 52 | 53 | return avgs 54 | -------------------------------------------------------------------------------- /linear_algebra.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def getTransformation(structure1, structure2): 4 | """Recovers transformation needed to align structure1 with structure2. Modified from http://nghiaho.com/?page_id=671""" 5 | abs_indices1 = structure1.nonzero_abs_indices() 6 | abs_indices2 = structure2.nonzero_abs_indices() 7 | 8 | intersection = [num for num in abs_indices1 if num in abs_indices2] 9 | 10 | a = [] #will hold 3D coords 11 | b = [] 12 | for num in intersection: 13 | a.append(structure1.points[int(num)-int(structure1.offset)].pos) 14 | b.append(structure2.points[int(num)-int(structure2.offset)].pos) 15 | 16 | a = np.mat(a) 17 | b = np.mat(b) 18 | 19 | n = a.shape[0] #number of points 20 | 21 | centroid_a = np.mean(a, axis=0) 22 | centroid_b = np.mean(b, axis=0) 23 | 24 | #center the points 25 | aa = a - np.tile(centroid_a, (n, 1)) 26 | bb = b - np.tile(centroid_b, (n, 1)) 27 | 28 | h = np.transpose(aa) * bb 29 | 30 | u, s, vt = np.linalg.svd(h) 31 | 32 | r = vt.T * u.T 33 | 34 | t = -r*centroid_a.T + centroid_b.T 35 | 36 | return r, t 37 | 38 | def calcDistance(coord1, coord2): 39 | """Euclidean distance between coordinates""" 40 | return ((coord1[0] - coord2[0])**2 + (coord1[1] - coord2[1])**2 + (coord1[2] - coord2[2])**2)**(1./2) 41 | 42 | def radius_of_gyration(structure): 43 | coords = np.array(structure.getCoords()) 44 | centroid = np.mean(coords, axis=0) 45 | dist_sum = sum([calcDistance(coord, centroid) for coord in coords]) 46 | return dist_sum/len(coords) 47 | 48 | def cmds(distMat): 49 | """Modified from http://www.nervouscomputer.com/hfs/cmdscale-in-python/""" 50 | # Number of points 51 | n = len(distMat) 52 | 53 | # Centering matrix 54 | h = np.eye(n) - np.ones((n, n))/n 55 | 56 | # YY^T 57 | b = -h.dot(distMat**2).dot(h)/2 58 | 59 | # Diagonalize 60 | evals, evecs = np.linalg.eigh(b) 61 | 62 | # Sort by eigenvalue in descending order 63 | idx = np.argsort(evals)[::-1] 64 | evals = evals[idx] 65 | evecs = evecs[:,idx] 66 | 67 | return np.array([evecs[:,0]*evals[0]**(1./2), evecs[:,1]*evals[1]**(1./2), evecs[:,2]*evals[2]**(1./2)]).T 68 | 69 | def change_coordinate_system(n, coords): 70 | """Rotate 3-D coords such that vector n aligns to z-axis""" 71 | origin = [0,0,0] 72 | len_n = calcDistance(n, origin) 73 | n_hat = n/len_n 74 | i_hat = [1,0,0] #x-axis 75 | j_hat = [0,1,0] #y-axis 76 | k_hat = [0,0,1] #z-axis 77 | theta = np.arccos(np.dot(k_hat, n_hat)) 78 | b = np.cross(k_hat, n_hat) 79 | len_b = calcDistance(b, origin) 80 | b_hat = b/len_b 81 | 82 | q0 = np.cos(theta/2) 83 | q1 = np.sin(theta/2)*b_hat[0] 84 | q2 = np.sin(theta/2)*b_hat[1] 85 | q3 = np.sin(theta/2)*b_hat[2] 86 | 87 | Q = np.matrix([[q0**2 + q1**2 - q2**2 - q3**2, 2*(q1*q2 - q0*q3), 2*(q1*q3 + q0*q2)], [2*(q2*q1 + q0*q3), q0**2 - q1**2 + q2**2 - q3**2, 2*(q2*q3 - q0*q1)], [2*(q3*q1 - q0*q2), 2*(q3*q2 + q0*q1), q0**2 - q1**2 - q2**2 + q3**2]]) 88 | 89 | i_hat = np.matrix(i_hat).transpose() 90 | j_hat = np.matrix(j_hat).transpose() 91 | k_hat = np.matrix(k_hat).transpose() 92 | 93 | u_hat = Q*i_hat 94 | v_hat = Q*j_hat 95 | w_hat = Q*k_hat 96 | 97 | new_coords = np.zeros_like(coords) 98 | for i in range(len(coords)): 99 | p = coords[i] 100 | new_coords[i] = [np.dot(p, u_hat), np.dot(p, v_hat), np.dot(p, w_hat)] 101 | return new_coords 102 | -------------------------------------------------------------------------------- /minimds.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | from sklearn import manifold 4 | import argparse 5 | import multiprocessing as mp 6 | import data_tools as dt 7 | import array_tools as at 8 | import tad 9 | import linear_algebra as la 10 | import tools 11 | from matplotlib import pyplot as plt 12 | from hic_oe import get_expected 13 | 14 | def infer_structure(contactMat, structure, alpha, num_threads, weight, classical=False): 15 | """Infers 3D coordinates for one structure""" 16 | assert len(structure.nonzero_abs_indices()) == len(contactMat) 17 | 18 | expected = get_expected(contactMat) 19 | for i in range(len(contactMat)): 20 | for j in range(i): 21 | contactMat[i,j] = (1-weight)*contactMat[i,j] + weight*expected[i-j-1] 22 | 23 | at.makeSymmetric(contactMat) 24 | rowsums = np.array([sum(row) for row in contactMat]) 25 | assert len(np.where(rowsums == 0)[0]) == 0 26 | 27 | distMat = at.contactToDist(contactMat, alpha) 28 | at.makeSymmetric(distMat) 29 | 30 | distMat = distMat/np.mean(distMat) #normalize 31 | 32 | if classical: #classical MDS 33 | coords = la.cmds(distMat) 34 | else: 35 | coords = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=num_threads).fit_transform(distMat) 36 | 37 | structure.setCoords(coords) 38 | 39 | def fullMDS(path, classical, alpha, num_threads, weight): 40 | """MDS without partitioning""" 41 | structure = dt.structureFromBed(path) 42 | contactMat = dt.matFromBed(path, structure1=structure) 43 | infer_structure(contactMat, structure, alpha, num_threads, weight, classical) 44 | return structure 45 | 46 | def mds_partition(high_substructure, low_substructure, path, alpha2, weight, res_ratio): 47 | if len(high_substructure.getPoints()) > 0: #skip empty 48 | #perform MDS individually 49 | structure_contactMat = dt.matFromBed(path, structure1=high_substructure) #contact matrix for this structure only 50 | infer_structure(structure_contactMat, high_substructure, alpha2, 1, weight) 51 | 52 | #approximate as low resolution 53 | inferred_low = dt.highToLow(high_substructure, res_ratio) 54 | 55 | #rescale 56 | scaling_factor = la.radius_of_gyration(low_substructure)/la.radius_of_gyration(inferred_low) 57 | for i, point in enumerate(inferred_low.points): 58 | if point != 0: 59 | x, y, z = point.pos 60 | inferred_low.points[i].pos = (x*scaling_factor, y*scaling_factor, z*scaling_factor) 61 | 62 | #recover the transformation for inferred from true low structure 63 | r, t = la.getTransformation(inferred_low, low_substructure) 64 | t /= scaling_factor 65 | 66 | #transform high structure 67 | high_substructure.transform(r, t) 68 | 69 | print(f"MDS performed on substructure") 70 | 71 | return high_substructure 72 | 73 | def partitionedMDS(path, args): 74 | """Partitions structure into substructures and performs MDS""" 75 | domainSmoothingParameter = args[0] 76 | minSizeFraction = args[1] 77 | maxmemory = args[2] 78 | num_threads = args[3] 79 | alpha = args[4] 80 | res_ratio = args[5] 81 | alpha2 = args[6] 82 | weight = args[7] 83 | 84 | #create low-res structure 85 | print("Begininning partitioned MDS") 86 | low_chrom = dt.chromFromBed(path) 87 | print("Initialized low-resolution chromosome") 88 | low_chrom.res *= res_ratio 89 | lowstructure = dt.structureFromBed(path, chrom=low_chrom) #low global structure 90 | print("Initialized low-resolution structure") 91 | 92 | #get TADs 93 | low_contactMat = dt.matFromBed(path, structure1=lowstructure) 94 | low_tads = tad.getDomains(low_contactMat, lowstructure, domainSmoothingParameter, minSizeFraction) #low substructures, defined on relative indices not absolute indices 95 | tad.substructuresFromTads(lowstructure, low_tads) 96 | print("Identified TADs") 97 | 98 | #create high-res chrom 99 | #size, res = dt.basicParamsFromBed(path) 100 | highChrom = dt.ChromParameters(lowstructure.chrom.minPos, lowstructure.chrom.maxPos, low_chrom.res/res_ratio, lowstructure.chrom.name) 101 | 102 | #create high-res structure 103 | highstructure = dt.Structure([], [], highChrom, 0) 104 | 105 | #initialize high-res substructures 106 | high_substructures = [] 107 | low_gen_coords = lowstructure.getGenCoords() 108 | offset = 0 #initialize 109 | for i, low_tad in enumerate(low_tads): 110 | start_gen_coord = low_gen_coords[low_tad[0]] 111 | if i == len(low_tads) - 1: #for last tad, avoid rounding error 112 | end_gen_coord = highstructure.chrom.maxPos 113 | else: 114 | end_gen_coord = low_gen_coords[low_tad[1]] 115 | high_substructure = dt.structureFromBed(path, None, highChrom, start_gen_coord, end_gen_coord, offset) 116 | high_substructures.append(high_substructure) 117 | offset += len(high_substructure.points) #update 118 | offset -= 1 119 | 120 | highstructure.setstructures(high_substructures) 121 | print("Initialized high-resolution structure") 122 | 123 | infer_structure(low_contactMat, lowstructure, alpha, num_threads, weight) 124 | print("Low-resolution MDS complete") 125 | 126 | num_substructures = len(highstructure.structures) 127 | print(f"Performing MDS on {num_substructures} substructures") 128 | with mp.Pool(processes=min((num_threads, mp.cpu_count(), num_substructures))) as pool: #don't exceed number of requested threads, available threads, or structures 129 | 130 | high_substructures = [pool.apply_async(mds_partition, (high_substructure, low_substructure, path, alpha2, weight, res_ratio)) \ 131 | for high_substructure, low_substructure in zip(highstructure.structures, lowstructure.structures)] 132 | 133 | highstructure.setstructures([high_substructure.get() for high_substructure in high_substructures]) 134 | 135 | highstructure.set_rel_indices() 136 | 137 | return highstructure 138 | 139 | def main(): 140 | parser = argparse.ArgumentParser(description="Reconstruct 3D coordinates from normalized intrachromosomal Hi-C BED files.") 141 | parser.add_argument("path", help="path to intrachromosomal Hi-C BED file") 142 | parser.add_argument("--classical", action="store_true", help="use classical MDS (default: metric MDS)") 143 | parser.add_argument("--partitioned", action="store_true", help="use partitioned MDS (default: full MDS)") 144 | parser.add_argument("-l", type=int, help="low resolution/high resolution", default=10) 145 | parser.add_argument("-p", type=float, default=0.1, help="domain size parameter: larger value means fewer structures created (for partitioned MDS only)") 146 | parser.add_argument("-m", type=float, default=0.05, help="minimum domain size parameter: prevents structures from being too small (for partitioned MDS only)") 147 | parser.add_argument("-o", help="path to output file") 148 | parser.add_argument("-r", default=32000000, help="maximum RAM to use (in kb)") 149 | parser.add_argument("-n", type=int, default=3, help="number of threads") 150 | parser.add_argument("-a", type=float, default=4, help="alpha factor for converting contact frequencies to physical distances") 151 | parser.add_argument("-a2", type=float, default=2.5, help="short-range alpha factor for converting contact frequencies to physical distances") 152 | parser.add_argument("-w", type=float, default=0.05, help="weight of distance decay prior") 153 | args = parser.parse_args() 154 | 155 | if args.partitioned: 156 | params = (args.p, args.m, args.r, args.n, args.a, args.l, args.a2, args.w) 157 | names = ("Domain size parameter", "Minimum domain size", "Maximum memory", "Number of threads", "Alpha", "Resolution ratio", "Short-range alpha", "Weight") 158 | intervals = ((0, 1), (0, 1), (0, None), (0, None), (0, None), (1, None), (0, None), (0, 1)) 159 | if not tools.args_are_valid(params, names, intervals): 160 | sys.exit(1) 161 | 162 | structure = partitionedMDS(args.path, params) 163 | 164 | else: #not partitioned 165 | structure = fullMDS(args.path, args.classical, args.a, args.n, args.w) 166 | 167 | if args.o: 168 | structure.write(args.o) 169 | else: 170 | prefix = args.path.split(".bed")[0] 171 | structure.write(prefix + "_structure.tsv") 172 | 173 | if __name__ == "__main__": 174 | main() 175 | -------------------------------------------------------------------------------- /minimds_inter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import data_tools as dt 3 | import linear_algebra as la 4 | import sys 5 | from sklearn import manifold 6 | import tools 7 | import argparse 8 | import minimds as mm 9 | import array_tools as at 10 | 11 | def infer_structures(contactMat, structures, offsets, alpha, num_threads, classical=False): 12 | """Infers 3D coordinates for multiple structures with same contact matrix""" 13 | assert sum([len(structure.nonzero_abs_indices()) for structure in structures]) == len(contactMat) 14 | 15 | #at.makeSymmetric(contactMat) 16 | rowsums = np.array([sum(row) for row in contactMat]) 17 | assert len(np.where(rowsums == 0)[0]) == 0 18 | 19 | distMat = at.contactToDist(contactMat, alpha) 20 | at.makeSymmetric(distMat) 21 | #rowsums = np.array([sum(row) for row in distMat]) 22 | #assert len(np.where(rowsums == 0)[0]) == 0 23 | 24 | if classical: #classical MDS 25 | coords = la.cmds(distMat) 26 | else: 27 | coords = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=num_threads).fit_transform(distMat) 28 | 29 | for offset, structure in zip(offsets, structures): 30 | structure.setCoords(coords[offset:offset+len(structure.getPoints())]) 31 | 32 | def get_inter_mat(prefix, inter_res_string, intra_res_string, structures, offsets): 33 | names = [structure.chrom.name for structure in structures] 34 | n = len(names) 35 | for i in range(n): 36 | if names[i].startswith("chr"): 37 | names[i] = names[i][3:len(names[i])] #remove "chr" 38 | 39 | #fill matrix 40 | total_len = sum([len(structure.getPoints()) for structure in structures]) 41 | mat = np.zeros((total_len, total_len)) 42 | for i in range(n): 43 | for j in range(i+1): 44 | if i == j: 45 | path = "{}_{}_{}.bed".format(prefix, names[i], intra_res_string) 46 | else: 47 | path = "{}_{}_{}_{}.bed".format(prefix, names[j], names[i], inter_res_string) 48 | print("Reading {}".format(path)) 49 | with open(path) as bed: 50 | for line in bed: 51 | line = line.strip().split() 52 | loc1 = int(line[4]) 53 | loc2 = int(line[1]) 54 | index1 = structures[i].get_rel_index(loc1) 55 | index2 = structures[j].get_rel_index(loc2) 56 | if index1 is not None and index2 is not None: 57 | row = index1 + offsets[i] 58 | col = index2 + offsets[j] 59 | count = float(line[6]) 60 | mat[row, col] += count 61 | mat[col, row] += count 62 | bed.close() 63 | return mat 64 | 65 | def interMDS(names, prefix, inter_res, intra_res, partitioned, args): 66 | inter_res_string = tools.get_res_string(inter_res) 67 | intra_res_string = tools.get_res_string(intra_res) 68 | 69 | #get low-res structures from intra files 70 | low_structures = [] 71 | for name in names: 72 | path = "{}_{}_{}.bed".format(prefix, name, intra_res_string) 73 | chrom = dt.chromFromBed(path) 74 | #reduce res 75 | chrom.res = inter_res 76 | chrom.minPos = int(np.floor(float(chrom.minPos)/chrom.res)) * chrom.res #round 77 | chrom.maxPos = int(np.ceil(float(chrom.maxPos)/chrom.res)) * chrom.res 78 | low_structures.append(dt.structureFromBed(path, chrom=chrom)) 79 | 80 | #for correct indexing 81 | n = len(names) 82 | offsets = np.zeros(n, dtype=int) 83 | for i in range(1, n): 84 | offsets[i] = offsets[i-1] + len(low_structures[i-1].getPoints()) 85 | 86 | inter_mat = get_inter_mat(prefix, inter_res_string, intra_res_string, low_structures, offsets) 87 | 88 | #perform MDS at low resolution on all chroms 89 | infer_structures(inter_mat, low_structures, offsets, args[3], args[4]) 90 | 91 | #perform MDS at high resolution on each chrom 92 | high_structures = [] 93 | inferred_low_structures = [] 94 | ts = [] 95 | for true_low, name in zip(low_structures, names): 96 | path = "{}_{}_{}.bed".format(prefix, name, intra_res_string) 97 | if partitioned: 98 | high_structure = mm.partitionedMDS(path, args) 99 | else: 100 | high_structure = mm.fullMDS(path, False, args[4], args[3], args[7]) 101 | high_structures.append(high_structure) 102 | inferred_low = dt.highToLow(high_structure, true_low.chrom.res/high_structure.chrom.res) 103 | inferred_low_structures.append(inferred_low) 104 | 105 | #rescale 106 | rescaling_factor = la.radius_of_gyration(true_low)/la.radius_of_gyration(inferred_low) 107 | rescaled_coords = [rescaling_factor * coord for coord in inferred_low.getCoords()] 108 | for i, point in enumerate(inferred_low.getPoints()): 109 | point.pos = rescaled_coords[i] 110 | 111 | r, t = la.getTransformation(inferred_low, true_low) 112 | high_structure.transform(r, None) #do not translate now (need to rescale) 113 | ts.append(t) 114 | 115 | #translate (with rescaling) 116 | low_rgs = np.array([la.radius_of_gyration(structure) for structure in low_structures]) 117 | high_rgs = np.array([la.radius_of_gyration(structure) for structure in high_structures]) 118 | scaling_factor = np.mean(high_rgs/low_rgs) 119 | for high_structure, t in zip(high_structures, ts): 120 | high_structure.transform(None, scaling_factor*t) #rescale translation 121 | 122 | return high_structures 123 | 124 | def main(): 125 | parser = argparse.ArgumentParser(description="Reconstruct 3D coordinates from normalized interchromosomal Hi-C BED files.") 126 | parser.add_argument("prefix", help="prefix of Hi-C BED files") 127 | parser.add_argument("inter_res", type=int, help="resolution of interchromosomal BED files (bp)") 128 | parser.add_argument("intra_res", type=int, help="resolution of intrachromosomal BED files (bp)") 129 | parser.add_argument("--partitioned", action="store_true", help="use partitioned MDS (default: full MDS)") 130 | parser.add_argument("-c", action="append", default=[], help="names of chromosomes to use, e.g. 1 (default: all human chromosomes other than Y)") 131 | parser.add_argument("-C", type=int, help="number of autosomes") 132 | parser.add_argument("-l", type=int, help="low resolution/high resolution", default=10) 133 | parser.add_argument("-p", type=float, default=0.1, help="domain size parameter: larger value means fewer structures created (for partitioned MDS only)") 134 | parser.add_argument("-m", type=float, default=0.05, help="minimum domain size parameter: prevents structures from being too small (for partitioned MDS only)") 135 | parser.add_argument("-o", help="prefix of output file") 136 | parser.add_argument("-r", default=32000000, help="maximum RAM to use (in kb)") 137 | parser.add_argument("-n", type=int, default=3, help="Number of threads") 138 | parser.add_argument("-a", type=float, default=4, help="alpha factor for converting contact frequencies to physical distances") 139 | parser.add_argument("-a2", type=float, default=2.5, help="short-range alpha factor for converting contact frequencies to physical distances") 140 | parser.add_argument("-w", type=float, default=0.05, help="weight of distance decay prior") 141 | args = parser.parse_args() 142 | 143 | if len(args.c) == 0: 144 | if args.C: 145 | chrom_names = range(1, args.C+1) 146 | else: 147 | chrom_names = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X"] 148 | else: 149 | chrom_names = args.c 150 | 151 | 152 | params = (args.p, args.m, args.r, args.n, args.a, args.l, args.a2, args.w) 153 | names = ("Domain size parameter", "Minimum domain size", "Maximum memory", "Number of threads", "Alpha", "Resolution ratio", "Short-range alpha", "Weight") 154 | intervals = ((0, 1), (0, 1), (0, None), (0, None), (1, None), (1, None), (1, None), (None, 1)) 155 | if not tools.args_are_valid(params, names, intervals): 156 | sys.exit(1) 157 | 158 | structures = interMDS(chrom_names, args.prefix, args.inter_res, args.intra_res, args.partitioned, params) 159 | 160 | if args.o: 161 | for structure in structures: 162 | structure.write("{}_{}_{}_structure.tsv".format(args.o, structure.chrom.name.strip("chr"), tools.get_res_string(structure.chrom.res))) 163 | else: 164 | for structure in structures: 165 | structure.write("{}_{}_{}_structure.tsv".format(args.prefix, structure.chrom.name.strip("chr"), tools.get_res_string(structure.chrom.res))) 166 | 167 | if __name__ == "__main__": 168 | main() 169 | -------------------------------------------------------------------------------- /plotting.py: -------------------------------------------------------------------------------- 1 | from mayavi import mlab 2 | import numpy as np 3 | import linear_algebra as la 4 | import os 5 | import sys 6 | from matplotlib import pyplot as plt 7 | 8 | #from Rippe 2001 9 | kl = 289 #Kuhn length (nm) 10 | bpPerKL = 30000. #base pairs per Kuhn length 11 | chromatinDiameter = 30 #diameter of heterochromatin (nm) 12 | 13 | default_colors = np.array([[255,0,0], [0,255,238], [255,238,0], [0,102,255], [255,0,170], [255,102,0], [204,255,0], [0,238,255], [0,68,255], [255,0,102], [255,136,0], [0,255,34], [0,204,255], [34,0,255], [255,0,68], [255,170,0], [0,255,136], [0,170,255], [204,0,255], [255,204,0], [0,255,204], [0,136,255], [255,0,238]])/255. 14 | default_colors = [tuple(color) for color in default_colors] #convert to tuple 15 | 16 | def plot_structures_interactive(structures, all_enrichments=None, colors=default_colors, radius=None, cut=False, out_path=None): 17 | mlab.close(all=True) 18 | mlab.figure(bgcolor=(1,1,1)) 19 | if radius is None: 20 | radius = calculateRadius(structures) 21 | for i, structure in enumerate(structures): 22 | coords = np.array(structure.getCoords()) 23 | xs = coords[:,0] 24 | ys = coords[:,1] 25 | zs = coords[:,2] 26 | if cut: 27 | midpoint = np.mean(xs) 28 | indices = np.where(xs > midpoint)[0] 29 | xs = xs[indices] 30 | ys = ys[indices] 31 | zs = zs[indices] 32 | if all_enrichments is not None: 33 | mlab.plot3d(xs, ys, zs, all_enrichments[i], tube_radius=radius, colormap="bwr") 34 | else: 35 | mlab.plot3d(xs, ys, zs, tube_radius=radius, color=colors[i]) 36 | if out_path: 37 | mlab.savefig(out_path) 38 | mlab.show() 39 | 40 | fig, ax = plt.subplots() 41 | ax.axis("off") 42 | for i, color in enumerate(colors): 43 | plt.text(0.5, i, "structure {}".format(i+1), color=color) 44 | plt.savefig("structures_legend") 45 | plt.close() 46 | 47 | def plot_structure_interactive(structure, enrichments=None, color=(1,0,0), radius=None, out_path=None): 48 | if radius is None: 49 | radius = calculateRadius([structure]) 50 | coords = np.array(structure.getCoords()) 51 | xs = coords[:,0] 52 | ys = coords[:,1] 53 | zs = coords[:,2] 54 | mlab.figure(bgcolor=(1,1,1)) 55 | if enrichments is not None: 56 | mlab.plot3d(xs, ys, zs, enrichments, tube_radius=radius, colormap="bwr") 57 | else: 58 | mlab.plot3d(xs, ys, zs, tube_radius=radius, color=color) 59 | if out_path: 60 | mlab.savefig(out_path) 61 | mlab.show() 62 | 63 | def plot_structures_gif(structures, outname, all_enrichments=None, colors=default_colors, radius=None, increment=10): 64 | if 360%increment != 0: 65 | print("Error. Increment must be factor of 360.") 66 | sys.exit(0) 67 | if radius is None: 68 | radius = calculateRadius(structures) 69 | mlab.figure(bgcolor=(1,1,1)) 70 | for i in range(0, 360, increment): 71 | for j, structure in enumerate(structures): 72 | coords = np.array(structure.getCoords()) 73 | if all_enrichments is not None: 74 | s = mlab.plot3d(coords[:,0], coords[:,1], coords[:,2], all_enrichments[j], tube_radius=radius, colormap="bwr") 75 | else: 76 | s = mlab.plot3d(coords[:,0], coords[:,1], coords[:,2], tube_radius=radius, color=colors[j]) 77 | mlab.view(i) 78 | mlab.savefig("{}_{:>03}.png".format(outname, i)) 79 | 80 | mlab.close() 81 | os.system("convert {}_*.png {}.gif".format(outname, outname)) 82 | os.system("rm {}_*.png".format(outname)) 83 | 84 | fig, ax = plt.subplots() 85 | ax.axis("off") 86 | for i, color in enumerate(colors): 87 | plt.text(0.5, i, "structure {}".format(i+1), color=color) 88 | plt.savefig("structures_legend") 89 | plt.close() 90 | 91 | def plot_structure_gif(structure, outname, enrichments=None, color=(1,0,0), radius=None, increment=10): 92 | if 360%increment != 0: 93 | print("Error. Increment must be factor of 360.") 94 | sys.exit(0) 95 | if radius is None: 96 | radius = calculateRadius([structure]) 97 | coords = np.array(structure.getCoords()) 98 | mlab.figure(bgcolor=(1,1,1)) 99 | if enrichments is not None: 100 | s = mlab.plot3d(coords[:,0], coords[:,1], coords[:,2], enrichments, tube_radius=radius, colormap="bwr") 101 | else: 102 | s = mlab.plot3d(coords[:,0], coords[:,1], coords[:,2], tube_radius=radius, color=color) 103 | for i in range(0, 360, increment): 104 | mlab.view(i) 105 | mlab.savefig("{}_{:>03}.png".format(outname, i)) 106 | 107 | mlab.close() 108 | os.system("convert {}_*.png {}.gif".format(outname, outname)) 109 | os.system("rm {}_*.png".format(outname)) 110 | 111 | def calculateRadius(structures): 112 | """Calculate to-scale radius based on Kuhn length and diameter of chromatin""" 113 | conversionFactors = np.zeros(len(structures)) 114 | for j, structure in enumerate(structures): 115 | totDist = 0 116 | coords = structure.getCoords() 117 | n = len(coords) 118 | for i in range(1, n): 119 | totDist += la.calcDistance(coords[i-1], coords[i]) 120 | avgDist = totDist/(n-1) #average distance between neighboring loci 121 | physicalDist = kl * (structure.chrom.res/bpPerKL)**(1./2) #physical distance between neighboring loci (nm) 122 | conversionFactors[j] = avgDist/physicalDist 123 | conversionFactor = np.mean(conversionFactors) 124 | return chromatinDiameter/2 * conversionFactor 125 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.5.2 2 | numpy==1.22.0 3 | scikit-learn==1.5.0 4 | scipy==1.10.0 -------------------------------------------------------------------------------- /scripts/README.txt: -------------------------------------------------------------------------------- 1 | Run the bash scripts to recreate the figures from the paper. 2 | 3 | For example, fig1.sh will create the files Fig1A.png and Fig1B.png. sup1.sh will create supplemental fig. 1. 4 | 5 | Requirements: 6 | - matplotlib 7 | - scipy 8 | - MOGEN requires Java 9 | - ChromSDE requires MATLAB 10 | - HSA requires R 11 | 12 | Chromosome3D can't be installed programmatically, so it must be installed according to the instructions here (https://github.com/multicom-toolbox/Chromosome3D) prior to running fig4.sh, fig5.sh, sup1.sh, sup2.sh, or sup3.sh. 13 | 14 | 4/6/17: The ChromSDE website is currently inaccessible. Relevant portions of code have been commented out. 15 | -------------------------------------------------------------------------------- /scripts/bin_bed.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import data_tools as dt 4 | import numpy as np 5 | 6 | path = sys.argv[1] 7 | res = int(sys.argv[2]) 8 | outpath = sys.argv[3] 9 | 10 | chrom1, chrom2 = dt.chromFromBed(path, True) 11 | 12 | chrom1.res = res 13 | chrom1.minPos = dt.round_down(chrom1.minPos, res) 14 | chrom1.maxPos = dt.round_up(chrom1.maxPos, res) 15 | struct1 = dt.structureFromBed(path, chrom=chrom1, chrom_order=1) 16 | points1 = struct1.getPoints() 17 | 18 | chrom2.res = res 19 | chrom2.minPos = dt.round_down(chrom2.minPos, res) 20 | chrom2.maxPos = dt.round_up(chrom2.maxPos, res) 21 | struct2 = dt.structureFromBed(path, chrom=chrom2, chrom_order=2) 22 | points2 = struct2.getPoints() 23 | 24 | mat = dt.matFromBed(path, structure1=struct1, structure2=struct2) 25 | 26 | with open(outpath, "w") as out: 27 | for i in range(mat.shape[0]): 28 | gen_coord1 = chrom1.getGenCoord(points1[i].absolute_index) 29 | for j in range(mat.shape[1]): 30 | if mat[i,j] != 0: 31 | gen_coord2 = chrom2.getGenCoord(points2[j].absolute_index) 32 | out.write("\t".join((chrom1.name, str(gen_coord1), str(gen_coord1 + res), chrom2.name, str(gen_coord2), str(gen_coord2 + res), str(mat[i,j])))) 33 | out.write("\n") -------------------------------------------------------------------------------- /scripts/chrom_from_tar.sh: -------------------------------------------------------------------------------- 1 | RES=$1 2 | CHROM=$2 3 | TAR=$3 4 | DATA_DIR=$4 5 | PREFIX=$5 6 | 7 | RES_KB=$(($RES/1000)) 8 | 9 | if [ $RES_KB -lt 1000 ] 10 | then 11 | RES_STRING=$RES_KB"kb" 12 | else 13 | RES_STRING=$(($RES_KB/1000))"mb" 14 | fi 15 | 16 | OUT_DIR=$PREFIX/$RES_STRING"_resolution_intrachromosomal"/chr$CHROM 17 | test ! -d $DATA_DIR/$OUT_DIR && (tar -C $DATA_DIR -xzf $TAR $OUT_DIR) 18 | 19 | test ! -s $DATA_DIR/$PREFIX"_"$CHROM"_"$RES_KB$RES_STRING.bed && (python normalize.py $DATA_DIR/$PREFIX $RES $CHROM) -------------------------------------------------------------------------------- /scripts/chromosome3d_input.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | sys.path.append("..") 4 | import data_tools as dt 5 | 6 | in_path = sys.argv[1] 7 | out_path = sys.argv[2] 8 | 9 | contactMat = dt.matFromBed(in_path) 10 | np.savetxt(out_path, contactMat, delimiter="\t") 11 | -------------------------------------------------------------------------------- /scripts/chromsde_input.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | sys.path.append("..") 4 | import data_tools as dt 5 | 6 | in_path = sys.argv[1] 7 | mat_path = sys.argv[2] 8 | id_path = sys.argv[3] 9 | 10 | structure = dt.structureFromBed(in_path, None, None) 11 | contactMat = dt.matFromBed(in_path, structure1=structure) 12 | n = len(contactMat) 13 | maxNumDigits = int(np.ceil(np.log10(np.amax(contactMat)))) 14 | formatstring = "%" + str(maxNumDigits) + "d" 15 | np.savetxt(mat_path, contactMat, formatstring, delimiter="\t") 16 | 17 | name = structure.chrom.name 18 | name = name[3:len(name)] #remove "chr" 19 | with open(id_path, "w") as out: 20 | for i, point in enumerate(structure.getPoints()): 21 | out.write("\t".join((name, str(structure.chrom.minPos + point.num*structure.chrom.res), str(structure.chrom.minPos + (point.num+1)*structure.chrom.res), str(i+1))) + "\n") -------------------------------------------------------------------------------- /scripts/distance_decay.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import data_tools as dt 4 | from matplotlib import pyplot as plt 5 | import numpy as np 6 | 7 | mat = dt.matFromBed(sys.argv[1]) 8 | 9 | n = len(mat) 10 | 11 | tots = np.zeros(n-1) 12 | counts = np.zeros_like(tots) 13 | 14 | for i in range(n): 15 | for j in range(i): 16 | s = i - j 17 | if mat[i,j] != 0: 18 | tots[s-1] += mat[i,j] 19 | counts[s-1] += 1 20 | 21 | avgs = np.zeros_like(tots) 22 | 23 | for i, (tot, count), in enumerate(zip(tots, counts)): 24 | if count != 0: 25 | avgs[i] = tot/count 26 | 27 | plt.plot(range(n-1), avgs) 28 | plt.xlabel("Separation (number of bins)") 29 | plt.ylabel("Average contact frequency") 30 | plt.show() 31 | -------------------------------------------------------------------------------- /scripts/figures/fig1.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import data_tools as dt 4 | from tools import Tracker 5 | import simple_tad as tad 6 | import heatmap as hm 7 | import numpy as np 8 | 9 | def matFromDixon(path, chrom): 10 | """Creates contact matrix from Dixon tsv data""" 11 | numBins = chrom.getLength() 12 | mat = np.zeros((numBins, numBins)) 13 | tracker = Tracker("Reading " + path, chrom.size) 14 | with open(path) as infile: 15 | for line in infile: 16 | line = line.strip().split() 17 | pos1 = int(line[0]) 18 | pos2 = int(line[1]) 19 | if pos1 != pos2: 20 | if pos1 >= chrom.minPos and pos1 <= chrom.maxPos and pos2 >= chrom.minPos and pos2 <= chrom.maxPos: 21 | bin1 = chrom.getAbsoluteIndex(pos1) 22 | bin2 = chrom.getAbsoluteIndex(pos2) 23 | if bin1 > bin2: 24 | row = bin1 25 | col = bin2 26 | else: 27 | row = bin1 28 | col = bin2 29 | mat[row, col] += 1 30 | tracker.increment() 31 | infile.close() 32 | return mat 33 | 34 | def plotDixon(mat): 35 | tads = [[0,8], [8,38], [38,52], [52,78], [78,97], [97,115], [115,127]] 36 | outpath = "Fig1A" 37 | hm.heatMapFromMat(mat, 100, tads, outpath) 38 | 39 | def plotMovingAverage(mat): 40 | smoothingFactor = 5 41 | outpath = "Fig1B" 42 | tads = tad.getDomains(mat, smoothingFactor, 0) 43 | hm.heatMapFromMat(mat, 100, tads, outpath) 44 | 45 | minPos = 49000000 #from Dixon 46 | maxPos = 54066692 #from Dixon 47 | res = 40000 #from Dixon 48 | name = "chr22" 49 | size = 30949158 50 | path = "mESC_chr6.tsv" 51 | 52 | chrom = dt.ChromParameters(minPos, maxPos, res, name, size) 53 | 54 | mat = matFromDixon(path, chrom) 55 | plotDixon(mat) 56 | plotMovingAverage(mat) 57 | -------------------------------------------------------------------------------- /scripts/figures/fig1.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | if [ ! -e mESC_chr6.tsv ] 4 | then 5 | wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE35nnn/GSE35156/suppl/GSE35156_GSM862720_J1_mESC_HindIII_ori_HiC.nodup.hic.summary.txt.gz 6 | gunzip GSE35156_GSM862720_J1_mESC_HindIII_ori_HiC.nodup.hic.summary.txt.gz 7 | cat GSE35156_GSM862720_J1_mESC_HindIII_ori_HiC.nodup.hic.summary.txt | awk ' $2 == "chr6" && $5 == "chr6" {print $3"\t"$6} ' > mESC_chr6.tsv 8 | rm GSE35156_GSM862720_J1_mESC_HindIII_ori_HiC.nodup.hic.summary.txt* 9 | fi 10 | python fig1.py 11 | -------------------------------------------------------------------------------- /scripts/figures/fig10.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import plotting as plot 4 | import data_tools as dt 5 | 6 | chroms = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X"] 7 | structures = [dt.structure_from_file("hic_data/GM12878_combined_{}_10kb_structure.tsv".format(chrom)) for chrom in chroms] 8 | plot.plot_structures_interactive(structures, out_path="Fig10.png") 9 | -------------------------------------------------------------------------------- /scripts/figures/fig10.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | ./get_gm12878.sh 10000 0 4 | ./get_gm12878_inter.sh 1000000 5 | 6 | python ../minimds_inter.py --partitioned -l 10 -o hic_data/GM12878_combined hic_data/GM12878_combined 1000000 10000 7 | python3.6 fig10.py 8 | -------------------------------------------------------------------------------- /scripts/figures/fig2.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | from data_tools import ChromParameters 4 | from tools import Tracker 5 | import heatmap as hm 6 | import simple_tad as tad 7 | import numpy as np 8 | 9 | def matFromDixon(path, chrom): 10 | """Creates contact matrix from Dixon tsv data""" 11 | numBins = chrom.getLength() 12 | mat = np.zeros((numBins, numBins)) 13 | tracker = Tracker("Reading " + path, chrom.size) 14 | with open(path) as infile: 15 | for line in infile: 16 | line = line.strip().split() 17 | pos1 = int(line[0]) 18 | pos2 = int(line[1]) 19 | if pos1 != pos2: 20 | if pos1 >= chrom.minPos and pos1 <= chrom.maxPos and pos2 >= chrom.minPos and pos2 <= chrom.maxPos: 21 | bin1 = chrom.getAbsoluteIndex(pos1) 22 | bin2 = chrom.getAbsoluteIndex(pos2) 23 | if bin1 > bin2: 24 | row = bin1 25 | col = bin2 26 | else: 27 | row = bin1 28 | col = bin2 29 | mat[row, col] += 1 30 | tracker.increment() 31 | infile.close() 32 | return mat 33 | 34 | def plotLevels(mat): 35 | smoothingFactors = [1, 2, 3, 8, 33] #these smoothing factors were selected to demonstrate to best demonstrate TAD levels 36 | domainsToInclude = [range(1, 15), [2,3,4,5], [7], [1,6], [3]] #selected domains from these smoothing factors to maximize prettiness 37 | all_tads = [] 38 | for i in range(len(smoothingFactors)): 39 | smoothingFactor = smoothingFactors[i] 40 | indices = domainsToInclude[i] 41 | tads = tad.getDomains(mat, smoothingFactor, 0) 42 | for index in indices: 43 | all_tads.append(tads[index]) 44 | hm.heatMapFromMat(mat, 100, all_tads, "Fig2") #all levels combined 45 | 46 | minPos = 49000000 #from Dixon 47 | maxPos = 54066692 #from Dixon 48 | res = 40000 #from Dixon 49 | name = "chr22" 50 | size = 30949158 51 | path = "mESC_chr6.tsv" 52 | 53 | chrom = ChromParameters(minPos, maxPos, res, name, size) 54 | 55 | mat = matFromDixon(path, chrom) 56 | plotLevels(mat) 57 | -------------------------------------------------------------------------------- /scripts/figures/fig2.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | if [ ! -e mESC_chr6.tsv ] 4 | then 5 | wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE35nnn/GSE35156/suppl/GSE35156_GSM862720_J1_mESC_HindIII_ori_HiC.nodup.hic.summary.txt.gz 6 | gunzip GSE35156_GSM862720_J1_mESC_HindIII_ori_HiC.nodup.hic.summary.txt.gz 7 | cat GSE35156_GSM862720_J1_mESC_HindIII_ori_HiC.nodup.hic.summary.txt | awk ' $2 == "chr6" && $5 == "chr6" {print $3"\t"$6} ' > mESC_chr6.tsv 8 | rm GSE35156_GSM862720_J1_mESC_HindIII_ori_HiC.nodup.hic.summary.txt* 9 | fi 10 | python fig2.py 11 | -------------------------------------------------------------------------------- /scripts/figures/fig4.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | import misc 4 | 5 | #labels = ("Chromosome3D", "mMDS", "cMDS", "miniMDS", "MOGEN", "ChromSDE") 6 | labels = ("mMDS", "cMDS", "miniMDS", "MOGEN") 7 | x_pos = np.arange(len(labels)) 8 | 9 | #with open("chromosome3d_chr22_10kb_time.txt") as in_file: 10 | # chromosomethreed_time = float(in_file.readline().strip())/60 #time in minutes 11 | #in_file.close() 12 | 13 | with open("mmds_chr22_10kb_time.txt") as in_file: 14 | mmds_time = float(in_file.readline().strip())/60 #time in minutes 15 | in_file.close() 16 | 17 | with open("cmds_chr22_10kb_time.txt") as in_file: 18 | cmds_time = float(in_file.readline().strip())/60 #time in minutes 19 | in_file.close() 20 | 21 | with open("minimds_chr22_10kb_time.txt") as in_file: 22 | minimds_time = float(in_file.readline().strip())/60 #time in minutes 23 | in_file.close() 24 | 25 | with open("mogen_chr22_10kb_time.txt") as in_file: 26 | mogen_time = float(in_file.readline().strip())/60 #time in minutes 27 | in_file.close() 28 | 29 | #with open("chromsde_chr22_10kb_time.txt") as in_file: 30 | # chromsde_time = float(in_file.readline().strip())/60 #time in minutes 31 | #in_file.close() 32 | 33 | #times = [chromosomethreed_time, mmds_time, cmds_time, minimds_time, mogen_time, chromsde_time] 34 | times = [mmds_time, cmds_time, minimds_time, mogen_time] 35 | 36 | #colors = ["y", "r", "g", "b", "m", "blueviolet"] 37 | colors = ["r", "g", "b", "m"] 38 | 39 | rects = plt.bar(x_pos, times, align="center", color = colors) 40 | plt.yscale("log", subsy=[]) 41 | plt.tick_params(top=False,bottom=False,right=False,left=False, labelbottom=False) 42 | #plt.legend((rects[0], rects[1], rects[2], rects[3], rects[4], rects[5]), labels, fontsize=9, loc=0) 43 | plt.legend((rects[0], rects[1], rects[2], rects[3]), labels, fontsize=9, loc=0) 44 | plt.ylabel("Computational time (minutes)") 45 | 46 | plt.savefig("Fig4.png") 47 | -------------------------------------------------------------------------------- /scripts/figures/fig4.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | TIME=/usr/bin/time 4 | 5 | bash get_gm12878.sh 100000 22 6 | bash get_gm12878.sh 10000 22 7 | 8 | BEDPATH=hic_data/GM12878_combined_22_10kb.bed 9 | 10 | #Chromosome3D 11 | 12 | #create input 13 | #INPUT_PATH=Chromosome3D/input/GM12878_combined_22_10kb.txt 14 | 15 | #if [ ! -e $INPUT_PATH] 16 | # then 17 | # python chromosome3d_input.py $BEDPATH $INPUT_PATH 18 | #fi 19 | 20 | #run 21 | #$TIME -o chromosome3d_chr22_10kb_time.txt -f %e perl Chromosome3D/chromosome3D.pl -i $INPUT_PATH -o Chromosome3D/output_models/chr22_10kb -m 1 22 | 23 | #mMDS 24 | 25 | $TIME -o mmds_chr22_10kb_time.txt -f %e python ../minimds.py $BEDPATH 26 | 27 | #cMDS 28 | 29 | $TIME -o cmds_chr22_10kb_time.txt -f %e python ../minimds.py --classical $BEDPATH 30 | 31 | #miniMDS 32 | 33 | $TIME -o minimds_chr22_10kb_time.txt -f %e python ../minimds.py --partitioned $BEDPATH 34 | 35 | #MOGEN 36 | 37 | #install 38 | bash install_mogen.sh 39 | 40 | #create input 41 | INPUT_PATH=MOGEN/examples/hiC/input/GM12878_combined_22_10kb.tsv 42 | if [ ! -e $INPUT_PATH ] 43 | then 44 | python mogen_input.py $BEDPATH $INPUT_PATH 45 | fi 46 | 47 | #run 48 | $TIME -o mogen_chr22_10kb_time.txt -f %e java -jar MOGEN/examples/hiC/3DGenerator.jar parameters_chr22_10kb.txt 49 | 50 | #ChromSDE 51 | 52 | #install 53 | #bash install_chromsde.sh 54 | 55 | #create input 56 | #CONTACTS_PATH=ChromSDE/chr22_10kb_contacts.dat 57 | #IDS_PATH=ChromSDE/chr22_10kb_ids.dat 58 | 59 | #if [ ! -e $CONTACTS_PATH ] || [ ! -e $IDS_PATH ] 60 | # then 61 | # python chromsde_input.py $BEDPATH $CONTACTS_PATH $IDS_PATH 62 | #fi 63 | 64 | #cd ChromSDE 65 | 66 | #run 67 | #$TIME -o chromsde_chr22_10kb_time.txt -f %e matlab -nodisplay -nosplash -nodesktop -r "run('run_chromsde('chr22_10kb_contacts.dat', 'chr22_10kb_ids.dat')')" > "../"$CHROMSDE_OUT 68 | 69 | #cd .. 70 | 71 | python fig4.py 72 | -------------------------------------------------------------------------------- /scripts/figures/fig5.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import misc 3 | import numpy as np 4 | import sys 5 | sys.path.append("..") 6 | import data_tools as dt 7 | 8 | def rep_correlation(coords1, coords2): 9 | dists1 = misc.distsFromCoords(coords1) 10 | dists2 = misc.distsFromCoords(coords2) 11 | 12 | return misc.pearson(dists1, dists2) 13 | 14 | 15 | #labels = ("Chromosome3D", "mMDS", "miniMDS", "MOGEN", "HSA", "ChromSDE") 16 | labels = ("mMDS", "miniMDS", "MOGEN", "HSA") 17 | n = len(labels) 18 | rs = np.zeros(n) 19 | 20 | #Chromosome3D 21 | #coords1 = np.loadtxt("Chromosome3D/output_models/chr22_10kb_rep1/rep1_coords.tsv") 22 | #coords2 = np.loadtxt("Chromosome3D/output_models/chr22_10kb_rep1/rep2_coords.tsv") 23 | #rs[0] = rep_correlation(coords1, coords2) 24 | 25 | #mMDS 26 | coords1 = dt.structure_from_file("hic_data/GM12878_combined_22_10kb_mmds_rep1.tsv").getCoords() 27 | coords2 = dt.structure_from_file("hic_data/GM12878_combined_22_10kb_mmds_rep2.tsv").getCoords() 28 | #rs[1] = rep_correlation(coords1, coords2) 29 | rs[0] = rep_correlation(coords1, coords2) 30 | 31 | #miniMDS 32 | coords1 = dt.structure_from_file("hic_data/GM12878_combined_22_10kb_minimds_rep1.tsv").getCoords() 33 | coords2 = dt.structure_from_file("hic_data/GM12878_combined_22_10kb_minimds_rep2.tsv").getCoords() 34 | #rs[2] = rep_correlation(coords1, coords2) 35 | rs[1] = rep_correlation(coords1, coords2) 36 | 37 | #MOGEN 38 | coords1 = np.loadtxt("MOGEN/examples/hiC/output/GM12878_combined_22_10kb_rep1_coords.tsv") 39 | coords2 = np.loadtxt("MOGEN/examples/hiC/output/GM12878_combined_22_10kb_rep2_coords.tsv") 40 | #rs[3] = rep_correlation(coords1, coords2) 41 | rs[2] = rep_correlation(coords1, coords2) 42 | 43 | #HSA 44 | coords1 = np.loadtxt("hsa/GM12878_combined_22_10kb_rep1_coords.txt") 45 | coords2 = np.loadtxt("hsa/GM12878_combined_22_10kb_rep2_coords.txt") 46 | #rs[4] = rep_correlation(coords1, coords2) 47 | rs[3] = rep_correlation(coords1, coords2) 48 | 49 | #ChromSDE 50 | #coords1 = np.loadtxt("ChromSDE/GM12878_combined_22_10kb_rep1_coords.tsv") 51 | #coords2 = np.loadtxt("ChromSDE/GM12878_combined_22_10kb_rep2_coords.tsv") 52 | #rs[5] = rep_correlation(coords1, coords2) 53 | 54 | x_pos = range(n) 55 | colors = ["y", "r", "b", "c", "m", "blueviolet"] 56 | rects = plt.bar(x_pos, rs, align="center", color = colors) 57 | plt.tick_params(top=False,bottom=False,right=False,left=False, labelbottom=False) 58 | #plt.legend((rects[0], rects[1], rects[2], rects[3], rects[4], rects[5]), labels, fontsize=8, loc=3) 59 | plt.legend((rects[0], rects[1], rects[2], rects[3]), labels, fontsize=8, loc=3) 60 | plt.ylabel("Correlation between iterations") 61 | 62 | plt.savefig("Fig5.png") 63 | -------------------------------------------------------------------------------- /scripts/figures/fig5.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | bash get_gm12878.sh 100000 22 4 | bash get_gm12878.sh 10000 22 5 | 6 | BEDPATH=hic_data/GM12878_combined_22_10kb.bed 7 | 8 | #Chromosome3D 9 | 10 | #create input 11 | #INPUT_PATH=Chromosome3D/input/GM12878_combined_22_10kb.txt 12 | 13 | #if [ ! -e $INPUT_PATH ] 14 | # then 15 | # python chromosome3d_input.py $BEDPATH $INPUT_PATH 16 | #fi 17 | 18 | #rep 1 19 | #perl Chromosome3D/chromosome3D.pl -i $INPUT_PATH -o Chromosome3D/output_models/chr22_10kb_rep1 -m 1 20 | 21 | #rep 2 22 | #perl Chromosome3D/chromosome3D.pl -i $INPUT_PATH -o Chromosome3D/output_models/chr22_10kb_rep2 -m 1 23 | 24 | #process output 25 | #cat Chromosome3D/output_models/chr22_10kb_rep1/GM12878_combined_22_10kb_1.pdb | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > Chromosome3D/output_models/chr22_10kb_rep1/rep1_coords.tsv 26 | #cat Chromosome3D/output_models/chr22_10kb_rep2/GM12878_combined_22_10kb_1.pdb | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > Chromosome3D/output_models/chr22_10kb_rep2/rep2_coords.tsv 27 | 28 | #mMDS 29 | 30 | #rep 1 31 | python ../minimds.py -o hic_data/GM12878_combined_22_10kb_mmds_rep1.tsv $BEDPATH 32 | 33 | #rep 2 34 | python ../minimds.py -o hic_data/GM12878_combined_22_10kb_mmds_rep2.tsv $BEDPATH 35 | 36 | #miniMDS 37 | 38 | #rep 1 39 | python ../minimds.py --partitioned -p 0.01 -m 0.01 -o hic_data/GM12878_combined_22_10kb_minimds_rep1.tsv $BEDPATH 40 | 41 | #rep 2 42 | python ../minimds.py --partitioned -p 0.01 -m 0.01 -o hic_data/GM12878_combined_22_10kb_minimds_rep2.tsv $BEDPATH 43 | 44 | #MOGEN 45 | 46 | #install 47 | bash install_mogen.sh 48 | 49 | #create input 50 | INPUT_PATH=MOGEN/examples/hiC/input/GM12878_combined_22_10kb.tsv 51 | if [ ! -e $INPUT_PATH ] 52 | then 53 | python mogen_input.py $BEDPATH $INPUT_PATH 54 | fi 55 | 56 | #rep 1 57 | java -jar MOGEN/examples/hiC/3DGenerator.jar parameters_chr22_10kb.txt 58 | 59 | #rep 2 60 | java -jar MOGEN/examples/hiC/3DGenerator.jar parameters_chr22_10kb.txt 61 | 62 | #process output 63 | REP_NUM=1 64 | for f in MOGEN/examples/hiC/output/GM12878_combined_22_10kb_*.pdb 65 | do 66 | cat $f | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > "MOGEN/examples/hiC/output/GM12878_combined_22_10kb_rep"$REP_NUM"_coords.tsv" 67 | REP_NUM=$(($REP_NUM+1)) 68 | done 69 | 70 | #HSA 71 | 72 | #install 73 | bash install_hsa.sh 74 | 75 | #create input 76 | INPUT_PATH=hsa/GM12878_combined_22_10kb.tsv 77 | 78 | if [ ! -e $INPUT_PATH ] 79 | then 80 | python hsa_input.py $BEDPATH $INPUT_PATH 81 | fi 82 | 83 | cd hsa 84 | 85 | #rep 1 86 | Rscript myR.R GM12878_combined_22_10kb.tsv 0 GM12878_combined_22_10kb_rep1_coords 1 87 | 88 | #rep 2 89 | Rscript myR.R GM12878_combined_22_10kb.tsv 0 GM12878_combined_22_10kb_rep2_coords 1 90 | 91 | cd .. 92 | 93 | #ChromSDE 94 | 95 | #install 96 | #bash install_chromsde.sh 97 | 98 | #create input 99 | #CONTACTS_PATH=ChromSDE/chr22_10kb_contacts.dat 100 | #IDS_PATH=ChromSDE/chr22_10kb_ids.dat 101 | 102 | #if [ ! -e $CONTACTS_PATH ] || [ ! -e $IDS_PATH ] 103 | # then 104 | # python chromsde_input.py $BEDPATH $CONTACTS_PATH $IDS_PATH 105 | #fi 106 | # 107 | #cd ChromSDE 108 | 109 | #rep 1 110 | #matlab -nodisplay -nosplash -nodesktop -r "run('run_chromsde_rep1')" 111 | 112 | #rep 2 113 | #matlab -nodisplay -nosplash -nodesktop -r "run('run_chromsde_rep2')" 114 | 115 | #process output 116 | #cat contacts_rep1.pos.pdb | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > GM12878_combined_22_10kb_rep1_coords.tsv 117 | #cat contacts_rep2.pos.pdb | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > GM12878_combined_22_10kb_rep2_coords.tsv 118 | 119 | #cd .. 120 | 121 | python fig5.py 122 | -------------------------------------------------------------------------------- /scripts/figures/fig6.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | 4 | res_kb = 10 5 | 6 | chrom_sizes = np.loadtxt("chrom_sizes_{}kb.txt".format(res_kb)) 7 | 8 | mmds_times = [] 9 | with open("mmds_{}kb_times.txt".format(res_kb)) as in_file: 10 | for line in in_file: 11 | mmds_times.append(float(line.strip())/60) 12 | in_file.close() 13 | 14 | cmds_times = [] 15 | with open ("cmds_{}kb_times.txt".format(res_kb)) as in_file: 16 | for line in in_file: 17 | cmds_times.append(float(line.strip())/60) 18 | in_file.close() 19 | 20 | minimds_times = [] 21 | with open("minimds_{}kb_times.txt".format(res_kb)) as in_file: 22 | for line in in_file: 23 | minimds_times.append(float(line.strip())/60) 24 | in_file.close() 25 | 26 | mogen_times = [] 27 | with open("mogen_{}kb_times.txt".format(res_kb)) as in_file: 28 | for line in in_file: 29 | mogen_times.append(float(line.strip())/60) 30 | in_file.close() 31 | 32 | fig = plt.figure() 33 | ax = fig.add_subplot(111, frameon=False) 34 | ax.plot(chrom_sizes, mmds_times, linestyle="None", marker="o", markerfacecolor="r", mec="r", markersize=10, label="Standard metric MDS") 35 | ax.plot(chrom_sizes, cmds_times, linestyle="None", marker="o", markerfacecolor="g", mec="g", markersize=10, label="Classical MDS") 36 | ax.plot(chrom_sizes, minimds_times, linestyle="None", marker="o", markerfacecolor="b", mec="b", markersize=10, label="miniMDS") 37 | ax.plot(chrom_sizes, mogen_times, linestyle="None", marker="o", markerfacecolor="m", mec="m", markersize=10, label="MOGEN") 38 | x_offset = 1000 #small number to prevent things from getting cut off 39 | y_offset = 5 40 | xmin = min(chrom_sizes) - x_offset 41 | xmax = max(chrom_sizes) + x_offset 42 | ymin = 0 - y_offset 43 | ymax = max((max(mmds_times), max(cmds_times), max(minimds_times))) + y_offset 44 | plt.axis([xmin, xmax, ymin, ymax]) 45 | plt.axvline(x=xmin, ymin=0, ymax=1, color="k", lw=4) 46 | plt.axhline(y=ymin, xmin=0, xmax=1, color="k", lw=4) 47 | plt.tick_params(direction="out", top=False, right=False, length=12, width=3, pad=10, labelsize=14) 48 | plt.xlabel("Number of genomic loci", fontsize=16) 49 | plt.ylabel("Time (minutes)", fontsize=16) 50 | plt.legend(loc=0, numpoints=1) 51 | plt.tight_layout() 52 | plt.savefig("Fig6.png".format(res_kb)) 53 | -------------------------------------------------------------------------------- /scripts/figures/fig6.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | TIME=/usr/bin/time 4 | 5 | #results files 6 | MINI_OUT=minimds_10kb_times.txt 7 | MMDS_OUT=mmds_10kb_times.txt 8 | CMDS_OUT=cmds_10kb_times.txt 9 | MOGEN_OUT=mogen_10kb_times.txt 10 | 11 | #reset 12 | if [ -e $MINI_OUT ] 13 | then 14 | rm $MINI_OUT 15 | fi 16 | 17 | if [ -e $MMDS_OUT ] 18 | then 19 | rm $MMDS_OUT 20 | fi 21 | 22 | if [ -e $CMDS_OUT ] 23 | then 24 | rm $CMDS_OUT 25 | fi 26 | 27 | if [ -e $MOGEN_OUT ] 28 | then 29 | rm $MOGEN_OUT 30 | fi 31 | 32 | #get data 33 | bash get_gm12878.sh 10000 0 34 | bash get_gm12878.sh 100000 0 35 | 36 | #install MOGEN 37 | bash install_mogen.sh 38 | 39 | #run MDS 40 | for CHROM in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X 41 | do 42 | BEDPATH="hic_data/GM12878_combined_"$CHROM"_10kb.bed" 43 | $TIME -o $MINI_OUT -a -f %e python ../minimds.py --partitioned $BEDPATH 44 | $TIME -o $MMDS_OUT -a -f %e python ../minimds.py $BEDPATH 45 | $TIME -o $CMDS_OUT -a -f %e python ../minimds.py --classical $BEDPATH 46 | 47 | 48 | INPUT_PATH="MOGEN/examples/hiC/input/GM12878_combined_"$CHROM"_10kb.tsv" 49 | if [ ! -e $INPUT_PATH ] 50 | then 51 | python mogen_input.py $BEDPATH $INPUT_PATH 52 | fi 53 | $TIME -o $MOGEN_OUT -a -f %e java -jar MOGEN/examples/hiC/3DGenerator.jar "parameters_chr"$CHROM"_10kb.txt" 54 | done 55 | 56 | if [ ! -e chrom_sizes_10kb.txt ] || [ ! -s chrom_size_10kb.txt ] 57 | then 58 | python get_chrom_sizes.py 10 59 | fi 60 | 61 | #plot 62 | python fig6.py 63 | -------------------------------------------------------------------------------- /scripts/figures/fig7.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | 4 | res_kb = 10 5 | 6 | chrom_sizes = np.loadtxt("chrom_sizes_{}kb.txt".format(res_kb)) 7 | 8 | mmds_memories = np.loadtxt("mmds_{}kb_memory.txt".format(res_kb))/10**6 9 | cmds_memories = np.loadtxt("cmds_{}kb_memory.txt".format(res_kb))/10**6 10 | minimds_memories = np.loadtxt("minimds_{}kb_memory.txt".format(res_kb))/10**6 11 | mogen_memories = np.loadtxt("mogen_{}kb_memory.txt".format(res_kb))/10**6 12 | 13 | fig = plt.figure() 14 | ax = fig.add_subplot(111, frameon=False) 15 | ax.plot(chrom_sizes, mmds_memories, linestyle="None", marker="o", markerfacecolor="r", mec="r", markersize=10, label="Standard metric MDS") 16 | ax.plot(chrom_sizes, cmds_memories, linestyle="None", marker="o", markerfacecolor="g", mec="g", markersize=10, label="Classical MDS") 17 | ax.plot(chrom_sizes, minimds_memories, linestyle="None", marker="o",markerfacecolor="b", mec="b", markersize=10, label="miniMDS") 18 | ax.plot(chrom_sizes, mogen_memories, linestyle="None", marker="o",markerfacecolor="m", mec="m", markersize=10, label="MOGEN") 19 | x_offset = 300 #small number to prevent things from getting cut off 20 | y_offset = 3 21 | xmin = min(chrom_sizes) - x_offset 22 | xmax = max(chrom_sizes) + x_offset 23 | ymin = 0 - y_offset 24 | ymax = max((max(mmds_memories), max(cmds_memories), max(minimds_memories))) + y_offset 25 | plt.axis([xmin, xmax, ymin, ymax]) 26 | plt.axvline(x=xmin, ymin=0, ymax=1, color="k", lw=4) 27 | plt.axhline(y=ymin, xmin=0, xmax=1, color="k", lw=4) 28 | plt.tick_params(direction="out", top=False, right=False, length=12,width=3, pad=10, labelsize=14) 29 | plt.xlabel("Number of genomic loci", fontsize=16) 30 | plt.ylabel("Computational memory (Gb)", fontsize=16) 31 | plt.legend(loc=0, numpoints=1) 32 | plt.tight_layout() 33 | plt.savefig("Fig7.png") 34 | -------------------------------------------------------------------------------- /scripts/figures/fig7.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | #parameters 4 | TIME=/usr/bin/time 5 | DOMAIN_SIZE_PARAMETER=0.01 6 | MIN_DOMAIN_SIZE=0.01 7 | 8 | #results files 9 | MINI_OUT=minimds_10kb_memory.txt 10 | MMDS_OUT=mmds_10kb_memory.txt 11 | CMDS_OUT=cmds_10kb_memory.txt 12 | MOGEN_OUT=mogen_10kb_memory.txt 13 | 14 | #reset 15 | if [ -e $MINI_OUT ] 16 | then 17 | rm $MINI_OUT 18 | fi 19 | 20 | if [ -e $MMDS_OUT ] 21 | then 22 | rm $MMDS_OUT 23 | fi 24 | 25 | if [ -e $CMDS_OUT ] 26 | then 27 | rm $CMDS_OUT 28 | fi 29 | 30 | if [ -e $MOGEN_OUT ] 31 | then 32 | rm $MOGEN_OUT 33 | fi 34 | 35 | #install MOGEN 36 | bash install_mogen.sh 37 | 38 | #get data 39 | bash get_gm12878.sh $RES 0 40 | bash get_gm12878.sh 100000 0 41 | 42 | #run algorithms 43 | for CHROM in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X 44 | do 45 | BEDPATH="hic_data/GM12878_combined_"$CHROM"_10kb.bed" 46 | $TIME -f "%M" -o $CMDS_OUT -a python ../minimds.py --classical $BEDPATH 47 | $TIME -f "%M" -o $MMDS_OUT -a python ../minimds.py $BEDPATH 48 | $TIME -f "%M" -o $MINI_OUT -a python ../minimds.py --partitioned -p $DOMAIN_SIZE_PARAMETER -m $MIN_DOMAIN_SIZE $BEDPATH 49 | 50 | INPUT_PATH="MOGEN/examples/hiC/input/GM12878_combined_"$CHROM"_10kb.tsv" 51 | if [ ! -e $INPUT_PATH ] 52 | then 53 | python mogen_input.py $BEDPATH $INPUT_PATH 54 | fi 55 | $TIME -f "%M" -o $MOGEN_OUT -a java -jar MOGEN/examples/hiC/3DGenerator.jar "parameters_chr"$CHROM"_10kb.txt" 56 | done 57 | 58 | if [ ! -e chrom_sizes_10kb.txt ] || [ ! -s chrom_size_10kb.txt ] 59 | then 60 | python get_chrom_sizes.py 10 61 | fi 62 | 63 | #plot 64 | python fig7.py 65 | -------------------------------------------------------------------------------- /scripts/figures/fig8.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import numpy as np 3 | import sys 4 | sys.path.append("..") 5 | import data_tools as dt 6 | import array_tools as at 7 | from scipy import stats as st 8 | import misc 9 | 10 | chroms = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X") 11 | n = len(chroms) 12 | 13 | mmds_rs = np.zeros(n) 14 | cmds_rs = np.zeros(n) 15 | minimds_rs = np.zeros(n) 16 | mogen_rs = np.zeros(n) 17 | 18 | for i, chrom in enumerate(chroms): 19 | bedpath = "hic_data/GM12878_combined_{}_10kb.bed".format(chrom) 20 | 21 | mmds_structure = dt.structure_from_file("hic_data/GM12878_combined_{}_10kb_mmds_coords.tsv".format(chrom)) 22 | contactMat = dt.matFromBed(bedpath, structure1=mmds_structure) 23 | mmds_true_mat = at.contactToDist(contactMat) 24 | at.makeSymmetric(mmds_true_mat) 25 | for j in range(len(mmds_true_mat)): #remove diagonal 26 | mmds_true_mat[j,j] = 0 27 | mmds_distMat = misc.distMat(mmds_structure) 28 | mmds_rs[i] = misc.pearson(mmds_true_mat, mmds_distMat) 29 | 30 | cmds_structure = dt.structure_from_file("hic_data/GM12878_combined_{}_10kb_cmds_coords.tsv".format(chrom)) 31 | contactMat = dt.matFromBed(bedpath, structure1=cmds_structure) 32 | cmds_true_mat = at.contactToDist(contactMat) 33 | at.makeSymmetric(cmds_true_mat) 34 | for j in range(len(cmds_true_mat)): #remove diagonal 35 | cmds_true_mat[j,j] = 0 36 | cmds_distMat = misc.distMat(cmds_structure) 37 | cmds_rs[i] = misc.pearson(cmds_true_mat, cmds_distMat) 38 | 39 | minimds_structure = dt.structure_from_file("hic_data/GM12878_combined_{}_10kb_minimds_coords.tsv".format(chrom)) 40 | contactMat = dt.matFromBed(bedpath, structure1=minimds_structure) 41 | minimds_true_mat = at.contactToDist(contactMat) 42 | at.makeSymmetric(minimds_true_mat) 43 | for j in range(len(minimds_true_mat)): #remove diagonal 44 | minimds_true_mat[j,j] = 0 45 | minimds_distMat = misc.distMat(minimds_structure) 46 | minimds_rs[i] = misc.pearson(minimds_true_mat, minimds_distMat) 47 | 48 | mogen_coords = np.loadtxt("MOGEN/examples/hiC/output/GM12878_combined_{}_10kb_rep1_coords.tsv".format(chrom)) 49 | mogen_distMat = misc.distsFromCoords(mogen_coords) 50 | mogen_rs[i] = misc.pearson(mmds_true_mat, mogen_distMat) #mMDS and MOGEN use the same matrix input procedure 51 | 52 | chrom_sizes = np.loadtxt("chrom_sizes_10kb.txt") 53 | 54 | fig = plt.figure() 55 | ax = fig.add_subplot(111, frameon=False) 56 | ax.plot(chrom_sizes, mmds_rs, linestyle="None", marker="o", markerfacecolor="r", mec="r", markersize=10, label="Standard metric MDS") 57 | ax.plot(chrom_sizes, cmds_rs, linestyle="None", marker="o", markerfacecolor="g", mec="g", markersize=10, label="Classical MDS") 58 | ax.plot(chrom_sizes, minimds_rs, linestyle="None", marker="o",markerfacecolor="b", mec="b", markersize=10, label="miniMDS") 59 | ax.plot(chrom_sizes, mogen_rs, linestyle="None", marker="o",markerfacecolor="m", mec="m", markersize=10, label="MOGEN") 60 | x_offset = 1000 #small number to prevent things from getting cut off 61 | y_offset = 0.01 62 | xmin = min(chrom_sizes) - x_offset 63 | xmax = max(chrom_sizes) + x_offset 64 | ymin = 0 - y_offset 65 | ymax = 0.8 66 | plt.axis([xmin, xmax, ymin, ymax]) 67 | plt.axvline(x=xmin, ymin=0, ymax=1, color="k", lw=4) 68 | plt.axhline(y=ymin, xmin=0, xmax=1, color="k", lw=4) 69 | plt.tick_params(direction="out", top=False, right=False, length=12, width=3, pad=10, labelsize=14) 70 | plt.xlabel("Number of genomic loci", fontsize=16) 71 | plt.ylabel("Correlation between input distances and output distances", fontsize=12) 72 | plt.legend(loc=0, numpoints=1) 73 | plt.tight_layout() 74 | plt.savefig("Fig8.png") -------------------------------------------------------------------------------- /scripts/figures/fig8.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | bash install_mogen.sh 4 | 5 | bash get_gm12878.sh 100000 0 6 | bash get_gm12878.sh 10000 0 7 | 8 | for CHROM in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X 9 | do 10 | BEDPATH="hic_data/GM12878_combined_"$CHROM"_10kb.bed" 11 | python ../minimds.py -o "hic_data/GM12878_combined_"$CHROM"_10kb_mmds_coords.tsv" $BEDPATH 12 | python ../minimds.py --classical -o "hic_data/GM12878_combined_"$CHROM"_10kb_cmds_coords.tsv" $BEDPATH 13 | python ../minimds.py --partitioned -o "hic_data/GM12878_combined_"$CHROM"_10kb_minimds_coords.tsv" -p 0.001 $BEDPATH 14 | 15 | INPUT_PATH="MOGEN/examples/hiC/input/GM12878_combined_"$CHROM"_"$RES_KB"kb.tsv" 16 | if [ ! -e $INPUT_PATH ] 17 | then 18 | python mogen_input.py $BEDPATH $INPUT_PATH 19 | fi 20 | java -jar MOGEN/examples/hiC/3DGenerator.jar "parameters_chr"$CHROM"_10kb.txt" 21 | 22 | #process MOGEN output 23 | REP_NUM=1 24 | for f in "MOGEN/examples/hiC/output/GM12878_combined_"$CHROM"_10kb_"*".pdb" 25 | do 26 | cat $f | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > "MOGEN/examples/hiC/output/GM12878_combined_"$CHROM"_10kb_rep"$REP_NUM"_coords.tsv" 27 | REP_NUM=$(($REP_NUM+1)) 28 | done 29 | done 30 | 31 | python fig8.py 32 | -------------------------------------------------------------------------------- /scripts/figures/fig9.py: -------------------------------------------------------------------------------- 1 | from mayavi import mlab 2 | import sys 3 | sys.path.append("..") 4 | import data_tools as dt 5 | import plotting as plot 6 | import linear_algebra as la 7 | import numpy as np 8 | 9 | def plot_coords_interactive(coords, res, color=(1,0,0), radius=None, out_path=None): 10 | if radius is None: 11 | radius = calculateRadius(coords, res) 12 | xs = coords[:,0] 13 | ys = coords[:,1] 14 | zs = coords[:,2] 15 | mlab.figure(bgcolor=(1,1,1)) 16 | mlab.plot3d(xs, ys, zs, tube_radius=radius, color=color) 17 | if out_path is not None: 18 | mlab.savefig(out_path) 19 | mlab.show() 20 | 21 | def calculateRadius(coords, res): 22 | """Calculate to-scale radius based on Kuhn length and diameter of chromatin""" 23 | #from Rippe (2001) 24 | kl = 289 #Kuhn length (nm) 25 | bpPerKL = 30000. #base pairs per Kuhn length 26 | chromatinDiameter = 30 #diameter of heterochromatin (nm) 27 | 28 | totDist = 0 29 | count = 0 30 | n = len(coords) 31 | for i in range(1, n): 32 | totDist += la.calcDistance(coords[i-1], coords[i]) 33 | count += 1 34 | avgDist = totDist/count #average distance between neighboring loci 35 | physicalDist = kl * (res/bpPerKL)**(1./2) #physical distance between neighboring loci (nm) 36 | conversionFactor = avgDist/physicalDist 37 | return chromatinDiameter/2 * conversionFactor 38 | 39 | mmds_structure = dt.structure_from_file("hic_data/GM12878_combined_22_10kb_mmds_coords.tsv") 40 | cmds_structure = dt.structure_from_file("hic_data/GM12878_combined_22_10kb_cmds_coords.tsv") 41 | minimds_structure = dt.structure_from_file("hic_data/GM12878_combined_22_10kb_minimds_coords.tsv") 42 | 43 | mmds_res = mmds_structure.chrom.res 44 | cmds_res = cmds_structure.chrom.res 45 | minimds_res = minimds_structure.chrom.res 46 | 47 | assert mmds_res == cmds_res == minimds_res 48 | 49 | res = mmds_res 50 | 51 | plot.plot_structure_interactive(mmds_structure, out_path="Fig9A.png") 52 | plot.plot_structure_interactive(cmds_structure, out_path="Fig9B.png") 53 | plot.plot_structure_interactive(minimds_structure, out_path="Fig9C.png") 54 | plot_coords_interactive(np.loadtxt("MOGEN/examples/hiC/output/GM12878_combined_22_10kb_rep1_coords.tsv"), res, out_path="Fig9D.png") 55 | -------------------------------------------------------------------------------- /scripts/figures/fig9.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | bash install_mogen.sh 4 | 5 | bash get_gm12878.sh 100000 22 6 | bash get_gm12878.sh 10000 22 7 | 8 | BEDPATH=hic_data/GM12878_combined_22_10kb.bed 9 | 10 | python ../minimds.py -o hic_data/GM12878_combined_22_10kb_mmds_coords.tsv $BEDPATH 11 | python ../minimds.py --classical -o hic_data/GM12878_combined_22_10kb_cmds_coords.tsv $BEDPATH 12 | python ../minimds.py --partitioned -o hic_data/GM12878_combined_22_10kb_minimds_coords.tsv $BEDPATH 13 | 14 | INPUT_PATH=MOGEN/examples/hiC/input/GM12878_combined_22_10kb.tsv 15 | if [ ! -e $INPUT_PATH ] 16 | then 17 | python mogen_input.py $BEDPATH $INPUT_PATH 18 | fi 19 | 20 | java -jar MOGEN/examples/hiC/3DGenerator.jar parameters_chr22_10kb.txt 21 | 22 | #process MOGEN output 23 | REP_NUM=1 24 | for f in MOGEN/examples/hiC/output/GM12878_combined_22_10kb_*.pdb 25 | do 26 | cat $f | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > "MOGEN/examples/hiC/output/GM12878_combined_22_10kb_rep"$REP_NUM"_coords.tsv" 27 | REP_NUM=$(($REP_NUM+1)) 28 | done 29 | 30 | python fig9.py 31 | -------------------------------------------------------------------------------- /scripts/figures/sup1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | import misc 4 | 5 | #labels = ("Chromosome3D", "mMDS", "cMDS", "miniMDS", "MOGEN", "HSA", "ChromSDE") 6 | labels = ("mMDS", "cMDS", "miniMDS", "MOGEN", "HSA") 7 | x_pos = np.arange(len(labels)) 8 | 9 | #with open("chromosome3d_chr22_100kb_time.txt") as in_file: 10 | # chromosomethreed_time = float(in_file.readline().strip())/60 #time in minutes 11 | #in_file.close() 12 | 13 | with open("mmds_chr22_100kb_time.txt") as in_file: 14 | mmds_time = float(in_file.readline().strip())/60 #time in minutes 15 | in_file.close() 16 | 17 | with open("cmds_chr22_100kb_time.txt") as in_file: 18 | cmds_time = float(in_file.readline().strip())/60 #time in minutes 19 | in_file.close() 20 | 21 | with open("minimds_chr22_100kb_time.txt") as in_file: 22 | minimds_time = float(in_file.readline().strip())/60 #time in minutes 23 | in_file.close() 24 | 25 | with open("mogen_chr22_100kb_time.txt") as in_file: 26 | mogen_time = float(in_file.readline().strip())/60 #time in minutes 27 | in_file.close() 28 | 29 | with open("hsa_chr22_100kb_time.txt") as in_file: 30 | hsa_time = float(in_file.readline().strip())/60 #time in minutes 31 | in_file.close() 32 | 33 | #with open("chromsde_chr22_100kb_time.txt") as in_file: 34 | # chromsde_time = float(in_file.readline().strip())/60 #time in minutes 35 | #in_file.close() 36 | 37 | #times = [chromosomethreed_time, mmds_time, cmds_time, minimds_time, mogen_time, hsa_time, chromsde_time] 38 | times = [mmds_time, cmds_time, minimds_time, mogen_time, hsa_time] 39 | 40 | #colors = ["y", "r", "g", "b", "m", "c", "blueviolet"] 41 | colors = ["r", "g", "b", "m", "c"] 42 | 43 | rects = plt.bar(x_pos, times, align="center", color = colors) 44 | plt.yscale("log", subsy=[]) 45 | plt.tick_params(top=False,bottom=False,right=False,left=False, labelbottom=False) 46 | #plt.legend((rects[0], rects[1], rects[2], rects[3], rects[4], rects[5], rects[6]), labels, fontsize=9, loc=0) 47 | plt.legend((rects[0], rects[1], rects[2], rects[3], rects[4]), labels, fontsize=9, loc=0) 48 | plt.ylabel("Computational time (minutes)") 49 | 50 | plt.savefig("Sup1.png") 51 | -------------------------------------------------------------------------------- /scripts/figures/sup1.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | TIME=/usr/bin/time 4 | 5 | bash get_gm12878.sh 1000000 22 6 | bash get_gm12878.sh 100000 22 7 | 8 | BEDPATH=hic_data/GM12878_combined_22_100kb.bed 9 | 10 | #Chromosome3D 11 | 12 | #create input 13 | INPUT_PATH=Chromosome3D/input/GM12878_combined_22_100kb.txt 14 | 15 | if [ ! -e $INPUT_PATH ] 16 | then 17 | python chromosome3d_input.py $BEDPATH $INPUT_PATH 18 | fi 19 | 20 | #run 21 | $TIME -f %e -o chromosome3d_chr22_100kb_time.txt perl Chromosome3D/chromosome3D.pl -i $INPUT_PATH -o Chromosome3D/output_models/chr22_100kb -m 1 22 | 23 | #mMDS 24 | 25 | $TIME -f %e -o mmds_chr22_100kb_time.txt python ../minimds.py $BEDPATH 26 | 27 | #cMDS 28 | 29 | $TIME -f %e -o cmds_chr22_100kb_time.txt python ../minimds.py --classical $BEDPATH 30 | 31 | #miniMDS 32 | 33 | $TIME -f %e -o minimds_chr22_100kb_time.txt python ../minimds.py -l hic_data/GM12878_combined_22_1mb.bed -p 0.01 -m 0.01 $BEDPATH 34 | 35 | #MOGEN 36 | 37 | #install 38 | bash install_mogen.sh 39 | 40 | #create input 41 | INPUT_PATH=MOGEN/examples/hiC/input/GM12878_combined_22_100kb.tsv 42 | 43 | if [ ! -e $INPUT_PATH ] 44 | then 45 | python mogen_input.py $BEDPATH $INPUT_PATH 46 | fi 47 | 48 | #run 49 | $TIME -f %e -o mogen_chr22_100kb_time.txt java -jar MOGEN/examples/hiC/3DGenerator.jar parameters_chr22_100kb.txt 50 | 51 | #HSA 52 | 53 | #install 54 | bash install_hsa.sh 55 | 56 | #create input 57 | INPUT_PATH=hsa/GM12878_combined_22_100kb.tsv 58 | 59 | if [ ! -e $INPUT_PATH ] 60 | then 61 | python hsa_input.py $BEDPATH $INPUT_PATH 62 | fi 63 | 64 | cd hsa 65 | 66 | #run 67 | $TIME -f %e -o ../hsa_chr22_100kb_time.txt Rscript myR.R GM12878_combined_22_100kb.tsv 0 GM12878_combined_22_100kb_coords.tsv 1 68 | 69 | cd .. 70 | 71 | #ChromSDE 72 | 73 | #install 74 | #bash install_chromsde.sh 75 | 76 | #create input 77 | #CONTACTS_PATH=ChromSDE/chr22_100kb_contacts.dat 78 | #IDS_PATH=ChromSDE/chr22_100kb_ids.dat 79 | 80 | #if [ ! -e $CONTACTS_PATH ] || [ ! -e $IDS_PATH ] 81 | # then 82 | # python chromsde_input.py $BEDPATH $CONTACTS_PATH $IDS_PATH 83 | #fi 84 | 85 | #cd ChromSDE 86 | 87 | #run 88 | #$TIME -f %e -o chromsde_chr22_100kb_time.txt matlab -nodisplay -nosplash -nodesktop -r "run('run_chromsde_100kb(22)')" 89 | 90 | #cd .. 91 | 92 | python get_chrom_sizes.py 10 93 | 94 | python sup1.py 95 | -------------------------------------------------------------------------------- /scripts/figures/sup2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | 4 | with open("chromosome3d_chr22_100kb_memory.txt") as in_file: 5 | chromthreed_mem = float(in_file.readline().strip()) 6 | in_file.close() 7 | 8 | with open("mmds_chr22_100kb_memory.txt") as in_file: 9 | mmds_mem = float(in_file.readline().strip()) 10 | in_file.close() 11 | 12 | with open("cmds_chr22_100kb_memory.txt") as in_file: 13 | cmds_mem = float(in_file.readline().strip()) 14 | in_file.close() 15 | 16 | with open("minimds_chr22_100kb_memory.txt") as in_file: 17 | minimds_mem = float(in_file.readline().strip()) 18 | in_file.close() 19 | 20 | with open("mogen_chr22_100kb_memory.txt") as in_file: 21 | mogen_mem = float(in_file.readline().strip()) 22 | in_file.close() 23 | 24 | #with open("chromsde_chr22_100kb_memory.txt") as in_file: 25 | # chromsde_mem = float(in_file.readline().strip()) 26 | #in_file.close() 27 | 28 | #labels = ("Chromosome3D", "mMDS", "cMDS", "miniMDS", "MOGEN", "ChromSDE") 29 | labels = ("Chromosome3D", "mMDS", "cMDS", "miniMDS", "MOGEN") 30 | x_pos = np.arange(len(labels)) 31 | #memory = [chromthreed_mem, mmds_mem, cmds_mem, minimds_mem, mogen_mem, chromsde_mem] 32 | memory = [chromthreed_mem, mmds_mem, cmds_mem, minimds_mem, mogen_mem] 33 | 34 | #colors = ["y", "r", "g", "b", "m", "blueviolet"] 35 | colors = ["y", "r", "g", "b", "m"] 36 | 37 | rects = plt.bar(x_pos, memory, align="center", color = colors) 38 | plt.title("100-Kbp resolution", fontsize=12) 39 | plt.yscale("log", subsy=[]) 40 | plt.tick_params(top=False,bottom=False,right=False,left=False, labelbottom=False) 41 | #plt.legend((rects[0], rects[1], rects[2], rects[3], rects[4], rects[5]), labels, fontsize=9, loc=0) 42 | plt.legend((rects[0], rects[1], rects[2], rects[3], rects[4]), labels, fontsize=9, loc=0) 43 | plt.ylabel("Memory usage (Mb)") 44 | plt.savefig("Sup2.png") 45 | -------------------------------------------------------------------------------- /scripts/figures/sup2.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | TIME=/usr/bin/time 4 | 5 | BEDPATH=hic_data/GM12878_combined_22_100kb.bed 6 | 7 | #Chromosome3D 8 | 9 | #create input 10 | INPUT_PATH=Chromosome3D/input/GM12878_combined_22_100kb.txt 11 | 12 | if [ ! -e $INPUT_PATH ] 13 | then 14 | python chromosome3d_input.py $BEDPATH $INPUT_PATH 15 | fi 16 | 17 | #run 18 | $TIME -f "%M" -o chromosome3d_chr22_100kb_memory.txt perl Chromosome3D/chromosome3D.pl -i $INPUT_PATH -o Chromosome3D/output_models/chr22_100kb -m 1 19 | 20 | #mMDS 21 | 22 | $TIME -f "%M" -o mmds_chr22_100kb_memory.txt python ../minimds.py $BEDPATH 23 | 24 | #cMDS 25 | 26 | $TIME -f "%M" -o cmds_chr22_100kb_memory.txt python ../minimds.py --classical $BEDPATH 27 | 28 | #miniMDS 29 | 30 | $TIME -f "%M" -o minimds_chr22_100kb_memory.txt python ../minimds.py -l hic_data/GM12878_combined_22_100kb.bed $BEDPATH 31 | 32 | #MOGEN 33 | 34 | #install 35 | bash install_mogen.sh 36 | 37 | #create input 38 | INPUT_PATH=MOGEN/examples/hiC/input/GM12878_combined_22_100kb.tsv 39 | 40 | if [ ! -e $INPUT_PATH ] 41 | then 42 | python mogen_input.py $BEDPATH $INPUT_PATH 43 | fi 44 | 45 | #run 46 | $TIME -f "%M" -o mogen_chr22_100kb_memory.txt java -jar MOGEN/examples/hiC/3DGenerator.jar parameters_chr22_100kb.txt 47 | 48 | #ChromSDE 49 | 50 | #install 51 | #bash install_chromsde.sh 52 | 53 | #create input 54 | #CONTACTS_PATH=ChromSDE/chr22_100kb_contacts.dat 55 | #IDS_PATH=ChromSDE/chr22_100kb_ids.dat 56 | 57 | #if [ ! -e $CONTACTS_PATH ] || [ ! -e $IDS_PATH ] 58 | # then 59 | # python chromsde_input.py $BEDPATH $CONTACTS_PATH $IDS_PATH 60 | #fi 61 | 62 | #cd ChromSDE 63 | 64 | #run 65 | #$TIME -f "%M" -o chromsde_chr22_100kb_memory.txt matlab -nodisplay -nosplash -nodesktop -r "run('run_chromsde('chr22_100kb_contacts.dat', 'chr22_100kb_ids.dat')')" 66 | 67 | #cd .. 68 | 69 | python sup2.py 70 | -------------------------------------------------------------------------------- /scripts/figures/sup3.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import numpy as np 3 | import sys 4 | sys.path.append("..") 5 | import data_tools as dt 6 | import array_tools as at 7 | import misc 8 | 9 | bedpath = "hic_data/GM12878_combined_22_100kb.bed" 10 | 11 | mmds_structure = dt.structure_from_file("hic_data/GM12878_combined_22_100kb_mmds_coords.tsv") 12 | contactMat = dt.matFromBed(bedpath, mmds_structure) 13 | mmds_true_mat = at.contactToDist(contactMat) 14 | at.makeSymmetric(mmds_true_mat) 15 | for j in range(len(mmds_true_mat)): #remove diagonal 16 | mmds_true_mat[j,j] = 0 17 | mmds_distMat = misc.distMat(mmds_structure) 18 | mmds_r = misc.pearson(mmds_true_mat, mmds_distMat) 19 | 20 | cmds_structure = dt.structure_from_file("hic_data/GM12878_combined_22_100kb_cmds_coords.tsv") 21 | contactMat = dt.matFromBed(bedpath, cmds_structure) 22 | cmds_true_mat = at.contactToDist(contactMat) 23 | at.makeSymmetric(cmds_true_mat) 24 | for j in range(len(cmds_true_mat)): #remove diagonal 25 | cmds_true_mat[j,j] = 0 26 | cmds_distMat = misc.distMat(cmds_structure) 27 | cmds_r = misc.pearson(cmds_true_mat, cmds_distMat) 28 | 29 | minimds_structure = dt.structure_from_file("hic_data/GM12878_combined_22_100kb_minimds_coords.tsv") 30 | contactMat = dt.matFromBed(bedpath, minimds_structure) 31 | minimds_true_mat = at.contactToDist(contactMat) 32 | at.makeSymmetric(minimds_true_mat) 33 | for j in range(len(minimds_true_mat)): #remove diagonal 34 | minimds_true_mat[j,j] = 0 35 | minimds_distMat = misc.distMat(minimds_structure) 36 | minimds_r = misc.pearson(minimds_true_mat, minimds_distMat) 37 | 38 | mogen_coords = np.loadtxt("MOGEN/examples/hiC/output/GM12878_combined_22_100kb_rep1_coords.tsv") 39 | mogen_distMat = misc.distsFromCoords(mogen_coords) 40 | mogen_r = misc.pearson(mmds_true_mat, mogen_distMat) #mMDS and MOGEN use the same matrix input procedure 41 | 42 | hsa_coords = np.loadtxt("hsa/GM12878_combined_22_100kb_coords.txt") 43 | hsa_distMat = misc.distsFromCoords(hsa_coords) 44 | hsa_r = misc.pearson(mmds_true_mat, hsa_distMat) 45 | 46 | #chromthreed_coords = np.loadtxt("Chromosome3D/output_models/chr22_100kb/chr22_100kb_coords.tsv") 47 | #chromthreed_distMat = misc.distsFromCoords(chromthreed_coords) 48 | #chromthreed_r = misc.pearson(mmds_true_mat, chromthreed_distMat) 49 | 50 | #chromsde_coords = np.loadtxt("ChromSDE/GM12878_combined_22_100kb_coords.tsv") 51 | #chromsde_distMat = misc.distsFromCoords(chromsde_coords) 52 | #chromsde_r = misc.pearson(mmds_true_mat, chromsde_distMat) 53 | 54 | #labels = ("Chromosome3D", "mMDS", "cMDS", "miniMDS", "MOGEN", "HSA", "ChromSDE") 55 | labels = ("mMDS", "cMDS", "miniMDS", "MOGEN", "HSA") 56 | x_pos = np.arange(len(labels)) 57 | #rs = [chromthreed_r, mmds_r, cmds_r, minimds_r, mogen_r, hsa_r, chromsde_r] 58 | rs = [mmds_r, cmds_r, minimds_r, mogen_r, hsa_r] 59 | #colors = ["y", "r", "g", "b", "c", "m", "blueviolet"] 60 | colors = ["r", "g", "b", "c", "m"] 61 | rects = plt.bar(x_pos, rs, align="center", color = colors) 62 | plt.title("100-Kbp resolution", fontsize=12) 63 | plt.tick_params(top=False,bottom=False,right=False,left=False, labelbottom=False) 64 | #plt.legend((rects[0], rects[1], rects[2], rects[3], rects[4], rects[5], rects[6]), labels, fontsize=9, loc=0) 65 | plt.legend((rects[0], rects[1], rects[2], rects[3], rects[4]), labels, fontsize=9, loc=0) 66 | plt.ylabel("Correlation between input distances and output distances") 67 | plt.savefig("Sup3.png") 68 | -------------------------------------------------------------------------------- /scripts/figures/sup3.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | bash get_gm12878.sh 1000000 22 4 | bash get_gm12878.sh 100000 22 5 | 6 | BEDPATH=hic_data/GM12878_combined_22_100kb.bed 7 | 8 | #Chromosome3D 9 | 10 | #create input 11 | INPUT_PATH=Chromosome3D/input/GM12878_combined_22_100kb.txt 12 | 13 | if [ ! -e $INPUT_PATH ] 14 | then 15 | python chromosome3d_input.py $BEDPATH $INPUT_PATH 16 | fi 17 | 18 | #run 19 | perl Chromosome3D/chromosome3D.pl -i $INPUT_PATH -o Chromosome3D/output_models/chr22_100kb -m 1 20 | 21 | #process output 22 | cat Chromosome3D/output_models/chr22_100kb/GM12878_combined_22_100kb_1.pdb | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > Chromosome3D/output_models/chr22_100kb/chr22_100kb_coords.tsv 23 | 24 | #mMDS 25 | 26 | python ../minimds.py -o hic_data/GM12878_combined_22_100kb_mmds_coords.tsv $BEDPATH 27 | 28 | #cMDS 29 | 30 | python ../minimds.py --classical -o hic_data/GM12878_combined_22_100kb_cmds_coords.tsv $BEDPATH 31 | 32 | #miniMDS 33 | 34 | python ../minimds.py -l hic_data/GM12878_combined_22_1mb.bed -p 0.01 -m 0.01 -o hic_data/GM12878_combined_22_100kb_minimds_coords.tsv $BEDPATH 35 | 36 | #MOGEN 37 | 38 | #install 39 | bash install_mogen.sh 40 | 41 | #create input 42 | INPUT_PATH=MOGEN/examples/hiC/input/GM12878_combined_22_100kb.tsv 43 | 44 | if [ ! -e $INPUT_PATH ] 45 | then 46 | python mogen_input.py $BEDPATH $INPUT_PATH 47 | fi 48 | 49 | #run 50 | java -jar MOGEN/examples/hiC/3DGenerator.jar parameters_chr22_100kb.txt 51 | 52 | #process output 53 | REP_NUM=1 54 | for f in MOGEN/examples/hiC/output/GM12878_combined_22_100kb_*.pdb 55 | do 56 | cat $f | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > "MOGEN/examples/hiC/output/GM12878_combined_22_100kb_rep"$REP_NUM"_coords.tsv" 57 | REP_NUM=$(($REP_NUM+1)) 58 | done 59 | 60 | #HSA 61 | 62 | #install 63 | bash install_hsa.sh 64 | 65 | #create input 66 | INPUT_PATH=hsa/GM12878_combined_22_100kb.tsv 67 | 68 | if [ ! -e $INPUT_PATH ] 69 | then 70 | python hsa_input.py $BEDPATH $INPUT_PATH 71 | fi 72 | 73 | cd hsa 74 | 75 | #run 76 | Rscript myR.R GM12878_combined_22_100kb.tsv 0 GM12878_combined_22_100kb_coords 1 77 | 78 | cd .. 79 | 80 | #ChromSDE 81 | 82 | #install 83 | #bash install_chromsde.sh 84 | 85 | #create input 86 | #CONTACTS_PATH=ChromSDE/chr22_100kb_contacts.dat 87 | #IDS_PATH=ChromSDE/chr22_100kb_ids.dat 88 | 89 | #python chromsde_input.py $BEDPATH $CONTACTS_PATH $IDS_PATH 90 | 91 | #cd ChromSDE 92 | 93 | #run 94 | #matlab -nodisplay -nosplash -nodesktop -r "run('run_chromsde_100kb(22)')" 95 | 96 | #process output 97 | #cat contacts_100kb.pos.pdb | awk '$1 == "ATOM" {print $6"\t"$7"\t"$8}' > GM12878_combined_22_100kb_coords.tsv 98 | 99 | #cd .. 100 | 101 | python sup3.py 102 | -------------------------------------------------------------------------------- /scripts/get_chrom_sizes.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import data_tools as dt 4 | 5 | res_kb = int(sys.argv[1]) 6 | with open("chrom_sizes_{}kb.txt".format(res_kb), "w") as out: 7 | for chrom in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X"]: 8 | structure = dt.structure_from_file("hic_data/GM12878_combined_chr{}_{}kb_structure.tsv".format(chrom, res_kb)) 9 | out.write(str(len(structure.getPoints())) + "\n") 10 | out.close() 11 | -------------------------------------------------------------------------------- /scripts/get_gm12878.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | RES=$1 4 | CHROM=$2 5 | 6 | DATA_DIR=hic_data 7 | mkdir -p $DATA_DIR 8 | 9 | PREFIX=GM12878_combined 10 | TAR=$DATA_DIR/GSE63525_${PREFIX}_intrachromosomal_contact_matrices.tar.gz 11 | 12 | test ! -s $TAR && (curl ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE63nnn/GSE63525/suppl/GSE63525%5FGM12878%5Fcombined%5Fintrachromosomal%5Fcontact%5Fmatrices%2Etar%2Egz -o $TAR) 13 | 14 | #all human chromosomes 15 | if [ $CHROM -eq 0 ] 16 | then 17 | for CHROM in `seq 23` 18 | do 19 | bash chrom_from_tar.sh $RES $CHROM $TAR $DATA_DIR $PREFIX 20 | done 21 | 22 | bash chrom_from_tar.sh $RES X $TAR $DATA_DIR $PREFIX 23 | 24 | #selected chromosome 25 | else 26 | bash chrom_from_tar.sh $RES $CHROM $TAR $DATA_DIR $PREFIX 27 | fi -------------------------------------------------------------------------------- /scripts/get_gm12878_inter.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | RES=$1 4 | 5 | mkdir -p hic_data 6 | 7 | cd hic_data 8 | 9 | if [ ! -e GSE63525_GM12878_combined_interchromosomal_contact_matrices.tar.gz ] 10 | then 11 | curl ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE63nnn/GSE63525/suppl/GSE63525_GM12878_combined_interchromosomal_contact_matrices.tar.gz -o GSE65325_GM12878_combined_interchromosomal_contact_matrices.tar.gz 12 | fi 13 | 14 | RES_KB=$(($RES/1000)) 15 | 16 | if [ $RES_KB -lt 1000 ] 17 | then 18 | RES_STRING=$RES_KB"kb" 19 | else 20 | RES_STRING=$(($RES_KB/1000))"mb" 21 | fi 22 | 23 | DIR=$RES_STRING"_resolution_interchromosomal" 24 | 25 | if [ ! -e GM12878_combined_interchromosomal/$DIR ] 26 | then 27 | tar xzf GSE63525_GM12878_combined_interchromosomal_contact_matrices.tar.gz GM12878_combined_interchromosomal/$DIR 28 | fi 29 | 30 | cd .. 31 | 32 | CHROMS=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X) 33 | 34 | for i in `seq 0 $((${#CHROMS[@]}-1))` 35 | do 36 | CHROM1=${CHROMS[$i]} 37 | for j in `seq 0 $(($i-1))` 38 | do 39 | CHROM2=${CHROMS[$j]} 40 | python normalize.py hic_data/GM12878_combined_interchromosomal $RES $CHROM1 --chrom2 $CHROM2 41 | mv hic_data/GM12878_combined_interchromosomal_${CHROM2}_${CHROM1}_${RES_STRING}.bed hic_data/GM12878_combined_${CHROM2}_${CHROM1}_${RES_STRING}.bed 42 | done 43 | done 44 | -------------------------------------------------------------------------------- /scripts/heatmap.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | from matplotlib import pyplot as plt 3 | import numpy as np 4 | 5 | def threshold(mat, value): 6 | """Cuts off values above threshold for ease of visualization of heatmap""" 7 | n = len(mat) 8 | thresholded = np.zeros_like(mat) 9 | for i in range(n): 10 | for j in range(len(mat[0])): 11 | thresholded[i,j] = min((mat[i,j], value)) 12 | return thresholded 13 | 14 | def createHeatmap(mat, domains, outpath, colors=None): 15 | # Plot 16 | fig, ax = plt.subplots() 17 | plt.pcolormesh(mat, cmap=plt.cm.Reds) 18 | 19 | # turn off the frame 20 | ax.set_frame_on(False) 21 | 22 | # want a more natural, table-like display 23 | ax.invert_yaxis() 24 | 25 | #turn off ticks 26 | ax.set_xticklabels([]) 27 | ax.set_yticklabels([]) 28 | for t in ax.xaxis.get_major_ticks(): 29 | t.tick1On = False 30 | t.tick2On = False 31 | for t in ax.yaxis.get_major_ticks(): 32 | t.tick1On = False 33 | t.tick2On = False 34 | 35 | #plot domain boundaries 36 | if domains is not None: 37 | if colors is None: #default is black 38 | colors = ["k" for domain in domains] 39 | 40 | for (domain, color) in zip(domains, colors): 41 | lowerBound = domain[0] 42 | upperBound = domain[1] 43 | plt.plot([lowerBound, upperBound], [lowerBound, lowerBound], c=color, lw=1) #horizontal 44 | plt.plot([lowerBound, upperBound], [upperBound, upperBound], c=color, lw=1) #lower horizontal 45 | plt.plot([upperBound, upperBound], [lowerBound, upperBound], c=color, lw=1) #vertical 46 | plt.plot([lowerBound, lowerBound], [lowerBound, upperBound], c=color, lw=1) #left vertical 47 | 48 | if outpath is not None: 49 | plt.savefig(outpath) 50 | 51 | else: 52 | plt.show() 53 | 54 | def heatMapFromMat(mat, maxvalue=None, tads=None, outpath=None, colors=None): 55 | if maxvalue is not None: 56 | mat = threshold(mat, maxvalue) 57 | createHeatmap(mat, tads, outpath, colors) 58 | -------------------------------------------------------------------------------- /scripts/hicpro/HindIII_resfrag_hg19_downsampled.bed: -------------------------------------------------------------------------------- 1 | chr1 42169793 42170284 HIC_chr1_9335 0 + 2 | chr1 45000708 45002062 HIC_chr1_10052 0 + 3 | chr1 54275477 54278186 HIC_chr1_12487 0 + 4 | chr1 58909017 58911877 HIC_chr1_13920 0 + 5 | chr1 59186221 59188177 HIC_chr1_13998 0 + 6 | chr1 63399728 63401618 HIC_chr1_15255 0 + 7 | chr1 64879375 64879614 HIC_chr1_15711 0 + 8 | chr1 72860515 72866568 HIC_chr1_18192 0 + 9 | chr1 74413875 74414875 HIC_chr1_18696 0 + 10 | chr1 76359648 76365153 HIC_chr1_19353 0 + 11 | chr1 80117089 80117670 HIC_chr1_20532 0 + 12 | chr1 81473769 81474058 HIC_chr1_20973 0 + 13 | chr1 81522334 81529049 HIC_chr1_20989 0 + 14 | chr1 83828851 83829505 HIC_chr1_21851 0 + 15 | chr1 86952092 86952937 HIC_chr1_22890 0 + 16 | chr1 88984098 88984341 HIC_chr1_23618 0 + 17 | chr1 96336515 96345707 HIC_chr1_25845 0 + 18 | chr1 97824605 97824726 HIC_chr1_26344 0 + 19 | chr1 110080431 110083450 HIC_chr1_30212 0 + 20 | chr1 112865147 112867697 HIC_chr1_31044 0 + 21 | chr1 115256195 115258531 HIC_chr1_31717 0 + 22 | chr1 121258157 121258212 HIC_chr1_33510 0 + 23 | chr1 154823841 154827390 HIC_chr1_36542 0 + 24 | chr1 158275621 158286817 HIC_chr1_37346 0 + 25 | chr1 161215610 161218180 HIC_chr1_38201 0 + 26 | chr1 176351242 176356275 HIC_chr1_42880 0 + 27 | chr1 179719412 179720898 HIC_chr1_43927 0 + 28 | chr1 182616435 182619647 HIC_chr1_44760 0 + 29 | chr1 198159562 198161126 HIC_chr1_49712 0 + 30 | chr1 198576660 198579071 HIC_chr1_49858 0 + 31 | chr1 204603975 204608737 HIC_chr1_51438 0 + 32 | chr1 210877472 210882749 HIC_chr1_53178 0 + 33 | chr1 232518393 232524400 HIC_chr1_59535 0 + 34 | chr1 248607843 248612002 HIC_chr1_64251 0 + 35 | chr2 15804735 15805111 HIC_chr2_4472 0 + 36 | chr2 21841547 21841946 HIC_chr2_6279 0 + 37 | chr2 25875354 25883998 HIC_chr2_7396 0 + 38 | chr2 28905428 28917599 HIC_chr2_8159 0 + 39 | chr2 29899574 29900240 HIC_chr2_8444 0 + 40 | chr2 31045334 31048837 HIC_chr2_8785 0 + 41 | chr2 39030276 39032116 HIC_chr2_11160 0 + 42 | chr2 42969082 42969131 HIC_chr2_12409 0 + 43 | chr2 45239041 45239816 HIC_chr2_13049 0 + 44 | chr2 57996341 57997394 HIC_chr2_17048 0 + 45 | chr2 58479826 58479840 HIC_chr2_17201 0 + 46 | chr2 75463003 75466784 HIC_chr2_22164 0 + 47 | chr2 75702099 75707374 HIC_chr2_22238 0 + 48 | chr2 76386199 76389144 HIC_chr2_22441 0 + 49 | chr2 83226371 83228086 HIC_chr2_24637 0 + 50 | chr2 85425229 85443983 HIC_chr2_25298 0 + 51 | chr2 86546457 86554034 HIC_chr2_25536 0 + 52 | chr2 89290482 89294953 HIC_chr2_26268 0 + 53 | chr2 103293266 103294909 HIC_chr2_28928 0 + 54 | chr2 104009651 104010169 HIC_chr2_29155 0 + 55 | chr2 114042576 114045949 HIC_chr2_31961 0 + 56 | chr2 117212400 117213042 HIC_chr2_32986 0 + 57 | chr2 119703712 119707233 HIC_chr2_33777 0 + 58 | chr2 128214911 128220500 HIC_chr2_36228 0 + 59 | chr2 129130516 129132086 HIC_chr2_36432 0 + 60 | chr2 134435721 134436401 HIC_chr2_37975 0 + 61 | chr2 144537433 144541703 HIC_chr2_41197 0 + 62 | chr2 147557592 147562247 HIC_chr2_42218 0 + 63 | chr2 149105536 149112428 HIC_chr2_42766 0 + 64 | chr2 154994724 154996371 HIC_chr2_44672 0 + 65 | chr2 170562797 170567441 HIC_chr2_49706 0 + 66 | chr2 180890038 180892108 HIC_chr2_52915 0 + 67 | chr2 191063973 191065687 HIC_chr2_56213 0 + 68 | chr2 191324711 191328087 HIC_chr2_56279 0 + 69 | chr2 208012721 208013180 HIC_chr2_61449 0 + 70 | chr2 213578277 213579004 HIC_chr2_63318 0 + 71 | chr2 219989045 219999992 HIC_chr2_65252 0 + 72 | chr2 226352664 226355918 HIC_chr2_67152 0 + 73 | chr2 228553760 228559261 HIC_chr2_67847 0 + 74 | chr2 231037959 231039190 HIC_chr2_68622 0 + 75 | chr3 1098844 1099312 HIC_chr3_355 0 + 76 | chr3 2271561 2271736 HIC_chr3_731 0 + 77 | chr3 15833275 15834662 HIC_chr3_4524 0 + 78 | chr3 25265003 25268693 HIC_chr3_7628 0 + 79 | chr3 30259560 30265154 HIC_chr3_9307 0 + 80 | chr3 41583886 41591015 HIC_chr3_12730 0 + 81 | chr3 52277038 52280554 HIC_chr3_15336 0 + 82 | chr3 61252312 61254281 HIC_chr3_17949 0 + 83 | chr3 61683774 61685546 HIC_chr3_18068 0 + 84 | chr3 79853052 79855152 HIC_chr3_23828 0 + 85 | chr3 81986762 81988075 HIC_chr3_24470 0 + 86 | chr3 83285897 83286598 HIC_chr3_24932 0 + 87 | chr3 98469801 98471893 HIC_chr3_28845 0 + 88 | chr3 101659789 101660960 HIC_chr3_29892 0 + 89 | chr3 129306258 129309774 HIC_chr3_38336 0 + 90 | chr3 130581881 130581887 HIC_chr3_38680 0 + 91 | chr3 141745708 141746824 HIC_chr3_42038 0 + 92 | chr3 147543597 147545199 HIC_chr3_43971 0 + 93 | chr3 157683262 157684207 HIC_chr3_47265 0 + 94 | chr3 165496405 165501813 HIC_chr3_49777 0 + 95 | chr3 165655173 165658552 HIC_chr3_49834 0 + 96 | chr3 165872624 165872887 HIC_chr3_49909 0 + 97 | chr3 172729128 172731334 HIC_chr3_52101 0 + 98 | chr3 173120240 173121409 HIC_chr3_52218 0 + 99 | chr3 173909195 173909856 HIC_chr3_52485 0 + 100 | chr3 175677752 175680289 HIC_chr3_53070 0 + 101 | chr3 178143067 178145302 HIC_chr3_53820 0 + 102 | chr3 182341050 182346434 HIC_chr3_55138 0 + 103 | chr3 189414766 189416858 HIC_chr3_57130 0 + 104 | chr3 190911077 190915781 HIC_chr3_57611 0 + 105 | chr3 191528028 191530246 HIC_chr3_57807 0 + 106 | chr3 197265572 197265761 HIC_chr3_59457 0 + 107 | chr4 6553998 6560485 HIC_chr4_1372 0 + 108 | chr4 27161431 27163733 HIC_chr4_7609 0 + 109 | chr4 29727427 29728977 HIC_chr4_8422 0 + 110 | chr4 41458825 41461836 HIC_chr4_11995 0 + 111 | chr4 58690364 58695393 HIC_chr4_16386 0 + 112 | chr4 60289143 60295003 HIC_chr4_16864 0 + 113 | chr4 65827455 65833129 HIC_chr4_18586 0 + 114 | chr4 75244368 75246086 HIC_chr4_21548 0 + 115 | chr4 90738937 90739525 HIC_chr4_26631 0 + 116 | chr4 92117746 92118494 HIC_chr4_27116 0 + 117 | chr4 93758468 93759083 HIC_chr4_27614 0 + 118 | chr4 101767871 101778345 HIC_chr4_30233 0 + 119 | chr4 103426812 103435916 HIC_chr4_30759 0 + 120 | chr4 113567830 113568685 HIC_chr4_34095 0 + 121 | chr4 118380243 118382892 HIC_chr4_35648 0 + 122 | chr4 119690576 119692114 HIC_chr4_36083 0 + 123 | chr4 122954811 122956632 HIC_chr4_37150 0 + 124 | chr4 127975606 127976928 HIC_chr4_38810 0 + 125 | chr4 130751067 130757977 HIC_chr4_39746 0 + 126 | chr4 135433881 135436254 HIC_chr4_41214 0 + 127 | chr4 141042309 141051844 HIC_chr4_43008 0 + 128 | chr4 145287890 145288134 HIC_chr4_44404 0 + 129 | chr4 155734924 155739358 HIC_chr4_47695 0 + 130 | chr4 163491164 163491661 HIC_chr4_50205 0 + 131 | chr4 172122067 172134678 HIC_chr4_52977 0 + 132 | chr4 182718368 182726318 HIC_chr4_56352 0 + 133 | chr4 183876940 183877375 HIC_chr4_56732 0 + 134 | chr4 184531646 184544285 HIC_chr4_56933 0 + 135 | chr4 188399922 188400945 HIC_chr4_58094 0 + 136 | chr5 1566851 1568849 HIC_chr5_275 0 + 137 | chr5 2623528 2623921 HIC_chr5_530 0 + 138 | chr5 18246247 18248510 HIC_chr5_5216 0 + 139 | chr5 32547294 32552713 HIC_chr5_9604 0 + 140 | chr5 39455763 39462184 HIC_chr5_11889 0 + 141 | chr5 42599227 42602642 HIC_chr5_12986 0 + 142 | chr5 44843937 44844019 HIC_chr5_13697 0 + 143 | chr5 46215543 46216228 HIC_chr5_14494 0 + 144 | chr5 49860994 49867039 HIC_chr5_14848 0 + 145 | chr5 54431477 54434902 HIC_chr5_16336 0 + 146 | chr5 60562825 60563835 HIC_chr5_18293 0 + 147 | chr5 61301361 61307273 HIC_chr5_18554 0 + 148 | chr5 63550978 63553399 HIC_chr5_19291 0 + 149 | chr5 68047135 68048324 HIC_chr5_20752 0 + 150 | chr5 69002559 69004234 HIC_chr5_20986 0 + 151 | chr5 75531227 75532283 HIC_chr5_22984 0 + 152 | chr5 82628817 82631688 HIC_chr5_25203 0 + 153 | chr5 88170950 88171279 HIC_chr5_26998 0 + 154 | chr5 103166204 103176426 HIC_chr5_31830 0 + 155 | chr5 121604269 121609386 HIC_chr5_37701 0 + 156 | chr5 127350398 127355609 HIC_chr5_39609 0 + 157 | chr5 132010404 132013729 HIC_chr5_41014 0 + 158 | chr5 134795313 134796424 HIC_chr5_41731 0 + 159 | chr5 142512793 142513336 HIC_chr5_43848 0 + 160 | chr5 144788682 144789893 HIC_chr5_44565 0 + 161 | chr5 146668858 146671413 HIC_chr5_45151 0 + 162 | chr6 2978348 2981202 HIC_chr6_841 0 + 163 | chr6 14587944 14589836 HIC_chr6_4321 0 + 164 | chr6 25357977 25361640 HIC_chr6_7560 0 + 165 | chr6 62614488 62627213 HIC_chr6_17535 0 + 166 | chr6 64060085 64064068 HIC_chr6_17956 0 + 167 | chr6 64702499 64703116 HIC_chr6_18176 0 + 168 | chr6 67229947 67230329 HIC_chr6_18975 0 + 169 | chr6 68731649 68732706 HIC_chr6_19442 0 + 170 | chr6 70922622 70923910 HIC_chr6_20136 0 + 171 | chr6 73145757 73147670 HIC_chr6_20873 0 + 172 | chr6 82362925 82363780 HIC_chr6_23715 0 + 173 | chr6 83182997 83187039 HIC_chr6_23998 0 + 174 | chr6 85900022 85902179 HIC_chr6_24871 0 + 175 | chr6 95232735 95238108 HIC_chr6_27804 0 + 176 | chr6 99208581 99216379 HIC_chr6_29064 0 + 177 | chr6 101710661 101711407 HIC_chr6_29878 0 + 178 | chr6 102920032 102920192 HIC_chr6_30260 0 + 179 | chr6 109475093 109475155 HIC_chr6_32363 0 + 180 | chr6 112969711 112976744 HIC_chr6_33354 0 + 181 | chr6 127324521 127329973 HIC_chr6_38033 0 + 182 | chr6 129879116 129883815 HIC_chr6_38923 0 + 183 | chr6 142099416 142104355 HIC_chr6_42745 0 + 184 | chr6 144309163 144314827 HIC_chr6_43436 0 + 185 | chr6 149182001 149183892 HIC_chr6_44975 0 + 186 | chr6 149520701 149521829 HIC_chr6_45087 0 + 187 | chr6 155279681 155283372 HIC_chr6_46743 0 + 188 | chr6 170572351 170573455 HIC_chr6_51160 0 + 189 | chr7 18607089 18607679 HIC_chr7_5090 0 + 190 | chr7 31964460 31967196 HIC_chr7_9288 0 + 191 | chr7 35808016 35816300 HIC_chr7_10462 0 + 192 | chr7 51500071 51511634 HIC_chr7_14890 0 + 193 | chr7 67867071 67872445 HIC_chr7_19130 0 + 194 | chr7 68388846 68390307 HIC_chr7_19264 0 + 195 | chr7 78077056 78078189 HIC_chr7_21620 0 + 196 | chr7 91659616 91661136 HIC_chr7_26004 0 + 197 | chr7 96231457 96240386 HIC_chr7_27457 0 + 198 | chr7 98498254 98500640 HIC_chr7_28075 0 + 199 | chr7 109079114 109079528 HIC_chr7_30849 0 + 200 | chr7 112186261 112189276 HIC_chr7_31869 0 + 201 | chr7 113708279 113716417 HIC_chr7_32367 0 + 202 | chr7 121608913 121612007 HIC_chr7_34925 0 + 203 | chr7 131921863 131922943 HIC_chr7_37990 0 + 204 | chr7 146618952 146621913 HIC_chr7_42252 0 + 205 | chr7 147276393 147286787 HIC_chr7_42462 0 + 206 | chr7 148513332 148518226 HIC_chr7_42872 0 + 207 | chr7 155169026 155172426 HIC_chr7_44535 0 + 208 | chr8 1076276 1081978 HIC_chr8_291 0 + 209 | chr8 12777298 12777771 HIC_chr8_3577 0 + 210 | chr8 28263339 28264498 HIC_chr8_8153 0 + 211 | chr8 32778928 32780377 HIC_chr8_9540 0 + 212 | chr8 40064666 40065594 HIC_chr8_11642 0 + 213 | chr8 63649874 63649900 HIC_chr8_17956 0 + 214 | chr8 68034610 68036815 HIC_chr8_19367 0 + 215 | chr8 75153877 75162062 HIC_chr8_21660 0 + 216 | chr8 79766471 79772386 HIC_chr8_23107 0 + 217 | chr8 82609630 82610642 HIC_chr8_24016 0 + 218 | chr8 87764397 87765564 HIC_chr8_25599 0 + 219 | chr8 91464270 91467009 HIC_chr8_26764 0 + 220 | chr8 93339044 93343625 HIC_chr8_27392 0 + 221 | chr8 108237596 108242565 HIC_chr8_31984 0 + 222 | chr8 119514318 119514337 HIC_chr8_35664 0 + 223 | chr8 122748816 122749335 HIC_chr8_36740 0 + 224 | chr8 132546569 132549676 HIC_chr8_39730 0 + 225 | chr8 133164844 133165721 HIC_chr8_39922 0 + 226 | chr8 133955611 133962687 HIC_chr8_40166 0 + 227 | chr9 1814402 1816730 HIC_chr9_520 0 + 228 | chr9 8889149 8890973 HIC_chr9_2786 0 + 229 | chr9 19188796 19189065 HIC_chr9_6070 0 + 230 | chr9 25696484 25697287 HIC_chr9_8156 0 + 231 | chr9 28246694 28256683 HIC_chr9_9012 0 + 232 | chr9 29598838 29600048 HIC_chr9_9473 0 + 233 | chr9 38557631 38566365 HIC_chr9_11916 0 + 234 | chr9 40064320 40064657 HIC_chr9_12299 0 + 235 | chr9 46920420 46920654 HIC_chr9_14008 0 + 236 | chr9 66828379 66833142 HIC_chr9_14469 0 + 237 | chr9 77750099 77752745 HIC_chr9_17494 0 + 238 | chr9 104258816 104265431 HIC_chr9_25256 0 + 239 | chr9 121399605 121406395 HIC_chr9_30329 0 + 240 | chr9 134410242 134413221 HIC_chr9_33382 0 + 241 | chr9 135660347 135681582 HIC_chr9_33696 0 + 242 | chr10 12254286 12255011 HIC_chr10_3436 0 + 243 | chr10 13866010 13867628 HIC_chr10_3863 0 + 244 | chr10 26208657 26216466 HIC_chr10_7504 0 + 245 | chr10 30731532 30732466 HIC_chr10_8781 0 + 246 | chr10 44025285 44027173 HIC_chr10_11671 0 + 247 | chr10 63004730 63007419 HIC_chr10_17348 0 + 248 | chr10 63838578 63842667 HIC_chr10_17639 0 + 249 | chr10 67838848 67846051 HIC_chr10_18810 0 + 250 | chr10 73932237 73933614 HIC_chr10_20422 0 + 251 | chr10 86263438 86264050 HIC_chr10_23961 0 + 252 | chr10 90338497 90340303 HIC_chr10_25145 0 + 253 | chr10 101963462 101975696 HIC_chr10_28587 0 + 254 | chr10 123866285 123886209 HIC_chr10_35019 0 + 255 | chr10 129343471 129353735 HIC_chr10_36512 0 + 256 | chr10 133851615 133853251 HIC_chr10_37730 0 + 257 | chr11 20798761 20802132 HIC_chr11_5788 0 + 258 | chr11 21267227 21267913 HIC_chr11_5963 0 + 259 | chr11 23254433 23255089 HIC_chr11_6588 0 + 260 | chr11 56203041 56203386 HIC_chr11_16750 0 + 261 | chr11 67927312 67927714 HIC_chr11_19471 0 + 262 | chr11 69874735 69875266 HIC_chr11_19869 0 + 263 | chr11 74020649 74024905 HIC_chr11_20818 0 + 264 | chr11 80412395 80414828 HIC_chr11_22573 0 + 265 | chr11 80528096 80530810 HIC_chr11_22607 0 + 266 | chr11 80794803 80800316 HIC_chr11_22714 0 + 267 | chr11 85847103 85849840 HIC_chr11_24326 0 + 268 | chr11 98938360 98940308 HIC_chr11_28517 0 + 269 | chr11 100289887 100294848 HIC_chr11_28963 0 + 270 | chr11 100724759 100731218 HIC_chr11_29101 0 + 271 | chr11 101247149 101252020 HIC_chr11_29288 0 + 272 | chr11 101429769 101433115 HIC_chr11_29358 0 + 273 | chr11 105072660 105073048 HIC_chr11_30533 0 + 274 | chr11 114597683 114601847 HIC_chr11_33435 0 + 275 | chr11 119330474 119332652 HIC_chr11_34705 0 + 276 | chr11 130927949 130928656 HIC_chr11_38105 0 + 277 | chr12 2285699 2291740 HIC_chr12_580 0 + 278 | chr12 4073492 4079590 HIC_chr12_1042 0 + 279 | chr12 9808741 9808991 HIC_chr12_2569 0 + 280 | chr12 11515195 11515204 HIC_chr12_3125 0 + 281 | chr12 30149344 30150759 HIC_chr12_9013 0 + 282 | chr12 38151544 38151715 HIC_chr12_10883 0 + 283 | chr12 38216863 38220072 HIC_chr12_10990 0 + 284 | chr12 38700198 38703470 HIC_chr12_11400 0 + 285 | chr12 39732081 39733699 HIC_chr12_11729 0 + 286 | chr12 42458015 42458666 HIC_chr12_12636 0 + 287 | chr12 48456740 48457042 HIC_chr12_14390 0 + 288 | chr12 48596543 48599775 HIC_chr12_14433 0 + 289 | chr12 64520155 64526821 HIC_chr12_18842 0 + 290 | chr12 67807446 67808705 HIC_chr12_19913 0 + 291 | chr12 74729632 74734176 HIC_chr12_22176 0 + 292 | chr12 78955877 78958295 HIC_chr12_23583 0 + 293 | chr12 84111284 84116052 HIC_chr12_25285 0 + 294 | chr12 85145360 85145795 HIC_chr12_25623 0 + 295 | chr12 85288286 85293405 HIC_chr12_25669 0 + 296 | chr12 107623553 107626526 HIC_chr12_32757 0 + 297 | chr12 114184500 114184816 HIC_chr12_34238 0 + 298 | chr12 120260713 120265081 HIC_chr12_35879 0 + 299 | chr12 123455276 123460716 HIC_chr12_36485 0 + 300 | chr13 19854665 19855545 HIC_chr13_298 0 + 301 | chr13 36125980 36126480 HIC_chr13_5076 0 + 302 | chr13 60992575 60995178 HIC_chr13_13012 0 + 303 | chr13 75802209 75807274 HIC_chr13_17749 0 + 304 | chr13 95684308 95686731 HIC_chr13_23967 0 + 305 | chr13 98535368 98537660 HIC_chr13_24789 0 + 306 | chr13 102663745 102663848 HIC_chr13_25994 0 + 307 | chr14 23002355 23010468 HIC_chr14_1117 0 + 308 | chr14 32192138 32203674 HIC_chr14_3917 0 + 309 | chr14 34163349 34166911 HIC_chr14_4578 0 + 310 | chr14 45036791 45037011 HIC_chr14_7894 0 + 311 | chr14 51370032 51373097 HIC_chr14_9899 0 + 312 | chr14 58516271 58518501 HIC_chr14_12111 0 + 313 | chr14 63737094 63738112 HIC_chr14_13852 0 + 314 | chr14 75805562 75806111 HIC_chr14_17221 0 + 315 | chr14 89340821 89344300 HIC_chr14_21397 0 + 316 | chr14 91082367 91094626 HIC_chr14_21837 0 + 317 | chr14 99462596 99468274 HIC_chr14_24146 0 + 318 | chr15 35654744 35658760 HIC_chr15_4028 0 + 319 | chr15 35837136 35838929 HIC_chr15_4092 0 + 320 | chr15 36943983 36945300 HIC_chr15_4426 0 + 321 | chr15 46314816 46325814 HIC_chr15_7009 0 + 322 | chr15 47834699 47836201 HIC_chr15_7515 0 + 323 | chr15 51982879 51984007 HIC_chr15_8782 0 + 324 | chr15 52534376 52534477 HIC_chr15_8952 0 + 325 | chr15 55861018 55862707 HIC_chr15_9999 0 + 326 | chr15 60691995 60697797 HIC_chr15_11451 0 + 327 | chr15 62395786 62395793 HIC_chr15_11999 0 + 328 | chr15 63851338 63856703 HIC_chr15_12419 0 + 329 | chr15 84675927 84676241 HIC_chr15_17934 0 + 330 | chr15 102149425 102156729 HIC_chr15_22831 0 + 331 | chr16 12284290 12285314 HIC_chr16_2564 0 + 332 | chr16 51050480 51052697 HIC_chr16_9510 0 + 333 | chr16 51147215 51152686 HIC_chr16_9530 0 + 334 | chr16 51326602 51327897 HIC_chr16_9579 0 + 335 | chr16 52730892 52732916 HIC_chr16_10032 0 + 336 | chr16 52741530 52741987 HIC_chr16_10036 0 + 337 | chr16 69490610 69491181 HIC_chr16_14747 0 + 338 | chr16 73701885 73701979 HIC_chr16_15835 0 + 339 | chr16 76552682 76553418 HIC_chr16_16598 0 + 340 | chr16 76846898 76848270 HIC_chr16_16699 0 + 341 | chr16 78084747 78087066 HIC_chr16_17061 0 + 342 | chr16 78527220 78527303 HIC_chr16_17175 0 + 343 | chr17 10254822 10256781 HIC_chr17_2119 0 + 344 | chr17 17974428 17979242 HIC_chr17_4225 0 + 345 | chr17 21507291 21514070 HIC_chr17_4992 0 + 346 | chr17 29406024 29407099 HIC_chr17_6239 0 + 347 | chr17 36035129 36044988 HIC_chr17_7994 0 + 348 | chr17 40038838 40048128 HIC_chr17_8835 0 + 349 | chr17 46814524 46814532 HIC_chr17_10314 0 + 350 | chr17 51048949 51050582 HIC_chr17_11446 0 + 351 | chr17 53332882 53333758 HIC_chr17_12181 0 + 352 | chr17 67310319 67314881 HIC_chr17_15877 0 + 353 | chr17 77263682 77267521 HIC_chr17_18264 0 + 354 | chr17 80415326 80421115 HIC_chr17_18772 0 + 355 | chr17 80723069 80723109 HIC_chr17_18838 0 + 356 | chr18 9981398 9981990 HIC_chr18_3009 0 + 357 | chr18 10077974 10081828 HIC_chr18_3047 0 + 358 | chr18 26282045 26283867 HIC_chr18_6893 0 + 359 | chr18 30137165 30137774 HIC_chr18_8148 0 + 360 | chr18 31344611 31345192 HIC_chr18_8546 0 + 361 | chr18 39799080 39799927 HIC_chr18_11228 0 + 362 | chr18 47853125 47854539 HIC_chr18_13689 0 + 363 | chr18 51359004 51359547 HIC_chr18_14799 0 + 364 | chr18 63999241 64000670 HIC_chr18_18683 0 + 365 | chr18 70273346 70274325 HIC_chr18_20633 0 + 366 | chr18 72548873 72550369 HIC_chr18_21304 0 + 367 | chr18 76422347 76426387 HIC_chr18_22422 0 + 368 | chr19 13898968 13908792 HIC_chr19_2024 0 + 369 | chr19 16565616 16575047 HIC_chr19_2478 0 + 370 | chr19 32931503 32935090 HIC_chr19_6561 0 + 371 | chr19 55030276 55032293 HIC_chr19_10748 0 + 372 | chr20 14415032 14415279 HIC_chr20_4145 0 + 373 | chr20 17714101 17714863 HIC_chr20_5146 0 + 374 | chr20 19450056 19450426 HIC_chr20_5612 0 + 375 | chr20 21569814 21575612 HIC_chr20_6218 0 + 376 | chr20 22498054 22501459 HIC_chr20_6500 0 + 377 | chr20 45916136 45921344 HIC_chr20_11544 0 + 378 | chr20 55788097 55789866 HIC_chr20_14123 0 + 379 | chr20 58640482 58641384 HIC_chr20_14891 0 + 380 | chr20 61753371 61763603 HIC_chr20_15518 0 + 381 | chr20 62300798 62305938 HIC_chr20_15591 0 + 382 | chr21 16161771 16163514 HIC_chr21_1002 0 + 383 | chr21 19949252 19949702 HIC_chr21_2205 0 + 384 | chr21 28046487 28048117 HIC_chr21_4620 0 + 385 | chr21 31498144 31504872 HIC_chr21_5757 0 + 386 | chr21 38716322 38718323 HIC_chr21_7854 0 + 387 | chr21 43491643 43494000 HIC_chr21_9188 0 + 388 | chr21 44996486 45000061 HIC_chr21_9474 0 + 389 | chr22 34008937 34010119 HIC_chr22_3912 0 + 390 | chr22 35694199 35697633 HIC_chr22_4421 0 + 391 | chr22 36204695 36211637 HIC_chr22_4534 0 + 392 | chr22 42593220 42603311 HIC_chr22_5880 0 + 393 | chr22 43160539 43167539 HIC_chr22_5987 0 + 394 | chrX 14295117 14295548 HIC_chrX_3716 0 + 395 | chrX 15352040 15355041 HIC_chrX_4088 0 + 396 | chrX 16959610 16960437 HIC_chrX_4563 0 + 397 | chrX 22232182 22232914 HIC_chrX_6129 0 + 398 | chrX 22747557 22750164 HIC_chrX_6296 0 + 399 | chrX 30170420 30171484 HIC_chrX_8585 0 + 400 | chrX 33108947 33113190 HIC_chrX_9551 0 + 401 | chrX 55784028 55787095 HIC_chrX_15869 0 + 402 | chrX 57710179 57711189 HIC_chrX_16476 0 + 403 | chrX 62982073 62985693 HIC_chrX_17434 0 + 404 | chrX 63548004 63548268 HIC_chrX_17616 0 + 405 | chrX 86789684 86795138 HIC_chrX_24551 0 + 406 | chrX 108050660 108054387 HIC_chrX_31070 0 + 407 | chrX 110293719 110296504 HIC_chrX_31788 0 + 408 | chrX 111095164 111095470 HIC_chrX_32052 0 + 409 | chrX 120326492 120328829 HIC_chrX_34805 0 + 410 | chrX 128019004 128019080 HIC_chrX_37178 0 + 411 | chrX 134116159 134117137 HIC_chrX_38961 0 + 412 | chrX 135665554 135667250 HIC_chrX_39398 0 + 413 | chrX 135751000 135757638 HIC_chrX_39421 0 + 414 | chrX 141026808 141026926 HIC_chrX_40991 0 + 415 | chrX 152120242 152120512 HIC_chrX_44315 0 + 416 | chrX 152741048 152742145 HIC_chrX_44471 0 + 417 | chrY 15555268 15559519 HIC_chrY_3216 0 + 418 | chrY 16390765 16395926 HIC_chrY_3480 0 + 419 | chrY 24490023 24493584 HIC_chrY_5996 0 + 420 | chrY 25911603 25911853 HIC_chrY_6408 0 + 421 | chrY 28692918 28700354 HIC_chrY_7285 0 + 422 | -------------------------------------------------------------------------------- /scripts/hicpro/hicpro_interchromosomal.sh: -------------------------------------------------------------------------------- 1 | BED_PATH=$1 2 | MAT_PATH=$2 3 | INTER_RES_KB=$3 4 | INTRA_RES_KB=$4 5 | INTER_RES=$((INTER_RES_KB*1000)) 6 | INTRA_RES=$((INTRA_RES_KB*1000)) 7 | 8 | BEDPE_PATH=all.bed 9 | 10 | test ! -s $BEDPE_PATH && (python3 hicpro_to_bedpe.py $BED_PATH $MAT_PATH $BEDPE_PATH) 11 | 12 | PREFIX=${MAT_PATH%.matrix} 13 | 14 | CHROMS=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X) 15 | NUM_CHROMS=${#CHROMS[@]} 16 | 17 | #interchromosomal 18 | for i in `seq $((NUM_CHROMS-1))` 19 | do 20 | CHROM1=${CHROMS[$i]} 21 | 22 | for j in `seq 0 $((i-1))` 23 | do 24 | CHROM2=${CHROMS[$j]} 25 | UNBINNED=${PREFIX}_${CHROM2}_$CHROM1.bed 26 | test ! -s $UNBINNED && (cat $BEDPE_PATH | awk -v chrom1=chr$CHROM1 -v chrom2=chr$CHROM2 '($1 == chrom1 && $4 == chrom2) || ($4 == chrom1 && $1 == chrom2) {print}' > $UNBINNED) 27 | BINNED=${PREFIX}_${CHROM2}_${CHROM1}_${INTER_RES_KB}kb.bed 28 | test ! -s $BINNED && (python3 bin_bed.py $UNBINNED $INTER_RES $BINNED) 29 | done 30 | 31 | done 32 | 33 | #intrachromosomal 34 | for i in `seq 0 $((NUM_CHROMS-1))` 35 | do 36 | CHROM=${CHROMS[$i]} 37 | UNBINNED=${PREFIX}_$CHROM.bed 38 | test ! -s $UNBINNED && (cat $BEDPE_PATH | awk -v chrom=chr$CHROM '$1 == chrom && $4 == chrom {print}' > $UNBINNED) 39 | BINNED=${PREFIX}_${CHROM}_${INTRA_RES_KB}kb.bed 40 | test ! -s $BINNED && (python3 bin_bed.py $UNBINNED $INTRA_RES $BINNED) 41 | done 42 | 43 | python3 ../minimds_inter.py $PREFIX $INTER_RES $INTRA_RES -------------------------------------------------------------------------------- /scripts/hicpro/hicpro_to_bedpe.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | bed_path = sys.argv[1] 4 | mat_path = sys.argv[2] 5 | bedpe_path = sys.argv[3] 6 | 7 | with open(bed_path) as in_file: 8 | name_to_interval = {} 9 | for line in in_file: 10 | line = line.strip().split("\t") 11 | name_to_interval[line[3]] = "\t".join((line[0], line[1], line[2])) 12 | 13 | with open(mat_path) as in_file, open(bedpe_path, "w") as out_file: 14 | for line in in_file: 15 | name1, name2, count = line.split("\t") 16 | interval1 = name_to_interval[name1] 17 | interval2 = name_to_interval[name2] 18 | out_file.write("\t".join((interval1, interval2, count))) -------------------------------------------------------------------------------- /scripts/hsa_input.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | sys.path.append("..") 4 | import data_tools as dt 5 | 6 | inpath = sys.argv[1] 7 | outpath = sys.argv[2] 8 | 9 | structure = dt.structureFromBed(inpath) 10 | contactMat = dt.matFromBed(inpath, structure1=structure) 11 | n = len(contactMat) 12 | fullMat = np.zeros((n, n+2)) 13 | 14 | #locus IDs 15 | for i, pointNum in enumerate(structure.getPointNums()): 16 | fullMat[i,0] = structure.chrom.minPos + structure.chrom.res * pointNum 17 | fullMat[i,1] = structure.chrom.minPos + structure.chrom.res * (pointNum + 1) 18 | 19 | fullMat[:,2:n+2] = contactMat 20 | 21 | maxNumDigits = int(np.ceil(np.log10(np.amax(fullMat)))) 22 | formatstring = "%" + str(maxNumDigits) + "d" 23 | np.savetxt(outpath, fullMat, formatstring, delimiter="\t") -------------------------------------------------------------------------------- /scripts/install_chromosome3d.sh: -------------------------------------------------------------------------------- 1 | if [ ! -e Chromosome3D ] 2 | then 3 | git clone https://github.com/multicom-toolbox/Chromosome3D.git 4 | fi 5 | -------------------------------------------------------------------------------- /scripts/install_chromsde.sh: -------------------------------------------------------------------------------- 1 | if [ ! -e ChromSDE ] 2 | then 3 | wget http://biogpu.ddns.comp.nus.edu.sg/~chipseq/ChromSDE/ChromSDE_program2.2.zip 4 | unzip ChromSDE_program2.2.zip 5 | rm ChromSDE_program2.2.zip 6 | mv program ChromSDE 7 | mv *.m ChromSDE 8 | fi 9 | -------------------------------------------------------------------------------- /scripts/install_hsa.sh: -------------------------------------------------------------------------------- 1 | if [ ! -e hsa ] 2 | then 3 | wget http://ouyanglab.jax.org/hsa/HSA.zip 4 | unzip HSA.zip 5 | rm HSA.zip 6 | fi 7 | -------------------------------------------------------------------------------- /scripts/install_mogen.sh: -------------------------------------------------------------------------------- 1 | if [ ! -e MOGEN ] 2 | then 3 | git clone https://github.com/BDM-Lab/MOGEN.git 4 | fi 5 | -------------------------------------------------------------------------------- /scripts/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import stats as st 3 | import sys 4 | sys.path.append("..") 5 | import linear_algebra as la 6 | 7 | def distsFromCoords(coords): 8 | """Creates distance matrix from 3D coords""" 9 | n = len(coords) 10 | distMat = np.zeros((n,n)) 11 | for i in range(n): 12 | for j in range(i): 13 | distMat[i,j] = la.calcDistance(coords[i], coords[j]) 14 | return distMat 15 | 16 | def pearson(mat1, mat2): 17 | """Root mean square error between two matrices, ignoring zeroes""" 18 | assert mat1.shape == mat2.shape 19 | #convert to vectors 20 | vec1 = mat1.flatten() 21 | vec2 = mat2.flatten() 22 | 23 | #remove zeroes 24 | nonzero = [i for i in range(len(vec1)) if vec1[i] != 0 and vec2[i] != 0] 25 | vec1 = vec1[nonzero] 26 | vec2 = vec2[nonzero] 27 | 28 | r, p = st.pearsonr(vec1, vec2) 29 | return r 30 | 31 | def parse_time(time_string): 32 | split = time_string.split("m") 33 | mins = int(split[0]) 34 | secs = float(split[1].split("s")[0]) 35 | return mins + secs/60 36 | 37 | def distMat(structure): 38 | """Creates distance matrix from structure""" 39 | points = structure.getPoints() 40 | numPoints = len(points) 41 | mat = np.zeros((numPoints, numPoints)) 42 | for i in range(numPoints): 43 | for j in range(i): 44 | mat[i,j] = la.calcDistance(points[i].pos, points[j].pos) 45 | return mat 46 | -------------------------------------------------------------------------------- /scripts/mogen_input.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import data_tools as dt 4 | 5 | in_path = sys.argv[1] 6 | out_path = sys.argv[2] 7 | 8 | chrom = dt.chromFromBed(in_path) 9 | 10 | with open(in_path) as in_file: 11 | with open(out_path, "w") as out_file: 12 | for line in in_file: 13 | line = line.strip().split() 14 | loc1 = int(line[1]) 15 | loc2 = int(line[4]) 16 | abs_index1 = chrom.getAbsoluteIndex(loc1) 17 | abs_index2 = chrom.getAbsoluteIndex(loc2) 18 | out_file.write("\t".join((str(abs_index1), str(abs_index2), line[6])) + "\n") 19 | out_file.close() 20 | in_file.close() 21 | -------------------------------------------------------------------------------- /scripts/normalize.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import numpy as np 4 | import argparse 5 | import tools 6 | import os 7 | 8 | def get_chrom_num(chrom): 9 | if chrom == "X": 10 | return 23 11 | else: 12 | return int(chrom) 13 | 14 | def normalize(chrom1, chrom2, rawpath, krpath1, krpath2, res, outpath): 15 | kr1 = np.loadtxt(krpath1) 16 | if krpath2 is None: 17 | kr2 = kr1 18 | else: 19 | kr2 = np.loadtxt(krpath2) 20 | with open(rawpath) as raw: 21 | with open(outpath, "w") as out: 22 | for line in raw: 23 | line = line.split() 24 | loc1 = line[0] 25 | loc2 = line[1] 26 | norm1 = kr1[int(int(loc1)/res)] 27 | norm2 = kr2[int(int(loc2)/res)] 28 | if not np.isnan(norm1) and not np.isnan(norm2): 29 | out.write("\t".join((chrom1, loc1, str(int(loc1) + res), chrom2, loc2, str(int(loc2) + res), str(float(line[2])/(norm1 * norm2)))) + "\n") 30 | out.close() 31 | raw.close() 32 | 33 | def normalize_inter(hic_id, res, chrom_a, chrom_b): 34 | res_string = tools.get_res_string(res) 35 | 36 | if get_chrom_num(chrom_a) < get_chrom_num(chrom_b): 37 | chrom1 = chrom_a 38 | chrom2 = chrom_b 39 | else: 40 | chrom1 = chrom_b 41 | chrom2 = chrom_a 42 | 43 | rawpath = "{}/{}_resolution_interchromosomal/chr{}_chr{}/MAPQGE30/chr{}_{}_{}.RAWobserved".format(hic_id, res_string, chrom1, chrom2, chrom1, chrom2, res_string) 44 | krpath1 = "{}/{}_resolution_interchromosomal/chr{}_chr{}/MAPQGE30/chr{}_{}.KRnorm".format(hic_id, res_string, chrom1, chrom2, chrom1, res_string) 45 | if not os.path.isfile(krpath1): 46 | krpath1 = "{}/{}_resolution_interchromosomal/chr{}_chr{}/MAPQGE30/chr{}_{}.VCnorm".format(hic_id, res_string, chrom1, chrom2, chrom1, res_string) 47 | krpath2 = "{}/{}_resolution_interchromosomal/chr{}_chr{}/MAPQGE30/chr{}_{}.KRnorm".format(hic_id, res_string, chrom1, chrom2, chrom2, res_string) 48 | if not os.path.isfile(krpath2): 49 | krpath2 = "{}/{}_resolution_interchromosomal/chr{}_chr{}/MAPQGE30/chr{}_{}VCnorm".format(hic_id, res_string, chrom1, chrom2, chrom2, res_string) 50 | outpath = "{}_{}_{}_{}.bed".format(hic_id, chrom1, chrom2, res_string) 51 | chromstring1 = "chr" + chrom1 52 | chromstring2 = "chr" + chrom2 53 | normalize(chromstring1, chromstring2, rawpath, krpath1, krpath2, res, outpath) 54 | 55 | def normalize_intra(hic_id, res, chrom): 56 | res_string = tools.get_res_string(res) 57 | 58 | rawpath = "{}/{}_resolution_intrachromosomal/chr{}/MAPQGE30/chr{}_{}.RAWobserved".format(hic_id, res_string, chrom, chrom, res_string) 59 | krpath = "{}/{}_resolution_intrachromosomal/chr{}/MAPQGE30/chr{}_{}.KRnorm".format(hic_id, res_string, chrom, chrom, res_string) 60 | if not os.path.isfile(krpath): 61 | krpath = "{}/{}_resolution_intrachromosomal/chr{}/MAPQGE30/chr{}_{}.VCnorm".format(hic_id, res_string, chrom, chrom, res_string) 62 | outpath = "{}_{}_{}.bed".format(hic_id, chrom, res_string) 63 | chromstring = "chr" + chrom 64 | normalize(chromstring, chromstring, rawpath, krpath, None, res, outpath) 65 | 66 | def main(): 67 | parser = argparse.ArgumentParser(description="Normalize Hi-C files using Knight-Ruiz method.") 68 | parser.add_argument("hic_id", help="e.g. GM12878_combined") 69 | parser.add_argument("res", type=int, help="resolution (bp)") 70 | parser.add_argument("chrom1", help="first chromosome (e.g. 1)") 71 | parser.add_argument("--chrom2", help="second chromosome (e.g. 2)") 72 | args = parser.parse_args() 73 | 74 | if args.chrom2 is None: 75 | normalize_intra(args.hic_id, args.res, args.chrom1) 76 | else: 77 | normalize_inter(args.hic_id, args.res, args.chrom1, args.chrom2) 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /scripts/normalize.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | for CHROM in 2 3 4 5 6 7 8 10 11 12 13 14 15 16 17 18 19 20 4 | do 5 | echo $CHROM 6 | python normalize.py /data/drive1/hic_data/K562 25000 $CHROM 7 | done 8 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr10_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_10_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr11_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_11_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr12_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_12_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr13_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_13_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr14_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_14_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr15_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_15_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr16_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_16_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr17_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_17_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr18_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_18_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr19_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_19_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr1_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_1_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr20_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_20_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr21_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_21_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr22_100kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_22_100kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr22_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_22_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr2_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_2_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr3_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_3_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr4_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_4_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr5_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_5_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr6_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_6_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr7_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_7_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr8_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_8_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chr9_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_9_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/parameters/parameters_chrX_10kb.txt: -------------------------------------------------------------------------------- 1 | #all distances here are square distance, all 15 parameters are required and no space should be included in the value 2 | #number of structures will be generated 3 | NUM = 1 4 | 5 | #number of chromosomes 6 | NBR_OF_CHR = 1 7 | 8 | #contact with interaction frequency less than this is considered as non-contact, 9 | 10 | 11 | INTRA_IF_THRESHOLD = 0.0001 12 | 13 | #NOTICE: the following distances are in square 14 | #maximum distance between 2 adjacent points 15 | ADJACENT_DIST = 1.5 16 | #contact distance, points that are in contact should have square distance less than this 17 | #when it is large , the whole structure will be scaled down in optimization and zoom out later 18 | CONTACT_DIST = 6.0 19 | POS_MIN_DIST = 0.2 20 | NEG_MAX_DIST_INTRA = 30 21 | #NEG_MAX_DIST_INTER = 150 22 | 23 | #increase this parameter to improve contact score, (but will decrease non-contact score) 24 | POS_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_max_dist_weight_1_normal.txt 25 | #increase this parameter if adjacent points are to close to each other 26 | POS_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/pos_min_dist_weight_1_normal.txt 27 | #increase this parameter to improve non-contact score, (but will decrease contact score) 28 | NEG_MIN_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_min_dist_weight_1_normal.txt 29 | #increase this parameter to prevent the structure from spanning too much (make the structure smaller) 30 | NEG_MAX_DIST_WEIGHT_FILE = MOGEN/examples/hiC/parameters/100kb/chr1_normal/neg_max_dist_weight_1_normal.txt 31 | 32 | 33 | 34 | OUTPUT_FOLDER = MOGEN/examples/hiC/output 35 | 36 | INPUT_FILE = MOGEN/examples/hiC/input/GM12878_combined_X_10kb.tsv 37 | 38 | #set VERBOSE = true for information during optimization printed out 39 | VERBOSE = true 40 | 41 | #learning rate for the optimization process, increase the learning rate can speed up the optimization process significantly, but sometimes, the optimization may fail 42 | #if the program fails to generate structures, or the distance between 2 consecutive points are too large, try to reduce this learning rate 43 | LEARNING_RATE = 0.001 44 | #during parameter adjustment, increase LEARNING_RATE and decrease MAX_ITERATION, so that "coarse" structures can be quickly generated 45 | MAX_ITERATION = 200000 46 | -------------------------------------------------------------------------------- /scripts/run_chromsde.m: -------------------------------------------------------------------------------- 1 | function output = run_chromsde(contacts_path, ids_path) 2 | test = sparse(importdata(contacts_path)); 3 | binAnno = importdata(ids_path); 4 | ChromSDE(binAnno, test, 1) 5 | output = 0 6 | exit 7 | end 8 | 9 | -------------------------------------------------------------------------------- /scripts/run_chromsde_100kb.m: -------------------------------------------------------------------------------- 1 | function output = run_chromsde(chrom) 2 | if class(chrom) == 'double' 3 | chrom = num2str(chrom); 4 | end 5 | contacts_100kb = sparse(importdata(strcat('chr', chrom, '_100kb_contacts.dat'))); 6 | ids = importdata(strcat('chr', chrom, '_100kb_ids.dat')); 7 | ChromSDE(ids, contacts, 1) 8 | output = 0 9 | exit 10 | end 11 | 12 | -------------------------------------------------------------------------------- /scripts/run_chromsde_rep1.m: -------------------------------------------------------------------------------- 1 | function output = run_chromsde(chrom) 2 | if class(chrom) == 'double' 3 | chrom = num2str(chrom); 4 | end 5 | contacts_rep1 = sparse(importdata(strcat('chr', chrom, '_10kb_contacts.dat'))); 6 | ids = importdata(strcat('chr', chrom, '_10kb_ids.dat')); 7 | ChromSDE(ids, contacts, 1) 8 | output = 0 9 | exit 10 | end 11 | 12 | -------------------------------------------------------------------------------- /scripts/run_chromsde_rep2.m: -------------------------------------------------------------------------------- 1 | function output = run_chromsde(chrom) 2 | if class(chrom) == 'double' 3 | chrom = num2str(chrom); 4 | end 5 | contacts_rep2 = sparse(importdata(strcat('chr', chrom, '_10kb_contacts.dat'))); 6 | ids = importdata(strcat('chr', chrom, '_10kb_ids.dat')); 7 | ChromSDE(ids, contacts, 1) 8 | output = 0 9 | exit 10 | end 11 | 12 | -------------------------------------------------------------------------------- /scripts/simple_tad.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def calcScore(locNum, contactMat, numLoc): 4 | """Calculate directionality index for locus. See Dixon 2012 supplemental.""" 5 | a = 0 #initialize 6 | b = 0 7 | aCount = 0 8 | bCount = 0 9 | avg_a = 0 10 | avg_b = 0 11 | for i in range(locNum-numLoc, locNum): #upstream 12 | a += contactMat[locNum][i] 13 | aCount += 1 14 | if aCount != 0: 15 | avg_a=a/aCount 16 | for i in range(locNum+1, locNum+numLoc): #downstream 17 | b += contactMat[i][locNum] 18 | bCount += 1 19 | if bCount != 0: 20 | avg_b = b/bCount 21 | if avg_a + avg_b != 0 and avg_a != avg_b: 22 | e = (avg_a + avg_b)/2 23 | index = (avg_b - avg_a)/abs(avg_b - avg_a)*((avg_a - e)**2/e + (avg_b - e)**2/e) 24 | else: 25 | index = 0 26 | return index 27 | 28 | def allScores(contactMat, maxNumLoc): 29 | """Calculate all directionality indices for chromosome""" 30 | dirIndices=[] 31 | totNumLoc = len(contactMat) 32 | for i in range(totNumLoc): 33 | numLoc = min((maxNumLoc, totNumLoc - i, i)) #avoid going out of range of contact matrix 34 | dirIndices.append([calcScore(i, contactMat, numLoc)][0]) 35 | dirIndices = np.array(dirIndices) 36 | return dirIndices 37 | 38 | def domainsFromScores(indices, minSizeFraction): 39 | """Identify domain starts and ends from directionality indices""" 40 | numLoc = len(indices) 41 | minNumLoc = minSizeFraction*numLoc 42 | starts = [] 43 | ends = [] 44 | prevIndex = np.nan #initialization 45 | currNum = 0 46 | nextstart = 0 47 | currend = 0 48 | for index in indices: 49 | if index > 0: #downstream bias 50 | if prevIndex < 0 or prevIndex is np.nan: 51 | currstart = nextstart #start of current domain 52 | currend = currNum #end of current domain 53 | nextstart = currNum #start of next domain 54 | if currend == 0 or currend - currstart > minNumLoc: 55 | starts.append(nextstart) 56 | if prevIndex < 0: #previous is upstream 57 | if currend - currstart > minNumLoc: 58 | ends.append(currend) 59 | prevIndex = index 60 | currNum+=1 61 | domains = [] 62 | i = 0 #index of starts 63 | j = 0 #index of ends 64 | if starts[i] > 0: #if first start isn't 0 65 | domains.append((0, ends[j])) 66 | j+=1 67 | numstarts = len(starts) 68 | numends = len(ends) 69 | while i5}{atom_num2:>5}\n") -------------------------------------------------------------------------------- /scripts/structure_to_xyz.sh: -------------------------------------------------------------------------------- 1 | f=$1 2 | PREFIX=${f%.tsv} 3 | 4 | cat $f | awk 'NF == 4 && $2 != "nan" {print "C\t"$2"\t"$3"\t"$4}' > $PREFIX.xyz -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | bash get_gm12878.sh 5000 22 2 | python ../minimds.py hic_data/GM12878_combined_22_5kb.bed 3 | python ../minimds.py --partitioned hic_data/GM12878_combined_22_5kb.bed 4 | -------------------------------------------------------------------------------- /tad.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def calcScore(abs_index, points, contactMat, numPoints): 4 | """Calculates directionality score for locus. See Dixon 2012 supplemental. Positive=downstream. Negative=upstream.""" 5 | a = 0 #initialize 6 | b = 0 7 | aCount = 0 8 | bCount = 0 9 | avg_a = 0 10 | avg_b = 0 11 | currIndex = points[abs_index].relative_index 12 | 13 | #upstream 14 | upstreamIndexFound = False 15 | numPointsUpstream = numPoints #numPoints is max separation from curr point to include when calculating score 16 | while not upstreamIndexFound and numPointsUpstream > 0: 17 | if points[abs_index - numPointsUpstream] != 0: 18 | upstreamIndexFound = True 19 | minIndex = points[abs_index - numPointsUpstream].relative_index 20 | else: 21 | numPointsUpstream -= 1 22 | 23 | if upstreamIndexFound: 24 | for i in range(minIndex, currIndex): 25 | a += contactMat[currIndex][i] 26 | aCount += 1 27 | if aCount != 0: 28 | avg_a = a/aCount 29 | 30 | #downstream 31 | downstreamIndexFound = False 32 | numPointsDownstream = numPoints #numPoints is max separation from curr point to include when calculating score 33 | while not downstreamIndexFound and numPointsDownstream > 0: 34 | if points[abs_index + numPointsDownstream] != 0: 35 | downstreamIndexFound = True 36 | maxIndex = points[abs_index + numPointsDownstream].relative_index 37 | else: 38 | numPointsDownstream -= 1 39 | 40 | if downstreamIndexFound: 41 | for i in range(currIndex + 1, maxIndex): 42 | b += contactMat[i][currIndex] 43 | bCount += 1 44 | if bCount != 0: 45 | avg_b = b/bCount 46 | 47 | if aCount != 0 and bCount != 0 and avg_a != avg_b: 48 | e = (avg_a + avg_b)/2 49 | score = (avg_b-avg_a)/abs(avg_b-avg_a)*((avg_a-e)**2/e+(avg_b-e)**2/e) 50 | else: 51 | score = 0 52 | 53 | return score 54 | 55 | def allScores(contactMat, structure, maxNumPoints): 56 | """Calculates all directionality scores for chromosome""" 57 | scores = [] 58 | totNumLoc = len(contactMat) 59 | for abs_index in structure.nonzero_abs_indices(): 60 | i = structure.points[abs_index - structure.offset].relative_index 61 | numPoints = min((maxNumPoints, totNumLoc - 1 - i, i)) #avoid going out of range of contact matrix 62 | scores.append(calcScore(abs_index, structure.points, contactMat, numPoints)) 63 | return scores 64 | 65 | def domainsFromScores(scores, minSizeFraction): 66 | minNumLoc = minSizeFraction*len(scores) 67 | start = 0 #initialize 68 | prevScore = 0 69 | domains = [] 70 | for i in range(len(scores)): 71 | score = scores[i] 72 | if i == len(scores) - 1: #at end of scores, close remaining TAD 73 | end = i 74 | domains.append([start,end]) 75 | elif score > 0 and prevScore < 0 and i-start >= minNumLoc: #current is downstream, previous was upstream 76 | end = i 77 | domains.append([start,end]) 78 | start = i 79 | prevScore = score 80 | return np.array(domains) 81 | 82 | def getDomains(contactMat, structure, sizeParameter, minSizeFraction): 83 | """Identify TADs in contact matrix""" 84 | scores = allScores(contactMat, structure, 50) #50 is from Dixon 2012 supplemental 85 | smoothingFactor = max((int(len(contactMat)*sizeParameter), 1)) #must be >= 1 86 | smoothed = smoothWithMovingAverage(scores, smoothingFactor) 87 | return domainsFromScores(smoothed, minSizeFraction) 88 | 89 | def movingAverage(signal, size_of_window): 90 | """Modified from http://beauty-of-imagination.blogspot.fr/2012/09/fun-with-signal-processing-and.html""" 91 | window = np.ones(size_of_window) 92 | return np.roll(np.convolve(window/size_of_window, signal, "valid"), int(size_of_window/2)) 93 | 94 | def smoothWithMovingAverage(signal, size_of_window): 95 | smoothed = movingAverage(signal, size_of_window) 96 | signal_size = len(signal) 97 | remainder = signal[signal_size - size_of_window + 1 : signal_size] #end of signal, which can't be smoothed 98 | smoothed_remainder = np.zeros_like(remainder) 99 | remainder_size = size_of_window - 1 100 | for i in range(remainder_size): 101 | smoothed_remainder[i] = movingAverage(remainder[i:remainder_size], remainder_size-i) 102 | return np.concatenate((smoothed, smoothed_remainder)) 103 | 104 | def substructuresFromTads(structure, tads): 105 | abs_indices = structure.nonzero_abs_indices() 106 | offset = 0 #initialize 107 | for td in tads: 108 | start = abs_indices[td[0]] #convert from relative index to absolute index 109 | end = abs_indices[td[1]] 110 | points = structure.points[start-structure.offset:end-structure.offset] 111 | structure.createSubstructure(points, offset) 112 | offset = end #update 113 | -------------------------------------------------------------------------------- /tools.py: -------------------------------------------------------------------------------- 1 | """Misc. useful things""" 2 | 3 | class Tracker(object): 4 | """Tracks progress of task""" 5 | def __init__(self, name, size, currPercentage=0, count=0): 6 | self.name = name #name of task 7 | self.size = size #total size of task (e.g. number of lines in file) 8 | self.currPercentage = currPercentage #current percentage of task complete 9 | self.count = count #absolute amount of task complete (e.g. number of lines of file read) 10 | def increment(self): 11 | if self.size !=0 and self.size is not None: 12 | self.count += 1 13 | newPercentage = self.currPercentage + 1 14 | if float(self.count)/self.size >= float(newPercentage)/100: #if at least X% of the file has been read, print percentage 15 | self.currPercentage = newPercentage 16 | print("{} {}% complete".format(self.name, self.currPercentage)) 17 | 18 | def args_are_valid(args, names, intervals): 19 | valid_args = True 20 | for (arg, name, interval) in zip(args, names, intervals): 21 | lower_bound = interval[0] 22 | upper_bound = interval[1] 23 | if lower_bound is not None: 24 | if arg <= float(lower_bound): 25 | print("Error. {} must be > {}.".format(name, lower_bound)) 26 | valid_args = False 27 | if upper_bound is not None: 28 | if arg >= float(upper_bound): 29 | print("Error. {} must be < {}.".format(name, upper_bound)) 30 | valid_args = False 31 | return valid_args 32 | 33 | def get_res_string(res): 34 | """Converts resolution in bp to string (e.g. 10kb)""" 35 | res_kb = int(res/1000) 36 | if res_kb < 1000: 37 | return str(res_kb) + "kb" 38 | else: 39 | return str(res_kb/1000) + "mb" 40 | --------------------------------------------------------------------------------