├── BalancedRowsumsCalculation.ipynb
├── CalculatingFilledBinFractionByDistance.ipynb
├── ContactMapVisualizationExampleNotebook.ipynb
├── LICENSE
├── LoopStrengthRCMC.ipynb
├── MicrocompartmentLoops_PlusMin1kb.bedpe
├── PileupsRCMC.ipynb
├── README.md
├── captureprobes_mm10.bed
├── captureprobes_mm39.bed
├── loopFeatureOverlap.R
├── microcbowtie2.py
└── spikeinChIP_PE_alignment.py
/BalancedRowsumsCalculation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# import libraries\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "import pandas as pd\n",
13 | "import os\n",
14 | "import cooltools\n",
15 | "import cooler"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "## Load in microcompartment loop anchors"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 10,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "data": {
32 | "text/html": [
33 | "
\n",
34 | "\n",
47 | "
\n",
48 | " \n",
49 | " \n",
50 | " | \n",
51 | " chr | \n",
52 | " start | \n",
53 | " end | \n",
54 | "
\n",
55 | " \n",
56 | " \n",
57 | " \n",
58 | " 0 | \n",
59 | " chr5 | \n",
60 | " 31273000 | \n",
61 | " 31275000 | \n",
62 | "
\n",
63 | " \n",
64 | " 1 | \n",
65 | " chr5 | \n",
66 | " 31295500 | \n",
67 | " 31297500 | \n",
68 | "
\n",
69 | " \n",
70 | " 2 | \n",
71 | " chr5 | \n",
72 | " 31306700 | \n",
73 | " 31308700 | \n",
74 | "
\n",
75 | " \n",
76 | " 3 | \n",
77 | " chr5 | \n",
78 | " 31310500 | \n",
79 | " 31312500 | \n",
80 | "
\n",
81 | " \n",
82 | " 4 | \n",
83 | " chr5 | \n",
84 | " 31336200 | \n",
85 | " 31338200 | \n",
86 | "
\n",
87 | " \n",
88 | " ... | \n",
89 | " ... | \n",
90 | " ... | \n",
91 | " ... | \n",
92 | "
\n",
93 | " \n",
94 | " 127 | \n",
95 | " chr8 | \n",
96 | " 85785750 | \n",
97 | " 85787750 | \n",
98 | "
\n",
99 | " \n",
100 | " 128 | \n",
101 | " chr8 | \n",
102 | " 85797300 | \n",
103 | " 85799300 | \n",
104 | "
\n",
105 | " \n",
106 | " 129 | \n",
107 | " chr8 | \n",
108 | " 85802000 | \n",
109 | " 85804000 | \n",
110 | "
\n",
111 | " \n",
112 | " 130 | \n",
113 | " chr8 | \n",
114 | " 85806500 | \n",
115 | " 85808500 | \n",
116 | "
\n",
117 | " \n",
118 | " 131 | \n",
119 | " chr8 | \n",
120 | " 85809000 | \n",
121 | " 85811000 | \n",
122 | "
\n",
123 | " \n",
124 | "
\n",
125 | "
132 rows × 3 columns
\n",
126 | "
"
127 | ],
128 | "text/plain": [
129 | " chr start end\n",
130 | "0 chr5 31273000 31275000\n",
131 | "1 chr5 31295500 31297500\n",
132 | "2 chr5 31306700 31308700\n",
133 | "3 chr5 31310500 31312500\n",
134 | "4 chr5 31336200 31338200\n",
135 | ".. ... ... ...\n",
136 | "127 chr8 85785750 85787750\n",
137 | "128 chr8 85797300 85799300\n",
138 | "129 chr8 85802000 85804000\n",
139 | "130 chr8 85806500 85808500\n",
140 | "131 chr8 85809000 85811000\n",
141 | "\n",
142 | "[132 rows x 3 columns]"
143 | ]
144 | },
145 | "execution_count": 10,
146 | "metadata": {},
147 | "output_type": "execute_result"
148 | }
149 | ],
150 | "source": [
151 | "# Load in the locations of all unique microcompartment anchors in BED format with chromosome, start (anchor coordinate minus 1 kb) and end (anchor coordinate plus 1 kb) columns\n",
152 | "anchors = pd.read_csv(r'FILE_PATH_TO_ANCHOR_LIST.bed', sep='\\t', header=None, names=['chr','start','end'])\n",
153 | "anchors"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 11,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "# Calculate the anchor coordinate by averaging the start & end coordinates, and create a DataFrame containing just the chromosome and anchor coordinate\n",
163 | "anchors['midpoint'] = (anchors['end'] + anchors['start']) // 2\n",
164 | "anchorList = anchors[['chr','midpoint']]\n",
165 | "\n",
166 | "# Subset the DataFrame of anchors to separate out the microcompartment anchors in Klf1 (on chr8) and in Ppm1g (on chr5)\n",
167 | "anchorDataframeKlf1 = anchorList[anchorList['chr'] == 'chr8']\n",
168 | "anchorDataframePpm1g = anchorList[anchorList['chr'] == 'chr5']\n",
169 | "\n",
170 | "# Convert the region-separated anchor lists from DataFrames to lists for ease of downstream use\n",
171 | "anchorListKlf1 = anchorDataframeKlf1['midpoint'].tolist()\n",
172 | "anchorListPpm1g = anchorDataframePpm1g['midpoint'].tolist()"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "## Calculate rowsums and plot their distribution\n",
180 | "### Whole-region balancing calculations"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": 47,
186 | "metadata": {},
187 | "outputs": [
188 | {
189 | "data": {
190 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAS30lEQVR4nO3dcZCc913f8fcncmxKIFjGJ6FKBplWJJEZbMJVhKalAZdKgYJMW08vbYkmVUdtx2WSmdJiM1MyHUYz7nSm01JqOpoQEFMajwpxLQKkaERppgWinBMlsWyrPqIi3Ui1LkkhTdIRSPn2j308Xkt3uud0u7q9n96vmZ3neX7P79n93t7dZ5/97fM8m6pCktSW16x1AZKk0TPcJalBhrskNchwl6QGGe6S1KDb1roAgLvvvru2b9++1mVI0rryzDPPfLaqphZbNxHhvn37dmZnZ9e6DElaV5L8wVLrHJaRpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBy4Z7kjckOTl0+0KS9yS5K8mxJC92041D2zyWZC7J6SS7x/sjSJKutmy4V9Xpqnqgqh4AvgP4MvAU8ChwvKp2AMe7ZZLsBGaA+4A9wBNJNoynfEnSYlY6LPMg8PtV9QfAXuBw134YeKib3ws8WVWXquoMMAfsGkGt0lglr9yk9W6l4T4DfKCb31xVFwC66aaufStwbmib+a7tVZIcSDKbZHZhYWGFZUiSrqd3uCe5Hfgh4D8t13WRtmu+y6+qDlXVdFVNT00tet0bSdINWsme+9uBj1fVS93yS0m2AHTTi137PHDP0HbbgPOrLVSS1N9Kwv0dvDIkA3AU2NfN7wOeHmqfSXJHknuBHcCJ1RYqSeqv1yV/k3w18H3A3x9qfhw4kmQ/cBZ4GKCqTiU5AjwHXAYeqaorI61aknRdvcK9qr4MfP1VbZ9jcPTMYv0PAgdXXZ00Zh4Zo1Z5hqokNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ3qdZy7dKsZPv69rrkykjT53HOXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqUK9wT3Jnkl9O8kKS55N8V5K7khxL8mI33TjU/7Ekc0lOJ9k9vvIlSYvpu+f+b4APV9UbgfuB54FHgeNVtQM43i2TZCcwA9wH7AGeSLJh1IVLkpa2bLgneT3w3cDPAVTVH1fVHwJ7gcNdt8PAQ938XuDJqrpUVWeAOWDXaMuWblzyyk1qVZ89928GFoCfT/KJJO9L8jpgc1VdAOimm7r+W4FzQ9vPd22vkuRAktkkswsLC6v6ISRJr9Yn3G8D3gz8bFV9O/AluiGYJSy2P3TNd9lU1aGqmq6q6ampqV7FSpL66RPu88B8VX20W/5lBmH/UpItAN304lD/e4a23wacH025kqQ+lg33qvrfwLkkb+iaHgSeA44C+7q2fcDT3fxRYCbJHUnuBXYAJ0ZatSTpuvp+QfaPAr+U5HbgM8C7GLwwHEmyHzgLPAxQVaeSHGHwAnAZeKSqroy8cukm8cuytR71CveqOglML7LqwSX6HwQO3nhZkqTV8AxVSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1qFe4J/lfST6d5GSS2a7triTHkrzYTTcO9X8syVyS00l2j6t4SdLiVrLn/j1V9UBVTXfLjwLHq2oHcLxbJslOYAa4D9gDPJFkwwhrliQtYzXDMnuBw938YeChofYnq+pSVZ0B5oBdq3gcSdIK9Q33An4zyTNJDnRtm6vqAkA33dS1bwXODW0737W9SpIDSWaTzC4sLNxY9ZKkRd3Ws99bq+p8kk3AsSQvXKdvFmmraxqqDgGHAKanp69ZL0m6cb323KvqfDe9CDzFYJjlpSRbALrpxa77PHDP0ObbgPOjKliStLxlwz3J65J87cvzwF8BngWOAvu6bvuAp7v5o8BMkjuS3AvsAE6MunBpLSSv3KRJ1mdYZjPwVAZ/zbcB/7GqPpzkY8CRJPuBs8DDAFV1KskR4DngMvBIVV0ZS/WSpEUtG+5V9Rng/kXaPwc8uMQ2B4GDq65OknRDPENVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoP6XvJXWte80JduNe65S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSg3qHe5INST6R5EPd8l1JjiV5sZtuHOr7WJK5JKeT7B5H4ZKkpa1kz/3dwPNDy48Cx6tqB3C8WybJTmAGuA/YAzyRZMNoypUk9dEr3JNsA34AeN9Q817gcDd/GHhoqP3JqrpUVWeAOWDXSKqVJPXSd8/9XwP/FPjKUNvmqroA0E03de1bgXND/ea7tldJciDJbJLZhYWFldYtSbqOZcM9yV8FLlbVMz3vc7Hr79U1DVWHqmq6qqanpqZ63rUkqY8+l/x9K/BDSb4f+Crg9Un+A/BSki1VdSHJFuBi138euGdo+23A+VEWLUm6vmX33KvqsaraVlXbGXxQ+ltV9XeAo8C+rts+4Olu/igwk+SOJPcCO4ATI69ckrSk1XxZx+PAkST7gbPAwwBVdSrJEeA54DLwSFVdWXWlkqTeUnXNcPhNNz09XbOzs2tdhho2jm9imoB/Hd3ikjxTVdOLrfMMVUlqkOEuSQ3yC7KlGzQ81OMQjSaNe+6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDvLaMmjSOS/xK64l77pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBy4Z7kq9KciLJJ5OcSvLPu/a7khxL8mI33Ti0zWNJ5pKcTrJ7nD+AJOlaffbcLwHfW1X3Aw8Ae5K8BXgUOF5VO4Dj3TJJdgIzwH3AHuCJJBvGULskaQnLhnsNfLFbfG13K2AvcLhrPww81M3vBZ6sqktVdQaYA3aNsmhJ0vX1GnNPsiHJSeAicKyqPgpsrqoLAN10U9d9K3BuaPP5ru3q+zyQZDbJ7MLCwip+BEnS1XqFe1VdqaoHgG3AriTfep3ui534XYvc56Gqmq6q6ampqV7FSpL6WdHRMlX1h8BvMxhLfynJFoBuerHrNg/cM7TZNuD8aguVJPXX52iZqSR3dvN/CvjLwAvAUWBf120f8HQ3fxSYSXJHknuBHcCJEdctSbqOPleF3AIc7o54eQ1wpKo+lOR3gSNJ9gNngYcBqupUkiPAc8Bl4JGqujKe8iVJi0nVNcPhN9309HTNzs6udRlqyM2+5O8E/BvpFpTkmaqaXmydZ6hKUoMMd0lqkN/EJI3A8DCQQzSaBO65S1KDDHdJapDhLkkNMtwlqUGGuyQ1yKNl1IybfeKSNMncc5ekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yJOYpBHz8r+aBO65S1KDDHdJatCy4Z7kniT/NcnzSU4leXfXfleSY0le7KYbh7Z5LMlcktNJdo/zB5AkXavPnvtl4B9X1ZuAtwCPJNkJPAocr6odwPFumW7dDHAfsAd4IsmGcRQvSVrcsuFeVReq6uPd/P8Fnge2AnuBw123w8BD3fxe4MmqulRVZ4A5YNeI65YkXceKxtyTbAe+HfgosLmqLsDgBQDY1HXbCpwb2my+a7v6vg4kmU0yu7CwcAOlS5KW0jvck3wN8CvAe6rqC9frukjbNQeEVdWhqpququmpqam+ZUiSeugV7kleyyDYf6mqPtg1v5RkS7d+C3Cxa58H7hnafBtwfjTlSpL66HO0TICfA56vqn81tOoosK+b3wc8PdQ+k+SOJPcCO4AToytZkrScPmeovhX4EeDTSU52bT8BPA4cSbIfOAs8DFBVp5IcAZ5jcKTNI1V1ZdSFS5KWtmy4V9V/Z/FxdIAHl9jmIHBwFXVJklbBM1QlqUGGuyQ1yKtCal3LUgOG0i3OPXdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIA+F1Lrj4Y/S8txzl6QGGe6S1CDDXZIa5Ji7NEbDnw/UNd9HJo2Pe+6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQcuGe5L3J7mY5NmhtruSHEvyYjfdOLTusSRzSU4n2T2uwqX1JnnlJo1bnz33XwD2XNX2KHC8qnYAx7tlkuwEZoD7um2eSLJhZNVKknpZNtyr6iPA569q3gsc7uYPAw8NtT9ZVZeq6gwwB+waTamSpL5udMx9c1VdAOimm7r2rcC5oX7zXds1khxIMptkdmFh4QbL0K3CIQ1pZUb9gepi/3qLnnRdVYeqarqqpqempkZchiTd2m403F9KsgWgm17s2ueBe4b6bQPO33h5Upt8J6Jxu9FwPwrs6+b3AU8Ptc8kuSPJvcAO4MTqSpQkrdSyV4VM8gHgbcDdSeaB9wKPA0eS7AfOAg8DVNWpJEeA54DLwCNVdWVMtUuSlrBsuFfVO5ZY9eAS/Q8CB1dTlCRpdTxDVZIaZLhLUoMMd0lqkF+zJ60xv4pP4+CeuyQ1yHCXpAYZ7pLUIMfcpQni+LtGxT13SWqQe+7ShHIvXqvhnrskNchwl6QGGe6S1CDH3DWx/CKLVzj+rpUy3KV1xqBXHw7LSFKD3HOXGuEevYYZ7hqJpYLFwJHWhuGum8YXgNHzQ2ctxXDXDVtNsCy1rWE1Gn1eSK+3zhfZ9c8PVCWpQWML9yR7kpxOMpfk0XE9jiZP8spNa+9Gfh/D26zmprUzlnBPsgH4d8DbgZ3AO5LsHMdj6ebyH7ct4/59LnX/vhiM37j23HcBc1X1mar6Y+BJYO+YHktSI8bxzuBWffEY1weqW4FzQ8vzwHcOd0hyADjQLX4xyelVPN7dwGdXsf24WNfKWNfKrKu6xhGuK7zPu5P183z19E1LrRhXuC/2lL/q8/eqOgQcGsmDJbNVNT2K+xol61oZ61oZ61qZW62ucQ3LzAP3DC1vA86P6bEkSVcZV7h/DNiR5N4ktwMzwNExPZYk6SpjGZapqstJ/hHwX4ANwPur6tQ4HqszkuGdMbCulbGulbGulbml6kp5KpokNcczVCWpQYa7JDVo3YT7cpczSPK2JH+U5GR3+8lJqGuotpNJTiX5b5NQV5J/MvRcPZvkSpK7JqCur0vyq0k+2T1f7xp3TT3r2pjkqSSfSnIiybfepLren+RikmeXWJ8kP93V/akkb56Qut6Y5HeTXEryYzejpp51/e3uefpUkt9Jcv+E1LW3q+lkktkkf2HVD1pVE39j8KHs7wPfDNwOfBLYeVWftwEfmsC67gSeA76xW940CXVd1f8Hgd+ahLqAnwD+RTc/BXweuH0C6vqXwHu7+TcCx2/S39h3A28Gnl1i/fcDv8Hg3JK3AB+dkLo2AX8OOAj82M2oqWddfx7Y2M2/fYKer6/hlc9Avw14YbWPuV723Cf1cgZ96vpbwAer6ixAVV2ckLqGvQP4wITUVcDXJgmDP/jPA5cnoK6dwHGAqnoB2J5k85jroqo+wuA5WMpe4Bdr4PeAO5NsWeu6qupiVX0M+JNx13LV4y5X1+9U1f/pFn+PwTk4k1DXF6tLduB1XHXS541YL+G+2OUMti7S77u6t/O/keS+CanrW4CNSX47yTNJ3jkhdQGQ5KuBPcCvTEhdPwO8icFJb58G3l1VX5mAuj4J/DWAJLsYnPZ9U4JhGb1/17rGfgbveiZCkh9O8gLwa8DfXe39rZdwX/ZyBsDHgW+qqvuBfwv853EXRb+6bgO+A/gBYDfwz5J8ywTU9bIfBP5HVV1v73BU+tS1GzgJ/GngAeBnkrx+vGX1qutxBi/SJ4EfBT7B+N9R9LGS37U6Sb6HQbj/+FrX8rKqeqqq3gg8BPzUau9vvYT7spczqKovVNUXu/lfB16b5O61rqvr8+Gq+lJVfRb4CDDuD3FWcvmHGW7OkAz0q+tdDIaxqqrmgDMMxrjXtK7u7+tdVfUA8E4GnwecGXNdfXipjxVK8m3A+4C9VfW5ta7nat0Qzp9ZbX6tl3Bf9nIGSb6hG6d9+W3za4Bx/+L6XGbhaeAvJrmtGwL5TuD5CaiLJF8H/KWuxpuhT11ngQe7+jYDbwA+s9Z1JbmzWwfw94CPVNUXxlxXH0eBd3ZHzbwF+KOqurDWRU2qJN8IfBD4kar6n2tdz8uS/Nmh/Hozgw/2V5Vf6+I7VGuJyxkk+Qfd+n8P/A3gHya5DPw/YGboA4o1q6uqnk/yYeBTwFeA91XVoodD3cy6uq4/DPxmVX1pnPWssK6fAn4hyacZDDn8ePeOZ63rehPwi0muMDj6af84a3pZkg8wOBLs7iTzwHuB1w7V9esMjpiZA77M4J3PmteV5BuAWeD1wFeSvIfBEUhjfUHs8Xz9JPD1wBNdll6um3ClyB51/XUGL9J/wiC//uZq88vLD0hSg9bLsIwkaQUMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSg/w/J7ivzeXkQkQAAAABJRU5ErkJggg==\n",
191 | "text/plain": [
192 | ""
193 | ]
194 | },
195 | "metadata": {
196 | "needs_background": "light"
197 | },
198 | "output_type": "display_data"
199 | }
200 | ],
201 | "source": [
202 | "# Usage: Plot different datasets & regions by updating the filename and \"region = regionsRCMC[x]\" attributes, respectively \n",
203 | "\n",
204 | "saveDir = f'DIRECTORY_PATH'\n",
205 | "fileName = f'FILE_NAME.svg' # swap out .svg for the desired output file format\n",
206 | "\n",
207 | "# Specify the data resolution at which balanced rowsums will be calculated\n",
208 | "resolution = 250\n",
209 | "\n",
210 | "# Load in the desired dataset\n",
211 | "clr_Data = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/'+str(resolution))\n",
212 | "\n",
213 | "# List the Capture regions for region-by-region calculations\n",
214 | "## For an apples-to-apples comparison, balanced rowsums are calculated at the RCMC regions for both the RCMC dataset as well as the genome-wide Micro-C (Hsieh 2020) and Hi-C (Bonev 2017) datasets\n",
215 | "regionsRCMC = ['chr3:33804149-35704149','chr5:31257344-32382344','chr6:122451959-122876959','chr8:84846629-85856629','chr18:58032072-59034072']\n",
216 | "\n",
217 | "# Specify which specific region you would like to calculate rowsums for\n",
218 | "region = regionsRCMC[3]\n",
219 | "\n",
220 | "# Load in the balanced contact matrix for the specified region\n",
221 | "regionMat = clr_Data.matrix(balance=True).fetch(region)\n",
222 | "\n",
223 | "# Create arrays of zeroes corresponding to the lengths (rows & columns) of the loaded in contact matrices\n",
224 | "## The code here calculates both the rowsums and columnsums; they should be the same distribution, so you only need 1 of the 2\n",
225 | "rowSum = np.zeros(len(regionMat))\n",
226 | "columnSum = np.zeros(len(regionMat[0]))\n",
227 | "\n",
228 | "\n",
229 | "# Iterate through the matrix bin-by-bin, summing up the balanced contact bin values for each row & column\n",
230 | "## Only need to iterate through one half of the contact matrix (i.e., the half above the diagonal or the half below it) to capture it all due to reflected symmetry\n",
231 | "i = 0\n",
232 | "while i < len(regionMat):\n",
233 | " j = i # Ensures that we keep our iteration to one side of the diagonal instead of spanning the whole matrix \n",
234 | " while j < len(regionMat[0]):\n",
235 | " val = regionMat[i][j] # Extracts the balanced value of the contact bin\n",
236 | " if np.isnan(val):\n",
237 | " val = 0 # NaN values can cause calculation errors, so this sets them to 0\n",
238 | " rowSum[i] += val # Adds the balanced value to the corresponding rowsum\n",
239 | " columnSum[j] += val # Adds the balanced value to the corresponding columnsum\n",
240 | " \n",
241 | " if j != i: # If j = i, then we're on the diagonal and there's no need to add in the reflected twin of the contact bin\n",
242 | " rowSum[j] += val # Adds the balanced value of the reflected twin to the corresponding rowsum\n",
243 | " columnSum[i] += val # Adds the balanced value of the reflected twin to the corresponding columnsum\n",
244 | " j += 1\n",
245 | " i += 1\n",
246 | "\n",
247 | "# Plotting the rowsums and columnsums yields the same distribution, so this just runs ahead with the rowsums\n",
248 | "## This filters out the rowsums that are 0 (i.e., those that correspond to empty stripes, either due to probe coverage or hard-to-map sites)\n",
249 | "rowSumFilt = []\n",
250 | "for i in rowSum:\n",
251 | " if i > 0:\n",
252 | " rowSumFilt.append(i)\n",
253 | "\n",
254 | "\n",
255 | "# Plot a histogram of the rowsums\n",
256 | "## Change the number of histogram bins as desired (100 used here)\n",
257 | "## Change the range of visualization to capture and visually center the entire distribution\n",
258 | "plt.hist(rowSumFilt,100,range=[3.75,8.25],align='mid',color='blue') # Klf1\n",
259 | "# plt.hist(rowSumFilt,100,range=[1.1,3.4],align='mid',color='blue') # Ppm1g\n",
260 | "# plt.hist(rowSumFilt,100,range=[0.3,1.0],align='mid',color='blue') # Fbn2\n",
261 | "# plt.hist(rowSumFilt,100,range=[0.5,1.3],align='mid',color='blue') # Sox2\n",
262 | "\n",
263 | "# plt.savefig(saveDir + fileName, dpi=1200)"
264 | ]
265 | },
266 | {
267 | "cell_type": "markdown",
268 | "metadata": {},
269 | "source": [
270 | "### Microcompartment-containing bins balancing calculations"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 37,
276 | "metadata": {},
277 | "outputs": [
278 | {
279 | "data": {
280 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAKvklEQVR4nO3d0avk91nH8c/T7EqLCeRiDxqSHNcLKcRCm7DESKDUUCStYm96kYIRRFkqFlIQRL0w5B8oogi62GDF2lJoIyU0asCGUrCpSUxr4rYSSsXQwBJLmwRFSXi8mElY192d39k9c3afPa8XHHbOzu/MPPnulze//c3Mpro7AMzztis9AACXRsABhhJwgKEEHGAoAQcY6sg2HvTYsWN9/PjxbTw0wDXp6aeffrm7d/byM1sJ+PHjx/PUU09t46EBrklV9W97/RmXUACGEnCAoQQcYCgBBxhKwAGGEnCAoRa9jbCqvpvk1SRvJHm9u09scygANtvL+8B/rrtf3tokAOyJSygAQy09A+8kf1dVneRPu/vUuQdU1ckkJ5Nkd3d3/ybk0Kp66K3b3Q9e8jFwrVp6Bn53d9+R5ANJfrOq3nvuAd19qrtPdPeJnZ09fZwfgEuwKODd/b31r2eSPJLkzm0OBcBmGwNeVT9aVTe8eTvJzyd5btuDAXBxS66B/1iSR6rqzeP/qrv/ZqtTAbDRxoB393eSvPsAZgFgD7yNEGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEWB7yqrquqf6qqR7c5EADL7OUM/IEkp7c1CAB7syjgVXVLkl9I8mfbHQeApY4sPO4Pkvx2khsudEBVnUxyMkl2d3cvezA4W9VDl3x894P7PQ5cFTaegVfVLyY5091PX+y47j7V3Se6+8TOzs6+DQjA+S25hHJ3kl+qqu8m+WySe6rqL7c6FQAbbQx4d/9ud9/S3ceT3Jfk77v7l7c+GQAX5X3gAEMtfREzSdLdTyR5YiuTALAnzsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYamPAq+rtVfX1qvpGVT1fVQ8dxGAAXNyRBcf8d5J7uvu1qjqa5KtV9Vh3f23LswFwERsD3t2d5LX1t0fXX73NoQDYbNE18Kq6rqqeTXImyePd/eRWpwJgoyWXUNLdbyR5T1XdmOSRqnpXdz939jFVdTLJySTZ3d3d7znhqnL2S0HdD17BSTjM9vQulO7+QZInktx7nvtOdfeJ7j6xs7OzP9MBcEFL3oWysz7zTlW9I8n7k3xry3MBsMGSSyg3JflUVV2XVfA/192PbncsADZZ8i6Ubya5/QBmAWAPfBITYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYKiNAa+qW6vqy1V1uqqer6oHDmIwAC7uyIJjXk/yW939TFXdkOTpqnq8u/9ly7MBcBEbz8C7+6XufmZ9+9Ukp5PcvO3BALi4JWfgb6mq40luT/Lkee47meRkkuzu7u7HbLDvqh5663b3gxuPOduFjocrZfGLmFV1fZLPJ/l4d79y7v3dfaq7T3T3iZ2dnf2cEYDzWBTwqjqaVbw/3d1f2O5IACyx5F0oleSTSU539ye2PxIASyw5A787yf1J7qmqZ9dfH9zyXABssPFFzO7+apI6gFkA2AOfxAQYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGCojQGvqoer6kxVPXcQAwGwzJIz8D9Pcu+W5wBgjzYGvLu/kuT7BzALAHtwZL8eqKpOJjmZJLu7u/v1sBwyVQ8d2GPu13Od/TjdD+7pmHNnuNDPw/ns24uY3X2qu09094mdnZ39elgALsC7UACGEnCAoZa8jfAzSf4hyTur6sWq+rXtjwXAJhtfxOzujxzEIADsjUsoAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMtCnhV3VtV366qF6rqd7Y9FACbbQx4VV2X5I+TfCDJbUk+UlW3bXswAC5uyRn4nUle6O7vdPf/JPlskg9tdywANqnuvvgBVR9Ocm93//r6+/uT/Ex3f+yc404mObn+9p1Jvp3kWJKX93vogazDinVYsQ4r1mHlzXX4ie7e2csPHllwTJ3n9/5f9bv7VJJT/+cHq57q7hN7GehaZB1WrMOKdVixDiuXsw5LLqG8mOTWs76/Jcn3LuXJANg/SwL+j0l+qqp+sqp+JMl9Sb643bEA2GTjJZTufr2qPpbkb5Ncl+Th7n5+4eOf2nzIoWAdVqzDinVYsQ4rl7wOG1/EBODq5JOYAEMJOMBQlx3wqnq4qs5U1XMXuL+q6g/XH8P/ZlXdcbnPeTVasA7vq6ofVtWz66/fP+gZD0JV3VpVX66q01X1fFU9cJ5jrvk9sXAdrvk9UVVvr6qvV9U31uvw0HmOOQz7Yck67H0/dPdlfSV5b5I7kjx3gfs/mOSxrN5PfleSJy/3Oa/GrwXr8L4kj17pOQ9gHW5Kcsf69g1J/jXJbYdtTyxch2t+T6z/jK9f3z6a5Mkkdx3C/bBkHfa8Hy77DLy7v5Lk+xc55ENJ/qJXvpbkxqq66XKf92qzYB0Ohe5+qbufWd9+NcnpJDefc9g1vycWrsM1b/1n/Nr626Prr3PfOXEY9sOSddizg7gGfnOSfz/r+xdzCDfy2s+u/wr1WFX99JUeZtuq6niS27M62zjbodoTF1mH5BDsiaq6rqqeTXImyePdfSj3w4J1SPa4Hw4i4Is+in8IPJPVv3Xw7iR/lOSvr+w421VV1yf5fJKPd/cr5959nh+5JvfEhnU4FHuiu9/o7vdk9SnuO6vqXecccij2w4J12PN+OIiA+yh+ku5+5c2/QnX3l5IcrapjV3israiqo1lF69Pd/YXzHHIo9sSmdThMeyJJuvsHSZ5Icu85dx2K/fCmC63DpeyHgwj4F5P8yvqV5ruS/LC7XzqA572qVNWPV1Wtb9+Z1dr/x5Wdav+t/xs/meR0d3/iAodd83tiyTochj1RVTtVdeP69juSvD/Jt8457DDsh43rcCn7Ycm/RrhpsM9k9erpsap6McmDWV2gT3f/SZIvZfUq8wtJ/jPJr17uc16NFqzDh5P8RlW9nuS/ktzX65eerzF3J7k/yT+vr/clye8l2U0O1Z5Ysg6HYU/clORTtfofw7wtyee6+9Gq+mhyqPbDknXY837wUXqAoXwSE2AoAQcYSsABhhJwgKEEHGAoAQcYSsABhvpfsvur3aL9XpsAAAAASUVORK5CYII=\n",
281 | "text/plain": [
282 | ""
283 | ]
284 | },
285 | "metadata": {
286 | "needs_background": "light"
287 | },
288 | "output_type": "display_data"
289 | }
290 | ],
291 | "source": [
292 | "# Same approach as above, though now limited to only those contact bins containing microcompartment anchors\n",
293 | "# Usage: Plot different datasets & regions by updating the filename, \"region = regionsRCMC[x]\", and \"anchorReg = x\" attributes \n",
294 | "\n",
295 | "saveDir = f'DIRECTORY_PATH'\n",
296 | "fileName = f'FILE_NAME.svg' # swap out .svg for the desired output file format\n",
297 | "\n",
298 | "# Specify the data resolution at which balanced rowsums will be calculated\n",
299 | "resolution = 250\n",
300 | "\n",
301 | "# Load in the desired dataset\n",
302 | "clr_Data = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/'+str(resolution))\n",
303 | "\n",
304 | "# List the Capture regions for region-by-region calculations\n",
305 | "## For an apples-to-apples comparison, balanced rowsums are calculated at the RCMC regions for both the RCMC dataset as well as the genome-wide Micro-C (Hsieh 2020) and Hi-C (Bonev 2017) datasets\n",
306 | "regionsRCMC = ['chr3:33804149-35704149','chr5:31257344-32382344','chr6:122451959-122876959','chr8:84846629-85856629','chr18:58032072-59034072']\n",
307 | "\n",
308 | "# Specify which specific region you would like to calculate rowsums for\n",
309 | "region = regionsRCMC[1]\n",
310 | "\n",
311 | "# Load in the balanced contact matrix for the specified region\n",
312 | "regionMat = clr_Data.matrix(balance=True).fetch(region)\n",
313 | "\n",
314 | "# Specify the region of interest (either Klf1 or Ppm1g) for which to load in microcompartment anchors\n",
315 | "anchorReg = 'Ppm1g'\n",
316 | "if anchorReg == 'Klf1':\n",
317 | " anchors = anchorListKlf1\n",
318 | " offset = 84846629 # This offset is the start coordinate of the Klf1 region, allowing genomic coordinate conversion to bins\n",
319 | "if anchorReg == 'Ppm1g':\n",
320 | " anchors = anchorListPpm1g\n",
321 | " offset = 31257344 # This offset is the start coordinate of the Ppm1g region, allowing genomic coordinate conversion to bins\n",
322 | "\n",
323 | "# Create a list of bins which contain microcompartment anchors\n",
324 | "bins = []\n",
325 | "for i in anchors:\n",
326 | " bins.append((i - offset) // resolution)\n",
327 | "\n",
328 | "# Create arrays of zeroes corresponding to the lengths (rows & columns) of the loaded in contact matrices\n",
329 | "## The code here calculates both the rowsums and columnsums; they should be the same distribution, so you only need 1 of the 2\n",
330 | "rowSum = np.zeros(len(regionMat))\n",
331 | "columnSum = np.zeros(len(regionMat[0]))\n",
332 | "\n",
333 | "\n",
334 | "# Iterate through the matrix bin-by-bin, summing up the balanced contact bin values for each row & column that contains a microcompartment anchor\n",
335 | "## Only need to iterate through one half of the contact matrix (i.e., the half above the diagonal or the half below it) to capture it all due to reflected symmetry\n",
336 | "i = 0\n",
337 | "while i < len(regionMat):\n",
338 | " j = i # Ensures that we keep our iteration to one side of the diagonal instead of spanning the whole matrix \n",
339 | " while j < len(regionMat[0]):\n",
340 | " if((i in bins) or (j in bins)): # The only difference from the calculation for the whole-region calculation, this if statement ensures that rowsums and columnsumes are only calculated for bins containing a microcompartment anchor \n",
341 | " val = regionMat[i][j] # Extracts the balanced value of the contact bin\n",
342 | " if np.isnan(val):\n",
343 | " val = 0 # NaN values can cause calculation errors, so this sets them to 0\n",
344 | " rowSum[i] += val # Adds the balanced value to the corresponding rowsum\n",
345 | " columnSum[j] += val # Adds the balanced value to the corresponding columnsum\n",
346 | "\n",
347 | " if j != i: # If j = i, then we're on the diagonal & there's no need to add in the reflected twin of the contact bin\n",
348 | " rowSum[j] += val # Adds the balanced value of the reflected twin to the corresponding rowsum\n",
349 | " columnSum[i] += val # Adds the balanced value of the reflected twin to the corresponding columnsum\n",
350 | " j += 1\n",
351 | " i += 1\n",
352 | "\n",
353 | "\n",
354 | "# Plotting the rowsums and columnsums yields the same distribution, so this just runs ahead with the rowsums\n",
355 | "## This selects for bins containing microcompartment anchors and filters out the rowsums that are 0\n",
356 | "## Note: Filtering for non-zero bins likely changes nothing since microcompartment anchor-containing bins are guaranteed not empty. Keeping it nonetheless for homogeneity with the whole-region calculation.\n",
357 | "rowSumFilt = []\n",
358 | "for x in bins:\n",
359 | " microcompRowSum = rowSum[x] # Extract the rowsum value of a microcompartment anchor-containing bin\n",
360 | " if microcompRowSum > 0:\n",
361 | " rowSumFilt.append(microcompRowSum) # Add the rowsum to the list of microcompartment rowsums if its greater than 0\n",
362 | " rowSumFilt.append()\n",
363 | "\n",
364 | " \n",
365 | "# Plot a histogram of the rowsums\n",
366 | "## Change the number of histogram bins as desired (100 used here)\n",
367 | "## Change the range of visualization to capture and visually center the entire distribution\n",
368 | "# plt.hist(rowSumFilt,100,range=[3.75,8.25],align='mid',color='navy') # Klf1\n",
369 | "plt.hist(rowSumFilt,100,range=[1.1,3.4],align='mid',color='navy') # Ppm1g\n",
370 | "\n",
371 | "# plt.savefig(saveDir + fileName, dpi=1200)"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": null,
377 | "metadata": {},
378 | "outputs": [],
379 | "source": []
380 | }
381 | ],
382 | "metadata": {
383 | "kernelspec": {
384 | "display_name": "Python [conda env:coolToolsEnv]",
385 | "language": "python",
386 | "name": "conda-env-coolToolsEnv-py"
387 | },
388 | "language_info": {
389 | "codemirror_mode": {
390 | "name": "ipython",
391 | "version": 3
392 | },
393 | "file_extension": ".py",
394 | "mimetype": "text/x-python",
395 | "name": "python",
396 | "nbconvert_exporter": "python",
397 | "pygments_lexer": "ipython3",
398 | "version": "3.8.12"
399 | }
400 | },
401 | "nbformat": 4,
402 | "nbformat_minor": 4
403 | }
404 |
--------------------------------------------------------------------------------
/CalculatingFilledBinFractionByDistance.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Counting fraction of filled bins across contact distances"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 2,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# import core packages\n",
17 | "import warnings\n",
18 | "warnings.filterwarnings(\"ignore\")\n",
19 | "from itertools import combinations\n",
20 | "\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "from matplotlib import colors\n",
23 | "%matplotlib inline\n",
24 | "plt.style.use('seaborn-poster')\n",
25 | "import numpy as np\n",
26 | "import pandas as pd\n",
27 | "\n",
28 | "# import open2c libraries\n",
29 | "import bioframe\n",
30 | "import cooler\n",
31 | "import cooltools\n",
32 | "\n",
33 | "from packaging import version\n",
34 | "if version.parse(cooltools.__version__) < version.parse('0.5.0'):\n",
35 | " raise AssertionError(\"tutorials rely on cooltools version 0.5.0 or higher,\"+\n",
36 | " \"please check your cooltools version and update to the latest\")\n",
37 | "\n",
38 | " \n",
39 | "resolution = 100 # input the desired data resolution for analysis\n",
40 | "distances = [1000,2500,5000,10000,25000,50000,100000,250000,500000,1000000] # input the desired contact distance diagonals in bp\n",
41 | "\n",
42 | "# Load in each of the datasets at the specified resolution\n",
43 | "clr_RCMC = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/' + str(resolution))\n",
44 | "clr_TMCC = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/' + str(resolution))\n",
45 | "clr_Hsieh = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/' + str(resolution))\n",
46 | "clr_Bonev = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/' + str(resolution))\n",
47 | "\n",
48 | "# List the Capture regions for RCMC & TMCC\n",
49 | "## To ensure an apples-to-apples comparison, the genome-wide Micro-C (Hsieh 2020) and Hi-C (Bonev 2017) datasets will be analyzed for the same regions as RCMC instead of across the entire genome\n",
50 | "regionsRCMC = ['chr3:33804149-35704149','chr5:31257344-32382344','chr6:122451959-122876959','chr8:84846629-85856629','chr18:58032072-59034072']\n",
51 | "regionsTMCC = ['chr3:34365200-35640000','chr6:122606805-122856450']\n",
52 | "\n",
53 | "\n",
54 | "# Variables for storing fill fraction data\n",
55 | "countsAcrossDistances = [] # Stores the fraction of all non-empty bins for each dataset (across all regions) for each of the specified contact distances\n",
56 | "nonZeroFracsByRegionAcrossDistances = [] # Stores the fraction of all non-empty bins for each dataset (region-separated) for each of the specified contact distances\n",
57 | "\n",
58 | "for distance in distances:\n",
59 | " compiledCounts = [] # Stores the total number of bins and number of non-empty bins for each region of each dataset at the given contact distance\n",
60 | "\n",
61 | " coolers = [clr_RCMC,clr_TMCC,clr_Hsieh,clr_Bonev]\n",
62 | " coolerIt = 0 # An iterating variable for going through each of the datasets\n",
63 | " while coolerIt < len(coolers):\n",
64 | " if coolerIt == 1:\n",
65 | " regions = regionsTMCC # Use the TMCC regions for calculation for the TMCC dataset\n",
66 | " else:\n",
67 | " regions = regionsRCMC # Use the RCMC regions for calculation for the RCMC, Hsieh, & Bonev datasets\n",
68 | "\n",
69 | " totBins = [] # Counts the number of total contact bins for at the given contact distance\n",
70 | " nonZeroBins = [] # Counts the number of contact bins that have at least one read in them at the given contact distance\n",
71 | "\n",
72 | " for i in regions: \n",
73 | " # Iterating region by region for the given dataset, load in the contact matrix\n",
74 | " # Keep balancing off to count raw read counts\n",
75 | " regionMat = coolers[coolerIt].matrix(balance=False).fetch(i)\n",
76 | " \n",
77 | " bins = len(regionMat) # Get the number of genomic bins across the matrix\n",
78 | " offset = distance // resolution - 1 # Calculate the number of fewer genomic bins at the given contact distance\n",
79 | " binsAtDiag = bins - offset # Calculate the total number of contact bins at the contact distance\n",
80 | " totBins.append(binsAtDiag)\n",
81 | "\n",
82 | " # Set the stage for iterating across the contact diagonal at the given distance to count the number of empty bins\n",
83 | " x = offset\n",
84 | " y = 0\n",
85 | " nonZeroCount = 0\n",
86 | " \n",
87 | " # Iterate through the contact diagonal tallying non-empty bins until the end of the diagonal\n",
88 | " while x < len(regionMat):\n",
89 | " val = regionMat[y][x]\n",
90 | " if val > 0:\n",
91 | " nonZeroCount += 1\n",
92 | " x += 1\n",
93 | " y += 1\n",
94 | " nonZeroBins.append(nonZeroCount)\n",
95 | "\n",
96 | " binCounts = [totBins,nonZeroBins] # Combine the total bin & non-empty bin counts into a tuple\n",
97 | " compiledCounts.append(binCounts) # Add the combined tuple for the given dataset & region to compiledCounts\n",
98 | "\n",
99 | " coolerIt += 1 # Onwards to the next cooler in the dataset list\n",
100 | "\n",
101 | " # print(compiledCounts) # Sanity check that the code is working \n",
102 | "\n",
103 | " totNonZeroFrac = [] # Stores the across-regions fraction of non-empty bins for each dataset at a given contact distance\n",
104 | " nonZeroFracsByRegion = [] # The region-separated version of totNonZeroFrac\n",
105 | "\n",
106 | " # Iterate through compiledCounts to extract filled bin fractions\n",
107 | " for i in compiledCounts:\n",
108 | " sumPossible = sum(i[0]) # Sums the total number of bins across all regions for a dataset\n",
109 | " sumNonZero = sum(i[1]) # Sums the total number of non-empty bins across all regions for a dataset\n",
110 | " totNonZeroFrac.append(sumNonZero / sumPossible) # Adds the non-empty bin fraction to totNonZeroFrac\n",
111 | "\n",
112 | " regionFracs = [] # An array to hold the non-empty bin fraction by region for a given dataset\n",
113 | " for j in range(len(i[0])): # Iterate across the regions\n",
114 | " fracNonZero = i[1][j] / i[0][j] # Calculate non-empty bin fraction for the given region\n",
115 | " regionFracs.append(fracNonZero) # Add the calculated fraction to regionFracs\n",
116 | " nonZeroFracsByRegion.append(regionFracs) # Add the array of non-empty bin fractions by region to nonZeroFracsByRegion\n",
117 | "\n",
118 | " # print(totNonZeroFrac) # Double-check that totNonZeroFrac is calculating as expected\n",
119 | " # print(nonZeroFracsByRegion) # Double-check that nonZeroFracsByRegion is calculating as expected\n",
120 | " \n",
121 | " countsAcrossDistances.append(totNonZeroFrac) # Add totNonZeroFrac, which contains the non-zero bin fraction for each dataset at the given contact distance, to countsAcrossDistances\n",
122 | " nonZeroFracsByRegionAcrossDistances.append(nonZeroFracsByRegion) # Add nonZeroFracsByRegion, which contains the region-separated non-zero bin fraction for each dataset at the given contact distance, to nonZeroFracsByRegionAcrossDistances\n",
123 | " \n",
124 | "countsAcrossDistances # Print the output of countsAcrossDistances to confirm that it produces a logical result"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 3,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "# Separate the nested array of countsAcrossDistance by dataset for ease of calculation & plotting\n",
134 | "RCMCcounts = []\n",
135 | "TMCCcounts = []\n",
136 | "Hsiehcounts = []\n",
137 | "Bonevcounts = []\n",
138 | "\n",
139 | "for i in countsAcrossDistances:\n",
140 | " RCMCcounts.append(i[0])\n",
141 | " TMCCcounts.append(i[1])\n",
142 | " Hsiehcounts.append(i[2])\n",
143 | " Bonevcounts.append(i[3])"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 6,
149 | "metadata": {},
150 | "outputs": [],
151 | "source": [
152 | "# Separate out the nested array of nonZeroFracsByRegionAcrossDistances to extract the RCMC & TMCC region counts for ease of calculation & plotting\n",
153 | "RCMCRegionCounts = []\n",
154 | "TMCCRegionCounts = []\n",
155 | "\n",
156 | "for distanceSet in nonZeroFracsByRegionAcrossDistances:\n",
157 | " RCMCRegionCounts.append(distanceSet[0])\n",
158 | " TMCCRegionCounts.append(distanceSet[1])"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 7,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "# Separate out the nested array of region counts by individual region for RCMC & TMCC for ease of plotting\n",
168 | "RCMC_Sox2 = []\n",
169 | "RCMC_Ppm1g = []\n",
170 | "RCMC_Nanog = []\n",
171 | "RCMC_Klf1 = []\n",
172 | "RCMC_Fbn2 = []\n",
173 | "TMCC_Sox2 = []\n",
174 | "TMCC_Nanog = []\n",
175 | "\n",
176 | "for i in RCMCRegionCounts:\n",
177 | " RCMC_Sox2.append(i[0])\n",
178 | " RCMC_Ppm1g.append(i[1])\n",
179 | " RCMC_Nanog.append(i[2])\n",
180 | " RCMC_Klf1.append(i[3])\n",
181 | " RCMC_Fbn2.append(i[4])\n",
182 | " \n",
183 | "for j in TMCCRegionCounts:\n",
184 | " TMCC_Sox2.append(j[0])\n",
185 | " TMCC_Nanog.append(j[1])"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 21,
191 | "metadata": {},
192 | "outputs": [
193 | {
194 | "data": {
195 | "image/png": "\n",
196 | "text/plain": [
197 | ""
198 | ]
199 | },
200 | "metadata": {
201 | "needs_background": "light"
202 | },
203 | "output_type": "display_data"
204 | }
205 | ],
206 | "source": [
207 | "# Plot the fraction of filled bins across contact distances both by region and averaged across regions\n",
208 | "\n",
209 | "import matplotlib.pyplot as plt\n",
210 | "\n",
211 | "saveDir = f'DIRECTORY_PATH'\n",
212 | "fileName = f'FILE_NAME.svg' # swap out .svg for the desired output file format\n",
213 | "\n",
214 | "plt.scatter(distances, RCMCcounts, marker = 'D', c = 'blue', label='RCMC')\n",
215 | "plt.scatter(distances, RCMC_Sox2, marker = 'x', c = 'green', label='RCMC_Sox2')\n",
216 | "plt.scatter(distances, RCMC_Ppm1g, marker = 'x', c = 'skyblue', label='RCMC_Ppm1g')\n",
217 | "plt.scatter(distances, RCMC_Nanog, marker = 'x', c = 'teal', label='RCMC_Nanog')\n",
218 | "plt.scatter(distances, RCMC_Klf1, marker = 'x', c = 'black', label='RCMC_Klf1')\n",
219 | "plt.scatter(distances, RCMC_Fbn2, marker = 'x', c = 'cyan', label='RCMC_Fbn2')\n",
220 | "plt.scatter(distances, TMCCcounts, marker = 's', c = 'red', label='TMCC')\n",
221 | "plt.scatter(distances, TMCC_Sox2, marker = '+', c = 'purple', label='TMCC_Sox2')\n",
222 | "plt.scatter(distances, TMCC_Nanog, marker = '+', c = 'pink', label='TMCC_Nanog')\n",
223 | "plt.scatter(distances, Hsiehcounts, marker = 'v', c = 'orange', label='Hsieh')\n",
224 | "plt.scatter(distances, Bonevcounts, marker = 'o', c = 'gray', label='Bonev')\n",
225 | "plt.xscale(\"log\")\n",
226 | "plt.xlabel(\"Contact Distance (bp) at 100 bp Resolution\")\n",
227 | "plt.ylabel(\"Fraction of Nonzero Bins\")\n",
228 | "plt.legend()\n",
229 | "\n",
230 | "# plt.savefig(saveDir + fileName, dpi=1200)\n",
231 | "plt.show()"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": null,
237 | "metadata": {},
238 | "outputs": [],
239 | "source": []
240 | }
241 | ],
242 | "metadata": {
243 | "kernelspec": {
244 | "display_name": "Python [conda env:coolToolsEnv]",
245 | "language": "python",
246 | "name": "conda-env-coolToolsEnv-py"
247 | },
248 | "language_info": {
249 | "codemirror_mode": {
250 | "name": "ipython",
251 | "version": 3
252 | },
253 | "file_extension": ".py",
254 | "mimetype": "text/x-python",
255 | "name": "python",
256 | "nbconvert_exporter": "python",
257 | "pygments_lexer": "ipython3",
258 | "version": "3.8.12"
259 | }
260 | },
261 | "nbformat": 4,
262 | "nbformat_minor": 5
263 | }
264 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Hansen lab at MIT
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/LoopStrengthRCMC.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "abd813be",
6 | "metadata": {},
7 | "source": [
8 | "# Loop strength calculation"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "dfcad04e",
14 | "metadata": {},
15 | "source": [
16 | "## Imports"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "id": "73bb3370",
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "# import standard python libraries\n",
27 | "import matplotlib as mpl\n",
28 | "%matplotlib inline\n",
29 | "mpl.rcParams['figure.dpi'] = 96\n",
30 | "import numpy as np\n",
31 | "import matplotlib.pyplot as plt\n",
32 | "import pandas as pd\n",
33 | "import seaborn as sns\n",
34 | "\n",
35 | "# import libraries for biological data analysis\n",
36 | "from coolpuppy import coolpup\n",
37 | "from plotpuppy import plotpup\n",
38 | "import cooler\n",
39 | "import bioframe\n",
40 | "import cooltools\n",
41 | "from cooltools import expected_cis\n",
42 | "from cooltools.lib import plotting\n",
43 | "\n",
44 | "import bbi"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "e71fbaa6",
50 | "metadata": {},
51 | "source": [
52 | "## Inputs"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "id": "70c700df",
58 | "metadata": {},
59 | "source": [
60 | "First, get the loops and mcools to analyse, set up variables, etc"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "id": "068a8eac",
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "#mcool resolution to read\n",
71 | "resolution = 250\n",
72 | "#List of mcool locations as strings\n",
73 | "conditions = [\"mcoollocation1\", \"mcoollocation2\", \"mcoollocation3\"]\n",
74 | "#List of loop types as strings\n",
75 | "loopTypesNames = [\"loop\", \"type\", \"names\"]\n",
76 | "#List of loop file locations (bedpe)\n",
77 | "loopFiles = [\"looplocation1\", \"looplocation2\", \"looplocation3\"]\n",
78 | "\n",
79 | "#Specify the RCMC regions of the mcools to look at (format: chromosome (string), start (number), end (number), name of region (string))\n",
80 | "regions = pd.DataFrame([['chrA',1,100,'regionname1'],['chrB',1,100,'regionname2'],['chrC',1,100,'regionname3']],\n",
81 | " columns=['chrom', 'start', 'end', 'name'])\n",
82 | "#Cis expected file locations from cooltools - .tsv file - one for each mcool\n",
83 | "expectedFiles = [\"expectedlocation1\", \"expectedlocation2\", \"expectedlocation3\"]\n",
84 | "#Set save directory\n",
85 | "saveDir = '/a/directory/on/your/system/'\n",
86 | "\n",
87 | "#Set the size of the area flanking the dot\n",
88 | "flankDist = 10000\n",
89 | "#Don't set this to be even... This is the size of the area to measure around the dot \n",
90 | "#(and by extension the size of the boxes at the edges of the region too)\n",
91 | "#For this reason, it needs to be odd to have integer box sizes on each side.\n",
92 | "dotWindow = 5\n"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "id": "e7cf57de",
98 | "metadata": {},
99 | "source": [
100 | "Run the imports"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": null,
106 | "id": "d76f2cee",
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "#######Don't change this section#######\n",
111 | "#Creat an empty list to store the imported loop locations\n",
112 | "loopTypes = []\n",
113 | "#List of column names to use for imported loops (this is constant - do not change)\n",
114 | "colNames = ['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2']\n",
115 | "#Read in files, put them in loopTypes\n",
116 | "for file in loopFiles:\n",
117 | " temploops = pd.read_csv(file, sep='\\t', names=colNames, header=None)\n",
118 | " loopTypes.append(temploops)"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "id": "3ba156f7",
124 | "metadata": {},
125 | "source": [
126 | "## Enrichment calculation"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": null,
132 | "id": "e4a3696a",
133 | "metadata": {},
134 | "outputs": [],
135 | "source": [
136 | "## Enrichment calculation\n",
137 | "\n",
138 | "#Viraat's new calculation\n",
139 | "#Modified 2022/10/04 by Miles to try to avoid NaN values and correct an issue with the background sum, \n",
140 | "#and generally make the code a little more streamlined\n",
141 | "def enrichmentCalc(mtx, dotWindow):\n",
142 | " #Dimension of array side (should be square)\n",
143 | " sideLength = len(mtx)\n",
144 | " #Middle of side length\n",
145 | " midPoint = (sideLength - 1) // 2\n",
146 | " #Half size of box around centre pixel (one pixel smaller if even-sized dot window - don't do this)\n",
147 | " buffer = (dotWindow - 1) // 2\n",
148 | " \n",
149 | " #Get sum of pixels around dot\n",
150 | " dotSum = np.nansum(mtx[midPoint-buffer:midPoint+buffer+1, midPoint-buffer:midPoint+buffer+1])\n",
151 | " \n",
152 | " #Subset the matrix and calculate the mean without NaN values\n",
153 | " backgroundSum1 = np.nansum(mtx[0:dotWindow, 0:dotWindow])\n",
154 | " backgroundSum2 = np.nansum(mtx[sideLength-dotWindow:sideLength, sideLength-dotWindow:sideLength])\n",
155 | " \n",
156 | " #Calculate enrichment (NB this assumes all boxes are the same size.\n",
157 | " #If you set an even dotWindow value, they won't be)\n",
158 | " enrichment = dotSum / ((backgroundSum1 + backgroundSum2)/2)\n",
159 | " \n",
160 | " return enrichment"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "id": "5e1e9fad",
166 | "metadata": {},
167 | "source": [
168 | "# Get the strengths"
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "id": "666fd62a",
174 | "metadata": {},
175 | "source": [
176 | "Function for getting strength of each loop (uses the pileup function from cooltools to do observed/expected)"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": null,
182 | "id": "afd62abd",
183 | "metadata": {},
184 | "outputs": [],
185 | "source": [
186 | "def loopStrengthGet(loop, flankDist, clr, regions, expected, dotWindow):\n",
187 | " loopf = loop.to_frame().T\n",
188 | " loopf = loopf.astype({'start1':'int64','end1':'int64','start2':'int64','end2':'int64'})\n",
189 | " stack = cooltools.pileup(clr, loopf, view_df=regions, expected_df=expected, flank=flankDist)\n",
190 | " mtx = np.nanmean(stack, axis=2)\n",
191 | " enrichment = enrichmentCalc(mtx, dotWindow)\n",
192 | " \n",
193 | " return enrichment"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": null,
199 | "id": "01ca3113",
200 | "metadata": {
201 | "scrolled": true
202 | },
203 | "outputs": [],
204 | "source": [
205 | "#Zip the names and loop info into a dictionary for easier referencing\n",
206 | "loopDict = dict(zip(loopTypesNames, loopTypes))\n",
207 | "#Stop the code if you used an even value for dotWindow, since it won't work\n",
208 | "if dotWindow % 2 == 0:\n",
209 | " print(\"You need to use an odd number for dotWindow in the inputs section\")\n",
210 | "else:\n",
211 | " #Loop through the conditions\n",
212 | " for i, condition in enumerate(conditions):\n",
213 | " #Get the cooler data\n",
214 | " clr = cooler.Cooler(condition+'::/resolutions/'+str(resolution))\n",
215 | " #Get the corresponding expected data\n",
216 | " expected = pd.read_csv(expectedFiles[i], sep='\\t')\n",
217 | "\n",
218 | " #Loop through loopDict\n",
219 | " for loopsName in loopDict:\n",
220 | " #Read out the loops\n",
221 | " loops = loopDict[loopsName]\n",
222 | " #For each row (ie loop), do pileup, get enrichment, write to new column [condition]_strength\n",
223 | " loops[f'{condition}_strength'] = loops.apply(loopStrengthGet, axis = 1, flankDist = flankDist, clr = clr, regions = regions, expected = expected, dotWindow = dotWindow)\n",
224 | "\n",
225 | " loopDict[loopsName] = loops\n",
226 | " "
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "id": "bdc3359f",
232 | "metadata": {},
233 | "source": [
234 | "## Output files - one for each loop type"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": null,
240 | "id": "50c922d5",
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "for name, df in loopDict.items():\n",
245 | " df.to_csv(saveDir + name + '.bedpe', sep = '\\t', index = False, header = True)"
246 | ]
247 | }
248 | ],
249 | "metadata": {
250 | "kernelspec": {
251 | "display_name": "Python [conda env:coolpuppy]",
252 | "language": "python",
253 | "name": "conda-env-coolpuppy-py"
254 | },
255 | "language_info": {
256 | "codemirror_mode": {
257 | "name": "ipython",
258 | "version": 3
259 | },
260 | "file_extension": ".py",
261 | "mimetype": "text/x-python",
262 | "name": "python",
263 | "nbconvert_exporter": "python",
264 | "pygments_lexer": "ipython3",
265 | "version": "3.7.12"
266 | }
267 | },
268 | "nbformat": 4,
269 | "nbformat_minor": 5
270 | }
271 |
--------------------------------------------------------------------------------
/MicrocompartmentLoops_PlusMin1kb.bedpe:
--------------------------------------------------------------------------------
1 | chr5 31273000 31275000 chr5 31336200 31338200
2 | chr5 31273000 31275000 chr5 31349200 31351200
3 | chr5 31273000 31275000 chr5 31358700 31360700
4 | chr5 31273000 31275000 chr5 31376700 31378700
5 | chr5 31273000 31275000 chr5 31397100 31399100
6 | chr5 31273000 31275000 chr5 31408000 31410000
7 | chr5 31273000 31275000 chr5 31447500 31449500
8 | chr5 31273000 31275000 chr5 31452200 31454200
9 | chr5 31273000 31275000 chr5 31608600 31610600
10 | chr5 31273000 31275000 chr5 31645300 31647300
11 | chr5 31273000 31275000 chr5 31651000 31653000
12 | chr5 31273000 31275000 chr5 31787500 31789500
13 | chr5 31273000 31275000 chr5 31791000 31793000
14 | chr5 31273000 31275000 chr5 31854000 31856000
15 | chr5 31273000 31275000 chr5 32015400 32017400
16 | chr5 31273000 31275000 chr5 32291000 32293000
17 | chr5 31295500 31297500 chr5 31336200 31338200
18 | chr5 31295500 31297500 chr5 31349200 31351200
19 | chr5 31295500 31297500 chr5 31358700 31360700
20 | chr5 31295500 31297500 chr5 31376700 31378700
21 | chr5 31295500 31297500 chr5 31397100 31399100
22 | chr5 31295500 31297500 chr5 31408000 31410000
23 | chr5 31295500 31297500 chr5 31447500 31449500
24 | chr5 31295500 31297500 chr5 31452200 31454200
25 | chr5 31295500 31297500 chr5 31483000 31485000
26 | chr5 31295500 31297500 chr5 31608600 31610600
27 | chr5 31295500 31297500 chr5 32291000 32293000
28 | chr5 31306700 31308700 chr5 31336200 31338200
29 | chr5 31306700 31308700 chr5 31349200 31351200
30 | chr5 31306700 31308700 chr5 31358700 31360700
31 | chr5 31306700 31308700 chr5 31376700 31378700
32 | chr5 31306700 31308700 chr5 31397100 31399100
33 | chr5 31306700 31308700 chr5 31408000 31410000
34 | chr5 31306700 31308700 chr5 31447500 31449500
35 | chr5 31306700 31308700 chr5 31452200 31454200
36 | chr5 31306700 31308700 chr5 31483000 31485000
37 | chr5 31306700 31308700 chr5 31608600 31610600
38 | chr5 31310500 31312500 chr5 31336200 31338200
39 | chr5 31310500 31312500 chr5 31349200 31351200
40 | chr5 31310500 31312500 chr5 31358700 31360700
41 | chr5 31310500 31312500 chr5 31376700 31378700
42 | chr5 31310500 31312500 chr5 31397100 31399100
43 | chr5 31310500 31312500 chr5 31408000 31410000
44 | chr5 31310500 31312500 chr5 31447500 31449500
45 | chr5 31310500 31312500 chr5 31452200 31454200
46 | chr5 31310500 31312500 chr5 31608600 31610600
47 | chr5 31310500 31312500 chr5 31683000 31685000
48 | chr5 31336200 31338200 chr5 31349200 31351200
49 | chr5 31336200 31338200 chr5 31358700 31360700
50 | chr5 31336200 31338200 chr5 31376700 31378700
51 | chr5 31336200 31338200 chr5 31397100 31399100
52 | chr5 31336200 31338200 chr5 31408000 31410000
53 | chr5 31336200 31338200 chr5 31447500 31449500
54 | chr5 31336200 31338200 chr5 31452200 31454200
55 | chr5 31336200 31338200 chr5 31608600 31610600
56 | chr5 31336200 31338200 chr5 31645300 31647300
57 | chr5 31336200 31338200 chr5 31651000 31653000
58 | chr5 31336200 31338200 chr5 31683000 31685000
59 | chr5 31336200 31338200 chr5 31854000 31856000
60 | chr5 31336200 31338200 chr5 32015400 32017400
61 | chr5 31336200 31338200 chr5 32291000 32293000
62 | chr5 31344650 31346650 chr5 31483000 31485000
63 | chr5 31349200 31351200 chr5 31358700 31360700
64 | chr5 31349200 31351200 chr5 31376700 31378700
65 | chr5 31349200 31351200 chr5 31397100 31399100
66 | chr5 31349200 31351200 chr5 31408000 31410000
67 | chr5 31349200 31351200 chr5 31447500 31449500
68 | chr5 31349200 31351200 chr5 31452200 31454200
69 | chr5 31349200 31351200 chr5 31608600 31610600
70 | chr5 31349200 31351200 chr5 31645300 31647300
71 | chr5 31349200 31351200 chr5 31651000 31653000
72 | chr5 31349200 31351200 chr5 31683000 31685000
73 | chr5 31349200 31351200 chr5 31854000 31856000
74 | chr5 31349200 31351200 chr5 32015400 32017400
75 | chr5 31349200 31351200 chr5 32291000 32293000
76 | chr5 31351450 31353450 chr5 31376700 31378700
77 | chr5 31351450 31353450 chr5 31397100 31399100
78 | chr5 31351450 31353450 chr5 31447500 31449500
79 | chr5 31358700 31360700 chr5 31376700 31378700
80 | chr5 31358700 31360700 chr5 31397100 31399100
81 | chr5 31358700 31360700 chr5 31408000 31410000
82 | chr5 31358700 31360700 chr5 31447500 31449500
83 | chr5 31358700 31360700 chr5 31452200 31454200
84 | chr5 31358700 31360700 chr5 31608600 31610600
85 | chr5 31358700 31360700 chr5 31645300 31647300
86 | chr5 31358700 31360700 chr5 31651000 31653000
87 | chr5 31358700 31360700 chr5 31683000 31685000
88 | chr5 31358700 31360700 chr5 31854000 31856000
89 | chr5 31358700 31360700 chr5 32291000 32293000
90 | chr5 31369650 31371650 chr5 31483000 31485000
91 | chr5 31376700 31378700 chr5 31397100 31399100
92 | chr5 31376700 31378700 chr5 31408000 31410000
93 | chr5 31376700 31378700 chr5 31447500 31449500
94 | chr5 31376700 31378700 chr5 31452200 31454200
95 | chr5 31376700 31378700 chr5 31483000 31485000
96 | chr5 31376700 31378700 chr5 31608600 31610600
97 | chr5 31376700 31378700 chr5 31645300 31647300
98 | chr5 31376700 31378700 chr5 31651000 31653000
99 | chr5 31376700 31378700 chr5 31683000 31685000
100 | chr5 31376700 31378700 chr5 31753200 31755200
101 | chr5 31376700 31378700 chr5 31770300 31772300
102 | chr5 31376700 31378700 chr5 31787500 31789500
103 | chr5 31376700 31378700 chr5 31791000 31793000
104 | chr5 31376700 31378700 chr5 31854000 31856000
105 | chr5 31376700 31378700 chr5 32015400 32017400
106 | chr5 31376700 31378700 chr5 32291000 32293000
107 | chr5 31397100 31399100 chr5 31408000 31410000
108 | chr5 31397100 31399100 chr5 31447500 31449500
109 | chr5 31397100 31399100 chr5 31452200 31454200
110 | chr5 31397100 31399100 chr5 31483000 31485000
111 | chr5 31397100 31399100 chr5 31608600 31610600
112 | chr5 31397100 31399100 chr5 31645300 31647300
113 | chr5 31397100 31399100 chr5 31651000 31653000
114 | chr5 31397100 31399100 chr5 31683000 31685000
115 | chr5 31397100 31399100 chr5 31753200 31755200
116 | chr5 31397100 31399100 chr5 31770300 31772300
117 | chr5 31397100 31399100 chr5 31787500 31789500
118 | chr5 31397100 31399100 chr5 31791000 31793000
119 | chr5 31397100 31399100 chr5 31854000 31856000
120 | chr5 31397100 31399100 chr5 32291000 32293000
121 | chr5 31397100 31399100 chr5 32339700 32341700
122 | chr5 31408000 31410000 chr5 31447500 31449500
123 | chr5 31408000 31410000 chr5 31452200 31454200
124 | chr5 31408000 31410000 chr5 31483000 31485000
125 | chr5 31408000 31410000 chr5 31608600 31610600
126 | chr5 31408000 31410000 chr5 31645300 31647300
127 | chr5 31408000 31410000 chr5 31651000 31653000
128 | chr5 31408000 31410000 chr5 31683000 31685000
129 | chr5 31408000 31410000 chr5 31854000 31856000
130 | chr5 31408000 31410000 chr5 32291000 32293000
131 | chr5 31408000 31410000 chr5 32339700 32341700
132 | chr5 31447500 31449500 chr5 31608600 31610600
133 | chr5 31447500 31449500 chr5 31645300 31647300
134 | chr5 31447500 31449500 chr5 31651000 31653000
135 | chr5 31447500 31449500 chr5 31683000 31685000
136 | chr5 31447500 31449500 chr5 31770300 31772300
137 | chr5 31447500 31449500 chr5 31791000 31793000
138 | chr5 31447500 31449500 chr5 31854000 31856000
139 | chr5 31447500 31449500 chr5 32015400 32017400
140 | chr5 31447500 31449500 chr5 32291000 32293000
141 | chr5 31447500 31449500 chr5 32339700 32341700
142 | chr5 31452200 31454200 chr5 31608600 31610600
143 | chr5 31452200 31454200 chr5 31645300 31647300
144 | chr5 31452200 31454200 chr5 31651000 31653000
145 | chr5 31452200 31454200 chr5 31753200 31755200
146 | chr5 31452200 31454200 chr5 31791000 31793000
147 | chr5 31452200 31454200 chr5 31854000 31856000
148 | chr5 31452200 31454200 chr5 32291000 32293000
149 | chr5 31452200 31454200 chr5 32339700 32341700
150 | chr5 31483000 31485000 chr5 31626700 31628700
151 | chr5 31608600 31610600 chr5 31645300 31647300
152 | chr5 31608600 31610600 chr5 31651000 31653000
153 | chr5 31608600 31610600 chr5 31683000 31685000
154 | chr5 31608600 31610600 chr5 31753200 31755200
155 | chr5 31608600 31610600 chr5 31770300 31772300
156 | chr5 31608600 31610600 chr5 31787500 31789500
157 | chr5 31608600 31610600 chr5 31791000 31793000
158 | chr5 31608600 31610600 chr5 31854000 31856000
159 | chr5 31608600 31610600 chr5 32015400 32017400
160 | chr5 31608600 31610600 chr5 32291000 32293000
161 | chr5 31608600 31610600 chr5 32339700 32341700
162 | chr5 31645300 31647300 chr5 31651000 31653000
163 | chr5 31645300 31647300 chr5 31683000 31685000
164 | chr5 31645300 31647300 chr5 31753200 31755200
165 | chr5 31645300 31647300 chr5 31791000 31793000
166 | chr5 31645300 31647300 chr5 31854000 31856000
167 | chr5 31645300 31647300 chr5 32015400 32017400
168 | chr5 31645300 31647300 chr5 32291000 32293000
169 | chr5 31645300 31647300 chr5 32339700 32341700
170 | chr5 31651000 31653000 chr5 31683000 31685000
171 | chr5 31651000 31653000 chr5 31753200 31755200
172 | chr5 31651000 31653000 chr5 31791000 31793000
173 | chr5 31651000 31653000 chr5 31854000 31856000
174 | chr5 31651000 31653000 chr5 32015400 32017400
175 | chr5 31651000 31653000 chr5 32291000 32293000
176 | chr5 31651000 31653000 chr5 32339700 32341700
177 | chr5 31683000 31685000 chr5 31753200 31755200
178 | chr5 31683000 31685000 chr5 31770300 31772300
179 | chr5 31683000 31685000 chr5 31787500 31789500
180 | chr5 31683000 31685000 chr5 31791000 31793000
181 | chr5 31683000 31685000 chr5 31854000 31856000
182 | chr5 31683000 31685000 chr5 32291000 32293000
183 | chr5 31683000 31685000 chr5 32339700 32341700
184 | chr5 31720000 31722000 chr5 31753200 31755200
185 | chr5 31720000 31722000 chr5 31770300 31772300
186 | chr5 31720000 31722000 chr5 31854000 31856000
187 | chr5 31720000 31722000 chr5 32083000 32085000
188 | chr5 31720000 31722000 chr5 32291000 32293000
189 | chr5 31720000 31722000 chr5 32339700 32341700
190 | chr5 31720000 31722000 chr5 32354200 32356200
191 | chr5 31720000 31722000 chr5 32372000 32374000
192 | chr5 31753200 31755200 chr5 31770300 31772300
193 | chr5 31753200 31755200 chr5 31787500 31789500
194 | chr5 31753200 31755200 chr5 31791000 31793000
195 | chr5 31753200 31755200 chr5 31854000 31856000
196 | chr5 31753200 31755200 chr5 32015400 32017400
197 | chr5 31753200 31755200 chr5 32083000 32085000
198 | chr5 31753200 31755200 chr5 32291000 32293000
199 | chr5 31753200 31755200 chr5 32339700 32341700
200 | chr5 31753200 31755200 chr5 32354200 32356200
201 | chr5 31753200 31755200 chr5 32372000 32374000
202 | chr5 31759000 31761000 chr5 31770300 31772300
203 | chr5 31759000 31761000 chr5 31787500 31789500
204 | chr5 31759000 31761000 chr5 31791000 31793000
205 | chr5 31759000 31761000 chr5 31854000 31856000
206 | chr5 31759000 31761000 chr5 32291000 32293000
207 | chr5 31759000 31761000 chr5 32339700 32341700
208 | chr5 31759000 31761000 chr5 32354200 32356200
209 | chr5 31759000 31761000 chr5 32372000 32374000
210 | chr5 31767000 31769000 chr5 31791000 31793000
211 | chr5 31767000 31769000 chr5 31854000 31856000
212 | chr5 31767000 31769000 chr5 32291000 32293000
213 | chr5 31767000 31769000 chr5 32339700 32341700
214 | chr5 31770300 31772300 chr5 31791000 31793000
215 | chr5 31770300 31772300 chr5 31854000 31856000
216 | chr5 31770300 31772300 chr5 32015400 32017400
217 | chr5 31770300 31772300 chr5 32291000 32293000
218 | chr5 31770300 31772300 chr5 32339700 32341700
219 | chr5 31770300 31772300 chr5 32354200 32356200
220 | chr5 31787500 31789500 chr5 31854000 31856000
221 | chr5 31787500 31789500 chr5 32015400 32017400
222 | chr5 31787500 31789500 chr5 32291000 32293000
223 | chr5 31787500 31789500 chr5 32339700 32341700
224 | chr5 31787500 31789500 chr5 32354200 32356200
225 | chr5 31791000 31793000 chr5 31854000 31856000
226 | chr5 31791000 31793000 chr5 32015400 32017400
227 | chr5 31791000 31793000 chr5 32291000 32293000
228 | chr5 31791000 31793000 chr5 32339700 32341700
229 | chr5 31791000 31793000 chr5 32354200 32356200
230 | chr5 31854000 31856000 chr5 32015400 32017400
231 | chr5 31854000 31856000 chr5 32291000 32293000
232 | chr5 31854000 31856000 chr5 32339700 32341700
233 | chr5 31854000 31856000 chr5 32354200 32356200
234 | chr5 31854000 31856000 chr5 32372000 32374000
235 | chr5 31865800 31867800 chr5 32296200 32298200
236 | chr5 31865800 31867800 chr5 32354200 32356200
237 | chr5 32015400 32017400 chr5 32291000 32293000
238 | chr5 32083000 32085000 chr5 32291000 32293000
239 | chr5 32083000 32085000 chr5 32296200 32298200
240 | chr5 32083000 32085000 chr5 32354200 32356200
241 | chr5 32174000 32176000 chr5 32291000 32293000
242 | chr5 32174000 32176000 chr5 32339700 32341700
243 | chr5 32291000 32293000 chr5 32339700 32341700
244 | chr5 32291000 32293000 chr5 32354200 32356200
245 | chr5 32291000 32293000 chr5 32372000 32374000
246 | chr5 32296200 32298200 chr5 32354200 32356200
247 | chr5 32339700 32341700 chr5 32354200 32356200
248 | chr5 32339700 32341700 chr5 32372000 32374000
249 | chr5 32354200 32356200 chr5 32372000 32374000
250 | chr8 84856500 84858500 chr8 84873500 84875500
251 | chr8 84873500 84875500 chr8 84906300 84908300
252 | chr8 84873500 84875500 chr8 84912250 84914250
253 | chr8 84873500 84875500 chr8 84917700 84919700
254 | chr8 84873500 84875500 chr8 84924200 84926200
255 | chr8 84873500 84875500 chr8 84936250 84938250
256 | chr8 84873500 84875500 chr8 84949150 84951150
257 | chr8 84873500 84875500 chr8 84962800 84964800
258 | chr8 84873500 84875500 chr8 84975300 84977300
259 | chr8 84873500 84875500 chr8 84983000 84985000
260 | chr8 84873500 84875500 chr8 84996000 84998000
261 | chr8 84873500 84875500 chr8 85011500 85013500
262 | chr8 84873500 84875500 chr8 85083500 85085500
263 | chr8 84873500 84875500 chr8 85141000 85143000
264 | chr8 84873500 84875500 chr8 85227800 85229800
265 | chr8 84873500 84875500 chr8 85285000 85287000
266 | chr8 84873500 84875500 chr8 85327000 85329000
267 | chr8 84873500 84875500 chr8 85359000 85361000
268 | chr8 84873500 84875500 chr8 85365000 85367000
269 | chr8 84873500 84875500 chr8 85373100 85375100
270 | chr8 84873500 84875500 chr8 85375800 85377800
271 | chr8 84873500 84875500 chr8 85378600 85380600
272 | chr8 84873500 84875500 chr8 85386000 85388000
273 | chr8 84873500 84875500 chr8 85388300 85390300
274 | chr8 84873500 84875500 chr8 85389700 85391700
275 | chr8 84873500 84875500 chr8 85412800 85414800
276 | chr8 84873500 84875500 chr8 85414700 85416700
277 | chr8 84873500 84875500 chr8 85448750 85450750
278 | chr8 84873500 84875500 chr8 85467800 85469800
279 | chr8 84873500 84875500 chr8 85526000 85528000
280 | chr8 84873500 84875500 chr8 85566300 85568300
281 | chr8 84873500 84875500 chr8 85572600 85574600
282 | chr8 84873500 84875500 chr8 85582500 85584500
283 | chr8 84873500 84875500 chr8 85620000 85622000
284 | chr8 84873500 84875500 chr8 85629000 85631000
285 | chr8 84873500 84875500 chr8 85641000 85643000
286 | chr8 84873500 84875500 chr8 85751500 85753500
287 | chr8 84873500 84875500 chr8 85762400 85764400
288 | chr8 84906300 84908300 chr8 84917700 84919700
289 | chr8 84906300 84908300 chr8 84924200 84926200
290 | chr8 84906300 84908300 chr8 84936250 84938250
291 | chr8 84906300 84908300 chr8 84949150 84951150
292 | chr8 84906300 84908300 chr8 84962800 84964800
293 | chr8 84906300 84908300 chr8 84983000 84985000
294 | chr8 84906300 84908300 chr8 84996000 84998000
295 | chr8 84906300 84908300 chr8 85083500 85085500
296 | chr8 84906300 84908300 chr8 85141000 85143000
297 | chr8 84906300 84908300 chr8 85227800 85229800
298 | chr8 84906300 84908300 chr8 85265000 85267000
299 | chr8 84906300 84908300 chr8 85285000 85287000
300 | chr8 84906300 84908300 chr8 85327000 85329000
301 | chr8 84906300 84908300 chr8 85365000 85367000
302 | chr8 84906300 84908300 chr8 85375800 85377800
303 | chr8 84906300 84908300 chr8 85378600 85380600
304 | chr8 84906300 84908300 chr8 85389700 85391700
305 | chr8 84906300 84908300 chr8 85412800 85414800
306 | chr8 84906300 84908300 chr8 85526000 85528000
307 | chr8 84906300 84908300 chr8 85572600 85574600
308 | chr8 84906300 84908300 chr8 85702000 85704000
309 | chr8 84912250 84914250 chr8 84936250 84938250
310 | chr8 84912250 84914250 chr8 84949150 84951150
311 | chr8 84912250 84914250 chr8 84956550 84958550
312 | chr8 84912250 84914250 chr8 84962800 84964800
313 | chr8 84912250 84914250 chr8 84983000 84985000
314 | chr8 84912250 84914250 chr8 84996000 84998000
315 | chr8 84912250 84914250 chr8 85011500 85013500
316 | chr8 84912250 84914250 chr8 85083500 85085500
317 | chr8 84912250 84914250 chr8 85122000 85124000
318 | chr8 84912250 84914250 chr8 85141000 85143000
319 | chr8 84912250 84914250 chr8 85227800 85229800
320 | chr8 84912250 84914250 chr8 85327000 85329000
321 | chr8 84912250 84914250 chr8 85365000 85367000
322 | chr8 84912250 84914250 chr8 85373100 85375100
323 | chr8 84912250 84914250 chr8 85375800 85377800
324 | chr8 84912250 84914250 chr8 85378600 85380600
325 | chr8 84912250 84914250 chr8 85389700 85391700
326 | chr8 84912250 84914250 chr8 85412800 85414800
327 | chr8 84912250 84914250 chr8 85526000 85528000
328 | chr8 84912250 84914250 chr8 85572600 85574600
329 | chr8 84912250 84914250 chr8 85702000 85704000
330 | chr8 84917700 84919700 chr8 84962800 84964800
331 | chr8 84917700 84919700 chr8 84983000 84985000
332 | chr8 84917700 84919700 chr8 84996000 84998000
333 | chr8 84917700 84919700 chr8 85011500 85013500
334 | chr8 84917700 84919700 chr8 85083500 85085500
335 | chr8 84917700 84919700 chr8 85122000 85124000
336 | chr8 84917700 84919700 chr8 85141000 85143000
337 | chr8 84917700 84919700 chr8 85227800 85229800
338 | chr8 84917700 84919700 chr8 85327000 85329000
339 | chr8 84917700 84919700 chr8 85365000 85367000
340 | chr8 84917700 84919700 chr8 85373100 85375100
341 | chr8 84917700 84919700 chr8 85375800 85377800
342 | chr8 84917700 84919700 chr8 85378600 85380600
343 | chr8 84917700 84919700 chr8 85386000 85388000
344 | chr8 84917700 84919700 chr8 85388300 85390300
345 | chr8 84917700 84919700 chr8 85389700 85391700
346 | chr8 84917700 84919700 chr8 85404000 85406000
347 | chr8 84917700 84919700 chr8 85412800 85414800
348 | chr8 84917700 84919700 chr8 85526000 85528000
349 | chr8 84917700 84919700 chr8 85536500 85538500
350 | chr8 84917700 84919700 chr8 85557500 85559500
351 | chr8 84917700 84919700 chr8 85572600 85574600
352 | chr8 84917700 84919700 chr8 85597000 85599000
353 | chr8 84917700 84919700 chr8 85629000 85631000
354 | chr8 84917700 84919700 chr8 85641000 85643000
355 | chr8 84917700 84919700 chr8 85702000 85704000
356 | chr8 84917700 84919700 chr8 85715600 85717600
357 | chr8 84917700 84919700 chr8 85751500 85753500
358 | chr8 84917700 84919700 chr8 85762400 85764400
359 | chr8 84917700 84919700 chr8 85797300 85799300
360 | chr8 84924200 84926200 chr8 84962800 84964800
361 | chr8 84924200 84926200 chr8 84983000 84985000
362 | chr8 84924200 84926200 chr8 84996000 84998000
363 | chr8 84924200 84926200 chr8 85083500 85085500
364 | chr8 84924200 84926200 chr8 85122000 85124000
365 | chr8 84924200 84926200 chr8 85141000 85143000
366 | chr8 84924200 84926200 chr8 85227800 85229800
367 | chr8 84924200 84926200 chr8 85365000 85367000
368 | chr8 84924200 84926200 chr8 85373100 85375100
369 | chr8 84924200 84926200 chr8 85375800 85377800
370 | chr8 84924200 84926200 chr8 85378600 85380600
371 | chr8 84924200 84926200 chr8 85386000 85388000
372 | chr8 84924200 84926200 chr8 85388300 85390300
373 | chr8 84924200 84926200 chr8 85389700 85391700
374 | chr8 84924200 84926200 chr8 85412800 85414800
375 | chr8 84924200 84926200 chr8 85526000 85528000
376 | chr8 84924200 84926200 chr8 85557500 85559500
377 | chr8 84924200 84926200 chr8 85572600 85574600
378 | chr8 84924200 84926200 chr8 85629000 85631000
379 | chr8 84924200 84926200 chr8 85641000 85643000
380 | chr8 84924200 84926200 chr8 85702000 85704000
381 | chr8 84924200 84926200 chr8 85715600 85717600
382 | chr8 84924200 84926200 chr8 85751500 85753500
383 | chr8 84936250 84938250 chr8 84962800 84964800
384 | chr8 84936250 84938250 chr8 84983000 84985000
385 | chr8 84936250 84938250 chr8 84996000 84998000
386 | chr8 84936250 84938250 chr8 85011500 85013500
387 | chr8 84936250 84938250 chr8 85141000 85143000
388 | chr8 84936250 84938250 chr8 85227800 85229800
389 | chr8 84949150 84951150 chr8 84962800 84964800
390 | chr8 84949150 84951150 chr8 84983000 84985000
391 | chr8 84949150 84951150 chr8 84996000 84998000
392 | chr8 84949150 84951150 chr8 85141000 85143000
393 | chr8 84949150 84951150 chr8 85227800 85229800
394 | chr8 84956550 84958550 chr8 84983000 84985000
395 | chr8 84962800 84964800 chr8 84975300 84977300
396 | chr8 84962800 84964800 chr8 84983000 84985000
397 | chr8 84962800 84964800 chr8 84996000 84998000
398 | chr8 84962800 84964800 chr8 85083500 85085500
399 | chr8 84962800 84964800 chr8 85141000 85143000
400 | chr8 84962800 84964800 chr8 85227800 85229800
401 | chr8 84962800 84964800 chr8 85285000 85287000
402 | chr8 84962800 84964800 chr8 85327000 85329000
403 | chr8 84962800 84964800 chr8 85365000 85367000
404 | chr8 84962800 84964800 chr8 85373100 85375100
405 | chr8 84962800 84964800 chr8 85378600 85380600
406 | chr8 84962800 84964800 chr8 85388300 85390300
407 | chr8 84962800 84964800 chr8 85412800 85414800
408 | chr8 84962800 84964800 chr8 85526000 85528000
409 | chr8 84962800 84964800 chr8 85536500 85538500
410 | chr8 84962800 84964800 chr8 85566300 85568300
411 | chr8 84962800 84964800 chr8 85572600 85574600
412 | chr8 84962800 84964800 chr8 85629000 85631000
413 | chr8 84962800 84964800 chr8 85633600 85635600
414 | chr8 84962800 84964800 chr8 85641000 85643000
415 | chr8 84975300 84977300 chr8 84983000 84985000
416 | chr8 84975300 84977300 chr8 84996000 84998000
417 | chr8 84975300 84977300 chr8 85083500 85085500
418 | chr8 84975300 84977300 chr8 85141000 85143000
419 | chr8 84975300 84977300 chr8 85227800 85229800
420 | chr8 84975300 84977300 chr8 85327000 85329000
421 | chr8 84975300 84977300 chr8 85365000 85367000
422 | chr8 84975300 84977300 chr8 85373100 85375100
423 | chr8 84975300 84977300 chr8 85375800 85377800
424 | chr8 84975300 84977300 chr8 85386000 85388000
425 | chr8 84975300 84977300 chr8 85388300 85390300
426 | chr8 84975300 84977300 chr8 85389700 85391700
427 | chr8 84975300 84977300 chr8 85412800 85414800
428 | chr8 84975300 84977300 chr8 85566300 85568300
429 | chr8 84975300 84977300 chr8 85572600 85574600
430 | chr8 84975300 84977300 chr8 85629000 85631000
431 | chr8 84975300 84977300 chr8 85641000 85643000
432 | chr8 84983000 84985000 chr8 84996000 84998000
433 | chr8 84983000 84985000 chr8 85083500 85085500
434 | chr8 84983000 84985000 chr8 85141000 85143000
435 | chr8 84983000 84985000 chr8 85227800 85229800
436 | chr8 84983000 84985000 chr8 85285000 85287000
437 | chr8 84983000 84985000 chr8 85296000 85298000
438 | chr8 84983000 84985000 chr8 85305400 85307400
439 | chr8 84983000 84985000 chr8 85317600 85319600
440 | chr8 84983000 84985000 chr8 85323000 85325000
441 | chr8 84983000 84985000 chr8 85327000 85329000
442 | chr8 84983000 84985000 chr8 85365000 85367000
443 | chr8 84983000 84985000 chr8 85373100 85375100
444 | chr8 84983000 84985000 chr8 85375800 85377800
445 | chr8 84983000 84985000 chr8 85378600 85380600
446 | chr8 84983000 84985000 chr8 85386000 85388000
447 | chr8 84983000 84985000 chr8 85388300 85390300
448 | chr8 84983000 84985000 chr8 85389700 85391700
449 | chr8 84983000 84985000 chr8 85412800 85414800
450 | chr8 84983000 84985000 chr8 85430200 85432200
451 | chr8 84983000 84985000 chr8 85433000 85435000
452 | chr8 84983000 84985000 chr8 85526000 85528000
453 | chr8 84983000 84985000 chr8 85566300 85568300
454 | chr8 84983000 84985000 chr8 85572600 85574600
455 | chr8 84983000 84985000 chr8 85597000 85599000
456 | chr8 84983000 84985000 chr8 85629000 85631000
457 | chr8 84983000 84985000 chr8 85633600 85635600
458 | chr8 84983000 84985000 chr8 85641000 85643000
459 | chr8 84996000 84998000 chr8 85083500 85085500
460 | chr8 84996000 84998000 chr8 85122000 85124000
461 | chr8 84996000 84998000 chr8 85141000 85143000
462 | chr8 84996000 84998000 chr8 85227800 85229800
463 | chr8 84996000 84998000 chr8 85285000 85287000
464 | chr8 84996000 84998000 chr8 85305400 85307400
465 | chr8 84996000 84998000 chr8 85323000 85325000
466 | chr8 84996000 84998000 chr8 85327000 85329000
467 | chr8 84996000 84998000 chr8 85365000 85367000
468 | chr8 84996000 84998000 chr8 85373100 85375100
469 | chr8 84996000 84998000 chr8 85375800 85377800
470 | chr8 84996000 84998000 chr8 85378600 85380600
471 | chr8 84996000 84998000 chr8 85386000 85388000
472 | chr8 84996000 84998000 chr8 85388300 85390300
473 | chr8 84996000 84998000 chr8 85389700 85391700
474 | chr8 84996000 84998000 chr8 85412800 85414800
475 | chr8 84996000 84998000 chr8 85430200 85432200
476 | chr8 84996000 84998000 chr8 85433000 85435000
477 | chr8 84996000 84998000 chr8 85448750 85450750
478 | chr8 84996000 84998000 chr8 85455750 85457750
479 | chr8 84996000 84998000 chr8 85526000 85528000
480 | chr8 84996000 84998000 chr8 85536500 85538500
481 | chr8 84996000 84998000 chr8 85566300 85568300
482 | chr8 84996000 84998000 chr8 85572600 85574600
483 | chr8 84996000 84998000 chr8 85597000 85599000
484 | chr8 84996000 84998000 chr8 85620000 85622000
485 | chr8 84996000 84998000 chr8 85629000 85631000
486 | chr8 84996000 84998000 chr8 85633600 85635600
487 | chr8 84996000 84998000 chr8 85641000 85643000
488 | chr8 84996000 84998000 chr8 85751500 85753500
489 | chr8 85011500 85013500 chr8 85083500 85085500
490 | chr8 85014650 85016650 chr8 85083500 85085500
491 | chr8 85017500 85019500 chr8 85083500 85085500
492 | chr8 85017500 85019500 chr8 85296000 85298000
493 | chr8 85017500 85019500 chr8 85320000 85322000
494 | chr8 85083500 85085500 chr8 85141000 85143000
495 | chr8 85083500 85085500 chr8 85227800 85229800
496 | chr8 85083500 85085500 chr8 85285000 85287000
497 | chr8 85083500 85085500 chr8 85296000 85298000
498 | chr8 85083500 85085500 chr8 85320000 85322000
499 | chr8 85083500 85085500 chr8 85365000 85367000
500 | chr8 85083500 85085500 chr8 85373100 85375100
501 | chr8 85083500 85085500 chr8 85375800 85377800
502 | chr8 85089500 85091500 chr8 85141000 85143000
503 | chr8 85089500 85091500 chr8 85171500 85173500
504 | chr8 85089500 85091500 chr8 85179500 85181500
505 | chr8 85089500 85091500 chr8 85207000 85209000
506 | chr8 85089500 85091500 chr8 85227800 85229800
507 | chr8 85089500 85091500 chr8 85235250 85237250
508 | chr8 85089500 85091500 chr8 85239700 85241700
509 | chr8 85089500 85091500 chr8 85262700 85264700
510 | chr8 85089500 85091500 chr8 85279800 85281800
511 | chr8 85089500 85091500 chr8 85293400 85295400
512 | chr8 85089500 85091500 chr8 85296000 85298000
513 | chr8 85089500 85091500 chr8 85321200 85323200
514 | chr8 85089500 85091500 chr8 85365000 85367000
515 | chr8 85089500 85091500 chr8 85373100 85375100
516 | chr8 85122000 85124000 chr8 85141000 85143000
517 | chr8 85122000 85124000 chr8 85227800 85229800
518 | chr8 85122000 85124000 chr8 85365000 85367000
519 | chr8 85122000 85124000 chr8 85373100 85375100
520 | chr8 85122000 85124000 chr8 85375800 85377800
521 | chr8 85122000 85124000 chr8 85378600 85380600
522 | chr8 85122000 85124000 chr8 85414700 85416700
523 | chr8 85141000 85143000 chr8 85227800 85229800
524 | chr8 85141000 85143000 chr8 85265000 85267000
525 | chr8 85141000 85143000 chr8 85285000 85287000
526 | chr8 85141000 85143000 chr8 85296000 85298000
527 | chr8 85141000 85143000 chr8 85305400 85307400
528 | chr8 85141000 85143000 chr8 85327000 85329000
529 | chr8 85141000 85143000 chr8 85365000 85367000
530 | chr8 85141000 85143000 chr8 85373100 85375100
531 | chr8 85141000 85143000 chr8 85375800 85377800
532 | chr8 85141000 85143000 chr8 85378600 85380600
533 | chr8 85141000 85143000 chr8 85386000 85388000
534 | chr8 85141000 85143000 chr8 85388300 85390300
535 | chr8 85141000 85143000 chr8 85389700 85391700
536 | chr8 85141000 85143000 chr8 85412800 85414800
537 | chr8 85141000 85143000 chr8 85526000 85528000
538 | chr8 85141000 85143000 chr8 85572600 85574600
539 | chr8 85141000 85143000 chr8 85597000 85599000
540 | chr8 85141000 85143000 chr8 85629000 85631000
541 | chr8 85141000 85143000 chr8 85641000 85643000
542 | chr8 85227800 85229800 chr8 85265000 85267000
543 | chr8 85227800 85229800 chr8 85285000 85287000
544 | chr8 85227800 85229800 chr8 85296000 85298000
545 | chr8 85227800 85229800 chr8 85327000 85329000
546 | chr8 85227800 85229800 chr8 85365000 85367000
547 | chr8 85227800 85229800 chr8 85373100 85375100
548 | chr8 85227800 85229800 chr8 85375800 85377800
549 | chr8 85227800 85229800 chr8 85378600 85380600
550 | chr8 85227800 85229800 chr8 85386000 85388000
551 | chr8 85227800 85229800 chr8 85388300 85390300
552 | chr8 85227800 85229800 chr8 85389700 85391700
553 | chr8 85227800 85229800 chr8 85412800 85414800
554 | chr8 85227800 85229800 chr8 85430200 85432200
555 | chr8 85227800 85229800 chr8 85448750 85450750
556 | chr8 85227800 85229800 chr8 85526000 85528000
557 | chr8 85227800 85229800 chr8 85536500 85538500
558 | chr8 85227800 85229800 chr8 85557500 85559500
559 | chr8 85227800 85229800 chr8 85566300 85568300
560 | chr8 85227800 85229800 chr8 85572600 85574600
561 | chr8 85227800 85229800 chr8 85597000 85599000
562 | chr8 85227800 85229800 chr8 85620000 85622000
563 | chr8 85227800 85229800 chr8 85629000 85631000
564 | chr8 85227800 85229800 chr8 85633600 85635600
565 | chr8 85227800 85229800 chr8 85641000 85643000
566 | chr8 85227800 85229800 chr8 85715600 85717600
567 | chr8 85227800 85229800 chr8 85751500 85753500
568 | chr8 85227800 85229800 chr8 85762400 85764400
569 | chr8 85227800 85229800 chr8 85797300 85799300
570 | chr8 85285000 85287000 chr8 85365000 85367000
571 | chr8 85285000 85287000 chr8 85526000 85528000
572 | chr8 85285000 85287000 chr8 85536500 85538500
573 | chr8 85285000 85287000 chr8 85557500 85559500
574 | chr8 85305400 85307400 chr8 85320000 85322000
575 | chr8 85305400 85307400 chr8 85365000 85367000
576 | chr8 85305400 85307400 chr8 85536500 85538500
577 | chr8 85305400 85307400 chr8 85538700 85540700
578 | chr8 85317600 85319600 chr8 85538700 85540700
579 | chr8 85323000 85325000 chr8 85538700 85540700
580 | chr8 85327000 85329000 chr8 85365000 85367000
581 | chr8 85327000 85329000 chr8 85373100 85375100
582 | chr8 85327000 85329000 chr8 85375800 85377800
583 | chr8 85327000 85329000 chr8 85378600 85380600
584 | chr8 85327000 85329000 chr8 85386000 85388000
585 | chr8 85327000 85329000 chr8 85389700 85391700
586 | chr8 85327000 85329000 chr8 85412800 85414800
587 | chr8 85327000 85329000 chr8 85526000 85528000
588 | chr8 85327000 85329000 chr8 85572600 85574600
589 | chr8 85327000 85329000 chr8 85629000 85631000
590 | chr8 85327000 85329000 chr8 85641000 85643000
591 | chr8 85365000 85367000 chr8 85412800 85414800
592 | chr8 85365000 85367000 chr8 85414700 85416700
593 | chr8 85365000 85367000 chr8 85430200 85432200
594 | chr8 85365000 85367000 chr8 85435000 85437000
595 | chr8 85365000 85367000 chr8 85448750 85450750
596 | chr8 85365000 85367000 chr8 85455750 85457750
597 | chr8 85365000 85367000 chr8 85463300 85465300
598 | chr8 85365000 85367000 chr8 85467800 85469800
599 | chr8 85365000 85367000 chr8 85526000 85528000
600 | chr8 85365000 85367000 chr8 85536500 85538500
601 | chr8 85365000 85367000 chr8 85538700 85540700
602 | chr8 85365000 85367000 chr8 85541200 85543200
603 | chr8 85365000 85367000 chr8 85557500 85559500
604 | chr8 85365000 85367000 chr8 85572600 85574600
605 | chr8 85365000 85367000 chr8 85597000 85599000
606 | chr8 85365000 85367000 chr8 85620000 85622000
607 | chr8 85365000 85367000 chr8 85629000 85631000
608 | chr8 85365000 85367000 chr8 85633600 85635600
609 | chr8 85365000 85367000 chr8 85641000 85643000
610 | chr8 85365000 85367000 chr8 85691000 85693000
611 | chr8 85365000 85367000 chr8 85695000 85697000
612 | chr8 85365000 85367000 chr8 85702000 85704000
613 | chr8 85365000 85367000 chr8 85711500 85713500
614 | chr8 85365000 85367000 chr8 85715600 85717600
615 | chr8 85365000 85367000 chr8 85751500 85753500
616 | chr8 85365000 85367000 chr8 85762400 85764400
617 | chr8 85365000 85367000 chr8 85797300 85799300
618 | chr8 85373100 85375100 chr8 85412800 85414800
619 | chr8 85373100 85375100 chr8 85414700 85416700
620 | chr8 85373100 85375100 chr8 85448750 85450750
621 | chr8 85373100 85375100 chr8 85455750 85457750
622 | chr8 85373100 85375100 chr8 85463300 85465300
623 | chr8 85373100 85375100 chr8 85467800 85469800
624 | chr8 85373100 85375100 chr8 85526000 85528000
625 | chr8 85373100 85375100 chr8 85557500 85559500
626 | chr8 85373100 85375100 chr8 85566300 85568300
627 | chr8 85373100 85375100 chr8 85572600 85574600
628 | chr8 85373100 85375100 chr8 85597000 85599000
629 | chr8 85373100 85375100 chr8 85620000 85622000
630 | chr8 85373100 85375100 chr8 85629000 85631000
631 | chr8 85373100 85375100 chr8 85633600 85635600
632 | chr8 85373100 85375100 chr8 85641000 85643000
633 | chr8 85373100 85375100 chr8 85691000 85693000
634 | chr8 85373100 85375100 chr8 85695000 85697000
635 | chr8 85373100 85375100 chr8 85702000 85704000
636 | chr8 85373100 85375100 chr8 85711500 85713500
637 | chr8 85373100 85375100 chr8 85715600 85717600
638 | chr8 85373100 85375100 chr8 85751500 85753500
639 | chr8 85373100 85375100 chr8 85762400 85764400
640 | chr8 85373100 85375100 chr8 85785750 85787750
641 | chr8 85373100 85375100 chr8 85797300 85799300
642 | chr8 85375800 85377800 chr8 85412800 85414800
643 | chr8 85375800 85377800 chr8 85414700 85416700
644 | chr8 85375800 85377800 chr8 85448750 85450750
645 | chr8 85375800 85377800 chr8 85455750 85457750
646 | chr8 85375800 85377800 chr8 85463300 85465300
647 | chr8 85375800 85377800 chr8 85467800 85469800
648 | chr8 85375800 85377800 chr8 85526000 85528000
649 | chr8 85375800 85377800 chr8 85557500 85559500
650 | chr8 85375800 85377800 chr8 85566300 85568300
651 | chr8 85375800 85377800 chr8 85572600 85574600
652 | chr8 85375800 85377800 chr8 85597000 85599000
653 | chr8 85375800 85377800 chr8 85629000 85631000
654 | chr8 85375800 85377800 chr8 85633600 85635600
655 | chr8 85375800 85377800 chr8 85641000 85643000
656 | chr8 85375800 85377800 chr8 85691000 85693000
657 | chr8 85375800 85377800 chr8 85702000 85704000
658 | chr8 85375800 85377800 chr8 85711500 85713500
659 | chr8 85375800 85377800 chr8 85715600 85717600
660 | chr8 85375800 85377800 chr8 85751500 85753500
661 | chr8 85375800 85377800 chr8 85762400 85764400
662 | chr8 85375800 85377800 chr8 85797300 85799300
663 | chr8 85378600 85380600 chr8 85412800 85414800
664 | chr8 85378600 85380600 chr8 85414700 85416700
665 | chr8 85378600 85380600 chr8 85430200 85432200
666 | chr8 85378600 85380600 chr8 85448750 85450750
667 | chr8 85378600 85380600 chr8 85455750 85457750
668 | chr8 85378600 85380600 chr8 85463300 85465300
669 | chr8 85378600 85380600 chr8 85467800 85469800
670 | chr8 85378600 85380600 chr8 85557500 85559500
671 | chr8 85378600 85380600 chr8 85566300 85568300
672 | chr8 85378600 85380600 chr8 85572600 85574600
673 | chr8 85378600 85380600 chr8 85597000 85599000
674 | chr8 85378600 85380600 chr8 85620000 85622000
675 | chr8 85378600 85380600 chr8 85629000 85631000
676 | chr8 85378600 85380600 chr8 85633600 85635600
677 | chr8 85378600 85380600 chr8 85641000 85643000
678 | chr8 85378600 85380600 chr8 85691000 85693000
679 | chr8 85378600 85380600 chr8 85695000 85697000
680 | chr8 85378600 85380600 chr8 85702000 85704000
681 | chr8 85378600 85380600 chr8 85711500 85713500
682 | chr8 85378600 85380600 chr8 85715600 85717600
683 | chr8 85378600 85380600 chr8 85751500 85753500
684 | chr8 85378600 85380600 chr8 85762400 85764400
685 | chr8 85378600 85380600 chr8 85797300 85799300
686 | chr8 85386000 85388000 chr8 85412800 85414800
687 | chr8 85386000 85388000 chr8 85430200 85432200
688 | chr8 85386000 85388000 chr8 85448750 85450750
689 | chr8 85386000 85388000 chr8 85455750 85457750
690 | chr8 85386000 85388000 chr8 85463300 85465300
691 | chr8 85386000 85388000 chr8 85467800 85469800
692 | chr8 85386000 85388000 chr8 85526000 85528000
693 | chr8 85386000 85388000 chr8 85557500 85559500
694 | chr8 85386000 85388000 chr8 85566300 85568300
695 | chr8 85386000 85388000 chr8 85572600 85574600
696 | chr8 85386000 85388000 chr8 85597000 85599000
697 | chr8 85386000 85388000 chr8 85629000 85631000
698 | chr8 85386000 85388000 chr8 85633600 85635600
699 | chr8 85386000 85388000 chr8 85641000 85643000
700 | chr8 85386000 85388000 chr8 85691000 85693000
701 | chr8 85386000 85388000 chr8 85695000 85697000
702 | chr8 85386000 85388000 chr8 85702000 85704000
703 | chr8 85386000 85388000 chr8 85711500 85713500
704 | chr8 85386000 85388000 chr8 85715600 85717600
705 | chr8 85386000 85388000 chr8 85751500 85753500
706 | chr8 85386000 85388000 chr8 85762400 85764400
707 | chr8 85386000 85388000 chr8 85797300 85799300
708 | chr8 85388300 85390300 chr8 85412800 85414800
709 | chr8 85388300 85390300 chr8 85448750 85450750
710 | chr8 85388300 85390300 chr8 85526000 85528000
711 | chr8 85388300 85390300 chr8 85557500 85559500
712 | chr8 85388300 85390300 chr8 85566300 85568300
713 | chr8 85388300 85390300 chr8 85572600 85574600
714 | chr8 85388300 85390300 chr8 85597000 85599000
715 | chr8 85388300 85390300 chr8 85629000 85631000
716 | chr8 85388300 85390300 chr8 85633600 85635600
717 | chr8 85388300 85390300 chr8 85641000 85643000
718 | chr8 85388300 85390300 chr8 85691000 85693000
719 | chr8 85388300 85390300 chr8 85695000 85697000
720 | chr8 85388300 85390300 chr8 85702000 85704000
721 | chr8 85388300 85390300 chr8 85711500 85713500
722 | chr8 85388300 85390300 chr8 85715600 85717600
723 | chr8 85388300 85390300 chr8 85751500 85753500
724 | chr8 85388300 85390300 chr8 85762400 85764400
725 | chr8 85388300 85390300 chr8 85797300 85799300
726 | chr8 85389700 85391700 chr8 85412800 85414800
727 | chr8 85389700 85391700 chr8 85414700 85416700
728 | chr8 85389700 85391700 chr8 85430200 85432200
729 | chr8 85389700 85391700 chr8 85435000 85437000
730 | chr8 85389700 85391700 chr8 85448750 85450750
731 | chr8 85389700 85391700 chr8 85455750 85457750
732 | chr8 85389700 85391700 chr8 85463300 85465300
733 | chr8 85389700 85391700 chr8 85467800 85469800
734 | chr8 85389700 85391700 chr8 85526000 85528000
735 | chr8 85389700 85391700 chr8 85549300 85551300
736 | chr8 85389700 85391700 chr8 85557500 85559500
737 | chr8 85389700 85391700 chr8 85566300 85568300
738 | chr8 85389700 85391700 chr8 85572600 85574600
739 | chr8 85389700 85391700 chr8 85597000 85599000
740 | chr8 85389700 85391700 chr8 85620000 85622000
741 | chr8 85389700 85391700 chr8 85629000 85631000
742 | chr8 85389700 85391700 chr8 85633600 85635600
743 | chr8 85389700 85391700 chr8 85641000 85643000
744 | chr8 85389700 85391700 chr8 85691000 85693000
745 | chr8 85389700 85391700 chr8 85695000 85697000
746 | chr8 85389700 85391700 chr8 85702000 85704000
747 | chr8 85389700 85391700 chr8 85711500 85713500
748 | chr8 85389700 85391700 chr8 85715600 85717600
749 | chr8 85389700 85391700 chr8 85751500 85753500
750 | chr8 85389700 85391700 chr8 85762400 85764400
751 | chr8 85389700 85391700 chr8 85785750 85787750
752 | chr8 85389700 85391700 chr8 85797300 85799300
753 | chr8 85412800 85414800 chr8 85448750 85450750
754 | chr8 85412800 85414800 chr8 85463300 85465300
755 | chr8 85412800 85414800 chr8 85467800 85469800
756 | chr8 85412800 85414800 chr8 85526000 85528000
757 | chr8 85412800 85414800 chr8 85557500 85559500
758 | chr8 85412800 85414800 chr8 85566300 85568300
759 | chr8 85412800 85414800 chr8 85572600 85574600
760 | chr8 85412800 85414800 chr8 85582500 85584500
761 | chr8 85412800 85414800 chr8 85597000 85599000
762 | chr8 85412800 85414800 chr8 85620000 85622000
763 | chr8 85412800 85414800 chr8 85629000 85631000
764 | chr8 85412800 85414800 chr8 85633600 85635600
765 | chr8 85412800 85414800 chr8 85639000 85641000
766 | chr8 85412800 85414800 chr8 85641000 85643000
767 | chr8 85412800 85414800 chr8 85663000 85665000
768 | chr8 85412800 85414800 chr8 85671400 85673400
769 | chr8 85412800 85414800 chr8 85691000 85693000
770 | chr8 85412800 85414800 chr8 85695000 85697000
771 | chr8 85412800 85414800 chr8 85702000 85704000
772 | chr8 85412800 85414800 chr8 85711500 85713500
773 | chr8 85412800 85414800 chr8 85715600 85717600
774 | chr8 85412800 85414800 chr8 85751500 85753500
775 | chr8 85412800 85414800 chr8 85762400 85764400
776 | chr8 85412800 85414800 chr8 85797300 85799300
777 | chr8 85414700 85416700 chr8 85448750 85450750
778 | chr8 85414700 85416700 chr8 85455750 85457750
779 | chr8 85414700 85416700 chr8 85463300 85465300
780 | chr8 85414700 85416700 chr8 85467800 85469800
781 | chr8 85414700 85416700 chr8 85526000 85528000
782 | chr8 85414700 85416700 chr8 85557500 85559500
783 | chr8 85414700 85416700 chr8 85566300 85568300
784 | chr8 85414700 85416700 chr8 85572600 85574600
785 | chr8 85414700 85416700 chr8 85597000 85599000
786 | chr8 85414700 85416700 chr8 85620000 85622000
787 | chr8 85414700 85416700 chr8 85629000 85631000
788 | chr8 85414700 85416700 chr8 85633600 85635600
789 | chr8 85414700 85416700 chr8 85639000 85641000
790 | chr8 85414700 85416700 chr8 85641000 85643000
791 | chr8 85414700 85416700 chr8 85715600 85717600
792 | chr8 85414700 85416700 chr8 85751500 85753500
793 | chr8 85414700 85416700 chr8 85762400 85764400
794 | chr8 85414700 85416700 chr8 85785750 85787750
795 | chr8 85414700 85416700 chr8 85797300 85799300
796 | chr8 85423500 85425500 chr8 85536500 85538500
797 | chr8 85423500 85425500 chr8 85538700 85540700
798 | chr8 85423500 85425500 chr8 85541200 85543200
799 | chr8 85425500 85427500 chr8 85536500 85538500
800 | chr8 85425500 85427500 chr8 85538700 85540700
801 | chr8 85425500 85427500 chr8 85541200 85543200
802 | chr8 85425500 85427500 chr8 85597000 85599000
803 | chr8 85430200 85432200 chr8 85526000 85528000
804 | chr8 85430200 85432200 chr8 85536500 85538500
805 | chr8 85430200 85432200 chr8 85541200 85543200
806 | chr8 85430200 85432200 chr8 85597000 85599000
807 | chr8 85430200 85432200 chr8 85629000 85631000
808 | chr8 85430200 85432200 chr8 85633600 85635600
809 | chr8 85430200 85432200 chr8 85641000 85643000
810 | chr8 85430200 85432200 chr8 85715600 85717600
811 | chr8 85433000 85435000 chr8 85526000 85528000
812 | chr8 85433000 85435000 chr8 85597000 85599000
813 | chr8 85433000 85435000 chr8 85629000 85631000
814 | chr8 85433000 85435000 chr8 85633600 85635600
815 | chr8 85433000 85435000 chr8 85641000 85643000
816 | chr8 85433000 85435000 chr8 85715600 85717600
817 | chr8 85435000 85437000 chr8 85463300 85465300
818 | chr8 85435000 85437000 chr8 85526000 85528000
819 | chr8 85435000 85437000 chr8 85536500 85538500
820 | chr8 85435000 85437000 chr8 85538700 85540700
821 | chr8 85435000 85437000 chr8 85541200 85543200
822 | chr8 85435000 85437000 chr8 85549300 85551300
823 | chr8 85435000 85437000 chr8 85597000 85599000
824 | chr8 85435000 85437000 chr8 85629000 85631000
825 | chr8 85435000 85437000 chr8 85633600 85635600
826 | chr8 85435000 85437000 chr8 85715600 85717600
827 | chr8 85448750 85450750 chr8 85526000 85528000
828 | chr8 85448750 85450750 chr8 85536500 85538500
829 | chr8 85448750 85450750 chr8 85557500 85559500
830 | chr8 85448750 85450750 chr8 85566300 85568300
831 | chr8 85448750 85450750 chr8 85572600 85574600
832 | chr8 85448750 85450750 chr8 85597000 85599000
833 | chr8 85448750 85450750 chr8 85629000 85631000
834 | chr8 85448750 85450750 chr8 85633600 85635600
835 | chr8 85448750 85450750 chr8 85639000 85641000
836 | chr8 85448750 85450750 chr8 85641000 85643000
837 | chr8 85455750 85457750 chr8 85526000 85528000
838 | chr8 85455750 85457750 chr8 85536500 85538500
839 | chr8 85455750 85457750 chr8 85557500 85559500
840 | chr8 85455750 85457750 chr8 85566300 85568300
841 | chr8 85455750 85457750 chr8 85572600 85574600
842 | chr8 85455750 85457750 chr8 85597000 85599000
843 | chr8 85455750 85457750 chr8 85629000 85631000
844 | chr8 85455750 85457750 chr8 85633600 85635600
845 | chr8 85455750 85457750 chr8 85639000 85641000
846 | chr8 85455750 85457750 chr8 85641000 85643000
847 | chr8 85463300 85465300 chr8 85526000 85528000
848 | chr8 85463300 85465300 chr8 85536500 85538500
849 | chr8 85463300 85465300 chr8 85538700 85540700
850 | chr8 85463300 85465300 chr8 85541200 85543200
851 | chr8 85463300 85465300 chr8 85549300 85551300
852 | chr8 85463300 85465300 chr8 85597000 85599000
853 | chr8 85463300 85465300 chr8 85620000 85622000
854 | chr8 85463300 85465300 chr8 85629000 85631000
855 | chr8 85463300 85465300 chr8 85633600 85635600
856 | chr8 85463300 85465300 chr8 85639000 85641000
857 | chr8 85463300 85465300 chr8 85641000 85643000
858 | chr8 85463300 85465300 chr8 85715600 85717600
859 | chr8 85467800 85469800 chr8 85526000 85528000
860 | chr8 85467800 85469800 chr8 85536500 85538500
861 | chr8 85467800 85469800 chr8 85538700 85540700
862 | chr8 85467800 85469800 chr8 85557500 85559500
863 | chr8 85467800 85469800 chr8 85566300 85568300
864 | chr8 85467800 85469800 chr8 85572600 85574600
865 | chr8 85467800 85469800 chr8 85597000 85599000
866 | chr8 85467800 85469800 chr8 85620000 85622000
867 | chr8 85467800 85469800 chr8 85629000 85631000
868 | chr8 85467800 85469800 chr8 85633600 85635600
869 | chr8 85467800 85469800 chr8 85639000 85641000
870 | chr8 85467800 85469800 chr8 85641000 85643000
871 | chr8 85467800 85469800 chr8 85715600 85717600
872 | chr8 85497500 85499500 chr8 85526000 85528000
873 | chr8 85499300 85501300 chr8 85526000 85528000
874 | chr8 85511000 85513000 chr8 85526000 85528000
875 | chr8 85511000 85513000 chr8 85597000 85599000
876 | chr8 85511000 85513000 chr8 85620000 85622000
877 | chr8 85511000 85513000 chr8 85629000 85631000
878 | chr8 85511000 85513000 chr8 85641000 85643000
879 | chr8 85526000 85528000 chr8 85536500 85538500
880 | chr8 85526000 85528000 chr8 85549300 85551300
881 | chr8 85526000 85528000 chr8 85557500 85559500
882 | chr8 85526000 85528000 chr8 85566300 85568300
883 | chr8 85526000 85528000 chr8 85572600 85574600
884 | chr8 85526000 85528000 chr8 85597000 85599000
885 | chr8 85526000 85528000 chr8 85620000 85622000
886 | chr8 85526000 85528000 chr8 85629000 85631000
887 | chr8 85526000 85528000 chr8 85633600 85635600
888 | chr8 85526000 85528000 chr8 85639000 85641000
889 | chr8 85526000 85528000 chr8 85641000 85643000
890 | chr8 85526000 85528000 chr8 85663000 85665000
891 | chr8 85526000 85528000 chr8 85671400 85673400
892 | chr8 85526000 85528000 chr8 85691000 85693000
893 | chr8 85526000 85528000 chr8 85695000 85697000
894 | chr8 85526000 85528000 chr8 85702000 85704000
895 | chr8 85526000 85528000 chr8 85705600 85707600
896 | chr8 85526000 85528000 chr8 85711500 85713500
897 | chr8 85526000 85528000 chr8 85715600 85717600
898 | chr8 85526000 85528000 chr8 85751500 85753500
899 | chr8 85526000 85528000 chr8 85762400 85764400
900 | chr8 85536500 85538500 chr8 85572600 85574600
901 | chr8 85536500 85538500 chr8 85597000 85599000
902 | chr8 85536500 85538500 chr8 85629000 85631000
903 | chr8 85536500 85538500 chr8 85633600 85635600
904 | chr8 85536500 85538500 chr8 85715600 85717600
905 | chr8 85536500 85538500 chr8 85751500 85753500
906 | chr8 85536500 85538500 chr8 85762400 85764400
907 | chr8 85538700 85540700 chr8 85597000 85599000
908 | chr8 85541200 85543200 chr8 85597000 85599000
909 | chr8 85549300 85551300 chr8 85597000 85599000
910 | chr8 85549300 85551300 chr8 85629000 85631000
911 | chr8 85549300 85551300 chr8 85797300 85799300
912 | chr8 85557500 85559500 chr8 85572600 85574600
913 | chr8 85557500 85559500 chr8 85582500 85584500
914 | chr8 85557500 85559500 chr8 85597000 85599000
915 | chr8 85557500 85559500 chr8 85620000 85622000
916 | chr8 85557500 85559500 chr8 85629000 85631000
917 | chr8 85557500 85559500 chr8 85633600 85635600
918 | chr8 85557500 85559500 chr8 85641000 85643000
919 | chr8 85557500 85559500 chr8 85691000 85693000
920 | chr8 85557500 85559500 chr8 85702000 85704000
921 | chr8 85557500 85559500 chr8 85705600 85707600
922 | chr8 85557500 85559500 chr8 85715600 85717600
923 | chr8 85557500 85559500 chr8 85751500 85753500
924 | chr8 85557500 85559500 chr8 85762400 85764400
925 | chr8 85557500 85559500 chr8 85785750 85787750
926 | chr8 85557500 85559500 chr8 85797300 85799300
927 | chr8 85566300 85568300 chr8 85572600 85574600
928 | chr8 85566300 85568300 chr8 85582500 85584500
929 | chr8 85566300 85568300 chr8 85597000 85599000
930 | chr8 85566300 85568300 chr8 85620000 85622000
931 | chr8 85566300 85568300 chr8 85629000 85631000
932 | chr8 85566300 85568300 chr8 85633600 85635600
933 | chr8 85566300 85568300 chr8 85641000 85643000
934 | chr8 85566300 85568300 chr8 85691000 85693000
935 | chr8 85566300 85568300 chr8 85702000 85704000
936 | chr8 85566300 85568300 chr8 85705600 85707600
937 | chr8 85566300 85568300 chr8 85715600 85717600
938 | chr8 85566300 85568300 chr8 85751500 85753500
939 | chr8 85566300 85568300 chr8 85762400 85764400
940 | chr8 85566300 85568300 chr8 85785750 85787750
941 | chr8 85566300 85568300 chr8 85797300 85799300
942 | chr8 85572600 85574600 chr8 85582500 85584500
943 | chr8 85572600 85574600 chr8 85597000 85599000
944 | chr8 85572600 85574600 chr8 85620000 85622000
945 | chr8 85572600 85574600 chr8 85629000 85631000
946 | chr8 85572600 85574600 chr8 85633600 85635600
947 | chr8 85572600 85574600 chr8 85641000 85643000
948 | chr8 85572600 85574600 chr8 85691000 85693000
949 | chr8 85572600 85574600 chr8 85695000 85697000
950 | chr8 85572600 85574600 chr8 85702000 85704000
951 | chr8 85572600 85574600 chr8 85705600 85707600
952 | chr8 85572600 85574600 chr8 85711500 85713500
953 | chr8 85572600 85574600 chr8 85715600 85717600
954 | chr8 85572600 85574600 chr8 85751500 85753500
955 | chr8 85572600 85574600 chr8 85762400 85764400
956 | chr8 85572600 85574600 chr8 85785750 85787750
957 | chr8 85572600 85574600 chr8 85797300 85799300
958 | chr8 85582500 85584500 chr8 85597000 85599000
959 | chr8 85582500 85584500 chr8 85620000 85622000
960 | chr8 85582500 85584500 chr8 85629000 85631000
961 | chr8 85582500 85584500 chr8 85633600 85635600
962 | chr8 85582500 85584500 chr8 85641000 85643000
963 | chr8 85582500 85584500 chr8 85691000 85693000
964 | chr8 85582500 85584500 chr8 85715600 85717600
965 | chr8 85582500 85584500 chr8 85751500 85753500
966 | chr8 85582500 85584500 chr8 85762400 85764400
967 | chr8 85582500 85584500 chr8 85797300 85799300
968 | chr8 85597000 85599000 chr8 85620000 85622000
969 | chr8 85597000 85599000 chr8 85629000 85631000
970 | chr8 85597000 85599000 chr8 85633600 85635600
971 | chr8 85597000 85599000 chr8 85641000 85643000
972 | chr8 85597000 85599000 chr8 85663000 85665000
973 | chr8 85597000 85599000 chr8 85671400 85673400
974 | chr8 85597000 85599000 chr8 85691000 85693000
975 | chr8 85597000 85599000 chr8 85695000 85697000
976 | chr8 85597000 85599000 chr8 85715600 85717600
977 | chr8 85597000 85599000 chr8 85751500 85753500
978 | chr8 85597000 85599000 chr8 85762400 85764400
979 | chr8 85597000 85599000 chr8 85785750 85787750
980 | chr8 85597000 85599000 chr8 85797300 85799300
981 | chr8 85620000 85622000 chr8 85629000 85631000
982 | chr8 85620000 85622000 chr8 85633600 85635600
983 | chr8 85620000 85622000 chr8 85641000 85643000
984 | chr8 85620000 85622000 chr8 85663000 85665000
985 | chr8 85620000 85622000 chr8 85671400 85673400
986 | chr8 85620000 85622000 chr8 85691000 85693000
987 | chr8 85620000 85622000 chr8 85695000 85697000
988 | chr8 85620000 85622000 chr8 85715600 85717600
989 | chr8 85620000 85622000 chr8 85751500 85753500
990 | chr8 85620000 85622000 chr8 85762400 85764400
991 | chr8 85620000 85622000 chr8 85797300 85799300
992 | chr8 85629000 85631000 chr8 85633600 85635600
993 | chr8 85629000 85631000 chr8 85641000 85643000
994 | chr8 85629000 85631000 chr8 85663000 85665000
995 | chr8 85629000 85631000 chr8 85671400 85673400
996 | chr8 85629000 85631000 chr8 85691000 85693000
997 | chr8 85629000 85631000 chr8 85695000 85697000
998 | chr8 85629000 85631000 chr8 85702000 85704000
999 | chr8 85629000 85631000 chr8 85711500 85713500
1000 | chr8 85629000 85631000 chr8 85715600 85717600
1001 | chr8 85629000 85631000 chr8 85751500 85753500
1002 | chr8 85629000 85631000 chr8 85762400 85764400
1003 | chr8 85629000 85631000 chr8 85797300 85799300
1004 | chr8 85633600 85635600 chr8 85641000 85643000
1005 | chr8 85633600 85635600 chr8 85663000 85665000
1006 | chr8 85633600 85635600 chr8 85671400 85673400
1007 | chr8 85633600 85635600 chr8 85691000 85693000
1008 | chr8 85633600 85635600 chr8 85695000 85697000
1009 | chr8 85633600 85635600 chr8 85702000 85704000
1010 | chr8 85633600 85635600 chr8 85711500 85713500
1011 | chr8 85633600 85635600 chr8 85715600 85717600
1012 | chr8 85633600 85635600 chr8 85751500 85753500
1013 | chr8 85633600 85635600 chr8 85762400 85764400
1014 | chr8 85633600 85635600 chr8 85797300 85799300
1015 | chr8 85641000 85643000 chr8 85663000 85665000
1016 | chr8 85641000 85643000 chr8 85671400 85673400
1017 | chr8 85641000 85643000 chr8 85691000 85693000
1018 | chr8 85641000 85643000 chr8 85695000 85697000
1019 | chr8 85641000 85643000 chr8 85702000 85704000
1020 | chr8 85641000 85643000 chr8 85711500 85713500
1021 | chr8 85641000 85643000 chr8 85715600 85717600
1022 | chr8 85641000 85643000 chr8 85751500 85753500
1023 | chr8 85641000 85643000 chr8 85762400 85764400
1024 | chr8 85641000 85643000 chr8 85797300 85799300
1025 | chr8 85654000 85656000 chr8 85682700 85684700
1026 | chr8 85654000 85656000 chr8 85715600 85717600
1027 | chr8 85663000 85665000 chr8 85682700 85684700
1028 | chr8 85663000 85665000 chr8 85691000 85693000
1029 | chr8 85663000 85665000 chr8 85715600 85717600
1030 | chr8 85663000 85665000 chr8 85751500 85753500
1031 | chr8 85663000 85665000 chr8 85762400 85764400
1032 | chr8 85663000 85665000 chr8 85797300 85799300
1033 | chr8 85671400 85673400 chr8 85691000 85693000
1034 | chr8 85671400 85673400 chr8 85715600 85717600
1035 | chr8 85671400 85673400 chr8 85751500 85753500
1036 | chr8 85671400 85673400 chr8 85762400 85764400
1037 | chr8 85671400 85673400 chr8 85797300 85799300
1038 | chr8 85682700 85684700 chr8 85715600 85717600
1039 | chr8 85691000 85693000 chr8 85702000 85704000
1040 | chr8 85691000 85693000 chr8 85711500 85713500
1041 | chr8 85691000 85693000 chr8 85715600 85717600
1042 | chr8 85691000 85693000 chr8 85751500 85753500
1043 | chr8 85691000 85693000 chr8 85762400 85764400
1044 | chr8 85691000 85693000 chr8 85785750 85787750
1045 | chr8 85691000 85693000 chr8 85797300 85799300
1046 | chr8 85691000 85693000 chr8 85806500 85808500
1047 | chr8 85691000 85693000 chr8 85809000 85811000
1048 | chr8 85695000 85697000 chr8 85702000 85704000
1049 | chr8 85695000 85697000 chr8 85711500 85713500
1050 | chr8 85695000 85697000 chr8 85715600 85717600
1051 | chr8 85695000 85697000 chr8 85751500 85753500
1052 | chr8 85695000 85697000 chr8 85762400 85764400
1053 | chr8 85695000 85697000 chr8 85785750 85787750
1054 | chr8 85695000 85697000 chr8 85797300 85799300
1055 | chr8 85695000 85697000 chr8 85806500 85808500
1056 | chr8 85695000 85697000 chr8 85809000 85811000
1057 | chr8 85702000 85704000 chr8 85711500 85713500
1058 | chr8 85702000 85704000 chr8 85715600 85717600
1059 | chr8 85702000 85704000 chr8 85723500 85725500
1060 | chr8 85702000 85704000 chr8 85751500 85753500
1061 | chr8 85702000 85704000 chr8 85762400 85764400
1062 | chr8 85702000 85704000 chr8 85785750 85787750
1063 | chr8 85702000 85704000 chr8 85797300 85799300
1064 | chr8 85702000 85704000 chr8 85802000 85804000
1065 | chr8 85702000 85704000 chr8 85806500 85808500
1066 | chr8 85702000 85704000 chr8 85809000 85811000
1067 | chr8 85705600 85707600 chr8 85751500 85753500
1068 | chr8 85705600 85707600 chr8 85762400 85764400
1069 | chr8 85705600 85707600 chr8 85797300 85799300
1070 | chr8 85711500 85713500 chr8 85751500 85753500
1071 | chr8 85711500 85713500 chr8 85762400 85764400
1072 | chr8 85711500 85713500 chr8 85797300 85799300
1073 | chr8 85715600 85717600 chr8 85751500 85753500
1074 | chr8 85715600 85717600 chr8 85762400 85764400
1075 | chr8 85715600 85717600 chr8 85785750 85787750
1076 | chr8 85715600 85717600 chr8 85797300 85799300
1077 | chr8 85715600 85717600 chr8 85806500 85808500
1078 | chr8 85715600 85717600 chr8 85809000 85811000
1079 | chr8 85751500 85753500 chr8 85762400 85764400
1080 | chr8 85751500 85753500 chr8 85785750 85787750
1081 | chr8 85751500 85753500 chr8 85797300 85799300
1082 | chr8 85751500 85753500 chr8 85806500 85808500
1083 | chr8 85751500 85753500 chr8 85809000 85811000
1084 | chr8 85762400 85764400 chr8 85785750 85787750
1085 | chr8 85762400 85764400 chr8 85797300 85799300
1086 | chr8 85762400 85764400 chr8 85806500 85808500
1087 | chr8 85762400 85764400 chr8 85809000 85811000
1088 | chr8 85785750 85787750 chr8 85797300 85799300
1089 | chr8 85785750 85787750 chr8 85806500 85808500
1090 | chr8 85797300 85799300 chr8 85806500 85808500
1091 | chr8 85797300 85799300 chr8 85809000 85811000
1092 |
--------------------------------------------------------------------------------
/PileupsRCMC.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "a2f31b9f",
6 | "metadata": {},
7 | "source": [
8 | "# Plotting pileups"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "717f5fcc",
14 | "metadata": {},
15 | "source": [
16 | "## Imports"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "id": "da763af9",
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "# import standard python libraries\n",
27 | "import matplotlib as mpl\n",
28 | "%matplotlib inline\n",
29 | "mpl.rcParams['figure.dpi'] = 96\n",
30 | "import numpy as np\n",
31 | "import matplotlib.pyplot as plt\n",
32 | "import pandas as pd\n",
33 | "import seaborn as sns\n",
34 | "\n",
35 | "# import libraries for biological data analysis\n",
36 | "from coolpuppy import coolpup\n",
37 | "from plotpuppy import plotpup\n",
38 | "import cooler\n",
39 | "import bioframe\n",
40 | "import cooltools\n",
41 | "from cooltools import expected_cis\n",
42 | "from cooltools.lib import plotting\n",
43 | "\n",
44 | "import bbi"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "98de4e75",
50 | "metadata": {},
51 | "source": [
52 | "## Enrichment calculation"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "id": "3a79fc85",
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "#Viraat's new calculation\n",
63 | "#Modified 2022/10/04 by Miles to try to avoid NaN values and correct an issue with the background sum, \n",
64 | "#and generally make the code a little more streamlined\n",
65 | "def enrichmentCalc(mtx, dotWindow):\n",
66 | " #Dimension of array side (should be square)\n",
67 | " sideLength = len(mtx)\n",
68 | " #Middle of side length\n",
69 | " midPoint = (sideLength - 1) // 2\n",
70 | " #Half size of box around centre pixel (one pixel smaller if even-sized dot window - don't do this)\n",
71 | " buffer = (dotWindow - 1) // 2\n",
72 | " \n",
73 | " #Get sum of pixels around dot\n",
74 | " dotSum = np.nansum(mtx[midPoint-buffer:midPoint+buffer+1, midPoint-buffer:midPoint+buffer+1])\n",
75 | " \n",
76 | " #Subset the matrix and calculate the mean without NaN values\n",
77 | " backgroundSum1 = np.nansum(mtx[0:dotWindow, 0:dotWindow])\n",
78 | " backgroundSum2 = np.nansum(mtx[sideLength-dotWindow:sideLength, sideLength-dotWindow:sideLength])\n",
79 | " \n",
80 | " #Calculate enrichment (NB this assumes all boxes are the same size.\n",
81 | " #If you set an even dotWindow value, they won't be)\n",
82 | " enrichment = dotSum / ((backgroundSum1 + backgroundSum2)/2)\n",
83 | " \n",
84 | " return enrichment"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "id": "a19e8ae8",
90 | "metadata": {},
91 | "source": [
92 | "## Inputs"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "id": "295f6354",
99 | "metadata": {},
100 | "outputs": [],
101 | "source": [
102 | "#mcool resolution to read\n",
103 | "resolution = 250\n",
104 | "#List of mcool locations as strings\n",
105 | "clrfiles = [\"mcoollocation1\", \"mcoollocation2\", \"mcoollocation3\"]\n",
106 | "#List of mcool conditions as strings\n",
107 | "conditions = [\"condition1\", \"condition2\", \"condition3\"]\n",
108 | "#List of loop types as strings\n",
109 | "loopTypesNames = [\"loop\", \"type\", \"names\"]\n",
110 | "#List of loop file locations (bedpe)\n",
111 | "loopFiles = [\"looplocation1\", \"looplocation2\", \"looplocation3\"]\n",
112 | "\n",
113 | "#Specify the RCMC regions of the mcools to look at (format: chromosome (string), start (number), end (number), name of region (string))\n",
114 | "regions = pd.DataFrame([['chrA',1,100,'regionname1'],['chrB',1,100,'regionname2'],['chrC',1,100,'regionname3']],\n",
115 | " columns=['chrom', 'start', 'end', 'name'])\n",
116 | "#Cis expected file locations from cooltools - .tsv file - one for each mcool\n",
117 | "expectedFiles = [\"expectedlocation1\", \"expectedlocation2\", \"expectedlocation3\"]\n",
118 | "#Set save directory\n",
119 | "saveDir = '/a/directory/on/your/system/'\n",
120 | "\n",
121 | "#Set the size of the area flanking the dot\n",
122 | "flankDist = 10000\n",
123 | "#Don't set this to be even... This is the size of the area to measure around the dot \n",
124 | "#(and by extension the size of the boxes at the edges of the region too)\n",
125 | "#For this reason, it needs to be odd to have integer box sizes on each side.\n",
126 | "dotWindow = 5\n",
127 | "\n"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "id": "aaa6af6c",
133 | "metadata": {},
134 | "source": [
135 | "Read in the loops"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "id": "e9f08a9b",
142 | "metadata": {},
143 | "outputs": [],
144 | "source": [
145 | "#######Don't change this section#######\n",
146 | "#Creat an empty list to store the imported loop locations\n",
147 | "loopTypes = []\n",
148 | "#List of column names to use for imported loops (this is constant - do not change)\n",
149 | "colNames = ['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2']\n",
150 | "#Read in files, put them in loopTypes\n",
151 | "for file in loopFiles:\n",
152 | " temploops = pd.read_csv(file, sep='\\t', names=colNames, header=None)\n",
153 | " loopTypes.append(temploops)"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "id": "c6de3c38",
159 | "metadata": {},
160 | "source": [
161 | "## Run the script"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "id": "f27175dd",
168 | "metadata": {},
169 | "outputs": [],
170 | "source": [
171 | "#Loop through each cooler\n",
172 | "for i, clrfile in enumerate(clrfiles):\n",
173 | " #Get condition name\n",
174 | " condition = conditions[i]\n",
175 | " #Get expected file\n",
176 | " expected = pd.read_csv(expectedFiles[i], sep='\\t')\n",
177 | " #Read in cooler\n",
178 | " clr = cooler.Cooler(clrfile+'::/resolutions/'+str(resolution))\n",
179 | " #Loop through different loop types\n",
180 | " for j in range(len(loopTypes)):\n",
181 | " loops = loopTypes[j]\n",
182 | " loopsName = loopTypesNames[j]\n",
183 | " #Calculate pileups\n",
184 | " stack = cooltools.pileup(clr, loops, view_df=regions, expected_df=expected, flank=flankDist)\n",
185 | " #Flatten stack by calculating means\n",
186 | " mtx = np.nanmean(stack, axis=2)\n",
187 | " #Calculate enrichment\n",
188 | " enrichment = enrichmentCalc(mtx, dotWindow)\n",
189 | " #Plot figure\n",
190 | " plt.imshow(\n",
191 | " np.log2(mtx),\n",
192 | " vmax = 2.5,\n",
193 | " vmin = -2.5,\n",
194 | " cmap='coolwarm')\n",
195 | " \n",
196 | " plt.colorbar(label = 'log2 mean obs/exp')\n",
197 | " ticks_pixels = np.linspace(0, flankDist*2//resolution,5)\n",
198 | " ticks_kbp = ((ticks_pixels-ticks_pixels[-1]/2)*resolution//1000).astype(int)\n",
199 | " plt.xticks(ticks_pixels, ticks_kbp)\n",
200 | " plt.yticks(ticks_pixels, ticks_kbp)\n",
201 | " plt.xlabel('relative position, kbp')\n",
202 | " plt.ylabel('relative position, kbp')\n",
203 | " plt.text(1, 1, round(enrichment, 2))\n",
204 | " plt.savefig(saveDir+'LoopPileups_'+condition+'_'+loopsName+'_'+str(resolution)+'bp_'+str(flankDist)+'bp.pdf', dpi=1200)\n",
205 | " plt.clf()\n",
206 | "\n"
207 | ]
208 | }
209 | ],
210 | "metadata": {
211 | "kernelspec": {
212 | "display_name": "Python [conda env:coolpuppy]",
213 | "language": "python",
214 | "name": "conda-env-coolpuppy-py"
215 | },
216 | "language_info": {
217 | "codemirror_mode": {
218 | "name": "ipython",
219 | "version": 3
220 | },
221 | "file_extension": ".py",
222 | "mimetype": "text/x-python",
223 | "name": "python",
224 | "nbconvert_exporter": "python",
225 | "pygments_lexer": "ipython3",
226 | "version": "3.7.12"
227 | }
228 | },
229 | "nbformat": 4,
230 | "nbformat_minor": 5
231 | }
232 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RCMC Analysis Code
2 | This repository contains source code for the article [Region Capture Micro-C reveals coalescence of enhancers and promoters into nested microcompartments](https://www.nature.com/articles/s41588-023-01391-1), used in the analysis of RCMC data.
3 |
4 | Code is provided either in the form of Python/R scripts or as Jupyter notebooks to be run in conda environments containing the required packages. Additionally, genomic positions of microcompartments identified in the paper are included in bedpe format.
5 |
6 | ## Code summary
7 | ### Micro-C alignment (microcbowtie2.py)
8 | Required packages:
9 | - bowtie2
10 | - samtools
11 | - sambamba
12 | - pairtools
13 | - cooler
14 | - pairix
15 |
16 | Python script used to align reads in .fastq format from paired-end sequencing of Micro-C experiments and produces as output .pairs, .cool and .mcool files compatible with downstream applications such as HiGlass.
17 |
18 | Example usage:
19 |
20 | ```
21 | python /path/to/script/microcbowtie2.py --file_1 pair1.fastq --file_2 pair2.fasq -g mm39 -t 36 -o exampleoutput
22 | ```
23 |
24 | ### ChIP-seq alignment (spikeinChIP_PE_alignment.py)
25 | Required packages:
26 | - bowtie2
27 | - samtools
28 | - sambamba
29 |
30 | Python script used to align reads in .fastq format from paired-end sequencing of ChIP-seq experiments and produces aligned .bam files. .fastq information is input as a .tsv with each line containing the path to the first pair .fastq, the path to the second pair .fastq, and the desired output name of the aligned file.
31 |
32 | Example usage:
33 |
34 | ```
35 | python /path/to/script/spikeinChIP_PE_alignment.py -f list_of_fastqs.tsv -g mm39 -t 36 -o alignmentcountsout
36 | ```
37 |
38 | ### Finding chromatin features overlapping microcompartment anchors (loopFeatureOverlap.R)
39 | Required packages:
40 | - plyr
41 | - dplyr
42 | - reshape2
43 | - purrr
44 | - grid
45 | - IRanges
46 | - GenomicRanges
47 | - arrangements
48 | - foreach
49 |
50 | R script used to classify microcompartment interactions by finding overlap between identified microcompartments (.bedpe) and chromatin features (.bed) such as promoters, enhancers, CTCF binding sites, etc. It outputs individual .bedpe files of interactions according to combinatorial classification of chromatin features (e.g for enhancer (E) and promoter (P): P-P, E-E, E-P, E-null, P-null, null-null), including interactions which have no overlap (null category). Classification can be mutually exclusive (E-P cannot also be P-P) or inclusive.
51 |
52 | Example usage:
53 |
54 | ```
55 | Rscript /path/to/script/loopFeatureOverlap.R -l interactions.bedpe -b promoter.bed,enhancer.bed -i P,E -o outputdirectory/
56 | ```
57 |
58 | ### Calculating strength of individual interactions (LoopStrengthRCMC.ipynb) or aggregate pileup analysis (PileupsRCMC.ipynb)
59 | Required packages:
60 | - seaborn
61 | - coolpuppy
62 | - cooltools
63 | - cooler
64 |
65 | Jupyter notebooks used to calculate strengths of individual microcompartments (LoopStrengthRCMC.ipynb) or generate aggregate pileup analysis figures (PileupsRCMC.ipynb). Each one takes .mcool files of contacts from RCMC, a list of interactions to calculate for (.bedpe format), expected files generated by cooltools for each .mcool, and the captured region, and calculates background corrected observed/expected interaction strengths, either for each interaction individually (output as a .bedpe with additional columns for strengths for each .mcool) or as a pileup (output as a .pdf of the aggregate interaction annotated with the calculated strength).
66 |
67 | ### Visualization of contact maps and genomic tracks (ContactMapVisualizationExampleNotebook.ipynb)
68 | Required packages:
69 | - cooltools
70 | - cooler
71 | - coolbox
72 | - matplotlib
73 |
74 | Jupyter notebook used to generate visualizations of contact maps and genomics tracks for figures. Contact map visualization is accomplished using cooltools and requires a .mcool file of contacts from RCMC or a comparable method. Genomic track visualization is accomplished using coolbox and requires a .mcool file of contacts, gene annotations (.gtf format or similar), and ChIP-seq, RNA-seq, and ATAC-seq datasets (.bw format).
75 |
76 | ### Calculation of read-containing bin fraction by contact distance (CalculatingFilledBinFractionByDistance.ipynb)
77 | Required packages:
78 | - cooltools
79 | - cooler
80 | - matplotlib
81 |
82 | Jupyter notebook used to calculate the fraction of bins in .mcool-derived contact maps which contain at least one read pair at a given resolution and contact distance from the diagonal. The notebook takes an unbalanced .mcool of contacts from RCMC or a comparable method, tabulates the occupied contact bin fraction at specified contact distances, and generates a plot of occupied bin fraction by contact distance.
83 |
84 | ### Calculation of row sums in ICE-balanced .mcools (BalancedRowsumsCalculation.ipynb)
85 | Required packages:
86 | - cooltools
87 | - cooler
88 | - matplotlib
89 |
90 | Jupyter notebook used to confirm successful ICE balancing of .mcool files by calculating and plotting row sums. Two variations of the calculation are provided in the notebook – one for calculating row sums across an entire region, and one for calculating row sums only for bins containing contact anchor sites. Both variations take ICE-balanced .mcool files of contacts from RCMC or a comparable method, and the latter additionally takes a list of contact anchor sites (.bed format). The distributions of calculated row sums in either variation are plotted as histograms.
91 |
92 | ### List of manually-annotated microcompartment loops (MicrocompartmentLoops_PlusMin1kb.bedpe)
93 | BEDPE format file listing all 1091 manually-annotated microcompartment loops across the Ppm1g (chr5) and Klf1 (chr8) regions used in the microcompartment analysis scripts above. Coordinates are provided for the mm39 reference genome, and loop anchors are listed as plus-and-minus 1kb from each anchor’s point coordinate. Columns in the file are as follows: the first is the chromosome of the left loop anchor, the second is the coordinate of the left loop anchor minus 1 kb, the third is the coordinate of the left loop anchor plus 1 kb, and the remaining three columns are the same for the right loop anchor.
94 |
95 | ### Lists of probes used for capturing regions of interest (captureprobes_mm10.bed, captureprobes_mm39.bed)
96 | BED format file listing the genomic locations of all probes used for capturing the Sox2 (chr3), Ppm1g (chr5), Nanog (chr6), Klf1 (chr8), and Fbn2 (chr18) regions used in capture. Coordinates are provided for both the mm10 and mm39 reference genomes, and loop anchors are listed as plus-and-minus 1kb from each anchor’s point coordinate. Columns in the file are as follows: the first is the chromosome the region is located on, the second is the start coordinate of the probe, and the third is the end coordinate of the probe.
97 |
98 | ## How to cite
99 | This work is shared under an MIT license. If you make use of analysis scripts or data from this work, please cite as follows:
100 |
101 | Goel, V.Y., Huseyin, M.K. & Hansen, A.S. Region Capture Micro-C reveals coalescence of enhancers and promoters into nested microcompartments. *Nat Genet* (2023). https://doi.org/10.1038/s41588-023-01391-1
102 |
103 | Also refer to our deposited and citable code on Zenodo:
104 |
105 | Goel, Viraat Y, Huseyin, Miles K, & Hansen, Anders S. (2023). Code supporting Region Capture Micro-C reveals coalescence of enhancers and promoters into nested microcompartments (1.0). Zenodo. https://doi.org/10.5281/zenodo.7641852
106 |
--------------------------------------------------------------------------------
/loopFeatureOverlap.R:
--------------------------------------------------------------------------------
1 | #New version of loopFeatureOverlap.py, written in R using Granges instead of Python-based Pyranges version, which had an output issue
2 |
3 | #Imports
4 | library('plyr')
5 | library('dplyr')
6 | require('reshape2')
7 | library('purrr')
8 | library('grid')
9 | # library('ChIPpeakAnno')
10 | library('IRanges')
11 | library('GenomicRanges')
12 | library('arrangements')
13 | library('foreach')
14 |
15 | #Get args
16 | if(!require(optparse)) {
17 | stop("Please install the optparse package and try again!", call. = FALSE)
18 | }
19 |
20 | library(optparse)
21 |
22 | parser <- OptionParser(add_help_option = TRUE)
23 | parser <- add_option(parser, c("-o", "--outdir"), action = "store", type = "character", help = "Directory to output bed files", default = "na")
24 | parser <- add_option(parser, c("-l", "--loops"), action = "store", type = "character", help = "Input bedpe or tsv file containing loops in bedpe format", default = "na")
25 | parser <- add_option(parser, c("-b", "--bed"), action = "store", type = "character", help = "Input bed file or files - if multiple, separate with commas", default = "na")
26 | parser <- add_option(parser, c("-i", "--id"), action = "store", type = "character", help = "Feature names to use for each bed file - will be used to name output files. If multiple, separate with commas", default = "na")
27 | parser <- add_option(parser, c("-e", "--exclusive"), action = "store_true", help = "When set, defines loop anchors with only a single feature - overlap with multiple features is not allowed", default = FALSE)
28 | args <- parse_args(parser)
29 |
30 | #########################################################################
31 | #Convert args to variables
32 | path.loops <- args$loops
33 | outdir <- args$outdir
34 | features <- args$bed
35 | ids <- args$id
36 |
37 | #########################################################################
38 | #Read in loops
39 | loops <- read.delim(path.loops, header = FALSE, col.names = c("chr1", "start1", "end1", "chr2", "start2", "end2"), sep = "", dec = ".") #opens up the file
40 |
41 | #Read in features
42 | featureslist <- unlist(strsplit(features, ","))
43 | idlist <- unlist(strsplit(ids, ","))
44 |
45 | #Check features and ids are same length
46 | if (length(featureslist) != length(idlist)) {
47 | stop("Make sure the same numbers of bed files and IDs are provided")
48 | }
49 |
50 | #We only want and know the names of the first three columns, so write a little import function:
51 | import_feature_data <- function(filename) {
52 | #Get number of columns
53 | num.cols.to.blank <- max(count.fields(filename, sep = "\t")) - 3
54 | df <- read.delim(filename, header = FALSE, sep = "", dec = ".", colClasses = c('character', rep('numeric', 2), rep("NULL", num.cols.to.blank)))
55 | colnames(df) <- c("chr", "start", "end")
56 | return(df)
57 | }
58 |
59 | features.data <- lapply(featureslist, import_feature_data)
60 |
61 | #Ensure output directory ends with a /
62 | if(!endsWith(outdir, '/')) {
63 | outdir <- paste0(outdir, '/')
64 | }
65 |
66 | if (outdir == "na") {
67 | stop("Please provide an option for --out", call. = FALSE)
68 | }
69 |
70 | #########################################################################
71 | #Make Granges objects
72 | #For features
73 | features.data.granges <- lapply(features.data, makeGRangesFromDataFrame, keep.extra.columns = FALSE)
74 | #For loops
75 | #First make separate dfs for each anchor
76 | #Make anchors function
77 | make_anchors_separate <- function(loops) {
78 | #Add loop_id column for merging
79 | loops$loop_id <- seq.int(nrow(loops))
80 | #Split loops into two bed-like files
81 | loops.1 <- data.frame(chr = loops$chr1, start = loops$start1, end = loops$end1, loop_id = loops$loop_id)
82 | loops.2 <- data.frame(chr = loops$chr2, start = loops$start2, end = loops$end2, loop_id = loops$loop_id)
83 | #Merge with original to add back the lost info
84 | return(list(loops.1, loops.2))
85 | }
86 | #Then make Granges
87 | loops.anchors.list <- make_anchors_separate(loops)
88 | #Make separate dfs (maybe don't need to do this?)
89 | loops.anchors.1 <- loops.anchors.list[[1]]
90 | loops.anchors.2 <- loops.anchors.list[[2]]
91 | #Make granges
92 | loops.anchors.list.granges <- lapply(loops.anchors.list, makeGRangesFromDataFrame, keep.extra.columns = FALSE)
93 |
94 | #########################################################################
95 | #Compare Granges objects
96 | #Start counter
97 | feature.count <- 1
98 |
99 | #Loop through the features and count overlaps
100 | for (feature in features.data.granges) {
101 | loops.anchors.1[[idlist[feature.count]]] <- countOverlaps(loops.anchors.list.granges[[1]], feature)
102 | loops.anchors.2[[idlist[feature.count]]] <- countOverlaps(loops.anchors.list.granges[[2]], feature)
103 | feature.count <- feature.count + 1
104 | }
105 |
106 |
107 | #Change column names for loops.anchors.2 so they don't match
108 | colnames(loops.anchors.2) <- paste0(colnames(loops.anchors.2), '2')
109 |
110 | #Next, merge based on loop_id(2) columns
111 | loops.anchors.remerge <- merge(loops.anchors.1, loops.anchors.2, by.x = "loop_id", by.y = "loop_id2")
112 |
113 | ########################################################################
114 | #Determine loop classes
115 |
116 | #First need to generate loop classes based on ids
117 | #Add null to the class list (for anchors with no features)
118 | idlist.null <- append(idlist, "null")
119 |
120 | #Then make all combinations (this is combinations with replacement)
121 | position <- 1
122 | combination.object <- icombinations(idlist.null, k = 2, replace = TRUE)
123 |
124 | id.combined.list <- vector("list", length(combination.object$collect())/2)
125 |
126 | foreach(x = icombinations(idlist.null, k = 2, replace = TRUE), .combine = c) %do% {
127 | id.combined.list[[position]] <- paste(idlist.null[x[1]], idlist.null[x[2]], sep = "-")
128 | position <- position + 1
129 | }
130 |
131 | #Classify loops. Which version is run depends on whether inclusive or exclusive loops are desired.
132 | if (args$exclusive) {
133 | #Make named list of dfs
134 | output.list <- setNames(replicate(length(id.combined.list), data.frame()), id.combined.list)
135 |
136 | #Exclusive version
137 | for (i1 in 1:length(idlist)) {
138 | item1 <- idlist[[i1]]
139 | #print(item1)
140 | for (i2 in 1:length(idlist)) {
141 | item2 <- idlist[[i2]]
142 | #print(item2)
143 | if (i1 == i2 | i1 < i2) {
144 | #Get the loop type from the indices
145 | looptype <- paste(idlist[[i1]], idlist[[i2]], sep = '-')
146 | #Get the relevant loops
147 | temp.df <- loops.anchors.remerge
148 | #Loop through all ids, check they're 0 except the ones matching the requirements
149 | for (id in idlist) {
150 | if (id == idlist[[i1]]) {
151 | temp.df <- temp.df[which(temp.df[[id]] > 0),]
152 | } else if (id != idlist[[i1]]) {
153 | temp.df <- temp.df[which(temp.df[[id]] == 0),]
154 | }
155 | if (id == idlist[[i2]]) {
156 | temp.df <- temp.df[which(temp.df[[paste0(id, '2')]] > 0),]
157 | } else if (id != idlist[[i2]]) {
158 | temp.df <- temp.df[which(temp.df[[paste0(id, '2')]] == 0),]
159 | }
160 | }
161 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df)
162 |
163 | } else if (i1 > i2) {
164 | #Get the loop type from the indices - need to invert here so that E-P2 and P-E2 loops are both put into
165 | #the same category
166 | looptype <- paste(idlist[[i2]], idlist[[i1]], sep = '-')
167 | #Get the relevant loops
168 | temp.df <- loops.anchors.remerge
169 |
170 | for (id in idlist) {
171 | if (id == idlist[[i1]]) {
172 | temp.df <- temp.df[which(temp.df[[id]] > 0),]
173 | } else if (id != idlist[[i1]]) {
174 | temp.df <- temp.df[which(temp.df[[id]] == 0),]
175 | }
176 | if (id == idlist[[i2]]) {
177 | temp.df <- temp.df[which(temp.df[[paste0(id, '2')]] > 0),]
178 | } else if (id != idlist[[i2]]) {
179 | temp.df <- temp.df[which(temp.df[[paste0(id, '2')]] == 0),]
180 | }
181 | }
182 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df)
183 | }
184 | }
185 | looptype <- paste(idlist[[i1]], "null", sep = '-')
186 | #Generate X-null
187 | temp.df.1 <- loops.anchors.remerge
188 | #Loop through the ID columns of the other anchor and select only rows with 0 for each column
189 | for (id in idlist) {
190 | if (id == idlist[[i1]]) {
191 | temp.df.1 <- temp.df.1[which(temp.df.1[[id]] > 0),]
192 | } else if (id != idlist[[i1]]) {
193 | temp.df.1 <- temp.df.1[which(temp.df.1[[id]] == 0),]
194 | }
195 | temp.df.1 <- temp.df.1[which(temp.df.1[[paste0(id, '2')]] == 0),]
196 | }
197 | #Generate null-X
198 | temp.df.2 <- loops.anchors.remerge[which(loops.anchors.remerge[[paste0(idlist[[i1]], '2')]] > 0),]
199 |
200 | for (id in idlist) {
201 | temp.df.2 <- temp.df.2[which(temp.df.2[[id]] == 0),]
202 | if (id == idlist[[i1]]) {
203 | temp.df.2 <- temp.df.2[which(temp.df.2[[paste0(id, '2')]] > 0),]
204 | } else if (id != idlist[[i1]]) {
205 | temp.df.2 <- temp.df.2[which(temp.df.2[[paste0(id, '2')]] == 0),]
206 | }
207 | }
208 | #Combine them
209 | temp.df <- rbind(temp.df.1, temp.df.2)
210 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df)
211 |
212 | }
213 | looptype <- "null-null"
214 | temp.df <- loops.anchors.remerge
215 |
216 | for (id in idlist) {
217 | temp.df <- temp.df[which(temp.df[[id]] == 0 & temp.df[[paste0(id, '2')]] == 0),]
218 | }
219 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df)
220 | } else {
221 | #Make named list of dfs
222 | output.list <- setNames(replicate(length(id.combined.list), data.frame()), id.combined.list)
223 |
224 | #Inclusive version
225 | for (i1 in 1:length(idlist)) {
226 | item1 <- idlist[[i1]]
227 | #print(item1)
228 | for (i2 in 1:length(idlist)) {
229 | item2 <- idlist[[i2]]
230 | #print(item2)
231 | if (i1 == i2 | i1 < i2) {
232 | #Get the loop type from the indices
233 | looptype <- paste(idlist[[i1]], idlist[[i2]], sep = '-')
234 | #Get the relevant loops
235 | temp.df <- loops.anchors.remerge[which(loops.anchors.remerge[[idlist[[i1]]]] > 0 & loops.anchors.remerge[[paste0(idlist[[i2]], "2")]] > 0), ]
236 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df)
237 | } else if (i1 > i2) {
238 | #Get the loop type from the indices - need to invert here so that E-P2 and P-E2 loops are both put into
239 | #the same category
240 | looptype <- paste(idlist[[i2]], idlist[[i1]], sep = '-')
241 | #Get the relevant loops
242 | temp.df <- loops.anchors.remerge[which(loops.anchors.remerge[[idlist[[i1]]]] > 0 & loops.anchors.remerge[[paste0(idlist[[i2]], "2")]] > 0), ]
243 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df)
244 | }
245 | }
246 | looptype <- paste(idlist[[i1]], "null", sep = '-')
247 | #Generate X-null
248 | temp.df.1 <- loops.anchors.remerge[which(loops.anchors.remerge[[idlist[[i1]]]] > 0),]
249 | #Loop through the ID columns of the other anchor and select only rows with 0 for each column
250 | for (id in idlist) {
251 | temp.df.1 <- temp.df.1[which(temp.df.1[[paste0(id, '2')]] == 0),]
252 | }
253 | #Generate null-X
254 | temp.df.2 <- loops.anchors.remerge[which(loops.anchors.remerge[[paste0(idlist[[i1]], '2')]] > 0),]
255 |
256 | for (id in idlist) {
257 | temp.df.2 <- temp.df.2[which(temp.df.2[[id]] == 0),]
258 | }
259 | #Combine them
260 | temp.df <- rbind(temp.df.1, temp.df.2)
261 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df)
262 |
263 | }
264 | looptype <- "null-null"
265 | temp.df <- loops.anchors.remerge
266 |
267 | for (id in idlist) {
268 | temp.df <- temp.df[which(temp.df[[id]] == 0 & temp.df[[paste0(id, '2')]] == 0),]
269 | }
270 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df)
271 | }
272 |
273 | #Remove any duplicates from the dfs (inclusive calling can result in many)
274 | output.list.nodups <- lapply(output.list, distinct)
275 |
276 | #Print the lengs of each df to give the result:
277 | sapply(output.list.nodups, nrow)
278 |
279 | #Output the dataframes
280 | for (i in 1:length(output.list.nodups)) {
281 | temp.df <- output.list.nodups[[i]]
282 | #Get only the relevant columns
283 | output.df <- data.frame(chr = temp.df$chr, start = temp.df$start, end = temp.df$end, chr2 = temp.df$chr2, start2 = temp.df$start2, end2 = temp.df$end2)
284 | write.table(output.df, file = paste0(outdir, names(output.list)[[i]], '.bedpe'), row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "\t")
285 | }
286 |
--------------------------------------------------------------------------------
/microcbowtie2.py:
--------------------------------------------------------------------------------
1 | #Aligning and processing a single fastq file through a pipeline similar to distiller but instead using bowtie2
2 |
3 | from sys import exit
4 | import subprocess as sp
5 | import argparse
6 | import multiprocessing
7 | import uuid
8 |
9 | parser = argparse.ArgumentParser(description = "run bowtie2 and pairtools on fastq files to produce pairsam files")
10 | bamopts = parser.add_mutually_exclusive_group()
11 | parser.add_argument("--file_1", "-1", help = "first demuxed fastq of paired end reads - required", nargs = "*")
12 | parser.add_argument("--file_2", "-2", help = "second demuxed fastq of paired end reads - required", nargs = "*")
13 | parser.add_argument("--genome", "-g", help = "genome to align to - mouse or human - required")
14 | parser.add_argument("--genometype", "-y", help = "genome type - use if your genome is a modified version of a standard genome - should be one of hg19, hg38, mm10, or mm39")
15 | parser.add_argument("--threads", "-t", help = "number of threads to use for bowtie2 - default is 1", default = "1")
16 | parser.add_argument("--resolutions", "-r", help = "list of resolutions to output in decreasing order - all resolutions must be a multiple of the smallest resolution - default: 10000000 5000000 2500000 1000000 500000 250000 100000 50000 25000 10000 5000 2000 1000", nargs = "*")
17 | parser.add_argument("--out", "-o", help = "name for output files - defaults to name of first file")
18 | parser.add_argument("--outdir", help = "a directory to store output files - default is current directory", default = "./")
19 | bamopts.add_argument("--bowtieonly", "-b", help = "only run bowtie2 and make bams - can be useful for post initial analysis QC steps", action = "store_true")
20 | bamopts.add_argument("--keepbams", "-k", help = "keep bam files while doing a normal full analysis", action = "store_true")
21 | args = parser.parse_args()
22 |
23 | file1 = args.file_1
24 | file2 = args.file_2
25 | genome = args.genome
26 | gentype = args.genometype
27 | threads = args.threads
28 | outname = args.out
29 | outdir = args.outdir
30 | reslist = args.resolutions
31 | bowtieonly = args.bowtieonly
32 | keepbams = args.keepbams
33 |
34 | #Check requirements are fulfilled:
35 | condapacks = sp.run("conda list".split(), capture_output=True)
36 | condapacksstr = str(condapacks.stdout)
37 | if bowtieonly:
38 | if "bowtie2" not in condapacksstr or "samtools" not in condapacksstr or "sambamba" not in condapacksstr:
39 | print("Please make sure bowtie2, samtools and sambamba are installed in your current conda environment (check conda list)")
40 | exit()
41 | elif not bowtieonly:
42 | if "bowtie2" not in condapacksstr or "pairtools" not in condapacksstr or "cooler" not in condapacksstr or "pairix" not in condapacksstr:
43 | print("Please make sure bowtie2, pairtools, pairix and cooler are installed in your current conda environment (check with 'conda list')")
44 | exit()
45 |
46 | #Check that outdir ends with a /, add one if it doesn't
47 | if args.outdir is not None and not outdir.endswith("/"):
48 | outdir = outdir + "/"
49 |
50 | #Check a genome was specified
51 | if args.genome is None:
52 | print("Genome not specified - check help for formatting")
53 | parser.print_usage()
54 | exit()
55 |
56 | #Check if file1 and file2 are single files or multiple
57 | if file1 is None or file2 is None:
58 | print("Input files not specified - check help for formatting")
59 | parser.print_usage()
60 | exit()
61 | elif len(file1) > 1 and type(file1) == list and type(file2) == list and len(file1) == len(file2):
62 | multifile = 1
63 | if args.out is None:
64 | outlist = [fname + "_" + genome for fname in file1]
65 | outname = outlist[0]
66 | else:
67 | outlist = list()
68 | for i in range(len(file1)):
69 | outlist.append(outname + "_" + str(i + 1))
70 | #Input to pair merging step needs all of the outputs together
71 | pairnamelist = [outdir + oname + ".pairs" for oname in outlist]
72 | pairnamest = " ".join(pairnamelist)
73 | bamlist = [outdir + oname + ".sorted.bam" for oname in outlist]
74 | bamst = " ".join(bamlist)
75 | bailist = [outdir + oname + ".sorted.bam.bai" for oname in outlist]
76 | baist = " ".join(bailist)
77 | elif len(file1) == 1 and len(file2) == 1:
78 | multifile = 0
79 | #If nargs = *, always makes a list, even if only one element
80 | file1 = "".join(file1)
81 | file2 = "".join(file2)
82 | if args.out is None:
83 | outname = file1 + "_" + genome
84 | pairnamest = outdir + outname + ".pairs"
85 | else:
86 | print("Mismatch in number of input files, check arguments")
87 | exit()
88 |
89 | #Check that a sensible number of threads has been requested - more protections here are possible - at the moment users are trusted to be sensible
90 | cpucount = multiprocessing.cpu_count()
91 | if args.threads is None:
92 | print("Defaulting to one thread")
93 | threads = 1
94 | elif int(args.threads) >= cpucount:
95 | print("Too many threads requested, resetting to default")
96 | threads = 1
97 |
98 | #Check that the user has entered a valid genome to align to
99 | if args.genometype is None:
100 | gentype = args.genome
101 |
102 | if gentype == "mm10" or gentype == "mm39":
103 | toprint = "Aligning to mouse genome {}".format(genome)
104 | print(toprint)
105 | elif gentype == "hg19" or gentype == "hg38":
106 | toprint = "Aligning to human genome {}".format(genome)
107 | print(toprint)
108 | else:
109 | if gentype == genome:
110 | print("Genome option not recognised or not entered. Please use mm10/39 or hg19/38 or ask Miles to change the script to accommodate your new organism/genome. If you are using a modified version of base genome, use the -g option to indicate the base genome name.")
111 | exit()
112 | else: #If they're using a modified genome, make sure the base genome exists so that the files are redirected properly
113 | print("Genome/base genome option not recognised. Please use mm10/39 or hg19/38 or ask Miles to change the script to accommodate your new organism/genome.")
114 | exit()
115 |
116 | #Set up resolutions as needed
117 | if args.resolutions is None:
118 | reslist = ["10000000", "5000000", "2500000", "1000000", "500000", "250000", "100000", "50000", "25000", "10000", "5000", "2000", "1000"]
119 | resst = ",".join(reslist)
120 | #Extract minimum resolution
121 | minres = reslist[-1]
122 |
123 | #Process ID (used to make unique sorttemp, so these are not overlapping for multiple processes in the same outdir)
124 | uniqueid = str(uuid.uuid4())
125 |
126 | # commands as strings
127 | line1 = "mkdir {0}{10}sorttemp -p"
128 | line2 = "bowtie2 -x /mnt/md0/DataRepository/genomes/{1}/{2} --threads {3} -1 {4} -2 {5} --reorder --local --very-sensitive-local {11}| pairtools parse --add-columns mapq --walks-policy mask -c /mnt/md0/DataRepository/chromsizes/{1}/{2}.sorted.chrom.sizes --assembly {2} --min-mapq 2 --drop-sam --drop-readid --nproc-in {3} | pairtools sort --tmpdir {0}{10}sorttemp --nproc {3} -o {0}{6}.pairs | cat" #Can add drop-sam and drop-readid options later
129 | line3 = "pairtools merge --tmpdir {0}{10}sorttemp --nproc {3} {7} | pairtools dedup --max-mismatch 1 --mark-dups --output {0}{6}.nodups.pairs.gz --output-unmapped {0}{6}.unmapped.pairs.gz --output-dups {0}{6}.dups.pairs.gz --output-stats {0}{6}.dedup.stats | cat"
130 | line4 = "pairtools dedup --max-mismatch 1 --mark-dups --output {0}{6}.nodups.pairs.gz --output-unmapped {0}{6}.unmapped.pairs.gz --output-dups {0}{6}.dups.pairs.gz --output-stats {0}{6}.dedup.stats {0}{7}"
131 | line5 = "pairix {0}{6}.nodups.pairs.gz"
132 | line6 = "bgzip -cd -@ 3 {0}{6}.nodups.pairs.gz | cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 --assembly {2} /mnt/md0/DataRepository/chromsizes/{1}/{2}.sorted.chrom.sizes:{8} - {0}{6}.{8}.cool"
133 | line7 = "cooler zoomify --nproc {3} --balance --out {0}{6}.{8}.mcool --resolutions {9} {0}{6}.{8}.cool"
134 | line8 = "rmdir {0}{10}sorttemp"
135 |
136 | #For running only bowtie2 and making bams
137 | bline1 = "mkdir {0}{6}temp -p"
138 | bline2 = "bowtie2 -x /mnt/md0/DataRepository/genomes/{1}/{2} --threads {3} -1 {4} -2 {5} --reorder --local --very-sensitive-local | samtools view -bS -o {0}{6}.bam"
139 | bline3 = "sambamba sort -t {3} -m 6GB --tmpdir {0}{7}temp {0}{6}.bam {0}{6}.sorted.bam && rm {0}{6}.bam"
140 | bline4 = "sambamba merge -t {3} {0}{6}.sorted.merged.bam {7} && rm {7} {8}"
141 | bline5 = "sambamba markdup -t {3} --tmpdir {0}{6}temp --overflow-list-size 10000000 -r {0}{6}.sorted.merged.bam {0}{6}.nodups.sorted.merged.bam && rm {0}{6}.sorted.merged.ba*"
142 | bline6 = "sambamba markdup -t {3} --tmpdir {0}{6}temp -r {0}{6}.sorted.bam {0}{6}.nodups.sorted.bam && rm {0}{6}.sorted.ba*"
143 | bline7 = "rmdir {0}{6}temp"
144 |
145 | multilines = [line1, line2, line3, line5, line6, line7, line8]
146 | lines = [line1, line2, line4, line5, line6, line7, line8]
147 |
148 | multiblines = [bline1, bline2, bline3, bline4, bline5, bline7]
149 | blines = [bline1, bline2, bline3, bline6, bline7]
150 |
151 | truncmultiblines = [bline1, bline3, bline4, bline5, bline7]
152 | truncblines = [bline1, bline3, bline6, bline7]
153 |
154 | #Process the files depending on the run mode
155 | if not multifile and not bowtieonly:
156 | #Include command if bams are wanted
157 | if keepbams:
158 | keepbamcmd = "| tee >(samtools view -bS > {0}{1}.bam) ".format(outdir, outname)
159 | else:
160 | keepbamcmd = ""
161 | for line in lines:
162 | # add file name and split by whitespace
163 | tokenized_line = line.format(outdir, gentype, genome, threads, file1, file2, outname, pairnamest, minres, resst, uniqueid, keepbamcmd)
164 | print(tokenized_line)
165 | # run
166 | sp.run(tokenized_line, shell=True, executable="/bin/bash")
167 | elif multifile and not bowtieonly:
168 | for line in multilines:
169 | if line == line2:
170 | for x in range(len(file1)):
171 | #Include command if bams are wanted
172 | if keepbams:
173 | keepbamcmd = "| tee >(samtools view -bS > {0}{1}.bam) ".format(outdir, outlist[x])
174 | else:
175 | keepbamcmd = ""
176 | # add file name and split by whitespace
177 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[x], file2[x], outlist[x], pairnamest, minres, resst, uniqueid, keepbamcmd)
178 | print(tokenized_line)
179 | # run
180 | sp.run(tokenized_line, shell=True, executable="/bin/bash")
181 | else:
182 | # add file name and split by whitespace
183 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[1], file2[1], outname, pairnamest, minres, resst, uniqueid)
184 | print(tokenized_line)
185 | # run
186 | sp.run(tokenized_line, shell=True)
187 | elif bowtieonly and multifile:
188 | for line in multiblines:
189 | if line == bline2 or line == bline3:
190 | for x in range(len(file1)):
191 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[x], file2[x], outlist[x], outname)
192 | print(tokenized_line)
193 | sp.run(tokenized_line, shell=True)
194 | else:
195 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[1], file2[1], outname, bamst, baist)
196 | print(tokenized_line)
197 | sp.run(tokenized_line, shell=True)
198 | elif bowtieonly and not multifile:
199 | for line in blines:
200 | tokenized_line = line.format(outdir, gentype, genome, threads, file1, file2, outname, outname)
201 | print(tokenized_line)
202 | sp.run(tokenized_line, shell=True)
203 |
204 | #After everything finishes, merge and process bams as required if doing full analysis
205 | if keepbams and not bowtieonly:
206 | if not multifile:
207 | for line in truncblines:
208 | tokenized_line = line.format(outdir, gentype, genome, threads, file1, file2, outname, outname)
209 | print(tokenized_line)
210 | sp.run(tokenized_line, shell=True)
211 | elif multifile:
212 | for line in truncmultiblines:
213 | if line == bline3:
214 | for x in range(len(file1)):
215 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[x], file2[x], outlist[x], outname)
216 | print(tokenized_line)
217 | sp.run(tokenized_line, shell=True)
218 | else:
219 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[1], file2[1], outname, bamst, baist)
220 | print(tokenized_line)
221 | sp.run(tokenized_line, shell=True)
222 |
--------------------------------------------------------------------------------
/spikeinChIP_PE_alignment.py:
--------------------------------------------------------------------------------
1 | #Aligning and processing paired end fastq files for spike-in ChIP-seq using bowtie2
2 |
3 | from sys import exit
4 | import subprocess as sp
5 | import argparse
6 | import multiprocessing
7 | import uuid
8 | import pandas as pd
9 |
10 | parser = argparse.ArgumentParser(description = "run bowtie2 on paired end fastq files with spikein to produce aligned bam files")
11 | parser.add_argument("--filename", "-f", help = "a tab-separated file containing one each line the path to the fastq with the first ends of pairs, the path to the fastq with the second ends of pairs, and the desired output name for the aligned file")
12 | # parser.add_argument("--file_1", "-1", help = "first demuxed fastq of paired end reads - required", nargs = "*")
13 | # parser.add_argument("--file_2", "-2", help = "second demuxed fastq of paired end reads - required", nargs = "*")
14 | parser.add_argument("--genome", "-g", help = "genome build to align to - mouse or human - required")
15 | parser.add_argument("--spikegenome", "-s", help = "spikein genome build to align to - mouse or human - required")
16 | parser.add_argument("--threads", "-t", help = "number of threads to use for bowtie2 - default is 1", default = "1")
17 | parser.add_argument("--outname", "-o", help = "name for the table to store counts")
18 | parser.add_argument("--outdir", help = "a directory to store output files - default is current directory", default = "./")
19 | args = parser.parse_args()
20 |
21 | # file1 = args.file_1
22 | # file2 = args.file_2
23 | genome = args.genome
24 | spikegenome = args.spikegenome
25 | threads = args.threads
26 | outname = args.outname
27 | outdir = args.outdir
28 |
29 | #Read in file to determine what to process
30 | files = pd.read_csv(args.filename, sep='\t', header=None, names = ['end1', 'end2', 'name'])
31 | file1 = files.end1.values.tolist()
32 | file2 = files.end2.values.tolist()
33 | outnames = files.name.values.tolist()
34 |
35 | #Check requirements are fulfilled:
36 | condapacks = sp.run("conda list".split(), capture_output=True)
37 | condapacksstr = str(condapacks.stdout)
38 |
39 | if "bowtie2" not in condapacksstr or "samtools" not in condapacksstr or "sambamba" not in condapacksstr:
40 | print("Please make sure bowtie2, samtools and sambamba are installed in your current conda environment (check conda list)")
41 | exit()
42 |
43 | #Check that outdir ends with a /, add one if it doesn't
44 | if args.outdir is not None and not outdir.endswith("/"):
45 | outdir = outdir + "/"
46 |
47 | #Check a genome was specified
48 | if args.genome is None or args.spikegenome is None:
49 | print("Genomes not specified - check help for formatting")
50 | parser.print_usage()
51 | exit()
52 |
53 | #Check if file1, file2, and outnames are present and the same lengths
54 | if file1 is None or file2 is None or outnames is None:
55 | print("Input files not specified - check help for formatting")
56 | parser.print_usage()
57 | exit()
58 | elif type(file1) == list and type(file1) == list and type(outnames) == list and len(file1) > 1:
59 | if len(file1) != len(file2) or len(file1) != len(outnames) or len(file2) != len(outnames):
60 | print("Mismatch in number of input files or output names, check arguments")
61 | exit()
62 |
63 | #Check that a sensible number of threads has been requested - more protections here are possible - at the moment users are trusted to be sensible
64 | cpucount = multiprocessing.cpu_count()
65 | if args.threads is None:
66 | print("Defaulting to one thread")
67 | threads = 1
68 | elif int(args.threads) >= cpucount:
69 | print("Too many threads requested, resetting to default")
70 | threads = 1
71 | else:
72 | print(f"Running alignment with {threads} threads...")
73 |
74 | #Check that the user has entered a valid genome to align to
75 | if genome == "mm10" or genome == "mm39":
76 | print(f"Aligning to mouse genome {genome}")
77 | elif genome == "hg19" or genome == "hg38":
78 | print(f"Aligning to human genome {genome}")
79 | else:
80 | print("Genome option not recognised or not entered. Please use mm10/39 or hg19/38 or ask Miles to change the script to accommodate your new organism/genome")
81 | exit()
82 |
83 | #Process ID (used to make unique sorttemp, so these are not overlapping for multiple processes in the same outdir)
84 | uniqueid = str(uuid.uuid4())
85 |
86 | #Create a place to store counts
87 | allcountslist = []
88 |
89 | #Process the files
90 | for i, fastq1 in enumerate(file1):
91 | fastq2 = file2[i]
92 | name = outnames[i]
93 | print(f"Aligning {name} to {genome} and {spikegenome}")
94 | #Explanation: align to genome and spikein genome, then remove multiply aligned reads with grep (XS: indicates the score of the next best aligning read, if it exists), then make a bam of all mapped reads (-F4 removes reads with a SAM flag of 4, which means unmapped)
95 | sp.run(f"bowtie2 -p {threads} --no-mixed --no-discordant -1 {fastq1} -2 {fastq2} -x /mnt/md0/DataRepository/genomes/{genome}.{spikegenome}/{genome}.{spikegenome} | grep -v XS: - | samtools view -bh -F4 - > {outdir}{name}_UniqMapped.bam", shell=True, executable="/bin/bash")
96 | #Sort aligned reads
97 | sp.run(f"sambamba sort --tmpdir {outdir}{uniqueid}/ -t {threads} -m 30G -o {outdir}{name}_UniqMapped_sorted.bam {outdir}{name}_UniqMapped.bam", shell=True, executable="/bin/bash")
98 | #Remove duplicates
99 | sp.run(f"sambamba markdup --tmpdir {outdir}{uniqueid}/ -r -t {threads} {outdir}{name}_UniqMapped_sorted.bam {outdir}{name}_UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash")
100 |
101 | #Next, need to separate out reads from each genome:
102 | print(f"Extracting reads aligning uniquely to {genome}")
103 | #Use samtools view to open the file, grep to remove those with the spikein genome name in them, then change the chromosomes with the genome name to just the chromosome numbers as normally used, then put the file back into bam.
104 | sp.run(f"samtools view -h {outdir}{name}_UniqMapped_sorted_rmdup.bam | grep -v {spikegenome} | sed s/{genome}_chr/chr/g | samtools view -bhS - > {outdir}{name}_{genome}.UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash")
105 | #Do the same thing for the spikein
106 | print(f"Extracting reads aligning uniquely to {spikegenome}.")
107 | sp.run(f"samtools view -h {outdir}{name}_UniqMapped_sorted_rmdup.bam | grep -v {genome} | sed s/{spikegenome}_chr/chr/g | samtools view -bhS - > {outdir}{name}_{spikegenome}.UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash")
108 | #Index outputs
109 | sp.run(f"sambamba index -t {threads} {outdir}{name}_UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash")
110 | sp.run(f"sambamba index -t {threads} {outdir}{name}_{genome}.UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash")
111 | sp.run(f"sambamba index -t {threads} {outdir}{name}_{spikegenome}.UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash")
112 | #Now clean up
113 | sp.run(f"rm {outdir}{name}_UniqMapped.bam", shell=True, executable="/bin/bash")
114 | sp.run(f"rm {outdir}{name}_UniqMapped_sorted.bam", shell=True, executable="/bin/bash")
115 |
116 | #Finally, count reads in each file:
117 | totalCount = sp.run(f"sambamba view -c -t {threads} {outdir}{name}_UniqMapped_sorted_rmdup.bam", capture_output=True, shell=True, executable="/bin/bash")
118 | genomeCount = sp.run(f"sambamba view -c -t {threads} {outdir}{name}_{genome}.UniqMapped_sorted_rmdup.bam", capture_output=True, shell=True, executable="/bin/bash")
119 | spikegenomeCount = sp.run(f"sambamba view -c -t {threads} {outdir}{name}_{spikegenome}.UniqMapped_sorted_rmdup.bam", capture_output=True, shell=True, executable="/bin/bash")
120 | countsList = [totalCount.stdout.decode('ascii').strip(), genomeCount.stdout.decode('ascii').strip(), spikegenomeCount.stdout.decode('ascii').strip()]
121 | allcountslist.append(countsList)
122 |
123 | sp.run(f"rm {outdir}{uniqueid}/ -r", shell=True, executable="/bin/bash")
124 | countstable = pd.DataFrame(allcountslist, columns = ['allcounts', 'genomecounts', 'spikecounts'])
125 | outtable = pd.concat([files, countstable], axis=1)
126 | outtable.to_csv(outdir + outname + '.tsv', sep = '\t', index = False, header = True)
127 |
--------------------------------------------------------------------------------