├── BalancedRowsumsCalculation.ipynb ├── CalculatingFilledBinFractionByDistance.ipynb ├── ContactMapVisualizationExampleNotebook.ipynb ├── LICENSE ├── LoopStrengthRCMC.ipynb ├── MicrocompartmentLoops_PlusMin1kb.bedpe ├── PileupsRCMC.ipynb ├── README.md ├── captureprobes_mm10.bed ├── captureprobes_mm39.bed ├── loopFeatureOverlap.R ├── microcbowtie2.py └── spikeinChIP_PE_alignment.py /BalancedRowsumsCalculation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# import libraries\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import pandas as pd\n", 13 | "import os\n", 14 | "import cooltools\n", 15 | "import cooler" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Load in microcompartment loop anchors" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 10, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/html": [ 33 | "
\n", 34 | "\n", 47 | "\n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | "
chrstartend
0chr53127300031275000
1chr53129550031297500
2chr53130670031308700
3chr53131050031312500
4chr53133620031338200
............
127chr88578575085787750
128chr88579730085799300
129chr88580200085804000
130chr88580650085808500
131chr88580900085811000
\n", 125 | "

132 rows × 3 columns

\n", 126 | "
" 127 | ], 128 | "text/plain": [ 129 | " chr start end\n", 130 | "0 chr5 31273000 31275000\n", 131 | "1 chr5 31295500 31297500\n", 132 | "2 chr5 31306700 31308700\n", 133 | "3 chr5 31310500 31312500\n", 134 | "4 chr5 31336200 31338200\n", 135 | ".. ... ... ...\n", 136 | "127 chr8 85785750 85787750\n", 137 | "128 chr8 85797300 85799300\n", 138 | "129 chr8 85802000 85804000\n", 139 | "130 chr8 85806500 85808500\n", 140 | "131 chr8 85809000 85811000\n", 141 | "\n", 142 | "[132 rows x 3 columns]" 143 | ] 144 | }, 145 | "execution_count": 10, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "# Load in the locations of all unique microcompartment anchors in BED format with chromosome, start (anchor coordinate minus 1 kb) and end (anchor coordinate plus 1 kb) columns\n", 152 | "anchors = pd.read_csv(r'FILE_PATH_TO_ANCHOR_LIST.bed', sep='\\t', header=None, names=['chr','start','end'])\n", 153 | "anchors" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 11, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "# Calculate the anchor coordinate by averaging the start & end coordinates, and create a DataFrame containing just the chromosome and anchor coordinate\n", 163 | "anchors['midpoint'] = (anchors['end'] + anchors['start']) // 2\n", 164 | "anchorList = anchors[['chr','midpoint']]\n", 165 | "\n", 166 | "# Subset the DataFrame of anchors to separate out the microcompartment anchors in Klf1 (on chr8) and in Ppm1g (on chr5)\n", 167 | "anchorDataframeKlf1 = anchorList[anchorList['chr'] == 'chr8']\n", 168 | "anchorDataframePpm1g = anchorList[anchorList['chr'] == 'chr5']\n", 169 | "\n", 170 | "# Convert the region-separated anchor lists from DataFrames to lists for ease of downstream use\n", 171 | "anchorListKlf1 = anchorDataframeKlf1['midpoint'].tolist()\n", 172 | "anchorListPpm1g = anchorDataframePpm1g['midpoint'].tolist()" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "## Calculate rowsums and plot their distribution\n", 180 | "### Whole-region balancing calculations" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 47, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAS30lEQVR4nO3dcZCc913f8fcncmxKIFjGJ6FKBplWJJEZbMJVhKalAZdKgYJMW08vbYkmVUdtx2WSmdJiM1MyHUYz7nSm01JqOpoQEFMajwpxLQKkaERppgWinBMlsWyrPqIi3Ui1LkkhTdIRSPn2j308Xkt3uud0u7q9n96vmZ3neX7P79n93t7dZ5/97fM8m6pCktSW16x1AZKk0TPcJalBhrskNchwl6QGGe6S1KDb1roAgLvvvru2b9++1mVI0rryzDPPfLaqphZbNxHhvn37dmZnZ9e6DElaV5L8wVLrHJaRpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBy4Z7kjckOTl0+0KS9yS5K8mxJC92041D2zyWZC7J6SS7x/sjSJKutmy4V9Xpqnqgqh4AvgP4MvAU8ChwvKp2AMe7ZZLsBGaA+4A9wBNJNoynfEnSYlY6LPMg8PtV9QfAXuBw134YeKib3ws8WVWXquoMMAfsGkGt0lglr9yk9W6l4T4DfKCb31xVFwC66aaufStwbmib+a7tVZIcSDKbZHZhYWGFZUiSrqd3uCe5Hfgh4D8t13WRtmu+y6+qDlXVdFVNT00tet0bSdINWsme+9uBj1fVS93yS0m2AHTTi137PHDP0HbbgPOrLVSS1N9Kwv0dvDIkA3AU2NfN7wOeHmqfSXJHknuBHcCJ1RYqSeqv1yV/k3w18H3A3x9qfhw4kmQ/cBZ4GKCqTiU5AjwHXAYeqaorI61aknRdvcK9qr4MfP1VbZ9jcPTMYv0PAgdXXZ00Zh4Zo1Z5hqokNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ3qdZy7dKsZPv69rrkykjT53HOXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqUK9wT3Jnkl9O8kKS55N8V5K7khxL8mI33TjU/7Ekc0lOJ9k9vvIlSYvpu+f+b4APV9UbgfuB54FHgeNVtQM43i2TZCcwA9wH7AGeSLJh1IVLkpa2bLgneT3w3cDPAVTVH1fVHwJ7gcNdt8PAQ938XuDJqrpUVWeAOWDXaMuWblzyyk1qVZ89928GFoCfT/KJJO9L8jpgc1VdAOimm7r+W4FzQ9vPd22vkuRAktkkswsLC6v6ISRJr9Yn3G8D3gz8bFV9O/AluiGYJSy2P3TNd9lU1aGqmq6q6ampqV7FSpL66RPu88B8VX20W/5lBmH/UpItAN304lD/e4a23wacH025kqQ+lg33qvrfwLkkb+iaHgSeA44C+7q2fcDT3fxRYCbJHUnuBXYAJ0ZatSTpuvp+QfaPAr+U5HbgM8C7GLwwHEmyHzgLPAxQVaeSHGHwAnAZeKSqroy8cukm8cuytR71CveqOglML7LqwSX6HwQO3nhZkqTV8AxVSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1qFe4J/lfST6d5GSS2a7triTHkrzYTTcO9X8syVyS00l2j6t4SdLiVrLn/j1V9UBVTXfLjwLHq2oHcLxbJslOYAa4D9gDPJFkwwhrliQtYzXDMnuBw938YeChofYnq+pSVZ0B5oBdq3gcSdIK9Q33An4zyTNJDnRtm6vqAkA33dS1bwXODW0737W9SpIDSWaTzC4sLNxY9ZKkRd3Ws99bq+p8kk3AsSQvXKdvFmmraxqqDgGHAKanp69ZL0m6cb323KvqfDe9CDzFYJjlpSRbALrpxa77PHDP0ObbgPOjKliStLxlwz3J65J87cvzwF8BngWOAvu6bvuAp7v5o8BMkjuS3AvsAE6MunBpLSSv3KRJ1mdYZjPwVAZ/zbcB/7GqPpzkY8CRJPuBs8DDAFV1KskR4DngMvBIVV0ZS/WSpEUtG+5V9Rng/kXaPwc8uMQ2B4GDq65OknRDPENVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoP6XvJXWte80JduNe65S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSg3qHe5INST6R5EPd8l1JjiV5sZtuHOr7WJK5JKeT7B5H4ZKkpa1kz/3dwPNDy48Cx6tqB3C8WybJTmAGuA/YAzyRZMNoypUk9dEr3JNsA34AeN9Q817gcDd/GHhoqP3JqrpUVWeAOWDXSKqVJPXSd8/9XwP/FPjKUNvmqroA0E03de1bgXND/ea7tldJciDJbJLZhYWFldYtSbqOZcM9yV8FLlbVMz3vc7Hr79U1DVWHqmq6qqanpqZ63rUkqY8+l/x9K/BDSb4f+Crg9Un+A/BSki1VdSHJFuBi138euGdo+23A+VEWLUm6vmX33KvqsaraVlXbGXxQ+ltV9XeAo8C+rts+4Olu/igwk+SOJPcCO4ATI69ckrSk1XxZx+PAkST7gbPAwwBVdSrJEeA54DLwSFVdWXWlkqTeUnXNcPhNNz09XbOzs2tdhho2jm9imoB/Hd3ikjxTVdOLrfMMVUlqkOEuSQ3yC7KlGzQ81OMQjSaNe+6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDvLaMmjSOS/xK64l77pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBy4Z7kq9KciLJJ5OcSvLPu/a7khxL8mI33Ti0zWNJ5pKcTrJ7nD+AJOlaffbcLwHfW1X3Aw8Ae5K8BXgUOF5VO4Dj3TJJdgIzwH3AHuCJJBvGULskaQnLhnsNfLFbfG13K2AvcLhrPww81M3vBZ6sqktVdQaYA3aNsmhJ0vX1GnNPsiHJSeAicKyqPgpsrqoLAN10U9d9K3BuaPP5ru3q+zyQZDbJ7MLCwip+BEnS1XqFe1VdqaoHgG3AriTfep3ui534XYvc56Gqmq6q6ampqV7FSpL6WdHRMlX1h8BvMxhLfynJFoBuerHrNg/cM7TZNuD8aguVJPXX52iZqSR3dvN/CvjLwAvAUWBf120f8HQ3fxSYSXJHknuBHcCJEdctSbqOPleF3AIc7o54eQ1wpKo+lOR3gSNJ9gNngYcBqupUkiPAc8Bl4JGqujKe8iVJi0nVNcPhN9309HTNzs6udRlqyM2+5O8E/BvpFpTkmaqaXmydZ6hKUoMMd0lqkN/EJI3A8DCQQzSaBO65S1KDDHdJapDhLkkNMtwlqUGGuyQ1yKNl1IybfeKSNMncc5ekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yJOYpBHz8r+aBO65S1KDDHdJatCy4Z7kniT/NcnzSU4leXfXfleSY0le7KYbh7Z5LMlcktNJdo/zB5AkXavPnvtl4B9X1ZuAtwCPJNkJPAocr6odwPFumW7dDHAfsAd4IsmGcRQvSVrcsuFeVReq6uPd/P8Fnge2AnuBw123w8BD3fxe4MmqulRVZ4A5YNeI65YkXceKxtyTbAe+HfgosLmqLsDgBQDY1HXbCpwb2my+a7v6vg4kmU0yu7CwcAOlS5KW0jvck3wN8CvAe6rqC9frukjbNQeEVdWhqpququmpqam+ZUiSeugV7kleyyDYf6mqPtg1v5RkS7d+C3Cxa58H7hnafBtwfjTlSpL66HO0TICfA56vqn81tOoosK+b3wc8PdQ+k+SOJPcCO4AToytZkrScPmeovhX4EeDTSU52bT8BPA4cSbIfOAs8DFBVp5IcAZ5jcKTNI1V1ZdSFS5KWtmy4V9V/Z/FxdIAHl9jmIHBwFXVJklbBM1QlqUGGuyQ1yKtCal3LUgOG0i3OPXdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIA+F1Lrj4Y/S8txzl6QGGe6S1CDDXZIa5Ji7NEbDnw/UNd9HJo2Pe+6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQcuGe5L3J7mY5NmhtruSHEvyYjfdOLTusSRzSU4n2T2uwqX1JnnlJo1bnz33XwD2XNX2KHC8qnYAx7tlkuwEZoD7um2eSLJhZNVKknpZNtyr6iPA569q3gsc7uYPAw8NtT9ZVZeq6gwwB+waTamSpL5udMx9c1VdAOimm7r2rcC5oX7zXds1khxIMptkdmFh4QbL0K3CIQ1pZUb9gepi/3qLnnRdVYeqarqqpqempkZchiTd2m403F9KsgWgm17s2ueBe4b6bQPO33h5Upt8J6Jxu9FwPwrs6+b3AU8Ptc8kuSPJvcAO4MTqSpQkrdSyV4VM8gHgbcDdSeaB9wKPA0eS7AfOAg8DVNWpJEeA54DLwCNVdWVMtUuSlrBsuFfVO5ZY9eAS/Q8CB1dTlCRpdTxDVZIaZLhLUoMMd0lqkF+zJ60xv4pP4+CeuyQ1yHCXpAYZ7pLUIMfcpQni+LtGxT13SWqQe+7ShHIvXqvhnrskNchwl6QGGe6S1CDH3DWx/CKLVzj+rpUy3KV1xqBXHw7LSFKD3HOXGuEevYYZ7hqJpYLFwJHWhuGum8YXgNHzQ2ctxXDXDVtNsCy1rWE1Gn1eSK+3zhfZ9c8PVCWpQWML9yR7kpxOMpfk0XE9jiZP8spNa+9Gfh/D26zmprUzlnBPsgH4d8DbgZ3AO5LsHMdj6ebyH7ct4/59LnX/vhiM37j23HcBc1X1mar6Y+BJYO+YHktSI8bxzuBWffEY1weqW4FzQ8vzwHcOd0hyADjQLX4xyelVPN7dwGdXsf24WNfKWNfKrKu6xhGuK7zPu5P183z19E1LrRhXuC/2lL/q8/eqOgQcGsmDJbNVNT2K+xol61oZ61oZ61qZW62ucQ3LzAP3DC1vA86P6bEkSVcZV7h/DNiR5N4ktwMzwNExPZYk6SpjGZapqstJ/hHwX4ANwPur6tQ4HqszkuGdMbCulbGulbGulbml6kp5KpokNcczVCWpQYa7JDVo3YT7cpczSPK2JH+U5GR3+8lJqGuotpNJTiX5b5NQV5J/MvRcPZvkSpK7JqCur0vyq0k+2T1f7xp3TT3r2pjkqSSfSnIiybfepLren+RikmeXWJ8kP93V/akkb56Qut6Y5HeTXEryYzejpp51/e3uefpUkt9Jcv+E1LW3q+lkktkkf2HVD1pVE39j8KHs7wPfDNwOfBLYeVWftwEfmsC67gSeA76xW940CXVd1f8Hgd+ahLqAnwD+RTc/BXweuH0C6vqXwHu7+TcCx2/S39h3A28Gnl1i/fcDv8Hg3JK3AB+dkLo2AX8OOAj82M2oqWddfx7Y2M2/fYKer6/hlc9Avw14YbWPuV723Cf1cgZ96vpbwAer6ixAVV2ckLqGvQP4wITUVcDXJgmDP/jPA5cnoK6dwHGAqnoB2J5k85jroqo+wuA5WMpe4Bdr4PeAO5NsWeu6qupiVX0M+JNx13LV4y5X1+9U1f/pFn+PwTk4k1DXF6tLduB1XHXS541YL+G+2OUMti7S77u6t/O/keS+CanrW4CNSX47yTNJ3jkhdQGQ5KuBPcCvTEhdPwO8icFJb58G3l1VX5mAuj4J/DWAJLsYnPZ9U4JhGb1/17rGfgbveiZCkh9O8gLwa8DfXe39rZdwX/ZyBsDHgW+qqvuBfwv853EXRb+6bgO+A/gBYDfwz5J8ywTU9bIfBP5HVV1v73BU+tS1GzgJ/GngAeBnkrx+vGX1qutxBi/SJ4EfBT7B+N9R9LGS37U6Sb6HQbj/+FrX8rKqeqqq3gg8BPzUau9vvYT7spczqKovVNUXu/lfB16b5O61rqvr8+Gq+lJVfRb4CDDuD3FWcvmHGW7OkAz0q+tdDIaxqqrmgDMMxrjXtK7u7+tdVfUA8E4GnwecGXNdfXipjxVK8m3A+4C9VfW5ta7nat0Qzp9ZbX6tl3Bf9nIGSb6hG6d9+W3za4Bx/+L6XGbhaeAvJrmtGwL5TuD5CaiLJF8H/KWuxpuhT11ngQe7+jYDbwA+s9Z1JbmzWwfw94CPVNUXxlxXH0eBd3ZHzbwF+KOqurDWRU2qJN8IfBD4kar6n2tdz8uS/Nmh/Hozgw/2V5Vf6+I7VGuJyxkk+Qfd+n8P/A3gHya5DPw/YGboA4o1q6uqnk/yYeBTwFeA91XVoodD3cy6uq4/DPxmVX1pnPWssK6fAn4hyacZDDn8ePeOZ63rehPwi0muMDj6af84a3pZkg8wOBLs7iTzwHuB1w7V9esMjpiZA77M4J3PmteV5BuAWeD1wFeSvIfBEUhjfUHs8Xz9JPD1wBNdll6um3ClyB51/XUGL9J/wiC//uZq88vLD0hSg9bLsIwkaQUMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSg/w/J7ivzeXkQkQAAAABJRU5ErkJggg==\n", 191 | "text/plain": [ 192 | "
" 193 | ] 194 | }, 195 | "metadata": { 196 | "needs_background": "light" 197 | }, 198 | "output_type": "display_data" 199 | } 200 | ], 201 | "source": [ 202 | "# Usage: Plot different datasets & regions by updating the filename and \"region = regionsRCMC[x]\" attributes, respectively \n", 203 | "\n", 204 | "saveDir = f'DIRECTORY_PATH'\n", 205 | "fileName = f'FILE_NAME.svg' # swap out .svg for the desired output file format\n", 206 | "\n", 207 | "# Specify the data resolution at which balanced rowsums will be calculated\n", 208 | "resolution = 250\n", 209 | "\n", 210 | "# Load in the desired dataset\n", 211 | "clr_Data = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/'+str(resolution))\n", 212 | "\n", 213 | "# List the Capture regions for region-by-region calculations\n", 214 | "## For an apples-to-apples comparison, balanced rowsums are calculated at the RCMC regions for both the RCMC dataset as well as the genome-wide Micro-C (Hsieh 2020) and Hi-C (Bonev 2017) datasets\n", 215 | "regionsRCMC = ['chr3:33804149-35704149','chr5:31257344-32382344','chr6:122451959-122876959','chr8:84846629-85856629','chr18:58032072-59034072']\n", 216 | "\n", 217 | "# Specify which specific region you would like to calculate rowsums for\n", 218 | "region = regionsRCMC[3]\n", 219 | "\n", 220 | "# Load in the balanced contact matrix for the specified region\n", 221 | "regionMat = clr_Data.matrix(balance=True).fetch(region)\n", 222 | "\n", 223 | "# Create arrays of zeroes corresponding to the lengths (rows & columns) of the loaded in contact matrices\n", 224 | "## The code here calculates both the rowsums and columnsums; they should be the same distribution, so you only need 1 of the 2\n", 225 | "rowSum = np.zeros(len(regionMat))\n", 226 | "columnSum = np.zeros(len(regionMat[0]))\n", 227 | "\n", 228 | "\n", 229 | "# Iterate through the matrix bin-by-bin, summing up the balanced contact bin values for each row & column\n", 230 | "## Only need to iterate through one half of the contact matrix (i.e., the half above the diagonal or the half below it) to capture it all due to reflected symmetry\n", 231 | "i = 0\n", 232 | "while i < len(regionMat):\n", 233 | " j = i # Ensures that we keep our iteration to one side of the diagonal instead of spanning the whole matrix \n", 234 | " while j < len(regionMat[0]):\n", 235 | " val = regionMat[i][j] # Extracts the balanced value of the contact bin\n", 236 | " if np.isnan(val):\n", 237 | " val = 0 # NaN values can cause calculation errors, so this sets them to 0\n", 238 | " rowSum[i] += val # Adds the balanced value to the corresponding rowsum\n", 239 | " columnSum[j] += val # Adds the balanced value to the corresponding columnsum\n", 240 | " \n", 241 | " if j != i: # If j = i, then we're on the diagonal and there's no need to add in the reflected twin of the contact bin\n", 242 | " rowSum[j] += val # Adds the balanced value of the reflected twin to the corresponding rowsum\n", 243 | " columnSum[i] += val # Adds the balanced value of the reflected twin to the corresponding columnsum\n", 244 | " j += 1\n", 245 | " i += 1\n", 246 | "\n", 247 | "# Plotting the rowsums and columnsums yields the same distribution, so this just runs ahead with the rowsums\n", 248 | "## This filters out the rowsums that are 0 (i.e., those that correspond to empty stripes, either due to probe coverage or hard-to-map sites)\n", 249 | "rowSumFilt = []\n", 250 | "for i in rowSum:\n", 251 | " if i > 0:\n", 252 | " rowSumFilt.append(i)\n", 253 | "\n", 254 | "\n", 255 | "# Plot a histogram of the rowsums\n", 256 | "## Change the number of histogram bins as desired (100 used here)\n", 257 | "## Change the range of visualization to capture and visually center the entire distribution\n", 258 | "plt.hist(rowSumFilt,100,range=[3.75,8.25],align='mid',color='blue') # Klf1\n", 259 | "# plt.hist(rowSumFilt,100,range=[1.1,3.4],align='mid',color='blue') # Ppm1g\n", 260 | "# plt.hist(rowSumFilt,100,range=[0.3,1.0],align='mid',color='blue') # Fbn2\n", 261 | "# plt.hist(rowSumFilt,100,range=[0.5,1.3],align='mid',color='blue') # Sox2\n", 262 | "\n", 263 | "# plt.savefig(saveDir + fileName, dpi=1200)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "### Microcompartment-containing bins balancing calculations" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 37, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "data": { 280 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAKvklEQVR4nO3d0avk91nH8c/T7EqLCeRiDxqSHNcLKcRCm7DESKDUUCStYm96kYIRRFkqFlIQRL0w5B8oogi62GDF2lJoIyU0asCGUrCpSUxr4rYSSsXQwBJLmwRFSXi8mElY192d39k9c3afPa8XHHbOzu/MPPnulze//c3Mpro7AMzztis9AACXRsABhhJwgKEEHGAoAQcY6sg2HvTYsWN9/PjxbTw0wDXp6aeffrm7d/byM1sJ+PHjx/PUU09t46EBrklV9W97/RmXUACGEnCAoQQcYCgBBxhKwAGGEnCAoRa9jbCqvpvk1SRvJHm9u09scygANtvL+8B/rrtf3tokAOyJSygAQy09A+8kf1dVneRPu/vUuQdU1ckkJ5Nkd3d3/ybk0Kp66K3b3Q9e8jFwrVp6Bn53d9+R5ANJfrOq3nvuAd19qrtPdPeJnZ09fZwfgEuwKODd/b31r2eSPJLkzm0OBcBmGwNeVT9aVTe8eTvJzyd5btuDAXBxS66B/1iSR6rqzeP/qrv/ZqtTAbDRxoB393eSvPsAZgFgD7yNEGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEWB7yqrquqf6qqR7c5EADL7OUM/IEkp7c1CAB7syjgVXVLkl9I8mfbHQeApY4sPO4Pkvx2khsudEBVnUxyMkl2d3cvezA4W9VDl3x894P7PQ5cFTaegVfVLyY5091PX+y47j7V3Se6+8TOzs6+DQjA+S25hHJ3kl+qqu8m+WySe6rqL7c6FQAbbQx4d/9ud9/S3ceT3Jfk77v7l7c+GQAX5X3gAEMtfREzSdLdTyR5YiuTALAnzsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYamPAq+rtVfX1qvpGVT1fVQ8dxGAAXNyRBcf8d5J7uvu1qjqa5KtV9Vh3f23LswFwERsD3t2d5LX1t0fXX73NoQDYbNE18Kq6rqqeTXImyePd/eRWpwJgoyWXUNLdbyR5T1XdmOSRqnpXdz939jFVdTLJySTZ3d3d7znhqnL2S0HdD17BSTjM9vQulO7+QZInktx7nvtOdfeJ7j6xs7OzP9MBcEFL3oWysz7zTlW9I8n7k3xry3MBsMGSSyg3JflUVV2XVfA/192PbncsADZZ8i6Ubya5/QBmAWAPfBITYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYCgBBxhKwAGGEnCAoQQcYKiNAa+qW6vqy1V1uqqer6oHDmIwAC7uyIJjXk/yW939TFXdkOTpqnq8u/9ly7MBcBEbz8C7+6XufmZ9+9Ukp5PcvO3BALi4JWfgb6mq40luT/Lkee47meRkkuzu7u7HbLDvqh5663b3gxuPOduFjocrZfGLmFV1fZLPJ/l4d79y7v3dfaq7T3T3iZ2dnf2cEYDzWBTwqjqaVbw/3d1f2O5IACyx5F0oleSTSU539ye2PxIASyw5A787yf1J7qmqZ9dfH9zyXABssPFFzO7+apI6gFkA2AOfxAQYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGAoAQcYSsABhhJwgKEEHGCojQGvqoer6kxVPXcQAwGwzJIz8D9Pcu+W5wBgjzYGvLu/kuT7BzALAHtwZL8eqKpOJjmZJLu7u/v1sBwyVQ8d2GPu13Od/TjdD+7pmHNnuNDPw/ns24uY3X2qu09094mdnZ39elgALsC7UACGEnCAoZa8jfAzSf4hyTur6sWq+rXtjwXAJhtfxOzujxzEIADsjUsoAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMJOMBQAg4wlIADDCXgAEMtCnhV3VtV366qF6rqd7Y9FACbbQx4VV2X5I+TfCDJbUk+UlW3bXswAC5uyRn4nUle6O7vdPf/JPlskg9tdywANqnuvvgBVR9Ocm93//r6+/uT/Ex3f+yc404mObn+9p1Jvp3kWJKX93vogazDinVYsQ4r1mHlzXX4ie7e2csPHllwTJ3n9/5f9bv7VJJT/+cHq57q7hN7GehaZB1WrMOKdVixDiuXsw5LLqG8mOTWs76/Jcn3LuXJANg/SwL+j0l+qqp+sqp+JMl9Sb643bEA2GTjJZTufr2qPpbkb5Ncl+Th7n5+4eOf2nzIoWAdVqzDinVYsQ4rl7wOG1/EBODq5JOYAEMJOMBQlx3wqnq4qs5U1XMXuL+q6g/XH8P/ZlXdcbnPeTVasA7vq6ofVtWz66/fP+gZD0JV3VpVX66q01X1fFU9cJ5jrvk9sXAdrvk9UVVvr6qvV9U31uvw0HmOOQz7Yck67H0/dPdlfSV5b5I7kjx3gfs/mOSxrN5PfleSJy/3Oa/GrwXr8L4kj17pOQ9gHW5Kcsf69g1J/jXJbYdtTyxch2t+T6z/jK9f3z6a5Mkkdx3C/bBkHfa8Hy77DLy7v5Lk+xc55ENJ/qJXvpbkxqq66XKf92qzYB0Ohe5+qbufWd9+NcnpJDefc9g1vycWrsM1b/1n/Nr626Prr3PfOXEY9sOSddizg7gGfnOSfz/r+xdzCDfy2s+u/wr1WFX99JUeZtuq6niS27M62zjbodoTF1mH5BDsiaq6rqqeTXImyePdfSj3w4J1SPa4Hw4i4Is+in8IPJPVv3Xw7iR/lOSvr+w421VV1yf5fJKPd/cr5959nh+5JvfEhnU4FHuiu9/o7vdk9SnuO6vqXecccij2w4J12PN+OIiA+yh+ku5+5c2/QnX3l5IcrapjV3israiqo1lF69Pd/YXzHHIo9sSmdThMeyJJuvsHSZ5Icu85dx2K/fCmC63DpeyHgwj4F5P8yvqV5ruS/LC7XzqA572qVNWPV1Wtb9+Z1dr/x5Wdav+t/xs/meR0d3/iAodd83tiyTochj1RVTtVdeP69juSvD/Jt8457DDsh43rcCn7Ycm/RrhpsM9k9erpsap6McmDWV2gT3f/SZIvZfUq8wtJ/jPJr17uc16NFqzDh5P8RlW9nuS/ktzX65eerzF3J7k/yT+vr/clye8l2U0O1Z5Ysg6HYU/clORTtfofw7wtyee6+9Gq+mhyqPbDknXY837wUXqAoXwSE2AoAQcYSsABhhJwgKEEHGAoAQcYSsABhvpfsvur3aL9XpsAAAAASUVORK5CYII=\n", 281 | "text/plain": [ 282 | "
" 283 | ] 284 | }, 285 | "metadata": { 286 | "needs_background": "light" 287 | }, 288 | "output_type": "display_data" 289 | } 290 | ], 291 | "source": [ 292 | "# Same approach as above, though now limited to only those contact bins containing microcompartment anchors\n", 293 | "# Usage: Plot different datasets & regions by updating the filename, \"region = regionsRCMC[x]\", and \"anchorReg = x\" attributes \n", 294 | "\n", 295 | "saveDir = f'DIRECTORY_PATH'\n", 296 | "fileName = f'FILE_NAME.svg' # swap out .svg for the desired output file format\n", 297 | "\n", 298 | "# Specify the data resolution at which balanced rowsums will be calculated\n", 299 | "resolution = 250\n", 300 | "\n", 301 | "# Load in the desired dataset\n", 302 | "clr_Data = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/'+str(resolution))\n", 303 | "\n", 304 | "# List the Capture regions for region-by-region calculations\n", 305 | "## For an apples-to-apples comparison, balanced rowsums are calculated at the RCMC regions for both the RCMC dataset as well as the genome-wide Micro-C (Hsieh 2020) and Hi-C (Bonev 2017) datasets\n", 306 | "regionsRCMC = ['chr3:33804149-35704149','chr5:31257344-32382344','chr6:122451959-122876959','chr8:84846629-85856629','chr18:58032072-59034072']\n", 307 | "\n", 308 | "# Specify which specific region you would like to calculate rowsums for\n", 309 | "region = regionsRCMC[1]\n", 310 | "\n", 311 | "# Load in the balanced contact matrix for the specified region\n", 312 | "regionMat = clr_Data.matrix(balance=True).fetch(region)\n", 313 | "\n", 314 | "# Specify the region of interest (either Klf1 or Ppm1g) for which to load in microcompartment anchors\n", 315 | "anchorReg = 'Ppm1g'\n", 316 | "if anchorReg == 'Klf1':\n", 317 | " anchors = anchorListKlf1\n", 318 | " offset = 84846629 # This offset is the start coordinate of the Klf1 region, allowing genomic coordinate conversion to bins\n", 319 | "if anchorReg == 'Ppm1g':\n", 320 | " anchors = anchorListPpm1g\n", 321 | " offset = 31257344 # This offset is the start coordinate of the Ppm1g region, allowing genomic coordinate conversion to bins\n", 322 | "\n", 323 | "# Create a list of bins which contain microcompartment anchors\n", 324 | "bins = []\n", 325 | "for i in anchors:\n", 326 | " bins.append((i - offset) // resolution)\n", 327 | "\n", 328 | "# Create arrays of zeroes corresponding to the lengths (rows & columns) of the loaded in contact matrices\n", 329 | "## The code here calculates both the rowsums and columnsums; they should be the same distribution, so you only need 1 of the 2\n", 330 | "rowSum = np.zeros(len(regionMat))\n", 331 | "columnSum = np.zeros(len(regionMat[0]))\n", 332 | "\n", 333 | "\n", 334 | "# Iterate through the matrix bin-by-bin, summing up the balanced contact bin values for each row & column that contains a microcompartment anchor\n", 335 | "## Only need to iterate through one half of the contact matrix (i.e., the half above the diagonal or the half below it) to capture it all due to reflected symmetry\n", 336 | "i = 0\n", 337 | "while i < len(regionMat):\n", 338 | " j = i # Ensures that we keep our iteration to one side of the diagonal instead of spanning the whole matrix \n", 339 | " while j < len(regionMat[0]):\n", 340 | " if((i in bins) or (j in bins)): # The only difference from the calculation for the whole-region calculation, this if statement ensures that rowsums and columnsumes are only calculated for bins containing a microcompartment anchor \n", 341 | " val = regionMat[i][j] # Extracts the balanced value of the contact bin\n", 342 | " if np.isnan(val):\n", 343 | " val = 0 # NaN values can cause calculation errors, so this sets them to 0\n", 344 | " rowSum[i] += val # Adds the balanced value to the corresponding rowsum\n", 345 | " columnSum[j] += val # Adds the balanced value to the corresponding columnsum\n", 346 | "\n", 347 | " if j != i: # If j = i, then we're on the diagonal & there's no need to add in the reflected twin of the contact bin\n", 348 | " rowSum[j] += val # Adds the balanced value of the reflected twin to the corresponding rowsum\n", 349 | " columnSum[i] += val # Adds the balanced value of the reflected twin to the corresponding columnsum\n", 350 | " j += 1\n", 351 | " i += 1\n", 352 | "\n", 353 | "\n", 354 | "# Plotting the rowsums and columnsums yields the same distribution, so this just runs ahead with the rowsums\n", 355 | "## This selects for bins containing microcompartment anchors and filters out the rowsums that are 0\n", 356 | "## Note: Filtering for non-zero bins likely changes nothing since microcompartment anchor-containing bins are guaranteed not empty. Keeping it nonetheless for homogeneity with the whole-region calculation.\n", 357 | "rowSumFilt = []\n", 358 | "for x in bins:\n", 359 | " microcompRowSum = rowSum[x] # Extract the rowsum value of a microcompartment anchor-containing bin\n", 360 | " if microcompRowSum > 0:\n", 361 | " rowSumFilt.append(microcompRowSum) # Add the rowsum to the list of microcompartment rowsums if its greater than 0\n", 362 | " rowSumFilt.append()\n", 363 | "\n", 364 | " \n", 365 | "# Plot a histogram of the rowsums\n", 366 | "## Change the number of histogram bins as desired (100 used here)\n", 367 | "## Change the range of visualization to capture and visually center the entire distribution\n", 368 | "# plt.hist(rowSumFilt,100,range=[3.75,8.25],align='mid',color='navy') # Klf1\n", 369 | "plt.hist(rowSumFilt,100,range=[1.1,3.4],align='mid',color='navy') # Ppm1g\n", 370 | "\n", 371 | "# plt.savefig(saveDir + fileName, dpi=1200)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [] 380 | } 381 | ], 382 | "metadata": { 383 | "kernelspec": { 384 | "display_name": "Python [conda env:coolToolsEnv]", 385 | "language": "python", 386 | "name": "conda-env-coolToolsEnv-py" 387 | }, 388 | "language_info": { 389 | "codemirror_mode": { 390 | "name": "ipython", 391 | "version": 3 392 | }, 393 | "file_extension": ".py", 394 | "mimetype": "text/x-python", 395 | "name": "python", 396 | "nbconvert_exporter": "python", 397 | "pygments_lexer": "ipython3", 398 | "version": "3.8.12" 399 | } 400 | }, 401 | "nbformat": 4, 402 | "nbformat_minor": 4 403 | } 404 | -------------------------------------------------------------------------------- /CalculatingFilledBinFractionByDistance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Counting fraction of filled bins across contact distances" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# import core packages\n", 17 | "import warnings\n", 18 | "warnings.filterwarnings(\"ignore\")\n", 19 | "from itertools import combinations\n", 20 | "\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "from matplotlib import colors\n", 23 | "%matplotlib inline\n", 24 | "plt.style.use('seaborn-poster')\n", 25 | "import numpy as np\n", 26 | "import pandas as pd\n", 27 | "\n", 28 | "# import open2c libraries\n", 29 | "import bioframe\n", 30 | "import cooler\n", 31 | "import cooltools\n", 32 | "\n", 33 | "from packaging import version\n", 34 | "if version.parse(cooltools.__version__) < version.parse('0.5.0'):\n", 35 | " raise AssertionError(\"tutorials rely on cooltools version 0.5.0 or higher,\"+\n", 36 | " \"please check your cooltools version and update to the latest\")\n", 37 | "\n", 38 | " \n", 39 | "resolution = 100 # input the desired data resolution for analysis\n", 40 | "distances = [1000,2500,5000,10000,25000,50000,100000,250000,500000,1000000] # input the desired contact distance diagonals in bp\n", 41 | "\n", 42 | "# Load in each of the datasets at the specified resolution\n", 43 | "clr_RCMC = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/' + str(resolution))\n", 44 | "clr_TMCC = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/' + str(resolution))\n", 45 | "clr_Hsieh = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/' + str(resolution))\n", 46 | "clr_Bonev = cooler.Cooler('FILE_PATH_TO_MCOOL::resolutions/' + str(resolution))\n", 47 | "\n", 48 | "# List the Capture regions for RCMC & TMCC\n", 49 | "## To ensure an apples-to-apples comparison, the genome-wide Micro-C (Hsieh 2020) and Hi-C (Bonev 2017) datasets will be analyzed for the same regions as RCMC instead of across the entire genome\n", 50 | "regionsRCMC = ['chr3:33804149-35704149','chr5:31257344-32382344','chr6:122451959-122876959','chr8:84846629-85856629','chr18:58032072-59034072']\n", 51 | "regionsTMCC = ['chr3:34365200-35640000','chr6:122606805-122856450']\n", 52 | "\n", 53 | "\n", 54 | "# Variables for storing fill fraction data\n", 55 | "countsAcrossDistances = [] # Stores the fraction of all non-empty bins for each dataset (across all regions) for each of the specified contact distances\n", 56 | "nonZeroFracsByRegionAcrossDistances = [] # Stores the fraction of all non-empty bins for each dataset (region-separated) for each of the specified contact distances\n", 57 | "\n", 58 | "for distance in distances:\n", 59 | " compiledCounts = [] # Stores the total number of bins and number of non-empty bins for each region of each dataset at the given contact distance\n", 60 | "\n", 61 | " coolers = [clr_RCMC,clr_TMCC,clr_Hsieh,clr_Bonev]\n", 62 | " coolerIt = 0 # An iterating variable for going through each of the datasets\n", 63 | " while coolerIt < len(coolers):\n", 64 | " if coolerIt == 1:\n", 65 | " regions = regionsTMCC # Use the TMCC regions for calculation for the TMCC dataset\n", 66 | " else:\n", 67 | " regions = regionsRCMC # Use the RCMC regions for calculation for the RCMC, Hsieh, & Bonev datasets\n", 68 | "\n", 69 | " totBins = [] # Counts the number of total contact bins for at the given contact distance\n", 70 | " nonZeroBins = [] # Counts the number of contact bins that have at least one read in them at the given contact distance\n", 71 | "\n", 72 | " for i in regions: \n", 73 | " # Iterating region by region for the given dataset, load in the contact matrix\n", 74 | " # Keep balancing off to count raw read counts\n", 75 | " regionMat = coolers[coolerIt].matrix(balance=False).fetch(i)\n", 76 | " \n", 77 | " bins = len(regionMat) # Get the number of genomic bins across the matrix\n", 78 | " offset = distance // resolution - 1 # Calculate the number of fewer genomic bins at the given contact distance\n", 79 | " binsAtDiag = bins - offset # Calculate the total number of contact bins at the contact distance\n", 80 | " totBins.append(binsAtDiag)\n", 81 | "\n", 82 | " # Set the stage for iterating across the contact diagonal at the given distance to count the number of empty bins\n", 83 | " x = offset\n", 84 | " y = 0\n", 85 | " nonZeroCount = 0\n", 86 | " \n", 87 | " # Iterate through the contact diagonal tallying non-empty bins until the end of the diagonal\n", 88 | " while x < len(regionMat):\n", 89 | " val = regionMat[y][x]\n", 90 | " if val > 0:\n", 91 | " nonZeroCount += 1\n", 92 | " x += 1\n", 93 | " y += 1\n", 94 | " nonZeroBins.append(nonZeroCount)\n", 95 | "\n", 96 | " binCounts = [totBins,nonZeroBins] # Combine the total bin & non-empty bin counts into a tuple\n", 97 | " compiledCounts.append(binCounts) # Add the combined tuple for the given dataset & region to compiledCounts\n", 98 | "\n", 99 | " coolerIt += 1 # Onwards to the next cooler in the dataset list\n", 100 | "\n", 101 | " # print(compiledCounts) # Sanity check that the code is working \n", 102 | "\n", 103 | " totNonZeroFrac = [] # Stores the across-regions fraction of non-empty bins for each dataset at a given contact distance\n", 104 | " nonZeroFracsByRegion = [] # The region-separated version of totNonZeroFrac\n", 105 | "\n", 106 | " # Iterate through compiledCounts to extract filled bin fractions\n", 107 | " for i in compiledCounts:\n", 108 | " sumPossible = sum(i[0]) # Sums the total number of bins across all regions for a dataset\n", 109 | " sumNonZero = sum(i[1]) # Sums the total number of non-empty bins across all regions for a dataset\n", 110 | " totNonZeroFrac.append(sumNonZero / sumPossible) # Adds the non-empty bin fraction to totNonZeroFrac\n", 111 | "\n", 112 | " regionFracs = [] # An array to hold the non-empty bin fraction by region for a given dataset\n", 113 | " for j in range(len(i[0])): # Iterate across the regions\n", 114 | " fracNonZero = i[1][j] / i[0][j] # Calculate non-empty bin fraction for the given region\n", 115 | " regionFracs.append(fracNonZero) # Add the calculated fraction to regionFracs\n", 116 | " nonZeroFracsByRegion.append(regionFracs) # Add the array of non-empty bin fractions by region to nonZeroFracsByRegion\n", 117 | "\n", 118 | " # print(totNonZeroFrac) # Double-check that totNonZeroFrac is calculating as expected\n", 119 | " # print(nonZeroFracsByRegion) # Double-check that nonZeroFracsByRegion is calculating as expected\n", 120 | " \n", 121 | " countsAcrossDistances.append(totNonZeroFrac) # Add totNonZeroFrac, which contains the non-zero bin fraction for each dataset at the given contact distance, to countsAcrossDistances\n", 122 | " nonZeroFracsByRegionAcrossDistances.append(nonZeroFracsByRegion) # Add nonZeroFracsByRegion, which contains the region-separated non-zero bin fraction for each dataset at the given contact distance, to nonZeroFracsByRegionAcrossDistances\n", 123 | " \n", 124 | "countsAcrossDistances # Print the output of countsAcrossDistances to confirm that it produces a logical result" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 3, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "# Separate the nested array of countsAcrossDistance by dataset for ease of calculation & plotting\n", 134 | "RCMCcounts = []\n", 135 | "TMCCcounts = []\n", 136 | "Hsiehcounts = []\n", 137 | "Bonevcounts = []\n", 138 | "\n", 139 | "for i in countsAcrossDistances:\n", 140 | " RCMCcounts.append(i[0])\n", 141 | " TMCCcounts.append(i[1])\n", 142 | " Hsiehcounts.append(i[2])\n", 143 | " Bonevcounts.append(i[3])" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 6, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# Separate out the nested array of nonZeroFracsByRegionAcrossDistances to extract the RCMC & TMCC region counts for ease of calculation & plotting\n", 153 | "RCMCRegionCounts = []\n", 154 | "TMCCRegionCounts = []\n", 155 | "\n", 156 | "for distanceSet in nonZeroFracsByRegionAcrossDistances:\n", 157 | " RCMCRegionCounts.append(distanceSet[0])\n", 158 | " TMCCRegionCounts.append(distanceSet[1])" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 7, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "# Separate out the nested array of region counts by individual region for RCMC & TMCC for ease of plotting\n", 168 | "RCMC_Sox2 = []\n", 169 | "RCMC_Ppm1g = []\n", 170 | "RCMC_Nanog = []\n", 171 | "RCMC_Klf1 = []\n", 172 | "RCMC_Fbn2 = []\n", 173 | "TMCC_Sox2 = []\n", 174 | "TMCC_Nanog = []\n", 175 | "\n", 176 | "for i in RCMCRegionCounts:\n", 177 | " RCMC_Sox2.append(i[0])\n", 178 | " RCMC_Ppm1g.append(i[1])\n", 179 | " RCMC_Nanog.append(i[2])\n", 180 | " RCMC_Klf1.append(i[3])\n", 181 | " RCMC_Fbn2.append(i[4])\n", 182 | " \n", 183 | "for j in TMCCRegionCounts:\n", 184 | " TMCC_Sox2.append(j[0])\n", 185 | " TMCC_Nanog.append(j[1])" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 21, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "image/png": "\n", 196 | "text/plain": [ 197 | "
" 198 | ] 199 | }, 200 | "metadata": { 201 | "needs_background": "light" 202 | }, 203 | "output_type": "display_data" 204 | } 205 | ], 206 | "source": [ 207 | "# Plot the fraction of filled bins across contact distances both by region and averaged across regions\n", 208 | "\n", 209 | "import matplotlib.pyplot as plt\n", 210 | "\n", 211 | "saveDir = f'DIRECTORY_PATH'\n", 212 | "fileName = f'FILE_NAME.svg' # swap out .svg for the desired output file format\n", 213 | "\n", 214 | "plt.scatter(distances, RCMCcounts, marker = 'D', c = 'blue', label='RCMC')\n", 215 | "plt.scatter(distances, RCMC_Sox2, marker = 'x', c = 'green', label='RCMC_Sox2')\n", 216 | "plt.scatter(distances, RCMC_Ppm1g, marker = 'x', c = 'skyblue', label='RCMC_Ppm1g')\n", 217 | "plt.scatter(distances, RCMC_Nanog, marker = 'x', c = 'teal', label='RCMC_Nanog')\n", 218 | "plt.scatter(distances, RCMC_Klf1, marker = 'x', c = 'black', label='RCMC_Klf1')\n", 219 | "plt.scatter(distances, RCMC_Fbn2, marker = 'x', c = 'cyan', label='RCMC_Fbn2')\n", 220 | "plt.scatter(distances, TMCCcounts, marker = 's', c = 'red', label='TMCC')\n", 221 | "plt.scatter(distances, TMCC_Sox2, marker = '+', c = 'purple', label='TMCC_Sox2')\n", 222 | "plt.scatter(distances, TMCC_Nanog, marker = '+', c = 'pink', label='TMCC_Nanog')\n", 223 | "plt.scatter(distances, Hsiehcounts, marker = 'v', c = 'orange', label='Hsieh')\n", 224 | "plt.scatter(distances, Bonevcounts, marker = 'o', c = 'gray', label='Bonev')\n", 225 | "plt.xscale(\"log\")\n", 226 | "plt.xlabel(\"Contact Distance (bp) at 100 bp Resolution\")\n", 227 | "plt.ylabel(\"Fraction of Nonzero Bins\")\n", 228 | "plt.legend()\n", 229 | "\n", 230 | "# plt.savefig(saveDir + fileName, dpi=1200)\n", 231 | "plt.show()" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [] 240 | } 241 | ], 242 | "metadata": { 243 | "kernelspec": { 244 | "display_name": "Python [conda env:coolToolsEnv]", 245 | "language": "python", 246 | "name": "conda-env-coolToolsEnv-py" 247 | }, 248 | "language_info": { 249 | "codemirror_mode": { 250 | "name": "ipython", 251 | "version": 3 252 | }, 253 | "file_extension": ".py", 254 | "mimetype": "text/x-python", 255 | "name": "python", 256 | "nbconvert_exporter": "python", 257 | "pygments_lexer": "ipython3", 258 | "version": "3.8.12" 259 | } 260 | }, 261 | "nbformat": 4, 262 | "nbformat_minor": 5 263 | } 264 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Hansen lab at MIT 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LoopStrengthRCMC.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "abd813be", 6 | "metadata": {}, 7 | "source": [ 8 | "# Loop strength calculation" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "dfcad04e", 14 | "metadata": {}, 15 | "source": [ 16 | "## Imports" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "73bb3370", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# import standard python libraries\n", 27 | "import matplotlib as mpl\n", 28 | "%matplotlib inline\n", 29 | "mpl.rcParams['figure.dpi'] = 96\n", 30 | "import numpy as np\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "import pandas as pd\n", 33 | "import seaborn as sns\n", 34 | "\n", 35 | "# import libraries for biological data analysis\n", 36 | "from coolpuppy import coolpup\n", 37 | "from plotpuppy import plotpup\n", 38 | "import cooler\n", 39 | "import bioframe\n", 40 | "import cooltools\n", 41 | "from cooltools import expected_cis\n", 42 | "from cooltools.lib import plotting\n", 43 | "\n", 44 | "import bbi" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "e71fbaa6", 50 | "metadata": {}, 51 | "source": [ 52 | "## Inputs" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "id": "70c700df", 58 | "metadata": {}, 59 | "source": [ 60 | "First, get the loops and mcools to analyse, set up variables, etc" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "id": "068a8eac", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "#mcool resolution to read\n", 71 | "resolution = 250\n", 72 | "#List of mcool locations as strings\n", 73 | "conditions = [\"mcoollocation1\", \"mcoollocation2\", \"mcoollocation3\"]\n", 74 | "#List of loop types as strings\n", 75 | "loopTypesNames = [\"loop\", \"type\", \"names\"]\n", 76 | "#List of loop file locations (bedpe)\n", 77 | "loopFiles = [\"looplocation1\", \"looplocation2\", \"looplocation3\"]\n", 78 | "\n", 79 | "#Specify the RCMC regions of the mcools to look at (format: chromosome (string), start (number), end (number), name of region (string))\n", 80 | "regions = pd.DataFrame([['chrA',1,100,'regionname1'],['chrB',1,100,'regionname2'],['chrC',1,100,'regionname3']],\n", 81 | " columns=['chrom', 'start', 'end', 'name'])\n", 82 | "#Cis expected file locations from cooltools - .tsv file - one for each mcool\n", 83 | "expectedFiles = [\"expectedlocation1\", \"expectedlocation2\", \"expectedlocation3\"]\n", 84 | "#Set save directory\n", 85 | "saveDir = '/a/directory/on/your/system/'\n", 86 | "\n", 87 | "#Set the size of the area flanking the dot\n", 88 | "flankDist = 10000\n", 89 | "#Don't set this to be even... This is the size of the area to measure around the dot \n", 90 | "#(and by extension the size of the boxes at the edges of the region too)\n", 91 | "#For this reason, it needs to be odd to have integer box sizes on each side.\n", 92 | "dotWindow = 5\n" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "id": "e7cf57de", 98 | "metadata": {}, 99 | "source": [ 100 | "Run the imports" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "id": "d76f2cee", 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "#######Don't change this section#######\n", 111 | "#Creat an empty list to store the imported loop locations\n", 112 | "loopTypes = []\n", 113 | "#List of column names to use for imported loops (this is constant - do not change)\n", 114 | "colNames = ['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2']\n", 115 | "#Read in files, put them in loopTypes\n", 116 | "for file in loopFiles:\n", 117 | " temploops = pd.read_csv(file, sep='\\t', names=colNames, header=None)\n", 118 | " loopTypes.append(temploops)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "id": "3ba156f7", 124 | "metadata": {}, 125 | "source": [ 126 | "## Enrichment calculation" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "id": "e4a3696a", 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "## Enrichment calculation\n", 137 | "\n", 138 | "#Viraat's new calculation\n", 139 | "#Modified 2022/10/04 by Miles to try to avoid NaN values and correct an issue with the background sum, \n", 140 | "#and generally make the code a little more streamlined\n", 141 | "def enrichmentCalc(mtx, dotWindow):\n", 142 | " #Dimension of array side (should be square)\n", 143 | " sideLength = len(mtx)\n", 144 | " #Middle of side length\n", 145 | " midPoint = (sideLength - 1) // 2\n", 146 | " #Half size of box around centre pixel (one pixel smaller if even-sized dot window - don't do this)\n", 147 | " buffer = (dotWindow - 1) // 2\n", 148 | " \n", 149 | " #Get sum of pixels around dot\n", 150 | " dotSum = np.nansum(mtx[midPoint-buffer:midPoint+buffer+1, midPoint-buffer:midPoint+buffer+1])\n", 151 | " \n", 152 | " #Subset the matrix and calculate the mean without NaN values\n", 153 | " backgroundSum1 = np.nansum(mtx[0:dotWindow, 0:dotWindow])\n", 154 | " backgroundSum2 = np.nansum(mtx[sideLength-dotWindow:sideLength, sideLength-dotWindow:sideLength])\n", 155 | " \n", 156 | " #Calculate enrichment (NB this assumes all boxes are the same size.\n", 157 | " #If you set an even dotWindow value, they won't be)\n", 158 | " enrichment = dotSum / ((backgroundSum1 + backgroundSum2)/2)\n", 159 | " \n", 160 | " return enrichment" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "id": "5e1e9fad", 166 | "metadata": {}, 167 | "source": [ 168 | "# Get the strengths" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "id": "666fd62a", 174 | "metadata": {}, 175 | "source": [ 176 | "Function for getting strength of each loop (uses the pileup function from cooltools to do observed/expected)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "id": "afd62abd", 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "def loopStrengthGet(loop, flankDist, clr, regions, expected, dotWindow):\n", 187 | " loopf = loop.to_frame().T\n", 188 | " loopf = loopf.astype({'start1':'int64','end1':'int64','start2':'int64','end2':'int64'})\n", 189 | " stack = cooltools.pileup(clr, loopf, view_df=regions, expected_df=expected, flank=flankDist)\n", 190 | " mtx = np.nanmean(stack, axis=2)\n", 191 | " enrichment = enrichmentCalc(mtx, dotWindow)\n", 192 | " \n", 193 | " return enrichment" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "id": "01ca3113", 200 | "metadata": { 201 | "scrolled": true 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "#Zip the names and loop info into a dictionary for easier referencing\n", 206 | "loopDict = dict(zip(loopTypesNames, loopTypes))\n", 207 | "#Stop the code if you used an even value for dotWindow, since it won't work\n", 208 | "if dotWindow % 2 == 0:\n", 209 | " print(\"You need to use an odd number for dotWindow in the inputs section\")\n", 210 | "else:\n", 211 | " #Loop through the conditions\n", 212 | " for i, condition in enumerate(conditions):\n", 213 | " #Get the cooler data\n", 214 | " clr = cooler.Cooler(condition+'::/resolutions/'+str(resolution))\n", 215 | " #Get the corresponding expected data\n", 216 | " expected = pd.read_csv(expectedFiles[i], sep='\\t')\n", 217 | "\n", 218 | " #Loop through loopDict\n", 219 | " for loopsName in loopDict:\n", 220 | " #Read out the loops\n", 221 | " loops = loopDict[loopsName]\n", 222 | " #For each row (ie loop), do pileup, get enrichment, write to new column [condition]_strength\n", 223 | " loops[f'{condition}_strength'] = loops.apply(loopStrengthGet, axis = 1, flankDist = flankDist, clr = clr, regions = regions, expected = expected, dotWindow = dotWindow)\n", 224 | "\n", 225 | " loopDict[loopsName] = loops\n", 226 | " " 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "id": "bdc3359f", 232 | "metadata": {}, 233 | "source": [ 234 | "## Output files - one for each loop type" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "id": "50c922d5", 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "for name, df in loopDict.items():\n", 245 | " df.to_csv(saveDir + name + '.bedpe', sep = '\\t', index = False, header = True)" 246 | ] 247 | } 248 | ], 249 | "metadata": { 250 | "kernelspec": { 251 | "display_name": "Python [conda env:coolpuppy]", 252 | "language": "python", 253 | "name": "conda-env-coolpuppy-py" 254 | }, 255 | "language_info": { 256 | "codemirror_mode": { 257 | "name": "ipython", 258 | "version": 3 259 | }, 260 | "file_extension": ".py", 261 | "mimetype": "text/x-python", 262 | "name": "python", 263 | "nbconvert_exporter": "python", 264 | "pygments_lexer": "ipython3", 265 | "version": "3.7.12" 266 | } 267 | }, 268 | "nbformat": 4, 269 | "nbformat_minor": 5 270 | } 271 | -------------------------------------------------------------------------------- /MicrocompartmentLoops_PlusMin1kb.bedpe: -------------------------------------------------------------------------------- 1 | chr5 31273000 31275000 chr5 31336200 31338200 2 | chr5 31273000 31275000 chr5 31349200 31351200 3 | chr5 31273000 31275000 chr5 31358700 31360700 4 | chr5 31273000 31275000 chr5 31376700 31378700 5 | chr5 31273000 31275000 chr5 31397100 31399100 6 | chr5 31273000 31275000 chr5 31408000 31410000 7 | chr5 31273000 31275000 chr5 31447500 31449500 8 | chr5 31273000 31275000 chr5 31452200 31454200 9 | chr5 31273000 31275000 chr5 31608600 31610600 10 | chr5 31273000 31275000 chr5 31645300 31647300 11 | chr5 31273000 31275000 chr5 31651000 31653000 12 | chr5 31273000 31275000 chr5 31787500 31789500 13 | chr5 31273000 31275000 chr5 31791000 31793000 14 | chr5 31273000 31275000 chr5 31854000 31856000 15 | chr5 31273000 31275000 chr5 32015400 32017400 16 | chr5 31273000 31275000 chr5 32291000 32293000 17 | chr5 31295500 31297500 chr5 31336200 31338200 18 | chr5 31295500 31297500 chr5 31349200 31351200 19 | chr5 31295500 31297500 chr5 31358700 31360700 20 | chr5 31295500 31297500 chr5 31376700 31378700 21 | chr5 31295500 31297500 chr5 31397100 31399100 22 | chr5 31295500 31297500 chr5 31408000 31410000 23 | chr5 31295500 31297500 chr5 31447500 31449500 24 | chr5 31295500 31297500 chr5 31452200 31454200 25 | chr5 31295500 31297500 chr5 31483000 31485000 26 | chr5 31295500 31297500 chr5 31608600 31610600 27 | chr5 31295500 31297500 chr5 32291000 32293000 28 | chr5 31306700 31308700 chr5 31336200 31338200 29 | chr5 31306700 31308700 chr5 31349200 31351200 30 | chr5 31306700 31308700 chr5 31358700 31360700 31 | chr5 31306700 31308700 chr5 31376700 31378700 32 | chr5 31306700 31308700 chr5 31397100 31399100 33 | chr5 31306700 31308700 chr5 31408000 31410000 34 | chr5 31306700 31308700 chr5 31447500 31449500 35 | chr5 31306700 31308700 chr5 31452200 31454200 36 | chr5 31306700 31308700 chr5 31483000 31485000 37 | chr5 31306700 31308700 chr5 31608600 31610600 38 | chr5 31310500 31312500 chr5 31336200 31338200 39 | chr5 31310500 31312500 chr5 31349200 31351200 40 | chr5 31310500 31312500 chr5 31358700 31360700 41 | chr5 31310500 31312500 chr5 31376700 31378700 42 | chr5 31310500 31312500 chr5 31397100 31399100 43 | chr5 31310500 31312500 chr5 31408000 31410000 44 | chr5 31310500 31312500 chr5 31447500 31449500 45 | chr5 31310500 31312500 chr5 31452200 31454200 46 | chr5 31310500 31312500 chr5 31608600 31610600 47 | chr5 31310500 31312500 chr5 31683000 31685000 48 | chr5 31336200 31338200 chr5 31349200 31351200 49 | chr5 31336200 31338200 chr5 31358700 31360700 50 | chr5 31336200 31338200 chr5 31376700 31378700 51 | chr5 31336200 31338200 chr5 31397100 31399100 52 | chr5 31336200 31338200 chr5 31408000 31410000 53 | chr5 31336200 31338200 chr5 31447500 31449500 54 | chr5 31336200 31338200 chr5 31452200 31454200 55 | chr5 31336200 31338200 chr5 31608600 31610600 56 | chr5 31336200 31338200 chr5 31645300 31647300 57 | chr5 31336200 31338200 chr5 31651000 31653000 58 | chr5 31336200 31338200 chr5 31683000 31685000 59 | chr5 31336200 31338200 chr5 31854000 31856000 60 | chr5 31336200 31338200 chr5 32015400 32017400 61 | chr5 31336200 31338200 chr5 32291000 32293000 62 | chr5 31344650 31346650 chr5 31483000 31485000 63 | chr5 31349200 31351200 chr5 31358700 31360700 64 | chr5 31349200 31351200 chr5 31376700 31378700 65 | chr5 31349200 31351200 chr5 31397100 31399100 66 | chr5 31349200 31351200 chr5 31408000 31410000 67 | chr5 31349200 31351200 chr5 31447500 31449500 68 | chr5 31349200 31351200 chr5 31452200 31454200 69 | chr5 31349200 31351200 chr5 31608600 31610600 70 | chr5 31349200 31351200 chr5 31645300 31647300 71 | chr5 31349200 31351200 chr5 31651000 31653000 72 | chr5 31349200 31351200 chr5 31683000 31685000 73 | chr5 31349200 31351200 chr5 31854000 31856000 74 | chr5 31349200 31351200 chr5 32015400 32017400 75 | chr5 31349200 31351200 chr5 32291000 32293000 76 | chr5 31351450 31353450 chr5 31376700 31378700 77 | chr5 31351450 31353450 chr5 31397100 31399100 78 | chr5 31351450 31353450 chr5 31447500 31449500 79 | chr5 31358700 31360700 chr5 31376700 31378700 80 | chr5 31358700 31360700 chr5 31397100 31399100 81 | chr5 31358700 31360700 chr5 31408000 31410000 82 | chr5 31358700 31360700 chr5 31447500 31449500 83 | chr5 31358700 31360700 chr5 31452200 31454200 84 | chr5 31358700 31360700 chr5 31608600 31610600 85 | chr5 31358700 31360700 chr5 31645300 31647300 86 | chr5 31358700 31360700 chr5 31651000 31653000 87 | chr5 31358700 31360700 chr5 31683000 31685000 88 | chr5 31358700 31360700 chr5 31854000 31856000 89 | chr5 31358700 31360700 chr5 32291000 32293000 90 | chr5 31369650 31371650 chr5 31483000 31485000 91 | chr5 31376700 31378700 chr5 31397100 31399100 92 | chr5 31376700 31378700 chr5 31408000 31410000 93 | chr5 31376700 31378700 chr5 31447500 31449500 94 | chr5 31376700 31378700 chr5 31452200 31454200 95 | chr5 31376700 31378700 chr5 31483000 31485000 96 | chr5 31376700 31378700 chr5 31608600 31610600 97 | chr5 31376700 31378700 chr5 31645300 31647300 98 | chr5 31376700 31378700 chr5 31651000 31653000 99 | chr5 31376700 31378700 chr5 31683000 31685000 100 | chr5 31376700 31378700 chr5 31753200 31755200 101 | chr5 31376700 31378700 chr5 31770300 31772300 102 | chr5 31376700 31378700 chr5 31787500 31789500 103 | chr5 31376700 31378700 chr5 31791000 31793000 104 | chr5 31376700 31378700 chr5 31854000 31856000 105 | chr5 31376700 31378700 chr5 32015400 32017400 106 | chr5 31376700 31378700 chr5 32291000 32293000 107 | chr5 31397100 31399100 chr5 31408000 31410000 108 | chr5 31397100 31399100 chr5 31447500 31449500 109 | chr5 31397100 31399100 chr5 31452200 31454200 110 | chr5 31397100 31399100 chr5 31483000 31485000 111 | chr5 31397100 31399100 chr5 31608600 31610600 112 | chr5 31397100 31399100 chr5 31645300 31647300 113 | chr5 31397100 31399100 chr5 31651000 31653000 114 | chr5 31397100 31399100 chr5 31683000 31685000 115 | chr5 31397100 31399100 chr5 31753200 31755200 116 | chr5 31397100 31399100 chr5 31770300 31772300 117 | chr5 31397100 31399100 chr5 31787500 31789500 118 | chr5 31397100 31399100 chr5 31791000 31793000 119 | chr5 31397100 31399100 chr5 31854000 31856000 120 | chr5 31397100 31399100 chr5 32291000 32293000 121 | chr5 31397100 31399100 chr5 32339700 32341700 122 | chr5 31408000 31410000 chr5 31447500 31449500 123 | chr5 31408000 31410000 chr5 31452200 31454200 124 | chr5 31408000 31410000 chr5 31483000 31485000 125 | chr5 31408000 31410000 chr5 31608600 31610600 126 | chr5 31408000 31410000 chr5 31645300 31647300 127 | chr5 31408000 31410000 chr5 31651000 31653000 128 | chr5 31408000 31410000 chr5 31683000 31685000 129 | chr5 31408000 31410000 chr5 31854000 31856000 130 | chr5 31408000 31410000 chr5 32291000 32293000 131 | chr5 31408000 31410000 chr5 32339700 32341700 132 | chr5 31447500 31449500 chr5 31608600 31610600 133 | chr5 31447500 31449500 chr5 31645300 31647300 134 | chr5 31447500 31449500 chr5 31651000 31653000 135 | chr5 31447500 31449500 chr5 31683000 31685000 136 | chr5 31447500 31449500 chr5 31770300 31772300 137 | chr5 31447500 31449500 chr5 31791000 31793000 138 | chr5 31447500 31449500 chr5 31854000 31856000 139 | chr5 31447500 31449500 chr5 32015400 32017400 140 | chr5 31447500 31449500 chr5 32291000 32293000 141 | chr5 31447500 31449500 chr5 32339700 32341700 142 | chr5 31452200 31454200 chr5 31608600 31610600 143 | chr5 31452200 31454200 chr5 31645300 31647300 144 | chr5 31452200 31454200 chr5 31651000 31653000 145 | chr5 31452200 31454200 chr5 31753200 31755200 146 | chr5 31452200 31454200 chr5 31791000 31793000 147 | chr5 31452200 31454200 chr5 31854000 31856000 148 | chr5 31452200 31454200 chr5 32291000 32293000 149 | chr5 31452200 31454200 chr5 32339700 32341700 150 | chr5 31483000 31485000 chr5 31626700 31628700 151 | chr5 31608600 31610600 chr5 31645300 31647300 152 | chr5 31608600 31610600 chr5 31651000 31653000 153 | chr5 31608600 31610600 chr5 31683000 31685000 154 | chr5 31608600 31610600 chr5 31753200 31755200 155 | chr5 31608600 31610600 chr5 31770300 31772300 156 | chr5 31608600 31610600 chr5 31787500 31789500 157 | chr5 31608600 31610600 chr5 31791000 31793000 158 | chr5 31608600 31610600 chr5 31854000 31856000 159 | chr5 31608600 31610600 chr5 32015400 32017400 160 | chr5 31608600 31610600 chr5 32291000 32293000 161 | chr5 31608600 31610600 chr5 32339700 32341700 162 | chr5 31645300 31647300 chr5 31651000 31653000 163 | chr5 31645300 31647300 chr5 31683000 31685000 164 | chr5 31645300 31647300 chr5 31753200 31755200 165 | chr5 31645300 31647300 chr5 31791000 31793000 166 | chr5 31645300 31647300 chr5 31854000 31856000 167 | chr5 31645300 31647300 chr5 32015400 32017400 168 | chr5 31645300 31647300 chr5 32291000 32293000 169 | chr5 31645300 31647300 chr5 32339700 32341700 170 | chr5 31651000 31653000 chr5 31683000 31685000 171 | chr5 31651000 31653000 chr5 31753200 31755200 172 | chr5 31651000 31653000 chr5 31791000 31793000 173 | chr5 31651000 31653000 chr5 31854000 31856000 174 | chr5 31651000 31653000 chr5 32015400 32017400 175 | chr5 31651000 31653000 chr5 32291000 32293000 176 | chr5 31651000 31653000 chr5 32339700 32341700 177 | chr5 31683000 31685000 chr5 31753200 31755200 178 | chr5 31683000 31685000 chr5 31770300 31772300 179 | chr5 31683000 31685000 chr5 31787500 31789500 180 | chr5 31683000 31685000 chr5 31791000 31793000 181 | chr5 31683000 31685000 chr5 31854000 31856000 182 | chr5 31683000 31685000 chr5 32291000 32293000 183 | chr5 31683000 31685000 chr5 32339700 32341700 184 | chr5 31720000 31722000 chr5 31753200 31755200 185 | chr5 31720000 31722000 chr5 31770300 31772300 186 | chr5 31720000 31722000 chr5 31854000 31856000 187 | chr5 31720000 31722000 chr5 32083000 32085000 188 | chr5 31720000 31722000 chr5 32291000 32293000 189 | chr5 31720000 31722000 chr5 32339700 32341700 190 | chr5 31720000 31722000 chr5 32354200 32356200 191 | chr5 31720000 31722000 chr5 32372000 32374000 192 | chr5 31753200 31755200 chr5 31770300 31772300 193 | chr5 31753200 31755200 chr5 31787500 31789500 194 | chr5 31753200 31755200 chr5 31791000 31793000 195 | chr5 31753200 31755200 chr5 31854000 31856000 196 | chr5 31753200 31755200 chr5 32015400 32017400 197 | chr5 31753200 31755200 chr5 32083000 32085000 198 | chr5 31753200 31755200 chr5 32291000 32293000 199 | chr5 31753200 31755200 chr5 32339700 32341700 200 | chr5 31753200 31755200 chr5 32354200 32356200 201 | chr5 31753200 31755200 chr5 32372000 32374000 202 | chr5 31759000 31761000 chr5 31770300 31772300 203 | chr5 31759000 31761000 chr5 31787500 31789500 204 | chr5 31759000 31761000 chr5 31791000 31793000 205 | chr5 31759000 31761000 chr5 31854000 31856000 206 | chr5 31759000 31761000 chr5 32291000 32293000 207 | chr5 31759000 31761000 chr5 32339700 32341700 208 | chr5 31759000 31761000 chr5 32354200 32356200 209 | chr5 31759000 31761000 chr5 32372000 32374000 210 | chr5 31767000 31769000 chr5 31791000 31793000 211 | chr5 31767000 31769000 chr5 31854000 31856000 212 | chr5 31767000 31769000 chr5 32291000 32293000 213 | chr5 31767000 31769000 chr5 32339700 32341700 214 | chr5 31770300 31772300 chr5 31791000 31793000 215 | chr5 31770300 31772300 chr5 31854000 31856000 216 | chr5 31770300 31772300 chr5 32015400 32017400 217 | chr5 31770300 31772300 chr5 32291000 32293000 218 | chr5 31770300 31772300 chr5 32339700 32341700 219 | chr5 31770300 31772300 chr5 32354200 32356200 220 | chr5 31787500 31789500 chr5 31854000 31856000 221 | chr5 31787500 31789500 chr5 32015400 32017400 222 | chr5 31787500 31789500 chr5 32291000 32293000 223 | chr5 31787500 31789500 chr5 32339700 32341700 224 | chr5 31787500 31789500 chr5 32354200 32356200 225 | chr5 31791000 31793000 chr5 31854000 31856000 226 | chr5 31791000 31793000 chr5 32015400 32017400 227 | chr5 31791000 31793000 chr5 32291000 32293000 228 | chr5 31791000 31793000 chr5 32339700 32341700 229 | chr5 31791000 31793000 chr5 32354200 32356200 230 | chr5 31854000 31856000 chr5 32015400 32017400 231 | chr5 31854000 31856000 chr5 32291000 32293000 232 | chr5 31854000 31856000 chr5 32339700 32341700 233 | chr5 31854000 31856000 chr5 32354200 32356200 234 | chr5 31854000 31856000 chr5 32372000 32374000 235 | chr5 31865800 31867800 chr5 32296200 32298200 236 | chr5 31865800 31867800 chr5 32354200 32356200 237 | chr5 32015400 32017400 chr5 32291000 32293000 238 | chr5 32083000 32085000 chr5 32291000 32293000 239 | chr5 32083000 32085000 chr5 32296200 32298200 240 | chr5 32083000 32085000 chr5 32354200 32356200 241 | chr5 32174000 32176000 chr5 32291000 32293000 242 | chr5 32174000 32176000 chr5 32339700 32341700 243 | chr5 32291000 32293000 chr5 32339700 32341700 244 | chr5 32291000 32293000 chr5 32354200 32356200 245 | chr5 32291000 32293000 chr5 32372000 32374000 246 | chr5 32296200 32298200 chr5 32354200 32356200 247 | chr5 32339700 32341700 chr5 32354200 32356200 248 | chr5 32339700 32341700 chr5 32372000 32374000 249 | chr5 32354200 32356200 chr5 32372000 32374000 250 | chr8 84856500 84858500 chr8 84873500 84875500 251 | chr8 84873500 84875500 chr8 84906300 84908300 252 | chr8 84873500 84875500 chr8 84912250 84914250 253 | chr8 84873500 84875500 chr8 84917700 84919700 254 | chr8 84873500 84875500 chr8 84924200 84926200 255 | chr8 84873500 84875500 chr8 84936250 84938250 256 | chr8 84873500 84875500 chr8 84949150 84951150 257 | chr8 84873500 84875500 chr8 84962800 84964800 258 | chr8 84873500 84875500 chr8 84975300 84977300 259 | chr8 84873500 84875500 chr8 84983000 84985000 260 | chr8 84873500 84875500 chr8 84996000 84998000 261 | chr8 84873500 84875500 chr8 85011500 85013500 262 | chr8 84873500 84875500 chr8 85083500 85085500 263 | chr8 84873500 84875500 chr8 85141000 85143000 264 | chr8 84873500 84875500 chr8 85227800 85229800 265 | chr8 84873500 84875500 chr8 85285000 85287000 266 | chr8 84873500 84875500 chr8 85327000 85329000 267 | chr8 84873500 84875500 chr8 85359000 85361000 268 | chr8 84873500 84875500 chr8 85365000 85367000 269 | chr8 84873500 84875500 chr8 85373100 85375100 270 | chr8 84873500 84875500 chr8 85375800 85377800 271 | chr8 84873500 84875500 chr8 85378600 85380600 272 | chr8 84873500 84875500 chr8 85386000 85388000 273 | chr8 84873500 84875500 chr8 85388300 85390300 274 | chr8 84873500 84875500 chr8 85389700 85391700 275 | chr8 84873500 84875500 chr8 85412800 85414800 276 | chr8 84873500 84875500 chr8 85414700 85416700 277 | chr8 84873500 84875500 chr8 85448750 85450750 278 | chr8 84873500 84875500 chr8 85467800 85469800 279 | chr8 84873500 84875500 chr8 85526000 85528000 280 | chr8 84873500 84875500 chr8 85566300 85568300 281 | chr8 84873500 84875500 chr8 85572600 85574600 282 | chr8 84873500 84875500 chr8 85582500 85584500 283 | chr8 84873500 84875500 chr8 85620000 85622000 284 | chr8 84873500 84875500 chr8 85629000 85631000 285 | chr8 84873500 84875500 chr8 85641000 85643000 286 | chr8 84873500 84875500 chr8 85751500 85753500 287 | chr8 84873500 84875500 chr8 85762400 85764400 288 | chr8 84906300 84908300 chr8 84917700 84919700 289 | chr8 84906300 84908300 chr8 84924200 84926200 290 | chr8 84906300 84908300 chr8 84936250 84938250 291 | chr8 84906300 84908300 chr8 84949150 84951150 292 | chr8 84906300 84908300 chr8 84962800 84964800 293 | chr8 84906300 84908300 chr8 84983000 84985000 294 | chr8 84906300 84908300 chr8 84996000 84998000 295 | chr8 84906300 84908300 chr8 85083500 85085500 296 | chr8 84906300 84908300 chr8 85141000 85143000 297 | chr8 84906300 84908300 chr8 85227800 85229800 298 | chr8 84906300 84908300 chr8 85265000 85267000 299 | chr8 84906300 84908300 chr8 85285000 85287000 300 | chr8 84906300 84908300 chr8 85327000 85329000 301 | chr8 84906300 84908300 chr8 85365000 85367000 302 | chr8 84906300 84908300 chr8 85375800 85377800 303 | chr8 84906300 84908300 chr8 85378600 85380600 304 | chr8 84906300 84908300 chr8 85389700 85391700 305 | chr8 84906300 84908300 chr8 85412800 85414800 306 | chr8 84906300 84908300 chr8 85526000 85528000 307 | chr8 84906300 84908300 chr8 85572600 85574600 308 | chr8 84906300 84908300 chr8 85702000 85704000 309 | chr8 84912250 84914250 chr8 84936250 84938250 310 | chr8 84912250 84914250 chr8 84949150 84951150 311 | chr8 84912250 84914250 chr8 84956550 84958550 312 | chr8 84912250 84914250 chr8 84962800 84964800 313 | chr8 84912250 84914250 chr8 84983000 84985000 314 | chr8 84912250 84914250 chr8 84996000 84998000 315 | chr8 84912250 84914250 chr8 85011500 85013500 316 | chr8 84912250 84914250 chr8 85083500 85085500 317 | chr8 84912250 84914250 chr8 85122000 85124000 318 | chr8 84912250 84914250 chr8 85141000 85143000 319 | chr8 84912250 84914250 chr8 85227800 85229800 320 | chr8 84912250 84914250 chr8 85327000 85329000 321 | chr8 84912250 84914250 chr8 85365000 85367000 322 | chr8 84912250 84914250 chr8 85373100 85375100 323 | chr8 84912250 84914250 chr8 85375800 85377800 324 | chr8 84912250 84914250 chr8 85378600 85380600 325 | chr8 84912250 84914250 chr8 85389700 85391700 326 | chr8 84912250 84914250 chr8 85412800 85414800 327 | chr8 84912250 84914250 chr8 85526000 85528000 328 | chr8 84912250 84914250 chr8 85572600 85574600 329 | chr8 84912250 84914250 chr8 85702000 85704000 330 | chr8 84917700 84919700 chr8 84962800 84964800 331 | chr8 84917700 84919700 chr8 84983000 84985000 332 | chr8 84917700 84919700 chr8 84996000 84998000 333 | chr8 84917700 84919700 chr8 85011500 85013500 334 | chr8 84917700 84919700 chr8 85083500 85085500 335 | chr8 84917700 84919700 chr8 85122000 85124000 336 | chr8 84917700 84919700 chr8 85141000 85143000 337 | chr8 84917700 84919700 chr8 85227800 85229800 338 | chr8 84917700 84919700 chr8 85327000 85329000 339 | chr8 84917700 84919700 chr8 85365000 85367000 340 | chr8 84917700 84919700 chr8 85373100 85375100 341 | chr8 84917700 84919700 chr8 85375800 85377800 342 | chr8 84917700 84919700 chr8 85378600 85380600 343 | chr8 84917700 84919700 chr8 85386000 85388000 344 | chr8 84917700 84919700 chr8 85388300 85390300 345 | chr8 84917700 84919700 chr8 85389700 85391700 346 | chr8 84917700 84919700 chr8 85404000 85406000 347 | chr8 84917700 84919700 chr8 85412800 85414800 348 | chr8 84917700 84919700 chr8 85526000 85528000 349 | chr8 84917700 84919700 chr8 85536500 85538500 350 | chr8 84917700 84919700 chr8 85557500 85559500 351 | chr8 84917700 84919700 chr8 85572600 85574600 352 | chr8 84917700 84919700 chr8 85597000 85599000 353 | chr8 84917700 84919700 chr8 85629000 85631000 354 | chr8 84917700 84919700 chr8 85641000 85643000 355 | chr8 84917700 84919700 chr8 85702000 85704000 356 | chr8 84917700 84919700 chr8 85715600 85717600 357 | chr8 84917700 84919700 chr8 85751500 85753500 358 | chr8 84917700 84919700 chr8 85762400 85764400 359 | chr8 84917700 84919700 chr8 85797300 85799300 360 | chr8 84924200 84926200 chr8 84962800 84964800 361 | chr8 84924200 84926200 chr8 84983000 84985000 362 | chr8 84924200 84926200 chr8 84996000 84998000 363 | chr8 84924200 84926200 chr8 85083500 85085500 364 | chr8 84924200 84926200 chr8 85122000 85124000 365 | chr8 84924200 84926200 chr8 85141000 85143000 366 | chr8 84924200 84926200 chr8 85227800 85229800 367 | chr8 84924200 84926200 chr8 85365000 85367000 368 | chr8 84924200 84926200 chr8 85373100 85375100 369 | chr8 84924200 84926200 chr8 85375800 85377800 370 | chr8 84924200 84926200 chr8 85378600 85380600 371 | chr8 84924200 84926200 chr8 85386000 85388000 372 | chr8 84924200 84926200 chr8 85388300 85390300 373 | chr8 84924200 84926200 chr8 85389700 85391700 374 | chr8 84924200 84926200 chr8 85412800 85414800 375 | chr8 84924200 84926200 chr8 85526000 85528000 376 | chr8 84924200 84926200 chr8 85557500 85559500 377 | chr8 84924200 84926200 chr8 85572600 85574600 378 | chr8 84924200 84926200 chr8 85629000 85631000 379 | chr8 84924200 84926200 chr8 85641000 85643000 380 | chr8 84924200 84926200 chr8 85702000 85704000 381 | chr8 84924200 84926200 chr8 85715600 85717600 382 | chr8 84924200 84926200 chr8 85751500 85753500 383 | chr8 84936250 84938250 chr8 84962800 84964800 384 | chr8 84936250 84938250 chr8 84983000 84985000 385 | chr8 84936250 84938250 chr8 84996000 84998000 386 | chr8 84936250 84938250 chr8 85011500 85013500 387 | chr8 84936250 84938250 chr8 85141000 85143000 388 | chr8 84936250 84938250 chr8 85227800 85229800 389 | chr8 84949150 84951150 chr8 84962800 84964800 390 | chr8 84949150 84951150 chr8 84983000 84985000 391 | chr8 84949150 84951150 chr8 84996000 84998000 392 | chr8 84949150 84951150 chr8 85141000 85143000 393 | chr8 84949150 84951150 chr8 85227800 85229800 394 | chr8 84956550 84958550 chr8 84983000 84985000 395 | chr8 84962800 84964800 chr8 84975300 84977300 396 | chr8 84962800 84964800 chr8 84983000 84985000 397 | chr8 84962800 84964800 chr8 84996000 84998000 398 | chr8 84962800 84964800 chr8 85083500 85085500 399 | chr8 84962800 84964800 chr8 85141000 85143000 400 | chr8 84962800 84964800 chr8 85227800 85229800 401 | chr8 84962800 84964800 chr8 85285000 85287000 402 | chr8 84962800 84964800 chr8 85327000 85329000 403 | chr8 84962800 84964800 chr8 85365000 85367000 404 | chr8 84962800 84964800 chr8 85373100 85375100 405 | chr8 84962800 84964800 chr8 85378600 85380600 406 | chr8 84962800 84964800 chr8 85388300 85390300 407 | chr8 84962800 84964800 chr8 85412800 85414800 408 | chr8 84962800 84964800 chr8 85526000 85528000 409 | chr8 84962800 84964800 chr8 85536500 85538500 410 | chr8 84962800 84964800 chr8 85566300 85568300 411 | chr8 84962800 84964800 chr8 85572600 85574600 412 | chr8 84962800 84964800 chr8 85629000 85631000 413 | chr8 84962800 84964800 chr8 85633600 85635600 414 | chr8 84962800 84964800 chr8 85641000 85643000 415 | chr8 84975300 84977300 chr8 84983000 84985000 416 | chr8 84975300 84977300 chr8 84996000 84998000 417 | chr8 84975300 84977300 chr8 85083500 85085500 418 | chr8 84975300 84977300 chr8 85141000 85143000 419 | chr8 84975300 84977300 chr8 85227800 85229800 420 | chr8 84975300 84977300 chr8 85327000 85329000 421 | chr8 84975300 84977300 chr8 85365000 85367000 422 | chr8 84975300 84977300 chr8 85373100 85375100 423 | chr8 84975300 84977300 chr8 85375800 85377800 424 | chr8 84975300 84977300 chr8 85386000 85388000 425 | chr8 84975300 84977300 chr8 85388300 85390300 426 | chr8 84975300 84977300 chr8 85389700 85391700 427 | chr8 84975300 84977300 chr8 85412800 85414800 428 | chr8 84975300 84977300 chr8 85566300 85568300 429 | chr8 84975300 84977300 chr8 85572600 85574600 430 | chr8 84975300 84977300 chr8 85629000 85631000 431 | chr8 84975300 84977300 chr8 85641000 85643000 432 | chr8 84983000 84985000 chr8 84996000 84998000 433 | chr8 84983000 84985000 chr8 85083500 85085500 434 | chr8 84983000 84985000 chr8 85141000 85143000 435 | chr8 84983000 84985000 chr8 85227800 85229800 436 | chr8 84983000 84985000 chr8 85285000 85287000 437 | chr8 84983000 84985000 chr8 85296000 85298000 438 | chr8 84983000 84985000 chr8 85305400 85307400 439 | chr8 84983000 84985000 chr8 85317600 85319600 440 | chr8 84983000 84985000 chr8 85323000 85325000 441 | chr8 84983000 84985000 chr8 85327000 85329000 442 | chr8 84983000 84985000 chr8 85365000 85367000 443 | chr8 84983000 84985000 chr8 85373100 85375100 444 | chr8 84983000 84985000 chr8 85375800 85377800 445 | chr8 84983000 84985000 chr8 85378600 85380600 446 | chr8 84983000 84985000 chr8 85386000 85388000 447 | chr8 84983000 84985000 chr8 85388300 85390300 448 | chr8 84983000 84985000 chr8 85389700 85391700 449 | chr8 84983000 84985000 chr8 85412800 85414800 450 | chr8 84983000 84985000 chr8 85430200 85432200 451 | chr8 84983000 84985000 chr8 85433000 85435000 452 | chr8 84983000 84985000 chr8 85526000 85528000 453 | chr8 84983000 84985000 chr8 85566300 85568300 454 | chr8 84983000 84985000 chr8 85572600 85574600 455 | chr8 84983000 84985000 chr8 85597000 85599000 456 | chr8 84983000 84985000 chr8 85629000 85631000 457 | chr8 84983000 84985000 chr8 85633600 85635600 458 | chr8 84983000 84985000 chr8 85641000 85643000 459 | chr8 84996000 84998000 chr8 85083500 85085500 460 | chr8 84996000 84998000 chr8 85122000 85124000 461 | chr8 84996000 84998000 chr8 85141000 85143000 462 | chr8 84996000 84998000 chr8 85227800 85229800 463 | chr8 84996000 84998000 chr8 85285000 85287000 464 | chr8 84996000 84998000 chr8 85305400 85307400 465 | chr8 84996000 84998000 chr8 85323000 85325000 466 | chr8 84996000 84998000 chr8 85327000 85329000 467 | chr8 84996000 84998000 chr8 85365000 85367000 468 | chr8 84996000 84998000 chr8 85373100 85375100 469 | chr8 84996000 84998000 chr8 85375800 85377800 470 | chr8 84996000 84998000 chr8 85378600 85380600 471 | chr8 84996000 84998000 chr8 85386000 85388000 472 | chr8 84996000 84998000 chr8 85388300 85390300 473 | chr8 84996000 84998000 chr8 85389700 85391700 474 | chr8 84996000 84998000 chr8 85412800 85414800 475 | chr8 84996000 84998000 chr8 85430200 85432200 476 | chr8 84996000 84998000 chr8 85433000 85435000 477 | chr8 84996000 84998000 chr8 85448750 85450750 478 | chr8 84996000 84998000 chr8 85455750 85457750 479 | chr8 84996000 84998000 chr8 85526000 85528000 480 | chr8 84996000 84998000 chr8 85536500 85538500 481 | chr8 84996000 84998000 chr8 85566300 85568300 482 | chr8 84996000 84998000 chr8 85572600 85574600 483 | chr8 84996000 84998000 chr8 85597000 85599000 484 | chr8 84996000 84998000 chr8 85620000 85622000 485 | chr8 84996000 84998000 chr8 85629000 85631000 486 | chr8 84996000 84998000 chr8 85633600 85635600 487 | chr8 84996000 84998000 chr8 85641000 85643000 488 | chr8 84996000 84998000 chr8 85751500 85753500 489 | chr8 85011500 85013500 chr8 85083500 85085500 490 | chr8 85014650 85016650 chr8 85083500 85085500 491 | chr8 85017500 85019500 chr8 85083500 85085500 492 | chr8 85017500 85019500 chr8 85296000 85298000 493 | chr8 85017500 85019500 chr8 85320000 85322000 494 | chr8 85083500 85085500 chr8 85141000 85143000 495 | chr8 85083500 85085500 chr8 85227800 85229800 496 | chr8 85083500 85085500 chr8 85285000 85287000 497 | chr8 85083500 85085500 chr8 85296000 85298000 498 | chr8 85083500 85085500 chr8 85320000 85322000 499 | chr8 85083500 85085500 chr8 85365000 85367000 500 | chr8 85083500 85085500 chr8 85373100 85375100 501 | chr8 85083500 85085500 chr8 85375800 85377800 502 | chr8 85089500 85091500 chr8 85141000 85143000 503 | chr8 85089500 85091500 chr8 85171500 85173500 504 | chr8 85089500 85091500 chr8 85179500 85181500 505 | chr8 85089500 85091500 chr8 85207000 85209000 506 | chr8 85089500 85091500 chr8 85227800 85229800 507 | chr8 85089500 85091500 chr8 85235250 85237250 508 | chr8 85089500 85091500 chr8 85239700 85241700 509 | chr8 85089500 85091500 chr8 85262700 85264700 510 | chr8 85089500 85091500 chr8 85279800 85281800 511 | chr8 85089500 85091500 chr8 85293400 85295400 512 | chr8 85089500 85091500 chr8 85296000 85298000 513 | chr8 85089500 85091500 chr8 85321200 85323200 514 | chr8 85089500 85091500 chr8 85365000 85367000 515 | chr8 85089500 85091500 chr8 85373100 85375100 516 | chr8 85122000 85124000 chr8 85141000 85143000 517 | chr8 85122000 85124000 chr8 85227800 85229800 518 | chr8 85122000 85124000 chr8 85365000 85367000 519 | chr8 85122000 85124000 chr8 85373100 85375100 520 | chr8 85122000 85124000 chr8 85375800 85377800 521 | chr8 85122000 85124000 chr8 85378600 85380600 522 | chr8 85122000 85124000 chr8 85414700 85416700 523 | chr8 85141000 85143000 chr8 85227800 85229800 524 | chr8 85141000 85143000 chr8 85265000 85267000 525 | chr8 85141000 85143000 chr8 85285000 85287000 526 | chr8 85141000 85143000 chr8 85296000 85298000 527 | chr8 85141000 85143000 chr8 85305400 85307400 528 | chr8 85141000 85143000 chr8 85327000 85329000 529 | chr8 85141000 85143000 chr8 85365000 85367000 530 | chr8 85141000 85143000 chr8 85373100 85375100 531 | chr8 85141000 85143000 chr8 85375800 85377800 532 | chr8 85141000 85143000 chr8 85378600 85380600 533 | chr8 85141000 85143000 chr8 85386000 85388000 534 | chr8 85141000 85143000 chr8 85388300 85390300 535 | chr8 85141000 85143000 chr8 85389700 85391700 536 | chr8 85141000 85143000 chr8 85412800 85414800 537 | chr8 85141000 85143000 chr8 85526000 85528000 538 | chr8 85141000 85143000 chr8 85572600 85574600 539 | chr8 85141000 85143000 chr8 85597000 85599000 540 | chr8 85141000 85143000 chr8 85629000 85631000 541 | chr8 85141000 85143000 chr8 85641000 85643000 542 | chr8 85227800 85229800 chr8 85265000 85267000 543 | chr8 85227800 85229800 chr8 85285000 85287000 544 | chr8 85227800 85229800 chr8 85296000 85298000 545 | chr8 85227800 85229800 chr8 85327000 85329000 546 | chr8 85227800 85229800 chr8 85365000 85367000 547 | chr8 85227800 85229800 chr8 85373100 85375100 548 | chr8 85227800 85229800 chr8 85375800 85377800 549 | chr8 85227800 85229800 chr8 85378600 85380600 550 | chr8 85227800 85229800 chr8 85386000 85388000 551 | chr8 85227800 85229800 chr8 85388300 85390300 552 | chr8 85227800 85229800 chr8 85389700 85391700 553 | chr8 85227800 85229800 chr8 85412800 85414800 554 | chr8 85227800 85229800 chr8 85430200 85432200 555 | chr8 85227800 85229800 chr8 85448750 85450750 556 | chr8 85227800 85229800 chr8 85526000 85528000 557 | chr8 85227800 85229800 chr8 85536500 85538500 558 | chr8 85227800 85229800 chr8 85557500 85559500 559 | chr8 85227800 85229800 chr8 85566300 85568300 560 | chr8 85227800 85229800 chr8 85572600 85574600 561 | chr8 85227800 85229800 chr8 85597000 85599000 562 | chr8 85227800 85229800 chr8 85620000 85622000 563 | chr8 85227800 85229800 chr8 85629000 85631000 564 | chr8 85227800 85229800 chr8 85633600 85635600 565 | chr8 85227800 85229800 chr8 85641000 85643000 566 | chr8 85227800 85229800 chr8 85715600 85717600 567 | chr8 85227800 85229800 chr8 85751500 85753500 568 | chr8 85227800 85229800 chr8 85762400 85764400 569 | chr8 85227800 85229800 chr8 85797300 85799300 570 | chr8 85285000 85287000 chr8 85365000 85367000 571 | chr8 85285000 85287000 chr8 85526000 85528000 572 | chr8 85285000 85287000 chr8 85536500 85538500 573 | chr8 85285000 85287000 chr8 85557500 85559500 574 | chr8 85305400 85307400 chr8 85320000 85322000 575 | chr8 85305400 85307400 chr8 85365000 85367000 576 | chr8 85305400 85307400 chr8 85536500 85538500 577 | chr8 85305400 85307400 chr8 85538700 85540700 578 | chr8 85317600 85319600 chr8 85538700 85540700 579 | chr8 85323000 85325000 chr8 85538700 85540700 580 | chr8 85327000 85329000 chr8 85365000 85367000 581 | chr8 85327000 85329000 chr8 85373100 85375100 582 | chr8 85327000 85329000 chr8 85375800 85377800 583 | chr8 85327000 85329000 chr8 85378600 85380600 584 | chr8 85327000 85329000 chr8 85386000 85388000 585 | chr8 85327000 85329000 chr8 85389700 85391700 586 | chr8 85327000 85329000 chr8 85412800 85414800 587 | chr8 85327000 85329000 chr8 85526000 85528000 588 | chr8 85327000 85329000 chr8 85572600 85574600 589 | chr8 85327000 85329000 chr8 85629000 85631000 590 | chr8 85327000 85329000 chr8 85641000 85643000 591 | chr8 85365000 85367000 chr8 85412800 85414800 592 | chr8 85365000 85367000 chr8 85414700 85416700 593 | chr8 85365000 85367000 chr8 85430200 85432200 594 | chr8 85365000 85367000 chr8 85435000 85437000 595 | chr8 85365000 85367000 chr8 85448750 85450750 596 | chr8 85365000 85367000 chr8 85455750 85457750 597 | chr8 85365000 85367000 chr8 85463300 85465300 598 | chr8 85365000 85367000 chr8 85467800 85469800 599 | chr8 85365000 85367000 chr8 85526000 85528000 600 | chr8 85365000 85367000 chr8 85536500 85538500 601 | chr8 85365000 85367000 chr8 85538700 85540700 602 | chr8 85365000 85367000 chr8 85541200 85543200 603 | chr8 85365000 85367000 chr8 85557500 85559500 604 | chr8 85365000 85367000 chr8 85572600 85574600 605 | chr8 85365000 85367000 chr8 85597000 85599000 606 | chr8 85365000 85367000 chr8 85620000 85622000 607 | chr8 85365000 85367000 chr8 85629000 85631000 608 | chr8 85365000 85367000 chr8 85633600 85635600 609 | chr8 85365000 85367000 chr8 85641000 85643000 610 | chr8 85365000 85367000 chr8 85691000 85693000 611 | chr8 85365000 85367000 chr8 85695000 85697000 612 | chr8 85365000 85367000 chr8 85702000 85704000 613 | chr8 85365000 85367000 chr8 85711500 85713500 614 | chr8 85365000 85367000 chr8 85715600 85717600 615 | chr8 85365000 85367000 chr8 85751500 85753500 616 | chr8 85365000 85367000 chr8 85762400 85764400 617 | chr8 85365000 85367000 chr8 85797300 85799300 618 | chr8 85373100 85375100 chr8 85412800 85414800 619 | chr8 85373100 85375100 chr8 85414700 85416700 620 | chr8 85373100 85375100 chr8 85448750 85450750 621 | chr8 85373100 85375100 chr8 85455750 85457750 622 | chr8 85373100 85375100 chr8 85463300 85465300 623 | chr8 85373100 85375100 chr8 85467800 85469800 624 | chr8 85373100 85375100 chr8 85526000 85528000 625 | chr8 85373100 85375100 chr8 85557500 85559500 626 | chr8 85373100 85375100 chr8 85566300 85568300 627 | chr8 85373100 85375100 chr8 85572600 85574600 628 | chr8 85373100 85375100 chr8 85597000 85599000 629 | chr8 85373100 85375100 chr8 85620000 85622000 630 | chr8 85373100 85375100 chr8 85629000 85631000 631 | chr8 85373100 85375100 chr8 85633600 85635600 632 | chr8 85373100 85375100 chr8 85641000 85643000 633 | chr8 85373100 85375100 chr8 85691000 85693000 634 | chr8 85373100 85375100 chr8 85695000 85697000 635 | chr8 85373100 85375100 chr8 85702000 85704000 636 | chr8 85373100 85375100 chr8 85711500 85713500 637 | chr8 85373100 85375100 chr8 85715600 85717600 638 | chr8 85373100 85375100 chr8 85751500 85753500 639 | chr8 85373100 85375100 chr8 85762400 85764400 640 | chr8 85373100 85375100 chr8 85785750 85787750 641 | chr8 85373100 85375100 chr8 85797300 85799300 642 | chr8 85375800 85377800 chr8 85412800 85414800 643 | chr8 85375800 85377800 chr8 85414700 85416700 644 | chr8 85375800 85377800 chr8 85448750 85450750 645 | chr8 85375800 85377800 chr8 85455750 85457750 646 | chr8 85375800 85377800 chr8 85463300 85465300 647 | chr8 85375800 85377800 chr8 85467800 85469800 648 | chr8 85375800 85377800 chr8 85526000 85528000 649 | chr8 85375800 85377800 chr8 85557500 85559500 650 | chr8 85375800 85377800 chr8 85566300 85568300 651 | chr8 85375800 85377800 chr8 85572600 85574600 652 | chr8 85375800 85377800 chr8 85597000 85599000 653 | chr8 85375800 85377800 chr8 85629000 85631000 654 | chr8 85375800 85377800 chr8 85633600 85635600 655 | chr8 85375800 85377800 chr8 85641000 85643000 656 | chr8 85375800 85377800 chr8 85691000 85693000 657 | chr8 85375800 85377800 chr8 85702000 85704000 658 | chr8 85375800 85377800 chr8 85711500 85713500 659 | chr8 85375800 85377800 chr8 85715600 85717600 660 | chr8 85375800 85377800 chr8 85751500 85753500 661 | chr8 85375800 85377800 chr8 85762400 85764400 662 | chr8 85375800 85377800 chr8 85797300 85799300 663 | chr8 85378600 85380600 chr8 85412800 85414800 664 | chr8 85378600 85380600 chr8 85414700 85416700 665 | chr8 85378600 85380600 chr8 85430200 85432200 666 | chr8 85378600 85380600 chr8 85448750 85450750 667 | chr8 85378600 85380600 chr8 85455750 85457750 668 | chr8 85378600 85380600 chr8 85463300 85465300 669 | chr8 85378600 85380600 chr8 85467800 85469800 670 | chr8 85378600 85380600 chr8 85557500 85559500 671 | chr8 85378600 85380600 chr8 85566300 85568300 672 | chr8 85378600 85380600 chr8 85572600 85574600 673 | chr8 85378600 85380600 chr8 85597000 85599000 674 | chr8 85378600 85380600 chr8 85620000 85622000 675 | chr8 85378600 85380600 chr8 85629000 85631000 676 | chr8 85378600 85380600 chr8 85633600 85635600 677 | chr8 85378600 85380600 chr8 85641000 85643000 678 | chr8 85378600 85380600 chr8 85691000 85693000 679 | chr8 85378600 85380600 chr8 85695000 85697000 680 | chr8 85378600 85380600 chr8 85702000 85704000 681 | chr8 85378600 85380600 chr8 85711500 85713500 682 | chr8 85378600 85380600 chr8 85715600 85717600 683 | chr8 85378600 85380600 chr8 85751500 85753500 684 | chr8 85378600 85380600 chr8 85762400 85764400 685 | chr8 85378600 85380600 chr8 85797300 85799300 686 | chr8 85386000 85388000 chr8 85412800 85414800 687 | chr8 85386000 85388000 chr8 85430200 85432200 688 | chr8 85386000 85388000 chr8 85448750 85450750 689 | chr8 85386000 85388000 chr8 85455750 85457750 690 | chr8 85386000 85388000 chr8 85463300 85465300 691 | chr8 85386000 85388000 chr8 85467800 85469800 692 | chr8 85386000 85388000 chr8 85526000 85528000 693 | chr8 85386000 85388000 chr8 85557500 85559500 694 | chr8 85386000 85388000 chr8 85566300 85568300 695 | chr8 85386000 85388000 chr8 85572600 85574600 696 | chr8 85386000 85388000 chr8 85597000 85599000 697 | chr8 85386000 85388000 chr8 85629000 85631000 698 | chr8 85386000 85388000 chr8 85633600 85635600 699 | chr8 85386000 85388000 chr8 85641000 85643000 700 | chr8 85386000 85388000 chr8 85691000 85693000 701 | chr8 85386000 85388000 chr8 85695000 85697000 702 | chr8 85386000 85388000 chr8 85702000 85704000 703 | chr8 85386000 85388000 chr8 85711500 85713500 704 | chr8 85386000 85388000 chr8 85715600 85717600 705 | chr8 85386000 85388000 chr8 85751500 85753500 706 | chr8 85386000 85388000 chr8 85762400 85764400 707 | chr8 85386000 85388000 chr8 85797300 85799300 708 | chr8 85388300 85390300 chr8 85412800 85414800 709 | chr8 85388300 85390300 chr8 85448750 85450750 710 | chr8 85388300 85390300 chr8 85526000 85528000 711 | chr8 85388300 85390300 chr8 85557500 85559500 712 | chr8 85388300 85390300 chr8 85566300 85568300 713 | chr8 85388300 85390300 chr8 85572600 85574600 714 | chr8 85388300 85390300 chr8 85597000 85599000 715 | chr8 85388300 85390300 chr8 85629000 85631000 716 | chr8 85388300 85390300 chr8 85633600 85635600 717 | chr8 85388300 85390300 chr8 85641000 85643000 718 | chr8 85388300 85390300 chr8 85691000 85693000 719 | chr8 85388300 85390300 chr8 85695000 85697000 720 | chr8 85388300 85390300 chr8 85702000 85704000 721 | chr8 85388300 85390300 chr8 85711500 85713500 722 | chr8 85388300 85390300 chr8 85715600 85717600 723 | chr8 85388300 85390300 chr8 85751500 85753500 724 | chr8 85388300 85390300 chr8 85762400 85764400 725 | chr8 85388300 85390300 chr8 85797300 85799300 726 | chr8 85389700 85391700 chr8 85412800 85414800 727 | chr8 85389700 85391700 chr8 85414700 85416700 728 | chr8 85389700 85391700 chr8 85430200 85432200 729 | chr8 85389700 85391700 chr8 85435000 85437000 730 | chr8 85389700 85391700 chr8 85448750 85450750 731 | chr8 85389700 85391700 chr8 85455750 85457750 732 | chr8 85389700 85391700 chr8 85463300 85465300 733 | chr8 85389700 85391700 chr8 85467800 85469800 734 | chr8 85389700 85391700 chr8 85526000 85528000 735 | chr8 85389700 85391700 chr8 85549300 85551300 736 | chr8 85389700 85391700 chr8 85557500 85559500 737 | chr8 85389700 85391700 chr8 85566300 85568300 738 | chr8 85389700 85391700 chr8 85572600 85574600 739 | chr8 85389700 85391700 chr8 85597000 85599000 740 | chr8 85389700 85391700 chr8 85620000 85622000 741 | chr8 85389700 85391700 chr8 85629000 85631000 742 | chr8 85389700 85391700 chr8 85633600 85635600 743 | chr8 85389700 85391700 chr8 85641000 85643000 744 | chr8 85389700 85391700 chr8 85691000 85693000 745 | chr8 85389700 85391700 chr8 85695000 85697000 746 | chr8 85389700 85391700 chr8 85702000 85704000 747 | chr8 85389700 85391700 chr8 85711500 85713500 748 | chr8 85389700 85391700 chr8 85715600 85717600 749 | chr8 85389700 85391700 chr8 85751500 85753500 750 | chr8 85389700 85391700 chr8 85762400 85764400 751 | chr8 85389700 85391700 chr8 85785750 85787750 752 | chr8 85389700 85391700 chr8 85797300 85799300 753 | chr8 85412800 85414800 chr8 85448750 85450750 754 | chr8 85412800 85414800 chr8 85463300 85465300 755 | chr8 85412800 85414800 chr8 85467800 85469800 756 | chr8 85412800 85414800 chr8 85526000 85528000 757 | chr8 85412800 85414800 chr8 85557500 85559500 758 | chr8 85412800 85414800 chr8 85566300 85568300 759 | chr8 85412800 85414800 chr8 85572600 85574600 760 | chr8 85412800 85414800 chr8 85582500 85584500 761 | chr8 85412800 85414800 chr8 85597000 85599000 762 | chr8 85412800 85414800 chr8 85620000 85622000 763 | chr8 85412800 85414800 chr8 85629000 85631000 764 | chr8 85412800 85414800 chr8 85633600 85635600 765 | chr8 85412800 85414800 chr8 85639000 85641000 766 | chr8 85412800 85414800 chr8 85641000 85643000 767 | chr8 85412800 85414800 chr8 85663000 85665000 768 | chr8 85412800 85414800 chr8 85671400 85673400 769 | chr8 85412800 85414800 chr8 85691000 85693000 770 | chr8 85412800 85414800 chr8 85695000 85697000 771 | chr8 85412800 85414800 chr8 85702000 85704000 772 | chr8 85412800 85414800 chr8 85711500 85713500 773 | chr8 85412800 85414800 chr8 85715600 85717600 774 | chr8 85412800 85414800 chr8 85751500 85753500 775 | chr8 85412800 85414800 chr8 85762400 85764400 776 | chr8 85412800 85414800 chr8 85797300 85799300 777 | chr8 85414700 85416700 chr8 85448750 85450750 778 | chr8 85414700 85416700 chr8 85455750 85457750 779 | chr8 85414700 85416700 chr8 85463300 85465300 780 | chr8 85414700 85416700 chr8 85467800 85469800 781 | chr8 85414700 85416700 chr8 85526000 85528000 782 | chr8 85414700 85416700 chr8 85557500 85559500 783 | chr8 85414700 85416700 chr8 85566300 85568300 784 | chr8 85414700 85416700 chr8 85572600 85574600 785 | chr8 85414700 85416700 chr8 85597000 85599000 786 | chr8 85414700 85416700 chr8 85620000 85622000 787 | chr8 85414700 85416700 chr8 85629000 85631000 788 | chr8 85414700 85416700 chr8 85633600 85635600 789 | chr8 85414700 85416700 chr8 85639000 85641000 790 | chr8 85414700 85416700 chr8 85641000 85643000 791 | chr8 85414700 85416700 chr8 85715600 85717600 792 | chr8 85414700 85416700 chr8 85751500 85753500 793 | chr8 85414700 85416700 chr8 85762400 85764400 794 | chr8 85414700 85416700 chr8 85785750 85787750 795 | chr8 85414700 85416700 chr8 85797300 85799300 796 | chr8 85423500 85425500 chr8 85536500 85538500 797 | chr8 85423500 85425500 chr8 85538700 85540700 798 | chr8 85423500 85425500 chr8 85541200 85543200 799 | chr8 85425500 85427500 chr8 85536500 85538500 800 | chr8 85425500 85427500 chr8 85538700 85540700 801 | chr8 85425500 85427500 chr8 85541200 85543200 802 | chr8 85425500 85427500 chr8 85597000 85599000 803 | chr8 85430200 85432200 chr8 85526000 85528000 804 | chr8 85430200 85432200 chr8 85536500 85538500 805 | chr8 85430200 85432200 chr8 85541200 85543200 806 | chr8 85430200 85432200 chr8 85597000 85599000 807 | chr8 85430200 85432200 chr8 85629000 85631000 808 | chr8 85430200 85432200 chr8 85633600 85635600 809 | chr8 85430200 85432200 chr8 85641000 85643000 810 | chr8 85430200 85432200 chr8 85715600 85717600 811 | chr8 85433000 85435000 chr8 85526000 85528000 812 | chr8 85433000 85435000 chr8 85597000 85599000 813 | chr8 85433000 85435000 chr8 85629000 85631000 814 | chr8 85433000 85435000 chr8 85633600 85635600 815 | chr8 85433000 85435000 chr8 85641000 85643000 816 | chr8 85433000 85435000 chr8 85715600 85717600 817 | chr8 85435000 85437000 chr8 85463300 85465300 818 | chr8 85435000 85437000 chr8 85526000 85528000 819 | chr8 85435000 85437000 chr8 85536500 85538500 820 | chr8 85435000 85437000 chr8 85538700 85540700 821 | chr8 85435000 85437000 chr8 85541200 85543200 822 | chr8 85435000 85437000 chr8 85549300 85551300 823 | chr8 85435000 85437000 chr8 85597000 85599000 824 | chr8 85435000 85437000 chr8 85629000 85631000 825 | chr8 85435000 85437000 chr8 85633600 85635600 826 | chr8 85435000 85437000 chr8 85715600 85717600 827 | chr8 85448750 85450750 chr8 85526000 85528000 828 | chr8 85448750 85450750 chr8 85536500 85538500 829 | chr8 85448750 85450750 chr8 85557500 85559500 830 | chr8 85448750 85450750 chr8 85566300 85568300 831 | chr8 85448750 85450750 chr8 85572600 85574600 832 | chr8 85448750 85450750 chr8 85597000 85599000 833 | chr8 85448750 85450750 chr8 85629000 85631000 834 | chr8 85448750 85450750 chr8 85633600 85635600 835 | chr8 85448750 85450750 chr8 85639000 85641000 836 | chr8 85448750 85450750 chr8 85641000 85643000 837 | chr8 85455750 85457750 chr8 85526000 85528000 838 | chr8 85455750 85457750 chr8 85536500 85538500 839 | chr8 85455750 85457750 chr8 85557500 85559500 840 | chr8 85455750 85457750 chr8 85566300 85568300 841 | chr8 85455750 85457750 chr8 85572600 85574600 842 | chr8 85455750 85457750 chr8 85597000 85599000 843 | chr8 85455750 85457750 chr8 85629000 85631000 844 | chr8 85455750 85457750 chr8 85633600 85635600 845 | chr8 85455750 85457750 chr8 85639000 85641000 846 | chr8 85455750 85457750 chr8 85641000 85643000 847 | chr8 85463300 85465300 chr8 85526000 85528000 848 | chr8 85463300 85465300 chr8 85536500 85538500 849 | chr8 85463300 85465300 chr8 85538700 85540700 850 | chr8 85463300 85465300 chr8 85541200 85543200 851 | chr8 85463300 85465300 chr8 85549300 85551300 852 | chr8 85463300 85465300 chr8 85597000 85599000 853 | chr8 85463300 85465300 chr8 85620000 85622000 854 | chr8 85463300 85465300 chr8 85629000 85631000 855 | chr8 85463300 85465300 chr8 85633600 85635600 856 | chr8 85463300 85465300 chr8 85639000 85641000 857 | chr8 85463300 85465300 chr8 85641000 85643000 858 | chr8 85463300 85465300 chr8 85715600 85717600 859 | chr8 85467800 85469800 chr8 85526000 85528000 860 | chr8 85467800 85469800 chr8 85536500 85538500 861 | chr8 85467800 85469800 chr8 85538700 85540700 862 | chr8 85467800 85469800 chr8 85557500 85559500 863 | chr8 85467800 85469800 chr8 85566300 85568300 864 | chr8 85467800 85469800 chr8 85572600 85574600 865 | chr8 85467800 85469800 chr8 85597000 85599000 866 | chr8 85467800 85469800 chr8 85620000 85622000 867 | chr8 85467800 85469800 chr8 85629000 85631000 868 | chr8 85467800 85469800 chr8 85633600 85635600 869 | chr8 85467800 85469800 chr8 85639000 85641000 870 | chr8 85467800 85469800 chr8 85641000 85643000 871 | chr8 85467800 85469800 chr8 85715600 85717600 872 | chr8 85497500 85499500 chr8 85526000 85528000 873 | chr8 85499300 85501300 chr8 85526000 85528000 874 | chr8 85511000 85513000 chr8 85526000 85528000 875 | chr8 85511000 85513000 chr8 85597000 85599000 876 | chr8 85511000 85513000 chr8 85620000 85622000 877 | chr8 85511000 85513000 chr8 85629000 85631000 878 | chr8 85511000 85513000 chr8 85641000 85643000 879 | chr8 85526000 85528000 chr8 85536500 85538500 880 | chr8 85526000 85528000 chr8 85549300 85551300 881 | chr8 85526000 85528000 chr8 85557500 85559500 882 | chr8 85526000 85528000 chr8 85566300 85568300 883 | chr8 85526000 85528000 chr8 85572600 85574600 884 | chr8 85526000 85528000 chr8 85597000 85599000 885 | chr8 85526000 85528000 chr8 85620000 85622000 886 | chr8 85526000 85528000 chr8 85629000 85631000 887 | chr8 85526000 85528000 chr8 85633600 85635600 888 | chr8 85526000 85528000 chr8 85639000 85641000 889 | chr8 85526000 85528000 chr8 85641000 85643000 890 | chr8 85526000 85528000 chr8 85663000 85665000 891 | chr8 85526000 85528000 chr8 85671400 85673400 892 | chr8 85526000 85528000 chr8 85691000 85693000 893 | chr8 85526000 85528000 chr8 85695000 85697000 894 | chr8 85526000 85528000 chr8 85702000 85704000 895 | chr8 85526000 85528000 chr8 85705600 85707600 896 | chr8 85526000 85528000 chr8 85711500 85713500 897 | chr8 85526000 85528000 chr8 85715600 85717600 898 | chr8 85526000 85528000 chr8 85751500 85753500 899 | chr8 85526000 85528000 chr8 85762400 85764400 900 | chr8 85536500 85538500 chr8 85572600 85574600 901 | chr8 85536500 85538500 chr8 85597000 85599000 902 | chr8 85536500 85538500 chr8 85629000 85631000 903 | chr8 85536500 85538500 chr8 85633600 85635600 904 | chr8 85536500 85538500 chr8 85715600 85717600 905 | chr8 85536500 85538500 chr8 85751500 85753500 906 | chr8 85536500 85538500 chr8 85762400 85764400 907 | chr8 85538700 85540700 chr8 85597000 85599000 908 | chr8 85541200 85543200 chr8 85597000 85599000 909 | chr8 85549300 85551300 chr8 85597000 85599000 910 | chr8 85549300 85551300 chr8 85629000 85631000 911 | chr8 85549300 85551300 chr8 85797300 85799300 912 | chr8 85557500 85559500 chr8 85572600 85574600 913 | chr8 85557500 85559500 chr8 85582500 85584500 914 | chr8 85557500 85559500 chr8 85597000 85599000 915 | chr8 85557500 85559500 chr8 85620000 85622000 916 | chr8 85557500 85559500 chr8 85629000 85631000 917 | chr8 85557500 85559500 chr8 85633600 85635600 918 | chr8 85557500 85559500 chr8 85641000 85643000 919 | chr8 85557500 85559500 chr8 85691000 85693000 920 | chr8 85557500 85559500 chr8 85702000 85704000 921 | chr8 85557500 85559500 chr8 85705600 85707600 922 | chr8 85557500 85559500 chr8 85715600 85717600 923 | chr8 85557500 85559500 chr8 85751500 85753500 924 | chr8 85557500 85559500 chr8 85762400 85764400 925 | chr8 85557500 85559500 chr8 85785750 85787750 926 | chr8 85557500 85559500 chr8 85797300 85799300 927 | chr8 85566300 85568300 chr8 85572600 85574600 928 | chr8 85566300 85568300 chr8 85582500 85584500 929 | chr8 85566300 85568300 chr8 85597000 85599000 930 | chr8 85566300 85568300 chr8 85620000 85622000 931 | chr8 85566300 85568300 chr8 85629000 85631000 932 | chr8 85566300 85568300 chr8 85633600 85635600 933 | chr8 85566300 85568300 chr8 85641000 85643000 934 | chr8 85566300 85568300 chr8 85691000 85693000 935 | chr8 85566300 85568300 chr8 85702000 85704000 936 | chr8 85566300 85568300 chr8 85705600 85707600 937 | chr8 85566300 85568300 chr8 85715600 85717600 938 | chr8 85566300 85568300 chr8 85751500 85753500 939 | chr8 85566300 85568300 chr8 85762400 85764400 940 | chr8 85566300 85568300 chr8 85785750 85787750 941 | chr8 85566300 85568300 chr8 85797300 85799300 942 | chr8 85572600 85574600 chr8 85582500 85584500 943 | chr8 85572600 85574600 chr8 85597000 85599000 944 | chr8 85572600 85574600 chr8 85620000 85622000 945 | chr8 85572600 85574600 chr8 85629000 85631000 946 | chr8 85572600 85574600 chr8 85633600 85635600 947 | chr8 85572600 85574600 chr8 85641000 85643000 948 | chr8 85572600 85574600 chr8 85691000 85693000 949 | chr8 85572600 85574600 chr8 85695000 85697000 950 | chr8 85572600 85574600 chr8 85702000 85704000 951 | chr8 85572600 85574600 chr8 85705600 85707600 952 | chr8 85572600 85574600 chr8 85711500 85713500 953 | chr8 85572600 85574600 chr8 85715600 85717600 954 | chr8 85572600 85574600 chr8 85751500 85753500 955 | chr8 85572600 85574600 chr8 85762400 85764400 956 | chr8 85572600 85574600 chr8 85785750 85787750 957 | chr8 85572600 85574600 chr8 85797300 85799300 958 | chr8 85582500 85584500 chr8 85597000 85599000 959 | chr8 85582500 85584500 chr8 85620000 85622000 960 | chr8 85582500 85584500 chr8 85629000 85631000 961 | chr8 85582500 85584500 chr8 85633600 85635600 962 | chr8 85582500 85584500 chr8 85641000 85643000 963 | chr8 85582500 85584500 chr8 85691000 85693000 964 | chr8 85582500 85584500 chr8 85715600 85717600 965 | chr8 85582500 85584500 chr8 85751500 85753500 966 | chr8 85582500 85584500 chr8 85762400 85764400 967 | chr8 85582500 85584500 chr8 85797300 85799300 968 | chr8 85597000 85599000 chr8 85620000 85622000 969 | chr8 85597000 85599000 chr8 85629000 85631000 970 | chr8 85597000 85599000 chr8 85633600 85635600 971 | chr8 85597000 85599000 chr8 85641000 85643000 972 | chr8 85597000 85599000 chr8 85663000 85665000 973 | chr8 85597000 85599000 chr8 85671400 85673400 974 | chr8 85597000 85599000 chr8 85691000 85693000 975 | chr8 85597000 85599000 chr8 85695000 85697000 976 | chr8 85597000 85599000 chr8 85715600 85717600 977 | chr8 85597000 85599000 chr8 85751500 85753500 978 | chr8 85597000 85599000 chr8 85762400 85764400 979 | chr8 85597000 85599000 chr8 85785750 85787750 980 | chr8 85597000 85599000 chr8 85797300 85799300 981 | chr8 85620000 85622000 chr8 85629000 85631000 982 | chr8 85620000 85622000 chr8 85633600 85635600 983 | chr8 85620000 85622000 chr8 85641000 85643000 984 | chr8 85620000 85622000 chr8 85663000 85665000 985 | chr8 85620000 85622000 chr8 85671400 85673400 986 | chr8 85620000 85622000 chr8 85691000 85693000 987 | chr8 85620000 85622000 chr8 85695000 85697000 988 | chr8 85620000 85622000 chr8 85715600 85717600 989 | chr8 85620000 85622000 chr8 85751500 85753500 990 | chr8 85620000 85622000 chr8 85762400 85764400 991 | chr8 85620000 85622000 chr8 85797300 85799300 992 | chr8 85629000 85631000 chr8 85633600 85635600 993 | chr8 85629000 85631000 chr8 85641000 85643000 994 | chr8 85629000 85631000 chr8 85663000 85665000 995 | chr8 85629000 85631000 chr8 85671400 85673400 996 | chr8 85629000 85631000 chr8 85691000 85693000 997 | chr8 85629000 85631000 chr8 85695000 85697000 998 | chr8 85629000 85631000 chr8 85702000 85704000 999 | chr8 85629000 85631000 chr8 85711500 85713500 1000 | chr8 85629000 85631000 chr8 85715600 85717600 1001 | chr8 85629000 85631000 chr8 85751500 85753500 1002 | chr8 85629000 85631000 chr8 85762400 85764400 1003 | chr8 85629000 85631000 chr8 85797300 85799300 1004 | chr8 85633600 85635600 chr8 85641000 85643000 1005 | chr8 85633600 85635600 chr8 85663000 85665000 1006 | chr8 85633600 85635600 chr8 85671400 85673400 1007 | chr8 85633600 85635600 chr8 85691000 85693000 1008 | chr8 85633600 85635600 chr8 85695000 85697000 1009 | chr8 85633600 85635600 chr8 85702000 85704000 1010 | chr8 85633600 85635600 chr8 85711500 85713500 1011 | chr8 85633600 85635600 chr8 85715600 85717600 1012 | chr8 85633600 85635600 chr8 85751500 85753500 1013 | chr8 85633600 85635600 chr8 85762400 85764400 1014 | chr8 85633600 85635600 chr8 85797300 85799300 1015 | chr8 85641000 85643000 chr8 85663000 85665000 1016 | chr8 85641000 85643000 chr8 85671400 85673400 1017 | chr8 85641000 85643000 chr8 85691000 85693000 1018 | chr8 85641000 85643000 chr8 85695000 85697000 1019 | chr8 85641000 85643000 chr8 85702000 85704000 1020 | chr8 85641000 85643000 chr8 85711500 85713500 1021 | chr8 85641000 85643000 chr8 85715600 85717600 1022 | chr8 85641000 85643000 chr8 85751500 85753500 1023 | chr8 85641000 85643000 chr8 85762400 85764400 1024 | chr8 85641000 85643000 chr8 85797300 85799300 1025 | chr8 85654000 85656000 chr8 85682700 85684700 1026 | chr8 85654000 85656000 chr8 85715600 85717600 1027 | chr8 85663000 85665000 chr8 85682700 85684700 1028 | chr8 85663000 85665000 chr8 85691000 85693000 1029 | chr8 85663000 85665000 chr8 85715600 85717600 1030 | chr8 85663000 85665000 chr8 85751500 85753500 1031 | chr8 85663000 85665000 chr8 85762400 85764400 1032 | chr8 85663000 85665000 chr8 85797300 85799300 1033 | chr8 85671400 85673400 chr8 85691000 85693000 1034 | chr8 85671400 85673400 chr8 85715600 85717600 1035 | chr8 85671400 85673400 chr8 85751500 85753500 1036 | chr8 85671400 85673400 chr8 85762400 85764400 1037 | chr8 85671400 85673400 chr8 85797300 85799300 1038 | chr8 85682700 85684700 chr8 85715600 85717600 1039 | chr8 85691000 85693000 chr8 85702000 85704000 1040 | chr8 85691000 85693000 chr8 85711500 85713500 1041 | chr8 85691000 85693000 chr8 85715600 85717600 1042 | chr8 85691000 85693000 chr8 85751500 85753500 1043 | chr8 85691000 85693000 chr8 85762400 85764400 1044 | chr8 85691000 85693000 chr8 85785750 85787750 1045 | chr8 85691000 85693000 chr8 85797300 85799300 1046 | chr8 85691000 85693000 chr8 85806500 85808500 1047 | chr8 85691000 85693000 chr8 85809000 85811000 1048 | chr8 85695000 85697000 chr8 85702000 85704000 1049 | chr8 85695000 85697000 chr8 85711500 85713500 1050 | chr8 85695000 85697000 chr8 85715600 85717600 1051 | chr8 85695000 85697000 chr8 85751500 85753500 1052 | chr8 85695000 85697000 chr8 85762400 85764400 1053 | chr8 85695000 85697000 chr8 85785750 85787750 1054 | chr8 85695000 85697000 chr8 85797300 85799300 1055 | chr8 85695000 85697000 chr8 85806500 85808500 1056 | chr8 85695000 85697000 chr8 85809000 85811000 1057 | chr8 85702000 85704000 chr8 85711500 85713500 1058 | chr8 85702000 85704000 chr8 85715600 85717600 1059 | chr8 85702000 85704000 chr8 85723500 85725500 1060 | chr8 85702000 85704000 chr8 85751500 85753500 1061 | chr8 85702000 85704000 chr8 85762400 85764400 1062 | chr8 85702000 85704000 chr8 85785750 85787750 1063 | chr8 85702000 85704000 chr8 85797300 85799300 1064 | chr8 85702000 85704000 chr8 85802000 85804000 1065 | chr8 85702000 85704000 chr8 85806500 85808500 1066 | chr8 85702000 85704000 chr8 85809000 85811000 1067 | chr8 85705600 85707600 chr8 85751500 85753500 1068 | chr8 85705600 85707600 chr8 85762400 85764400 1069 | chr8 85705600 85707600 chr8 85797300 85799300 1070 | chr8 85711500 85713500 chr8 85751500 85753500 1071 | chr8 85711500 85713500 chr8 85762400 85764400 1072 | chr8 85711500 85713500 chr8 85797300 85799300 1073 | chr8 85715600 85717600 chr8 85751500 85753500 1074 | chr8 85715600 85717600 chr8 85762400 85764400 1075 | chr8 85715600 85717600 chr8 85785750 85787750 1076 | chr8 85715600 85717600 chr8 85797300 85799300 1077 | chr8 85715600 85717600 chr8 85806500 85808500 1078 | chr8 85715600 85717600 chr8 85809000 85811000 1079 | chr8 85751500 85753500 chr8 85762400 85764400 1080 | chr8 85751500 85753500 chr8 85785750 85787750 1081 | chr8 85751500 85753500 chr8 85797300 85799300 1082 | chr8 85751500 85753500 chr8 85806500 85808500 1083 | chr8 85751500 85753500 chr8 85809000 85811000 1084 | chr8 85762400 85764400 chr8 85785750 85787750 1085 | chr8 85762400 85764400 chr8 85797300 85799300 1086 | chr8 85762400 85764400 chr8 85806500 85808500 1087 | chr8 85762400 85764400 chr8 85809000 85811000 1088 | chr8 85785750 85787750 chr8 85797300 85799300 1089 | chr8 85785750 85787750 chr8 85806500 85808500 1090 | chr8 85797300 85799300 chr8 85806500 85808500 1091 | chr8 85797300 85799300 chr8 85809000 85811000 1092 | -------------------------------------------------------------------------------- /PileupsRCMC.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a2f31b9f", 6 | "metadata": {}, 7 | "source": [ 8 | "# Plotting pileups" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "717f5fcc", 14 | "metadata": {}, 15 | "source": [ 16 | "## Imports" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "da763af9", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# import standard python libraries\n", 27 | "import matplotlib as mpl\n", 28 | "%matplotlib inline\n", 29 | "mpl.rcParams['figure.dpi'] = 96\n", 30 | "import numpy as np\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "import pandas as pd\n", 33 | "import seaborn as sns\n", 34 | "\n", 35 | "# import libraries for biological data analysis\n", 36 | "from coolpuppy import coolpup\n", 37 | "from plotpuppy import plotpup\n", 38 | "import cooler\n", 39 | "import bioframe\n", 40 | "import cooltools\n", 41 | "from cooltools import expected_cis\n", 42 | "from cooltools.lib import plotting\n", 43 | "\n", 44 | "import bbi" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "98de4e75", 50 | "metadata": {}, 51 | "source": [ 52 | "## Enrichment calculation" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "id": "3a79fc85", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "#Viraat's new calculation\n", 63 | "#Modified 2022/10/04 by Miles to try to avoid NaN values and correct an issue with the background sum, \n", 64 | "#and generally make the code a little more streamlined\n", 65 | "def enrichmentCalc(mtx, dotWindow):\n", 66 | " #Dimension of array side (should be square)\n", 67 | " sideLength = len(mtx)\n", 68 | " #Middle of side length\n", 69 | " midPoint = (sideLength - 1) // 2\n", 70 | " #Half size of box around centre pixel (one pixel smaller if even-sized dot window - don't do this)\n", 71 | " buffer = (dotWindow - 1) // 2\n", 72 | " \n", 73 | " #Get sum of pixels around dot\n", 74 | " dotSum = np.nansum(mtx[midPoint-buffer:midPoint+buffer+1, midPoint-buffer:midPoint+buffer+1])\n", 75 | " \n", 76 | " #Subset the matrix and calculate the mean without NaN values\n", 77 | " backgroundSum1 = np.nansum(mtx[0:dotWindow, 0:dotWindow])\n", 78 | " backgroundSum2 = np.nansum(mtx[sideLength-dotWindow:sideLength, sideLength-dotWindow:sideLength])\n", 79 | " \n", 80 | " #Calculate enrichment (NB this assumes all boxes are the same size.\n", 81 | " #If you set an even dotWindow value, they won't be)\n", 82 | " enrichment = dotSum / ((backgroundSum1 + backgroundSum2)/2)\n", 83 | " \n", 84 | " return enrichment" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "a19e8ae8", 90 | "metadata": {}, 91 | "source": [ 92 | "## Inputs" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "id": "295f6354", 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "#mcool resolution to read\n", 103 | "resolution = 250\n", 104 | "#List of mcool locations as strings\n", 105 | "clrfiles = [\"mcoollocation1\", \"mcoollocation2\", \"mcoollocation3\"]\n", 106 | "#List of mcool conditions as strings\n", 107 | "conditions = [\"condition1\", \"condition2\", \"condition3\"]\n", 108 | "#List of loop types as strings\n", 109 | "loopTypesNames = [\"loop\", \"type\", \"names\"]\n", 110 | "#List of loop file locations (bedpe)\n", 111 | "loopFiles = [\"looplocation1\", \"looplocation2\", \"looplocation3\"]\n", 112 | "\n", 113 | "#Specify the RCMC regions of the mcools to look at (format: chromosome (string), start (number), end (number), name of region (string))\n", 114 | "regions = pd.DataFrame([['chrA',1,100,'regionname1'],['chrB',1,100,'regionname2'],['chrC',1,100,'regionname3']],\n", 115 | " columns=['chrom', 'start', 'end', 'name'])\n", 116 | "#Cis expected file locations from cooltools - .tsv file - one for each mcool\n", 117 | "expectedFiles = [\"expectedlocation1\", \"expectedlocation2\", \"expectedlocation3\"]\n", 118 | "#Set save directory\n", 119 | "saveDir = '/a/directory/on/your/system/'\n", 120 | "\n", 121 | "#Set the size of the area flanking the dot\n", 122 | "flankDist = 10000\n", 123 | "#Don't set this to be even... This is the size of the area to measure around the dot \n", 124 | "#(and by extension the size of the boxes at the edges of the region too)\n", 125 | "#For this reason, it needs to be odd to have integer box sizes on each side.\n", 126 | "dotWindow = 5\n", 127 | "\n" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "id": "aaa6af6c", 133 | "metadata": {}, 134 | "source": [ 135 | "Read in the loops" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "id": "e9f08a9b", 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "#######Don't change this section#######\n", 146 | "#Creat an empty list to store the imported loop locations\n", 147 | "loopTypes = []\n", 148 | "#List of column names to use for imported loops (this is constant - do not change)\n", 149 | "colNames = ['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2']\n", 150 | "#Read in files, put them in loopTypes\n", 151 | "for file in loopFiles:\n", 152 | " temploops = pd.read_csv(file, sep='\\t', names=colNames, header=None)\n", 153 | " loopTypes.append(temploops)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "id": "c6de3c38", 159 | "metadata": {}, 160 | "source": [ 161 | "## Run the script" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "f27175dd", 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "#Loop through each cooler\n", 172 | "for i, clrfile in enumerate(clrfiles):\n", 173 | " #Get condition name\n", 174 | " condition = conditions[i]\n", 175 | " #Get expected file\n", 176 | " expected = pd.read_csv(expectedFiles[i], sep='\\t')\n", 177 | " #Read in cooler\n", 178 | " clr = cooler.Cooler(clrfile+'::/resolutions/'+str(resolution))\n", 179 | " #Loop through different loop types\n", 180 | " for j in range(len(loopTypes)):\n", 181 | " loops = loopTypes[j]\n", 182 | " loopsName = loopTypesNames[j]\n", 183 | " #Calculate pileups\n", 184 | " stack = cooltools.pileup(clr, loops, view_df=regions, expected_df=expected, flank=flankDist)\n", 185 | " #Flatten stack by calculating means\n", 186 | " mtx = np.nanmean(stack, axis=2)\n", 187 | " #Calculate enrichment\n", 188 | " enrichment = enrichmentCalc(mtx, dotWindow)\n", 189 | " #Plot figure\n", 190 | " plt.imshow(\n", 191 | " np.log2(mtx),\n", 192 | " vmax = 2.5,\n", 193 | " vmin = -2.5,\n", 194 | " cmap='coolwarm')\n", 195 | " \n", 196 | " plt.colorbar(label = 'log2 mean obs/exp')\n", 197 | " ticks_pixels = np.linspace(0, flankDist*2//resolution,5)\n", 198 | " ticks_kbp = ((ticks_pixels-ticks_pixels[-1]/2)*resolution//1000).astype(int)\n", 199 | " plt.xticks(ticks_pixels, ticks_kbp)\n", 200 | " plt.yticks(ticks_pixels, ticks_kbp)\n", 201 | " plt.xlabel('relative position, kbp')\n", 202 | " plt.ylabel('relative position, kbp')\n", 203 | " plt.text(1, 1, round(enrichment, 2))\n", 204 | " plt.savefig(saveDir+'LoopPileups_'+condition+'_'+loopsName+'_'+str(resolution)+'bp_'+str(flankDist)+'bp.pdf', dpi=1200)\n", 205 | " plt.clf()\n", 206 | "\n" 207 | ] 208 | } 209 | ], 210 | "metadata": { 211 | "kernelspec": { 212 | "display_name": "Python [conda env:coolpuppy]", 213 | "language": "python", 214 | "name": "conda-env-coolpuppy-py" 215 | }, 216 | "language_info": { 217 | "codemirror_mode": { 218 | "name": "ipython", 219 | "version": 3 220 | }, 221 | "file_extension": ".py", 222 | "mimetype": "text/x-python", 223 | "name": "python", 224 | "nbconvert_exporter": "python", 225 | "pygments_lexer": "ipython3", 226 | "version": "3.7.12" 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 5 231 | } 232 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RCMC Analysis Code 2 | This repository contains source code for the article [Region Capture Micro-C reveals coalescence of enhancers and promoters into nested microcompartments](https://www.nature.com/articles/s41588-023-01391-1), used in the analysis of RCMC data. 3 | 4 | Code is provided either in the form of Python/R scripts or as Jupyter notebooks to be run in conda environments containing the required packages. Additionally, genomic positions of microcompartments identified in the paper are included in bedpe format. 5 | 6 | ## Code summary 7 | ### Micro-C alignment (microcbowtie2.py) 8 | Required packages: 9 | - bowtie2 10 | - samtools 11 | - sambamba 12 | - pairtools 13 | - cooler 14 | - pairix 15 | 16 | Python script used to align reads in .fastq format from paired-end sequencing of Micro-C experiments and produces as output .pairs, .cool and .mcool files compatible with downstream applications such as HiGlass. 17 | 18 | Example usage: 19 | 20 | ``` 21 | python /path/to/script/microcbowtie2.py --file_1 pair1.fastq --file_2 pair2.fasq -g mm39 -t 36 -o exampleoutput 22 | ``` 23 | 24 | ### ChIP-seq alignment (spikeinChIP_PE_alignment.py) 25 | Required packages: 26 | - bowtie2 27 | - samtools 28 | - sambamba 29 | 30 | Python script used to align reads in .fastq format from paired-end sequencing of ChIP-seq experiments and produces aligned .bam files. .fastq information is input as a .tsv with each line containing the path to the first pair .fastq, the path to the second pair .fastq, and the desired output name of the aligned file. 31 | 32 | Example usage: 33 | 34 | ``` 35 | python /path/to/script/spikeinChIP_PE_alignment.py -f list_of_fastqs.tsv -g mm39 -t 36 -o alignmentcountsout 36 | ``` 37 | 38 | ### Finding chromatin features overlapping microcompartment anchors (loopFeatureOverlap.R) 39 | Required packages: 40 | - plyr 41 | - dplyr 42 | - reshape2 43 | - purrr 44 | - grid 45 | - IRanges 46 | - GenomicRanges 47 | - arrangements 48 | - foreach 49 | 50 | R script used to classify microcompartment interactions by finding overlap between identified microcompartments (.bedpe) and chromatin features (.bed) such as promoters, enhancers, CTCF binding sites, etc. It outputs individual .bedpe files of interactions according to combinatorial classification of chromatin features (e.g for enhancer (E) and promoter (P): P-P, E-E, E-P, E-null, P-null, null-null), including interactions which have no overlap (null category). Classification can be mutually exclusive (E-P cannot also be P-P) or inclusive. 51 | 52 | Example usage: 53 | 54 | ``` 55 | Rscript /path/to/script/loopFeatureOverlap.R -l interactions.bedpe -b promoter.bed,enhancer.bed -i P,E -o outputdirectory/ 56 | ``` 57 | 58 | ### Calculating strength of individual interactions (LoopStrengthRCMC.ipynb) or aggregate pileup analysis (PileupsRCMC.ipynb) 59 | Required packages: 60 | - seaborn 61 | - coolpuppy 62 | - cooltools 63 | - cooler 64 | 65 | Jupyter notebooks used to calculate strengths of individual microcompartments (LoopStrengthRCMC.ipynb) or generate aggregate pileup analysis figures (PileupsRCMC.ipynb). Each one takes .mcool files of contacts from RCMC, a list of interactions to calculate for (.bedpe format), expected files generated by cooltools for each .mcool, and the captured region, and calculates background corrected observed/expected interaction strengths, either for each interaction individually (output as a .bedpe with additional columns for strengths for each .mcool) or as a pileup (output as a .pdf of the aggregate interaction annotated with the calculated strength). 66 | 67 | ### Visualization of contact maps and genomic tracks (ContactMapVisualizationExampleNotebook.ipynb) 68 | Required packages: 69 | - cooltools 70 | - cooler 71 | - coolbox 72 | - matplotlib 73 | 74 | Jupyter notebook used to generate visualizations of contact maps and genomics tracks for figures. Contact map visualization is accomplished using cooltools and requires a .mcool file of contacts from RCMC or a comparable method. Genomic track visualization is accomplished using coolbox and requires a .mcool file of contacts, gene annotations (.gtf format or similar), and ChIP-seq, RNA-seq, and ATAC-seq datasets (.bw format). 75 | 76 | ### Calculation of read-containing bin fraction by contact distance (CalculatingFilledBinFractionByDistance.ipynb) 77 | Required packages: 78 | - cooltools 79 | - cooler 80 | - matplotlib 81 | 82 | Jupyter notebook used to calculate the fraction of bins in .mcool-derived contact maps which contain at least one read pair at a given resolution and contact distance from the diagonal. The notebook takes an unbalanced .mcool of contacts from RCMC or a comparable method, tabulates the occupied contact bin fraction at specified contact distances, and generates a plot of occupied bin fraction by contact distance. 83 | 84 | ### Calculation of row sums in ICE-balanced .mcools (BalancedRowsumsCalculation.ipynb) 85 | Required packages: 86 | - cooltools 87 | - cooler 88 | - matplotlib 89 | 90 | Jupyter notebook used to confirm successful ICE balancing of .mcool files by calculating and plotting row sums. Two variations of the calculation are provided in the notebook – one for calculating row sums across an entire region, and one for calculating row sums only for bins containing contact anchor sites. Both variations take ICE-balanced .mcool files of contacts from RCMC or a comparable method, and the latter additionally takes a list of contact anchor sites (.bed format). The distributions of calculated row sums in either variation are plotted as histograms. 91 | 92 | ### List of manually-annotated microcompartment loops (MicrocompartmentLoops_PlusMin1kb.bedpe) 93 | BEDPE format file listing all 1091 manually-annotated microcompartment loops across the Ppm1g (chr5) and Klf1 (chr8) regions used in the microcompartment analysis scripts above. Coordinates are provided for the mm39 reference genome, and loop anchors are listed as plus-and-minus 1kb from each anchor’s point coordinate. Columns in the file are as follows: the first is the chromosome of the left loop anchor, the second is the coordinate of the left loop anchor minus 1 kb, the third is the coordinate of the left loop anchor plus 1 kb, and the remaining three columns are the same for the right loop anchor. 94 | 95 | ### Lists of probes used for capturing regions of interest (captureprobes_mm10.bed, captureprobes_mm39.bed) 96 | BED format file listing the genomic locations of all probes used for capturing the Sox2 (chr3), Ppm1g (chr5), Nanog (chr6), Klf1 (chr8), and Fbn2 (chr18) regions used in capture. Coordinates are provided for both the mm10 and mm39 reference genomes, and loop anchors are listed as plus-and-minus 1kb from each anchor’s point coordinate. Columns in the file are as follows: the first is the chromosome the region is located on, the second is the start coordinate of the probe, and the third is the end coordinate of the probe. 97 | 98 | ## How to cite 99 | This work is shared under an MIT license. If you make use of analysis scripts or data from this work, please cite as follows: 100 | 101 | Goel, V.Y., Huseyin, M.K. & Hansen, A.S. Region Capture Micro-C reveals coalescence of enhancers and promoters into nested microcompartments. *Nat Genet* (2023). https://doi.org/10.1038/s41588-023-01391-1 102 | 103 | Also refer to our deposited and citable code on Zenodo: 104 | 105 | Goel, Viraat Y, Huseyin, Miles K, & Hansen, Anders S. (2023). Code supporting Region Capture Micro-C reveals coalescence of enhancers and promoters into nested microcompartments (1.0). Zenodo. https://doi.org/10.5281/zenodo.7641852 106 | -------------------------------------------------------------------------------- /loopFeatureOverlap.R: -------------------------------------------------------------------------------- 1 | #New version of loopFeatureOverlap.py, written in R using Granges instead of Python-based Pyranges version, which had an output issue 2 | 3 | #Imports 4 | library('plyr') 5 | library('dplyr') 6 | require('reshape2') 7 | library('purrr') 8 | library('grid') 9 | # library('ChIPpeakAnno') 10 | library('IRanges') 11 | library('GenomicRanges') 12 | library('arrangements') 13 | library('foreach') 14 | 15 | #Get args 16 | if(!require(optparse)) { 17 | stop("Please install the optparse package and try again!", call. = FALSE) 18 | } 19 | 20 | library(optparse) 21 | 22 | parser <- OptionParser(add_help_option = TRUE) 23 | parser <- add_option(parser, c("-o", "--outdir"), action = "store", type = "character", help = "Directory to output bed files", default = "na") 24 | parser <- add_option(parser, c("-l", "--loops"), action = "store", type = "character", help = "Input bedpe or tsv file containing loops in bedpe format", default = "na") 25 | parser <- add_option(parser, c("-b", "--bed"), action = "store", type = "character", help = "Input bed file or files - if multiple, separate with commas", default = "na") 26 | parser <- add_option(parser, c("-i", "--id"), action = "store", type = "character", help = "Feature names to use for each bed file - will be used to name output files. If multiple, separate with commas", default = "na") 27 | parser <- add_option(parser, c("-e", "--exclusive"), action = "store_true", help = "When set, defines loop anchors with only a single feature - overlap with multiple features is not allowed", default = FALSE) 28 | args <- parse_args(parser) 29 | 30 | ######################################################################### 31 | #Convert args to variables 32 | path.loops <- args$loops 33 | outdir <- args$outdir 34 | features <- args$bed 35 | ids <- args$id 36 | 37 | ######################################################################### 38 | #Read in loops 39 | loops <- read.delim(path.loops, header = FALSE, col.names = c("chr1", "start1", "end1", "chr2", "start2", "end2"), sep = "", dec = ".") #opens up the file 40 | 41 | #Read in features 42 | featureslist <- unlist(strsplit(features, ",")) 43 | idlist <- unlist(strsplit(ids, ",")) 44 | 45 | #Check features and ids are same length 46 | if (length(featureslist) != length(idlist)) { 47 | stop("Make sure the same numbers of bed files and IDs are provided") 48 | } 49 | 50 | #We only want and know the names of the first three columns, so write a little import function: 51 | import_feature_data <- function(filename) { 52 | #Get number of columns 53 | num.cols.to.blank <- max(count.fields(filename, sep = "\t")) - 3 54 | df <- read.delim(filename, header = FALSE, sep = "", dec = ".", colClasses = c('character', rep('numeric', 2), rep("NULL", num.cols.to.blank))) 55 | colnames(df) <- c("chr", "start", "end") 56 | return(df) 57 | } 58 | 59 | features.data <- lapply(featureslist, import_feature_data) 60 | 61 | #Ensure output directory ends with a / 62 | if(!endsWith(outdir, '/')) { 63 | outdir <- paste0(outdir, '/') 64 | } 65 | 66 | if (outdir == "na") { 67 | stop("Please provide an option for --out", call. = FALSE) 68 | } 69 | 70 | ######################################################################### 71 | #Make Granges objects 72 | #For features 73 | features.data.granges <- lapply(features.data, makeGRangesFromDataFrame, keep.extra.columns = FALSE) 74 | #For loops 75 | #First make separate dfs for each anchor 76 | #Make anchors function 77 | make_anchors_separate <- function(loops) { 78 | #Add loop_id column for merging 79 | loops$loop_id <- seq.int(nrow(loops)) 80 | #Split loops into two bed-like files 81 | loops.1 <- data.frame(chr = loops$chr1, start = loops$start1, end = loops$end1, loop_id = loops$loop_id) 82 | loops.2 <- data.frame(chr = loops$chr2, start = loops$start2, end = loops$end2, loop_id = loops$loop_id) 83 | #Merge with original to add back the lost info 84 | return(list(loops.1, loops.2)) 85 | } 86 | #Then make Granges 87 | loops.anchors.list <- make_anchors_separate(loops) 88 | #Make separate dfs (maybe don't need to do this?) 89 | loops.anchors.1 <- loops.anchors.list[[1]] 90 | loops.anchors.2 <- loops.anchors.list[[2]] 91 | #Make granges 92 | loops.anchors.list.granges <- lapply(loops.anchors.list, makeGRangesFromDataFrame, keep.extra.columns = FALSE) 93 | 94 | ######################################################################### 95 | #Compare Granges objects 96 | #Start counter 97 | feature.count <- 1 98 | 99 | #Loop through the features and count overlaps 100 | for (feature in features.data.granges) { 101 | loops.anchors.1[[idlist[feature.count]]] <- countOverlaps(loops.anchors.list.granges[[1]], feature) 102 | loops.anchors.2[[idlist[feature.count]]] <- countOverlaps(loops.anchors.list.granges[[2]], feature) 103 | feature.count <- feature.count + 1 104 | } 105 | 106 | 107 | #Change column names for loops.anchors.2 so they don't match 108 | colnames(loops.anchors.2) <- paste0(colnames(loops.anchors.2), '2') 109 | 110 | #Next, merge based on loop_id(2) columns 111 | loops.anchors.remerge <- merge(loops.anchors.1, loops.anchors.2, by.x = "loop_id", by.y = "loop_id2") 112 | 113 | ######################################################################## 114 | #Determine loop classes 115 | 116 | #First need to generate loop classes based on ids 117 | #Add null to the class list (for anchors with no features) 118 | idlist.null <- append(idlist, "null") 119 | 120 | #Then make all combinations (this is combinations with replacement) 121 | position <- 1 122 | combination.object <- icombinations(idlist.null, k = 2, replace = TRUE) 123 | 124 | id.combined.list <- vector("list", length(combination.object$collect())/2) 125 | 126 | foreach(x = icombinations(idlist.null, k = 2, replace = TRUE), .combine = c) %do% { 127 | id.combined.list[[position]] <- paste(idlist.null[x[1]], idlist.null[x[2]], sep = "-") 128 | position <- position + 1 129 | } 130 | 131 | #Classify loops. Which version is run depends on whether inclusive or exclusive loops are desired. 132 | if (args$exclusive) { 133 | #Make named list of dfs 134 | output.list <- setNames(replicate(length(id.combined.list), data.frame()), id.combined.list) 135 | 136 | #Exclusive version 137 | for (i1 in 1:length(idlist)) { 138 | item1 <- idlist[[i1]] 139 | #print(item1) 140 | for (i2 in 1:length(idlist)) { 141 | item2 <- idlist[[i2]] 142 | #print(item2) 143 | if (i1 == i2 | i1 < i2) { 144 | #Get the loop type from the indices 145 | looptype <- paste(idlist[[i1]], idlist[[i2]], sep = '-') 146 | #Get the relevant loops 147 | temp.df <- loops.anchors.remerge 148 | #Loop through all ids, check they're 0 except the ones matching the requirements 149 | for (id in idlist) { 150 | if (id == idlist[[i1]]) { 151 | temp.df <- temp.df[which(temp.df[[id]] > 0),] 152 | } else if (id != idlist[[i1]]) { 153 | temp.df <- temp.df[which(temp.df[[id]] == 0),] 154 | } 155 | if (id == idlist[[i2]]) { 156 | temp.df <- temp.df[which(temp.df[[paste0(id, '2')]] > 0),] 157 | } else if (id != idlist[[i2]]) { 158 | temp.df <- temp.df[which(temp.df[[paste0(id, '2')]] == 0),] 159 | } 160 | } 161 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df) 162 | 163 | } else if (i1 > i2) { 164 | #Get the loop type from the indices - need to invert here so that E-P2 and P-E2 loops are both put into 165 | #the same category 166 | looptype <- paste(idlist[[i2]], idlist[[i1]], sep = '-') 167 | #Get the relevant loops 168 | temp.df <- loops.anchors.remerge 169 | 170 | for (id in idlist) { 171 | if (id == idlist[[i1]]) { 172 | temp.df <- temp.df[which(temp.df[[id]] > 0),] 173 | } else if (id != idlist[[i1]]) { 174 | temp.df <- temp.df[which(temp.df[[id]] == 0),] 175 | } 176 | if (id == idlist[[i2]]) { 177 | temp.df <- temp.df[which(temp.df[[paste0(id, '2')]] > 0),] 178 | } else if (id != idlist[[i2]]) { 179 | temp.df <- temp.df[which(temp.df[[paste0(id, '2')]] == 0),] 180 | } 181 | } 182 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df) 183 | } 184 | } 185 | looptype <- paste(idlist[[i1]], "null", sep = '-') 186 | #Generate X-null 187 | temp.df.1 <- loops.anchors.remerge 188 | #Loop through the ID columns of the other anchor and select only rows with 0 for each column 189 | for (id in idlist) { 190 | if (id == idlist[[i1]]) { 191 | temp.df.1 <- temp.df.1[which(temp.df.1[[id]] > 0),] 192 | } else if (id != idlist[[i1]]) { 193 | temp.df.1 <- temp.df.1[which(temp.df.1[[id]] == 0),] 194 | } 195 | temp.df.1 <- temp.df.1[which(temp.df.1[[paste0(id, '2')]] == 0),] 196 | } 197 | #Generate null-X 198 | temp.df.2 <- loops.anchors.remerge[which(loops.anchors.remerge[[paste0(idlist[[i1]], '2')]] > 0),] 199 | 200 | for (id in idlist) { 201 | temp.df.2 <- temp.df.2[which(temp.df.2[[id]] == 0),] 202 | if (id == idlist[[i1]]) { 203 | temp.df.2 <- temp.df.2[which(temp.df.2[[paste0(id, '2')]] > 0),] 204 | } else if (id != idlist[[i1]]) { 205 | temp.df.2 <- temp.df.2[which(temp.df.2[[paste0(id, '2')]] == 0),] 206 | } 207 | } 208 | #Combine them 209 | temp.df <- rbind(temp.df.1, temp.df.2) 210 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df) 211 | 212 | } 213 | looptype <- "null-null" 214 | temp.df <- loops.anchors.remerge 215 | 216 | for (id in idlist) { 217 | temp.df <- temp.df[which(temp.df[[id]] == 0 & temp.df[[paste0(id, '2')]] == 0),] 218 | } 219 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df) 220 | } else { 221 | #Make named list of dfs 222 | output.list <- setNames(replicate(length(id.combined.list), data.frame()), id.combined.list) 223 | 224 | #Inclusive version 225 | for (i1 in 1:length(idlist)) { 226 | item1 <- idlist[[i1]] 227 | #print(item1) 228 | for (i2 in 1:length(idlist)) { 229 | item2 <- idlist[[i2]] 230 | #print(item2) 231 | if (i1 == i2 | i1 < i2) { 232 | #Get the loop type from the indices 233 | looptype <- paste(idlist[[i1]], idlist[[i2]], sep = '-') 234 | #Get the relevant loops 235 | temp.df <- loops.anchors.remerge[which(loops.anchors.remerge[[idlist[[i1]]]] > 0 & loops.anchors.remerge[[paste0(idlist[[i2]], "2")]] > 0), ] 236 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df) 237 | } else if (i1 > i2) { 238 | #Get the loop type from the indices - need to invert here so that E-P2 and P-E2 loops are both put into 239 | #the same category 240 | looptype <- paste(idlist[[i2]], idlist[[i1]], sep = '-') 241 | #Get the relevant loops 242 | temp.df <- loops.anchors.remerge[which(loops.anchors.remerge[[idlist[[i1]]]] > 0 & loops.anchors.remerge[[paste0(idlist[[i2]], "2")]] > 0), ] 243 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df) 244 | } 245 | } 246 | looptype <- paste(idlist[[i1]], "null", sep = '-') 247 | #Generate X-null 248 | temp.df.1 <- loops.anchors.remerge[which(loops.anchors.remerge[[idlist[[i1]]]] > 0),] 249 | #Loop through the ID columns of the other anchor and select only rows with 0 for each column 250 | for (id in idlist) { 251 | temp.df.1 <- temp.df.1[which(temp.df.1[[paste0(id, '2')]] == 0),] 252 | } 253 | #Generate null-X 254 | temp.df.2 <- loops.anchors.remerge[which(loops.anchors.remerge[[paste0(idlist[[i1]], '2')]] > 0),] 255 | 256 | for (id in idlist) { 257 | temp.df.2 <- temp.df.2[which(temp.df.2[[id]] == 0),] 258 | } 259 | #Combine them 260 | temp.df <- rbind(temp.df.1, temp.df.2) 261 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df) 262 | 263 | } 264 | looptype <- "null-null" 265 | temp.df <- loops.anchors.remerge 266 | 267 | for (id in idlist) { 268 | temp.df <- temp.df[which(temp.df[[id]] == 0 & temp.df[[paste0(id, '2')]] == 0),] 269 | } 270 | output.list[[looptype]] <- rbind(output.list[[looptype]], temp.df) 271 | } 272 | 273 | #Remove any duplicates from the dfs (inclusive calling can result in many) 274 | output.list.nodups <- lapply(output.list, distinct) 275 | 276 | #Print the lengs of each df to give the result: 277 | sapply(output.list.nodups, nrow) 278 | 279 | #Output the dataframes 280 | for (i in 1:length(output.list.nodups)) { 281 | temp.df <- output.list.nodups[[i]] 282 | #Get only the relevant columns 283 | output.df <- data.frame(chr = temp.df$chr, start = temp.df$start, end = temp.df$end, chr2 = temp.df$chr2, start2 = temp.df$start2, end2 = temp.df$end2) 284 | write.table(output.df, file = paste0(outdir, names(output.list)[[i]], '.bedpe'), row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "\t") 285 | } 286 | -------------------------------------------------------------------------------- /microcbowtie2.py: -------------------------------------------------------------------------------- 1 | #Aligning and processing a single fastq file through a pipeline similar to distiller but instead using bowtie2 2 | 3 | from sys import exit 4 | import subprocess as sp 5 | import argparse 6 | import multiprocessing 7 | import uuid 8 | 9 | parser = argparse.ArgumentParser(description = "run bowtie2 and pairtools on fastq files to produce pairsam files") 10 | bamopts = parser.add_mutually_exclusive_group() 11 | parser.add_argument("--file_1", "-1", help = "first demuxed fastq of paired end reads - required", nargs = "*") 12 | parser.add_argument("--file_2", "-2", help = "second demuxed fastq of paired end reads - required", nargs = "*") 13 | parser.add_argument("--genome", "-g", help = "genome to align to - mouse or human - required") 14 | parser.add_argument("--genometype", "-y", help = "genome type - use if your genome is a modified version of a standard genome - should be one of hg19, hg38, mm10, or mm39") 15 | parser.add_argument("--threads", "-t", help = "number of threads to use for bowtie2 - default is 1", default = "1") 16 | parser.add_argument("--resolutions", "-r", help = "list of resolutions to output in decreasing order - all resolutions must be a multiple of the smallest resolution - default: 10000000 5000000 2500000 1000000 500000 250000 100000 50000 25000 10000 5000 2000 1000", nargs = "*") 17 | parser.add_argument("--out", "-o", help = "name for output files - defaults to name of first file") 18 | parser.add_argument("--outdir", help = "a directory to store output files - default is current directory", default = "./") 19 | bamopts.add_argument("--bowtieonly", "-b", help = "only run bowtie2 and make bams - can be useful for post initial analysis QC steps", action = "store_true") 20 | bamopts.add_argument("--keepbams", "-k", help = "keep bam files while doing a normal full analysis", action = "store_true") 21 | args = parser.parse_args() 22 | 23 | file1 = args.file_1 24 | file2 = args.file_2 25 | genome = args.genome 26 | gentype = args.genometype 27 | threads = args.threads 28 | outname = args.out 29 | outdir = args.outdir 30 | reslist = args.resolutions 31 | bowtieonly = args.bowtieonly 32 | keepbams = args.keepbams 33 | 34 | #Check requirements are fulfilled: 35 | condapacks = sp.run("conda list".split(), capture_output=True) 36 | condapacksstr = str(condapacks.stdout) 37 | if bowtieonly: 38 | if "bowtie2" not in condapacksstr or "samtools" not in condapacksstr or "sambamba" not in condapacksstr: 39 | print("Please make sure bowtie2, samtools and sambamba are installed in your current conda environment (check conda list)") 40 | exit() 41 | elif not bowtieonly: 42 | if "bowtie2" not in condapacksstr or "pairtools" not in condapacksstr or "cooler" not in condapacksstr or "pairix" not in condapacksstr: 43 | print("Please make sure bowtie2, pairtools, pairix and cooler are installed in your current conda environment (check with 'conda list')") 44 | exit() 45 | 46 | #Check that outdir ends with a /, add one if it doesn't 47 | if args.outdir is not None and not outdir.endswith("/"): 48 | outdir = outdir + "/" 49 | 50 | #Check a genome was specified 51 | if args.genome is None: 52 | print("Genome not specified - check help for formatting") 53 | parser.print_usage() 54 | exit() 55 | 56 | #Check if file1 and file2 are single files or multiple 57 | if file1 is None or file2 is None: 58 | print("Input files not specified - check help for formatting") 59 | parser.print_usage() 60 | exit() 61 | elif len(file1) > 1 and type(file1) == list and type(file2) == list and len(file1) == len(file2): 62 | multifile = 1 63 | if args.out is None: 64 | outlist = [fname + "_" + genome for fname in file1] 65 | outname = outlist[0] 66 | else: 67 | outlist = list() 68 | for i in range(len(file1)): 69 | outlist.append(outname + "_" + str(i + 1)) 70 | #Input to pair merging step needs all of the outputs together 71 | pairnamelist = [outdir + oname + ".pairs" for oname in outlist] 72 | pairnamest = " ".join(pairnamelist) 73 | bamlist = [outdir + oname + ".sorted.bam" for oname in outlist] 74 | bamst = " ".join(bamlist) 75 | bailist = [outdir + oname + ".sorted.bam.bai" for oname in outlist] 76 | baist = " ".join(bailist) 77 | elif len(file1) == 1 and len(file2) == 1: 78 | multifile = 0 79 | #If nargs = *, always makes a list, even if only one element 80 | file1 = "".join(file1) 81 | file2 = "".join(file2) 82 | if args.out is None: 83 | outname = file1 + "_" + genome 84 | pairnamest = outdir + outname + ".pairs" 85 | else: 86 | print("Mismatch in number of input files, check arguments") 87 | exit() 88 | 89 | #Check that a sensible number of threads has been requested - more protections here are possible - at the moment users are trusted to be sensible 90 | cpucount = multiprocessing.cpu_count() 91 | if args.threads is None: 92 | print("Defaulting to one thread") 93 | threads = 1 94 | elif int(args.threads) >= cpucount: 95 | print("Too many threads requested, resetting to default") 96 | threads = 1 97 | 98 | #Check that the user has entered a valid genome to align to 99 | if args.genometype is None: 100 | gentype = args.genome 101 | 102 | if gentype == "mm10" or gentype == "mm39": 103 | toprint = "Aligning to mouse genome {}".format(genome) 104 | print(toprint) 105 | elif gentype == "hg19" or gentype == "hg38": 106 | toprint = "Aligning to human genome {}".format(genome) 107 | print(toprint) 108 | else: 109 | if gentype == genome: 110 | print("Genome option not recognised or not entered. Please use mm10/39 or hg19/38 or ask Miles to change the script to accommodate your new organism/genome. If you are using a modified version of base genome, use the -g option to indicate the base genome name.") 111 | exit() 112 | else: #If they're using a modified genome, make sure the base genome exists so that the files are redirected properly 113 | print("Genome/base genome option not recognised. Please use mm10/39 or hg19/38 or ask Miles to change the script to accommodate your new organism/genome.") 114 | exit() 115 | 116 | #Set up resolutions as needed 117 | if args.resolutions is None: 118 | reslist = ["10000000", "5000000", "2500000", "1000000", "500000", "250000", "100000", "50000", "25000", "10000", "5000", "2000", "1000"] 119 | resst = ",".join(reslist) 120 | #Extract minimum resolution 121 | minres = reslist[-1] 122 | 123 | #Process ID (used to make unique sorttemp, so these are not overlapping for multiple processes in the same outdir) 124 | uniqueid = str(uuid.uuid4()) 125 | 126 | # commands as strings 127 | line1 = "mkdir {0}{10}sorttemp -p" 128 | line2 = "bowtie2 -x /mnt/md0/DataRepository/genomes/{1}/{2} --threads {3} -1 {4} -2 {5} --reorder --local --very-sensitive-local {11}| pairtools parse --add-columns mapq --walks-policy mask -c /mnt/md0/DataRepository/chromsizes/{1}/{2}.sorted.chrom.sizes --assembly {2} --min-mapq 2 --drop-sam --drop-readid --nproc-in {3} | pairtools sort --tmpdir {0}{10}sorttemp --nproc {3} -o {0}{6}.pairs | cat" #Can add drop-sam and drop-readid options later 129 | line3 = "pairtools merge --tmpdir {0}{10}sorttemp --nproc {3} {7} | pairtools dedup --max-mismatch 1 --mark-dups --output {0}{6}.nodups.pairs.gz --output-unmapped {0}{6}.unmapped.pairs.gz --output-dups {0}{6}.dups.pairs.gz --output-stats {0}{6}.dedup.stats | cat" 130 | line4 = "pairtools dedup --max-mismatch 1 --mark-dups --output {0}{6}.nodups.pairs.gz --output-unmapped {0}{6}.unmapped.pairs.gz --output-dups {0}{6}.dups.pairs.gz --output-stats {0}{6}.dedup.stats {0}{7}" 131 | line5 = "pairix {0}{6}.nodups.pairs.gz" 132 | line6 = "bgzip -cd -@ 3 {0}{6}.nodups.pairs.gz | cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 --assembly {2} /mnt/md0/DataRepository/chromsizes/{1}/{2}.sorted.chrom.sizes:{8} - {0}{6}.{8}.cool" 133 | line7 = "cooler zoomify --nproc {3} --balance --out {0}{6}.{8}.mcool --resolutions {9} {0}{6}.{8}.cool" 134 | line8 = "rmdir {0}{10}sorttemp" 135 | 136 | #For running only bowtie2 and making bams 137 | bline1 = "mkdir {0}{6}temp -p" 138 | bline2 = "bowtie2 -x /mnt/md0/DataRepository/genomes/{1}/{2} --threads {3} -1 {4} -2 {5} --reorder --local --very-sensitive-local | samtools view -bS -o {0}{6}.bam" 139 | bline3 = "sambamba sort -t {3} -m 6GB --tmpdir {0}{7}temp {0}{6}.bam {0}{6}.sorted.bam && rm {0}{6}.bam" 140 | bline4 = "sambamba merge -t {3} {0}{6}.sorted.merged.bam {7} && rm {7} {8}" 141 | bline5 = "sambamba markdup -t {3} --tmpdir {0}{6}temp --overflow-list-size 10000000 -r {0}{6}.sorted.merged.bam {0}{6}.nodups.sorted.merged.bam && rm {0}{6}.sorted.merged.ba*" 142 | bline6 = "sambamba markdup -t {3} --tmpdir {0}{6}temp -r {0}{6}.sorted.bam {0}{6}.nodups.sorted.bam && rm {0}{6}.sorted.ba*" 143 | bline7 = "rmdir {0}{6}temp" 144 | 145 | multilines = [line1, line2, line3, line5, line6, line7, line8] 146 | lines = [line1, line2, line4, line5, line6, line7, line8] 147 | 148 | multiblines = [bline1, bline2, bline3, bline4, bline5, bline7] 149 | blines = [bline1, bline2, bline3, bline6, bline7] 150 | 151 | truncmultiblines = [bline1, bline3, bline4, bline5, bline7] 152 | truncblines = [bline1, bline3, bline6, bline7] 153 | 154 | #Process the files depending on the run mode 155 | if not multifile and not bowtieonly: 156 | #Include command if bams are wanted 157 | if keepbams: 158 | keepbamcmd = "| tee >(samtools view -bS > {0}{1}.bam) ".format(outdir, outname) 159 | else: 160 | keepbamcmd = "" 161 | for line in lines: 162 | # add file name and split by whitespace 163 | tokenized_line = line.format(outdir, gentype, genome, threads, file1, file2, outname, pairnamest, minres, resst, uniqueid, keepbamcmd) 164 | print(tokenized_line) 165 | # run 166 | sp.run(tokenized_line, shell=True, executable="/bin/bash") 167 | elif multifile and not bowtieonly: 168 | for line in multilines: 169 | if line == line2: 170 | for x in range(len(file1)): 171 | #Include command if bams are wanted 172 | if keepbams: 173 | keepbamcmd = "| tee >(samtools view -bS > {0}{1}.bam) ".format(outdir, outlist[x]) 174 | else: 175 | keepbamcmd = "" 176 | # add file name and split by whitespace 177 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[x], file2[x], outlist[x], pairnamest, minres, resst, uniqueid, keepbamcmd) 178 | print(tokenized_line) 179 | # run 180 | sp.run(tokenized_line, shell=True, executable="/bin/bash") 181 | else: 182 | # add file name and split by whitespace 183 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[1], file2[1], outname, pairnamest, minres, resst, uniqueid) 184 | print(tokenized_line) 185 | # run 186 | sp.run(tokenized_line, shell=True) 187 | elif bowtieonly and multifile: 188 | for line in multiblines: 189 | if line == bline2 or line == bline3: 190 | for x in range(len(file1)): 191 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[x], file2[x], outlist[x], outname) 192 | print(tokenized_line) 193 | sp.run(tokenized_line, shell=True) 194 | else: 195 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[1], file2[1], outname, bamst, baist) 196 | print(tokenized_line) 197 | sp.run(tokenized_line, shell=True) 198 | elif bowtieonly and not multifile: 199 | for line in blines: 200 | tokenized_line = line.format(outdir, gentype, genome, threads, file1, file2, outname, outname) 201 | print(tokenized_line) 202 | sp.run(tokenized_line, shell=True) 203 | 204 | #After everything finishes, merge and process bams as required if doing full analysis 205 | if keepbams and not bowtieonly: 206 | if not multifile: 207 | for line in truncblines: 208 | tokenized_line = line.format(outdir, gentype, genome, threads, file1, file2, outname, outname) 209 | print(tokenized_line) 210 | sp.run(tokenized_line, shell=True) 211 | elif multifile: 212 | for line in truncmultiblines: 213 | if line == bline3: 214 | for x in range(len(file1)): 215 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[x], file2[x], outlist[x], outname) 216 | print(tokenized_line) 217 | sp.run(tokenized_line, shell=True) 218 | else: 219 | tokenized_line = line.format(outdir, gentype, genome, threads, file1[1], file2[1], outname, bamst, baist) 220 | print(tokenized_line) 221 | sp.run(tokenized_line, shell=True) 222 | -------------------------------------------------------------------------------- /spikeinChIP_PE_alignment.py: -------------------------------------------------------------------------------- 1 | #Aligning and processing paired end fastq files for spike-in ChIP-seq using bowtie2 2 | 3 | from sys import exit 4 | import subprocess as sp 5 | import argparse 6 | import multiprocessing 7 | import uuid 8 | import pandas as pd 9 | 10 | parser = argparse.ArgumentParser(description = "run bowtie2 on paired end fastq files with spikein to produce aligned bam files") 11 | parser.add_argument("--filename", "-f", help = "a tab-separated file containing one each line the path to the fastq with the first ends of pairs, the path to the fastq with the second ends of pairs, and the desired output name for the aligned file") 12 | # parser.add_argument("--file_1", "-1", help = "first demuxed fastq of paired end reads - required", nargs = "*") 13 | # parser.add_argument("--file_2", "-2", help = "second demuxed fastq of paired end reads - required", nargs = "*") 14 | parser.add_argument("--genome", "-g", help = "genome build to align to - mouse or human - required") 15 | parser.add_argument("--spikegenome", "-s", help = "spikein genome build to align to - mouse or human - required") 16 | parser.add_argument("--threads", "-t", help = "number of threads to use for bowtie2 - default is 1", default = "1") 17 | parser.add_argument("--outname", "-o", help = "name for the table to store counts") 18 | parser.add_argument("--outdir", help = "a directory to store output files - default is current directory", default = "./") 19 | args = parser.parse_args() 20 | 21 | # file1 = args.file_1 22 | # file2 = args.file_2 23 | genome = args.genome 24 | spikegenome = args.spikegenome 25 | threads = args.threads 26 | outname = args.outname 27 | outdir = args.outdir 28 | 29 | #Read in file to determine what to process 30 | files = pd.read_csv(args.filename, sep='\t', header=None, names = ['end1', 'end2', 'name']) 31 | file1 = files.end1.values.tolist() 32 | file2 = files.end2.values.tolist() 33 | outnames = files.name.values.tolist() 34 | 35 | #Check requirements are fulfilled: 36 | condapacks = sp.run("conda list".split(), capture_output=True) 37 | condapacksstr = str(condapacks.stdout) 38 | 39 | if "bowtie2" not in condapacksstr or "samtools" not in condapacksstr or "sambamba" not in condapacksstr: 40 | print("Please make sure bowtie2, samtools and sambamba are installed in your current conda environment (check conda list)") 41 | exit() 42 | 43 | #Check that outdir ends with a /, add one if it doesn't 44 | if args.outdir is not None and not outdir.endswith("/"): 45 | outdir = outdir + "/" 46 | 47 | #Check a genome was specified 48 | if args.genome is None or args.spikegenome is None: 49 | print("Genomes not specified - check help for formatting") 50 | parser.print_usage() 51 | exit() 52 | 53 | #Check if file1, file2, and outnames are present and the same lengths 54 | if file1 is None or file2 is None or outnames is None: 55 | print("Input files not specified - check help for formatting") 56 | parser.print_usage() 57 | exit() 58 | elif type(file1) == list and type(file1) == list and type(outnames) == list and len(file1) > 1: 59 | if len(file1) != len(file2) or len(file1) != len(outnames) or len(file2) != len(outnames): 60 | print("Mismatch in number of input files or output names, check arguments") 61 | exit() 62 | 63 | #Check that a sensible number of threads has been requested - more protections here are possible - at the moment users are trusted to be sensible 64 | cpucount = multiprocessing.cpu_count() 65 | if args.threads is None: 66 | print("Defaulting to one thread") 67 | threads = 1 68 | elif int(args.threads) >= cpucount: 69 | print("Too many threads requested, resetting to default") 70 | threads = 1 71 | else: 72 | print(f"Running alignment with {threads} threads...") 73 | 74 | #Check that the user has entered a valid genome to align to 75 | if genome == "mm10" or genome == "mm39": 76 | print(f"Aligning to mouse genome {genome}") 77 | elif genome == "hg19" or genome == "hg38": 78 | print(f"Aligning to human genome {genome}") 79 | else: 80 | print("Genome option not recognised or not entered. Please use mm10/39 or hg19/38 or ask Miles to change the script to accommodate your new organism/genome") 81 | exit() 82 | 83 | #Process ID (used to make unique sorttemp, so these are not overlapping for multiple processes in the same outdir) 84 | uniqueid = str(uuid.uuid4()) 85 | 86 | #Create a place to store counts 87 | allcountslist = [] 88 | 89 | #Process the files 90 | for i, fastq1 in enumerate(file1): 91 | fastq2 = file2[i] 92 | name = outnames[i] 93 | print(f"Aligning {name} to {genome} and {spikegenome}") 94 | #Explanation: align to genome and spikein genome, then remove multiply aligned reads with grep (XS: indicates the score of the next best aligning read, if it exists), then make a bam of all mapped reads (-F4 removes reads with a SAM flag of 4, which means unmapped) 95 | sp.run(f"bowtie2 -p {threads} --no-mixed --no-discordant -1 {fastq1} -2 {fastq2} -x /mnt/md0/DataRepository/genomes/{genome}.{spikegenome}/{genome}.{spikegenome} | grep -v XS: - | samtools view -bh -F4 - > {outdir}{name}_UniqMapped.bam", shell=True, executable="/bin/bash") 96 | #Sort aligned reads 97 | sp.run(f"sambamba sort --tmpdir {outdir}{uniqueid}/ -t {threads} -m 30G -o {outdir}{name}_UniqMapped_sorted.bam {outdir}{name}_UniqMapped.bam", shell=True, executable="/bin/bash") 98 | #Remove duplicates 99 | sp.run(f"sambamba markdup --tmpdir {outdir}{uniqueid}/ -r -t {threads} {outdir}{name}_UniqMapped_sorted.bam {outdir}{name}_UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash") 100 | 101 | #Next, need to separate out reads from each genome: 102 | print(f"Extracting reads aligning uniquely to {genome}") 103 | #Use samtools view to open the file, grep to remove those with the spikein genome name in them, then change the chromosomes with the genome name to just the chromosome numbers as normally used, then put the file back into bam. 104 | sp.run(f"samtools view -h {outdir}{name}_UniqMapped_sorted_rmdup.bam | grep -v {spikegenome} | sed s/{genome}_chr/chr/g | samtools view -bhS - > {outdir}{name}_{genome}.UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash") 105 | #Do the same thing for the spikein 106 | print(f"Extracting reads aligning uniquely to {spikegenome}.") 107 | sp.run(f"samtools view -h {outdir}{name}_UniqMapped_sorted_rmdup.bam | grep -v {genome} | sed s/{spikegenome}_chr/chr/g | samtools view -bhS - > {outdir}{name}_{spikegenome}.UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash") 108 | #Index outputs 109 | sp.run(f"sambamba index -t {threads} {outdir}{name}_UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash") 110 | sp.run(f"sambamba index -t {threads} {outdir}{name}_{genome}.UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash") 111 | sp.run(f"sambamba index -t {threads} {outdir}{name}_{spikegenome}.UniqMapped_sorted_rmdup.bam", shell=True, executable="/bin/bash") 112 | #Now clean up 113 | sp.run(f"rm {outdir}{name}_UniqMapped.bam", shell=True, executable="/bin/bash") 114 | sp.run(f"rm {outdir}{name}_UniqMapped_sorted.bam", shell=True, executable="/bin/bash") 115 | 116 | #Finally, count reads in each file: 117 | totalCount = sp.run(f"sambamba view -c -t {threads} {outdir}{name}_UniqMapped_sorted_rmdup.bam", capture_output=True, shell=True, executable="/bin/bash") 118 | genomeCount = sp.run(f"sambamba view -c -t {threads} {outdir}{name}_{genome}.UniqMapped_sorted_rmdup.bam", capture_output=True, shell=True, executable="/bin/bash") 119 | spikegenomeCount = sp.run(f"sambamba view -c -t {threads} {outdir}{name}_{spikegenome}.UniqMapped_sorted_rmdup.bam", capture_output=True, shell=True, executable="/bin/bash") 120 | countsList = [totalCount.stdout.decode('ascii').strip(), genomeCount.stdout.decode('ascii').strip(), spikegenomeCount.stdout.decode('ascii').strip()] 121 | allcountslist.append(countsList) 122 | 123 | sp.run(f"rm {outdir}{uniqueid}/ -r", shell=True, executable="/bin/bash") 124 | countstable = pd.DataFrame(allcountslist, columns = ['allcounts', 'genomecounts', 'spikecounts']) 125 | outtable = pd.concat([files, countstable], axis=1) 126 | outtable.to_csv(outdir + outname + '.tsv', sep = '\t', index = False, header = True) 127 | --------------------------------------------------------------------------------