├── .gitignore ├── .gitmodules ├── PhISCS-B-configure ├── PhISCS-B ├── Makefile ├── __init__.py ├── __main__.py ├── csp_maxsat.cpp ├── csp_z3.py ├── helperFunctions.py └── solvers │ ├── aspino │ └── aspino-static │ ├── maxino │ ├── maxino-2015-k16-static │ └── maxino-2015-kdyn-static │ ├── mscg │ ├── mscg14-linux-x86-64 │ ├── mscg15a-linux-x86-64 │ └── mscg15b-linux-x86-64 │ ├── open-wbo │ ├── open-wbo_glucose4.1_static │ └── open-wbo_minisat2.2_static │ └── qmaxsat │ ├── qmaxsat14.04auto-glucose3_static │ ├── qmaxsat14.04pms-glucose2_static │ └── qmaxsat14.04wpms-glucose2_static ├── PhISCS-I ├── __init__.py ├── __main__.py ├── faridFunctions.py └── helperFunctions.py ├── README.md └── example ├── input.SC └── input.bulk /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | gurobi.log 3 | PhISCS-B/csp_maxsat 4 | *.pyc 5 | PhISCS-B/solvers/maxhs 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "PhISCS-B/solvers/z3"] 2 | path = PhISCS-B/solvers/z3 3 | url = https://github.com/Z3Prover/z3.git 4 | -------------------------------------------------------------------------------- /PhISCS-B-configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # get submodules 4 | git submodule update --init --recursive 5 | 6 | # make z3 7 | (cd PhISCS-B/solvers/z3; ./configure) 8 | (cd PhISCS-B/solvers/z3/build; make) 9 | 10 | # make maxhs 11 | # (cd PhISCS-B/solvers/maxhs; make) 12 | 13 | # make codes 14 | (cd PhISCS-B; make) 15 | -------------------------------------------------------------------------------- /PhISCS-B/Makefile: -------------------------------------------------------------------------------- 1 | CXX ?= g++ 2 | CXXFLAGS = -O3 3 | LDFLAGS = 4 | 5 | all: clean-exe csp_maxsat clean 6 | 7 | csp_maxsat: csp_maxsat.o 8 | $(CXX) csp_maxsat.o -o $@ ${LDFLAGS} 9 | 10 | clean: 11 | @rm -f *.o 12 | 13 | clean-exe: 14 | @rm -f csp_maxsat -------------------------------------------------------------------------------- /PhISCS-B/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/__init__.py -------------------------------------------------------------------------------- /PhISCS-B/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # ============================================================================== 4 | # Written by : Salem Malikic 5 | # Modified by: Farid Rashidi 6 | # Last Update: Apr 25, 2019 7 | # ============================================================================== 8 | 9 | 10 | from helperFunctions import * 11 | import argparse 12 | import errno 13 | import subprocess 14 | import sys 15 | 16 | 17 | # COMMAND LINE ARGUMENTS PARSING 18 | parser = argparse.ArgumentParser(description='PhISCS-B by Z3, aspino, Maxino, Open-WBO, QMaxSAT and MSCG solvers', add_help=True) 19 | # Required arguments: 20 | parser.add_argument('-SCFile', '--SCFile', required=True, type=str, 21 | help='Path to single cell data matrix file') 22 | parser.add_argument('-fn', '--fnProbability', required=True, type=float, 23 | help='Probablity of false negative') 24 | parser.add_argument('-fp', '--fpProbability', required=True, type=float, 25 | help='Probablity of false positive') 26 | 27 | # Optional: 28 | parser.add_argument('-o', '--outDir', default='.', type=str, 29 | help='Output directory') 30 | parser.add_argument('-kmax', '--maxMutationsToEliminate', default=0, type=int, 31 | help='Max number of mutations to be eliminated [default value is 0]') 32 | parser.add_argument('-bulkFile', '--bulkFile', default=None, type=str, 33 | help='Path to bulk data file') 34 | parser.add_argument('-delta', '--delta', default=0.20, type=float, 35 | help='Delta parameter accounting for VAF variance [default value is 0.20]') 36 | parser.add_argument('-time', '--time', type=int, default=86400, 37 | help='Max time (in seconds) allowed for the computation (supported only for Z3 solver) [default value is 24 hours]') 38 | parser.add_argument('--drawTree', action='store_true', 39 | help='Draw output tree by Graphviz') 40 | 41 | parser.add_argument('-w', '--colEliminationWeight', default=0, type=float, 42 | help='Weight of column elimination [default value is 0]') 43 | parser.add_argument('-threads', '--threads', default=1, type=int, 44 | help='Number of threads [default value is 1]') 45 | parser.add_argument('--solver', type=str, default='Z3', 46 | help='Solver can be Z3, aspino, Maxino, Open-WBO, QMaxSAT, MSCG or other (provided the path to exe file of solver)') 47 | 48 | 49 | args = parser.parse_args() 50 | 51 | 52 | # assert os.path.exists(args.outDir) == False, "ERROR!!! There already exists file or folder with name " + args.outDir + ". Exiting." 53 | try: 54 | os.makedirs(args.outDir) 55 | except OSError as exc: 56 | if exc.errno == errno.EEXIST and os.path.isdir(args.outDir): 57 | pass 58 | else: 59 | raise 60 | 61 | usingBulk = False 62 | if args.bulkFile: 63 | usingBulk = True 64 | 65 | filename = os.path.splitext(os.path.basename(args.SCFile))[0] 66 | outfile = os.path.join(args.outDir, filename) 67 | dirname = os.path.dirname(__file__) 68 | 69 | if args.solver.lower() == 'z3': 70 | if usingBulk: 71 | cmds = ['python', '{}/csp_z3.py'.format(dirname), '-f', args.SCFile, '-n', args.fnProbability, '-p', args.fpProbability, 72 | '-w', args.colEliminationWeight, '-o', args.outDir, '-t 1', '--timeout', args.time, '-e', args.delta, 73 | '-b', args.bulkFile, '-m', args.maxMutationsToEliminate] 74 | else: 75 | cmds = ['python', '{}/csp_z3.py'.format(dirname), '-f', args.SCFile, '-n', args.fnProbability, '-p', args.fpProbability, 76 | '-w', args.colEliminationWeight, '-o', args.outDir, '-t 1', '--timeout', args.time, '-m', args.maxMutationsToEliminate] 77 | else: 78 | cmds = ['{}/csp_maxsat'.format(dirname), '-f', args.SCFile, '-n', args.fnProbability, '-p', args.fpProbability, '-o', args.outDir, '-i'] 79 | 80 | cmd = ' '.join(str(v) for v in cmds) 81 | # print(cmd) 82 | subprocess.check_output(cmd, shell=True) 83 | 84 | if args.drawTree: 85 | draw_tree("{}.CFMatrix".format(outfile), usingBulk, args.bulkFile) 86 | -------------------------------------------------------------------------------- /PhISCS-B/csp_maxsat.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Author: Ehsan Haghshenas 3 | * Last update: Oct 19, 2017 4 | *******************************************************************************/ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace std; 20 | 21 | #define MAX_CELL 300 22 | #define MAX_MUT 200 23 | 24 | string par_inputFile = ""; 25 | string par_outDir = ""; 26 | double par_fnRate = -1; 27 | double par_fnWeight; 28 | double par_fnWeight_neg; 29 | double par_fpRate = -1; 30 | double par_fpWeight; 31 | double par_fpWeight_neg; 32 | double par_const = 1000000; 33 | double par_precisionFactor = 1000; 34 | int par_colWeight = -1; 35 | int par_maxColRemove = 0; 36 | string par_bulkFile = ""; 37 | double par_delta = 0.01; 38 | string par_maxSolver = "openwbo"; 39 | int par_threads = 1; 40 | bool par_isTrueVAF = false; 41 | bool IS_PWCNF = true; 42 | bool INT_WEIGHTS = false; 43 | string MAXSAT_EXE; 44 | 45 | int mat[MAX_CELL][MAX_MUT]; // the 0/1 matrix 46 | vector cellId; 47 | vector mutId; 48 | int var_x[MAX_CELL][MAX_MUT]; // X variables for the maxSAT 49 | int var_y[MAX_CELL][MAX_MUT]; // Y variables for the maxSAT; if(Iij==0) Yij=Xij and if(Iij==1) Yij=~Xij 50 | int weight_x[MAX_CELL][MAX_MUT]; // weight of X variables 51 | int var_b[MAX_MUT][MAX_MUT][2][2]; 52 | int var_k[MAX_MUT]; 53 | int var_a[MAX_MUT][MAX_MUT]; 54 | double vaf[MAX_MUT]; 55 | int vafP[MAX_MUT][MAX_MUT]; 56 | int vafT[MAX_MUT][MAX_MUT][MAX_MUT]; 57 | pair map_y2ij[MAX_CELL * MAX_MUT + 10]; // maps Y variables to matrix position (row and column) 58 | // pair map_a2pq[10000]; // maps a variables to matrix position (row and column) 59 | vector clauseSoft; // the set of soft clauses for wcnf formulation 60 | vector clauseHard; // the set of soft clauses for wcnf formulation 61 | 62 | int numMut = 0; // actual number of mutations (columns) 63 | int numCell = 0; // actual number of cells (rows) 64 | int numVarY = 0; // number of Y variables 65 | int numVarX = 0; // number of X variables 66 | int numVarB = 0; // number of B variables 67 | int numVarK = 0; // number of K variables 68 | int numVarA = 0; // number of A variables 69 | int numVarW = 0; // number of W variables 70 | int numZero = 0; // number of zeros in the input matrix 71 | int numOne = 0; // number of ones in the input matrix 72 | int numTwo = 0; // number of twos in the input matrix 73 | 74 | // #define startVarY (0) 75 | #define startVarX (numVarY) 76 | #define startVarB (numVarY + numVarX) 77 | #define startVarK (numVarY + numVarX + numVarB) 78 | #define startVarA (numVarY + numVarX + numVarB + numVarK) 79 | #define startVarW (numVarY + numVarX + numVarB + numVarK + numVarA) 80 | 81 | string int2str(int n) 82 | { 83 | ostringstream sout; 84 | sout<< n; 85 | return sout.str(); 86 | } 87 | string double2str(double n) 88 | { 89 | ostringstream sout; 90 | sout<< n; 91 | return sout.str(); 92 | } 93 | 94 | int str2int(string s) 95 | { 96 | int retVal; 97 | istringstream sin(s.c_str()); 98 | sin >> retVal; 99 | return retVal; 100 | } 101 | 102 | double str2double(string s) 103 | { 104 | double retVal; 105 | istringstream sin(s.c_str()); 106 | sin >> retVal; 107 | return retVal; 108 | } 109 | 110 | inline double log10(double n) 111 | { 112 | return log(n)/log(10); 113 | } 114 | 115 | // double getCpuTime() 116 | // { 117 | // struct rusage t; 118 | // getrusage(RUSAGE_SELF, &t); 119 | // return t.ru_utime.tv_sec + t.ru_utime.tv_usec / 1000000.0 + t.ru_stime.tv_sec + t.ru_stime.tv_usec / 1000000.0; 120 | // } 121 | 122 | double getRealTime() 123 | { 124 | struct timeval t; 125 | struct timezone tz; 126 | gettimeofday(&t, &tz); 127 | return t.tv_sec + t.tv_usec / 1000000.0; 128 | } 129 | 130 | string get_file_name(string path, bool removExtension = false) 131 | { 132 | string fileName; 133 | size_t pos; 134 | // extract file name 135 | pos = path.find_last_of("/"); 136 | if(pos != string::npos) 137 | fileName = path.substr(pos+1); 138 | else 139 | fileName = path; 140 | // remove extension 141 | if(removExtension) 142 | { 143 | pos = fileName.find_last_of("."); 144 | if(pos != string::npos) 145 | fileName = fileName.substr(0, pos); 146 | } 147 | return fileName; 148 | } 149 | 150 | string get_dir_path(string path) 151 | { 152 | size_t pos = path.find_last_of("/"); 153 | if(pos != string::npos) 154 | { 155 | return path.substr(0, pos); 156 | } 157 | else 158 | { 159 | return ""; 160 | } 161 | } 162 | 163 | string get_exe_path() 164 | { 165 | char path[10000]; 166 | ssize_t count = readlink( "/proc/self/exe", path, 10000); 167 | return string(path, (count > 0) ? count : 0); 168 | } 169 | 170 | void print_usage() 171 | { 172 | cout<< endl 173 | << "usage: csp_maxsat [-h] -f FILE -n FNRATE -p FPRATE -o OUTDIR" << endl 174 | << " [-c CONST] [-s SOLVER]" << endl; 175 | // << " [-m MAXMUT] [-b BULK] [-e DELTA] [-v] [-t THREADS]" << endl; 176 | } 177 | 178 | void print_help() 179 | { 180 | cout<< endl 181 | << "Required arguments:" << endl 182 | << " -f, --file STR Input matrix file" << endl 183 | << " -n, --fnRate FLT The estimate for false negative rate" << endl 184 | << " -p, --fpRate FLT The estimate for false positive rate" << endl 185 | << " -o, --outDir STR Output directory" << endl 186 | << endl 187 | << "Optional arguments:" << endl 188 | << " -c, --const FLT The constant for weights of soft clauses[1000000]" << endl 189 | // << " -m, --maxMut INT Max number mutations to be eliminated [0]" << endl 190 | // << " -b, --bulk INT Bulk sequencing file [\"\"]" << endl 191 | // << " -e, --delta FLT Delta in VAF [0.01]" << endl 192 | // << " -v, --truevaf Use true VAFs instead of noisy VAFs [false]" << endl 193 | << " -s, --solver STR Name of MaxSAT solver. Available options are:" << endl 194 | << " qmaxsat/maxino/openwbo/aspino/mscg/maxhs [\"openwbo\"]" << endl 195 | << " Note: for another solver, pass the path to binary" << endl 196 | << " -i, --integer Round weights to their nearest integers [false]" << endl 197 | << " -z, --coefficient The coefficient for rounding weights [1000]" << endl 198 | // << " -t, --threads INT Number of threads [1]" << endl 199 | << endl 200 | << "Other arguments:" << endl 201 | << " -h, --help Show this help message and exit" << endl; 202 | } 203 | 204 | bool command_line_parser(int argc, char *argv[]) 205 | { 206 | int index; 207 | char c; 208 | static struct option longOptions[] = 209 | { 210 | // {"progress", no_argument, &progressRep, 1}, 211 | {"file", required_argument, 0, 'f'}, 212 | {"fnRate", required_argument, 0, 'n'}, 213 | {"fpRate", required_argument, 0, 'p'}, 214 | {"outDir", required_argument, 0, 'o'}, 215 | {"const", required_argument, 0, 'c'}, 216 | // {"maxMut", required_argument, 0, 'm'}, 217 | // {"bulk", required_argument, 0, 'b'}, 218 | // {"delta", required_argument, 0, 'e'}, 219 | // {"vafTrue", no_argument, 0, 'v'}, 220 | {"solver", required_argument, 0, 's'}, 221 | {"integer", required_argument, 0, 'i'}, 222 | {"coefficient", required_argument, 0, 'z'}, 223 | // {"threads", required_argument, 0, 't'}, 224 | {"help", no_argument, 0, 'h'}, 225 | {0,0,0,0} 226 | }; 227 | 228 | // while ( (c = getopt_long ( argc, argv, "f:n:p:o:m:b:e:s:t:vh", longOptions, &index))!= -1 ) 229 | while ( (c = getopt_long ( argc, argv, "f:n:p:o:c:s:z:ih", longOptions, &index))!= -1 ) 230 | { 231 | switch (c) 232 | { 233 | case 'f': 234 | par_inputFile = optarg; 235 | break; 236 | case 'n': 237 | par_fnRate = str2double(optarg); 238 | if(par_fnRate <= 0 || par_fnRate >= 1) 239 | { 240 | cerr<< "[ERROR] Estimate for false negative rate should be an double in range (0,1)" << endl; 241 | return false; 242 | } 243 | break; 244 | case 'p': 245 | par_fpRate = str2double(optarg); 246 | if(par_fpRate <= 0 || par_fpRate >= 1) 247 | { 248 | cerr<< "[ERROR] Estimate for false positive rate should be an double in range (0,1)" << endl; 249 | return false; 250 | } 251 | break; 252 | case 'o': 253 | par_outDir = optarg; 254 | break; 255 | case 'c': 256 | par_const = str2int(optarg); 257 | if(par_const <= 1) 258 | { 259 | cerr<< "[ERROR] Constant for weights should be an integer > 1" << endl; 260 | return false; 261 | } 262 | break; 263 | // case 'm': 264 | // par_maxColRemove = str2int(optarg); 265 | // if(par_maxColRemove < 0) 266 | // { 267 | // cerr<< "[ERROR] Maximum number of mutation removal should be an integer >= 0" << endl; 268 | // return false; 269 | // } 270 | // break; 271 | // case 'b': 272 | // par_bulkFile = optarg; 273 | // break; 274 | // case 'e': 275 | // par_delta = str2double(optarg); 276 | // if(par_delta <= 0) 277 | // { 278 | // cerr<< "[ERROR] Delta should be a floating point number > 0" << endl; 279 | // return false; 280 | // } 281 | // break; 282 | // case 'v': 283 | // par_isTrueVAF = true; 284 | // break; 285 | case 's': 286 | par_maxSolver = optarg; 287 | break; 288 | case 'i': 289 | INT_WEIGHTS = true; 290 | break; 291 | case 'z': 292 | par_precisionFactor = str2int(optarg); 293 | if(par_precisionFactor < 1) 294 | { 295 | cerr<< "[ERROR] Rounding coefficient should be an integer >= 1" << endl; 296 | return false; 297 | } 298 | break; 299 | // case 't': 300 | // par_threads = str2int(optarg); 301 | // if(par_threads != 1) 302 | // { 303 | // cerr<< "[ERROR] Only single thread is supported at the moment!" << endl; 304 | // return false; 305 | // } 306 | // break; 307 | case 'h': 308 | print_usage(); 309 | print_help(); 310 | exit(EXIT_SUCCESS); 311 | } 312 | } 313 | 314 | if(par_inputFile == "") 315 | { 316 | cerr<< "[ERROR] option -f/--file is required" << endl; 317 | print_usage(); 318 | return false; 319 | } 320 | 321 | if(par_outDir == "") 322 | { 323 | cerr<< "[ERROR] option -o/--outDir is required" << endl; 324 | print_usage(); 325 | return false; 326 | } 327 | 328 | if(par_fnRate < 0) 329 | { 330 | cerr<< "[ERROR] option -n/--fnRate is required" << endl; 331 | print_usage(); 332 | return false; 333 | } 334 | 335 | if(par_fpRate < 0) 336 | { 337 | cerr<< "[ERROR] option -p/--fpRate is required" << endl; 338 | print_usage(); 339 | return false; 340 | } 341 | 342 | if(INT_WEIGHTS == false) 343 | { 344 | par_precisionFactor = 1; 345 | } 346 | 347 | string exeDir = get_dir_path(get_exe_path()); 348 | if(par_maxSolver == "qmaxsat") 349 | MAXSAT_EXE = exeDir + "/solvers/qmaxsat/qmaxsat14.04auto-glucose3_static"; 350 | else if(par_maxSolver == "openwbo") 351 | MAXSAT_EXE = exeDir + "/solvers/open-wbo/open-wbo_glucose4.1_static"; 352 | else if(par_maxSolver == "maxino") 353 | MAXSAT_EXE = exeDir + "/solvers/maxino/maxino-2015-k16-static"; 354 | else if(par_maxSolver == "aspino") 355 | MAXSAT_EXE = exeDir + "/solvers/aspino/aspino-static -mode=maxsat"; 356 | else if(par_maxSolver == "mscg") 357 | MAXSAT_EXE = exeDir + "/solvers/mscg/mscg15b-linux-x86-64"; 358 | else if(par_maxSolver == "maxhs") 359 | MAXSAT_EXE = exeDir + "/solvers/maxhs/maxhs"; 360 | // else // wrong solver name, use openwbo 361 | else // expect the full path to the binary of the solver 362 | { 363 | MAXSAT_EXE = par_maxSolver; 364 | // cerr<< "[WARNING] Wrong solver name! Using default solver (openwbo)..." << endl; 365 | // MAXSAT_EXE = exeDir + "/solvers/open-wbo/open-wbo_glucose4.1_static"; 366 | } 367 | 368 | return true; 369 | } 370 | 371 | void get_input_data(string path) 372 | { 373 | int i, j; 374 | string tmpStr; 375 | string line; 376 | ifstream fin(path.c_str()); 377 | if(fin.is_open() == false) 378 | { 379 | cerr<< "Could not open file: " << path << endl; 380 | exit(EXIT_FAILURE); 381 | } 382 | // process the header 383 | getline(fin, line); 384 | istringstream sin1(line); 385 | while(sin1 >> tmpStr) 386 | { 387 | mutId.push_back(tmpStr); 388 | } 389 | numMut = mutId.size() - 1; 390 | // 391 | i = 0; 392 | while(getline(fin, line)) 393 | { 394 | istringstream sin(line); 395 | sin >> tmpStr; // cell name 396 | cellId.push_back(tmpStr); 397 | for(int j = 0; j < numMut; j++) 398 | { 399 | sin >> mat[i][j]; 400 | } 401 | i++; 402 | } 403 | numCell = i; 404 | fin.close(); 405 | // // artificial cell and mutation 406 | // mutId.push_back("mutX"); 407 | // cellId.push_back("cellX"); 408 | // for(j = 0; j < numMut; j++) 409 | // { 410 | // mat[numCell][j] = 0; 411 | // } 412 | // for(i = 0; i <= numCell; i++) 413 | // { 414 | // mat[i][numMut] = 1; 415 | // } 416 | } 417 | 418 | void set_y_variables() 419 | { 420 | int i, j; 421 | numVarY = 0; 422 | 423 | // for(i = 0; i <= numCell; i++) 424 | for(i = 0; i < numCell; i++) 425 | { 426 | // for(j = 0; j <= numMut; j++) 427 | for(j = 0; j < numMut; j++) 428 | { 429 | numVarY++; 430 | var_y[i][j] = numVarY; 431 | map_y2ij[numVarY] = make_pair(i, j); 432 | } 433 | } 434 | } 435 | 436 | void set_x_variables() 437 | { 438 | int i, j; 439 | numVarX = 0; 440 | 441 | // for(i = 0; i <= numCell; i++) 442 | for(i = 0; i < numCell; i++) 443 | { 444 | // for(j = 0; j <= numMut; j++) 445 | for(j = 0; j < numMut; j++) 446 | { 447 | numVarX++; 448 | var_x[i][j] = startVarX + numVarX; 449 | } 450 | } 451 | } 452 | 453 | void set_b_variables() 454 | { 455 | int i, j, p, q; 456 | numVarB = 0; 457 | 458 | // for(p = 0; p <= numMut; p++) 459 | for(p = 0; p < numMut; p++) 460 | { 461 | // for(q = 0; q <= numMut; q++) 462 | for(q = 0; q < numMut; q++) 463 | { 464 | for(i = 0; i < 2; i++) 465 | { 466 | for(j = 0; j < 2; j++) 467 | { 468 | numVarB++; 469 | var_b[p][q][i][j] = startVarB + numVarB; 470 | } 471 | } 472 | } 473 | } 474 | } 475 | 476 | void set_k_variables() 477 | { 478 | int p; 479 | numVarK = 0; 480 | 481 | for(p = 0; p <= numMut; p++) 482 | { 483 | numVarK++; 484 | var_k[p] = startVarK + numVarK; 485 | } 486 | } 487 | 488 | void set_a_variables() 489 | { 490 | int p, q; 491 | numVarA = 0; 492 | 493 | for(p = 0; p <= numMut; p++) 494 | { 495 | for(q = 0; q <= numMut; q++) 496 | { 497 | numVarA++; 498 | var_a[p][q] = startVarA + numVarA; 499 | // map_a2pq[startVarA + numVarA] = make_pair(p, q); 500 | } 501 | } 502 | } 503 | 504 | void add_variable_clauses() 505 | { 506 | int i, j; 507 | string str_fnWeight; 508 | string str_fnWeight_neg; 509 | string str_fpWeight; 510 | string str_fpWeight_neg; 511 | 512 | numZero = 0; 513 | numOne = 0; 514 | numTwo = 0; 515 | 516 | if(INT_WEIGHTS) 517 | { 518 | str_fnWeight = int2str(round(par_fnWeight)); 519 | str_fnWeight_neg = int2str(round(par_fnWeight_neg)); 520 | str_fpWeight = int2str(round(par_fpWeight)); 521 | str_fpWeight_neg = int2str(round(par_fpWeight_neg)); 522 | } 523 | else 524 | { 525 | str_fnWeight = double2str(par_fnWeight); 526 | str_fnWeight_neg = double2str(par_fnWeight_neg); 527 | str_fpWeight = double2str(par_fpWeight); 528 | str_fpWeight_neg = double2str(par_fpWeight_neg); 529 | } 530 | 531 | for(i = 0; i < numCell; i++) 532 | { 533 | for(j = 0; j < numMut; j++) 534 | { 535 | // fout<< weight_x[map_x2ij[i].first][map_x2ij[i].second] << " " << -1*i << " 0\n"; 536 | if(mat[i][j] == 0) 537 | { 538 | numZero++; 539 | clauseSoft.push_back(str_fnWeight + " " + int2str(var_x[i][j])); 540 | clauseSoft.push_back(str_fnWeight_neg + " " + int2str(-1*var_x[i][j])); 541 | clauseHard.push_back(int2str(-1*var_x[i][j]) + " " + int2str(var_y[i][j])); 542 | clauseHard.push_back(int2str(var_x[i][j]) + " " + int2str(-1*var_y[i][j])); 543 | } 544 | else if (mat[i][j] == 1) 545 | { 546 | numOne++; 547 | clauseSoft.push_back(str_fpWeight + " " + int2str(var_x[i][j])); 548 | clauseSoft.push_back(str_fpWeight_neg + " " + int2str(-1*var_x[i][j])); 549 | clauseHard.push_back(int2str(var_x[i][j]) + " " + int2str(var_y[i][j])); 550 | clauseHard.push_back(int2str(-1*var_x[i][j]) + " " + int2str(-1*var_y[i][j])); 551 | } 552 | else // mat[i][j] == 2 (not available) 553 | { 554 | numTwo++; 555 | clauseHard.push_back(int2str(-1*var_x[i][j]) + " " + int2str(var_y[i][j])); 556 | clauseHard.push_back(int2str(var_x[i][j]) + " " + int2str(-1*var_y[i][j])); 557 | } 558 | } 559 | } 560 | } 561 | 562 | void add_conflict_clauses() 563 | { 564 | int i; 565 | int p, q; 566 | for(i = 0; i < numCell; i++) 567 | { 568 | for(p = 0; p < numMut; p++) 569 | { 570 | for(q = p; q < numMut; q++) 571 | { 572 | // ~Yip v ~Yiq v Bpq11 573 | clauseHard.push_back(int2str(-1*var_y[i][p]) + " " + int2str(-1*var_y[i][q]) + " " + int2str(var_b[p][q][1][1])); 574 | // Yip v ~Yiq v Bpq01 575 | clauseHard.push_back(int2str(var_y[i][p]) + " " + int2str(-1*var_y[i][q]) + " " + int2str(var_b[p][q][0][1])); 576 | // ~Yip v Yiq v Bpq10 577 | clauseHard.push_back(int2str(-1*var_y[i][p]) + " " + int2str(var_y[i][q]) + " " + int2str(var_b[p][q][1][0])); 578 | // if(par_maxColRemove > 0) // column elimination enabled 579 | // { 580 | // // Kp v Kq v ~Bpq01 v ~Bpq10 v ~Bpq11 581 | // clauseHard.push_back(int2str(var_k[p]) + " " + int2str(var_k[q]) + " " + int2str(-1*var_b[p][q][0][1]) + " " + int2str(-1*var_b[p][q][1][0]) + " " + int2str(-1*var_b[p][q][1][1])); 582 | // } 583 | // else // column elimination disabled 584 | // { 585 | // ~Bpq01 v ~Bpq10 v ~Bpq11 586 | clauseHard.push_back(int2str(-1*var_b[p][q][0][1]) + " " + int2str(-1*var_b[p][q][1][0]) + " " + int2str(-1*var_b[p][q][1][1])); 587 | // } 588 | } 589 | } 590 | } 591 | } 592 | 593 | int next_comb(int comb[], int k, int n) 594 | { 595 | int i = k - 1; 596 | ++comb[i]; 597 | while ((i >= 0) && (comb[i] >= n - k + 1 + i)) 598 | { 599 | --i; 600 | ++comb[i]; 601 | } 602 | 603 | if (comb[0] > n - k) /* Combination (n-k, n-k+1, ..., n) reached */ 604 | return 0; /* No more combinations can be generated */ 605 | 606 | /* comb now looks like (..., x, n, n, n, ..., n). 607 | Turn it into (..., x, x + 1, x + 2, ...) */ 608 | for (i = i + 1; i < k; ++i) 609 | comb[i] = comb[i - 1] + 1; 610 | 611 | return 1; 612 | } 613 | 614 | void add_column_clauses() 615 | { 616 | int i; 617 | // code for C(n, k) 618 | // n choose k 619 | int n = numMut; 620 | int k = par_maxColRemove + 1; 621 | int comb[numMut + 10]; // comb[i] is the index of the i-th element in the combination 622 | for (i = 0; i < k; i++) 623 | comb[i] = i; 624 | 625 | do 626 | { 627 | string tmpClause = ""; 628 | for(i = 0; i < k; i++) 629 | tmpClause += int2str(-1*var_k[comb[i]]) + " "; 630 | clauseHard.push_back(tmpClause); 631 | }while(next_comb(comb, k, n)); 632 | } 633 | 634 | void add_column_clauses_weight() 635 | { 636 | int i; 637 | // int colWeight = numCell / 2; 638 | // int colWeight = 20; 639 | string str_colWeight = int2str(par_colWeight); 640 | for(i = 0; i < numMut; i++) 641 | { 642 | clauseSoft.push_back(str_colWeight + " " + int2str(-1*var_k[i])); 643 | } 644 | } 645 | 646 | void add_vaf_clauses() 647 | { 648 | int t, r; 649 | int p, q; 650 | 651 | // 1.(a) 652 | // ~K(numMut) 653 | if(par_maxColRemove > 0) 654 | { 655 | clauseHard.push_back(int2str(-1*var_k[numMut])); 656 | } 657 | 658 | // 1.(b) 659 | // for all rows t, Y(t, numMut) = 1 660 | for(t = 0; t <= numCell; t++) 661 | { 662 | clauseHard.push_back(int2str(var_y[t][numMut])); 663 | } 664 | 665 | // 1.(c) 666 | // for all columns p != numMut, Y(numCell, p) = 0 667 | for(p = 0; p < numMut; p++) 668 | { 669 | clauseHard.push_back(int2str(-1*var_y[numCell][p])); 670 | } 671 | 672 | // // 2.(old): ~a(p,q) v ~a(q,p) 673 | // for(p = 0; p < numMut; p++) 674 | // { 675 | // for(q = 0; q < numMut; q++) 676 | // { 677 | // // for all pairs of mutations p and q (including p=q) 678 | // clauseHard.push_back(int2str(-1*var_a[p][q]) + " " + int2str(-1*var_a[q][p])); 679 | // } 680 | // } 681 | 682 | // 2.(a) 683 | // (a(p,q) v a(q,p)) => (~K(p) ^ ~K(q)) 684 | // (~K(p) v ~a(p,q)) ^ (~K(p) v ~a(q,p)) ^ (~a(p,q) v ~K(q)) ^ (~a(q,p) v ~K(q)) 685 | if(par_maxColRemove > 0) // FIXME: should I have this condition or not? 686 | { 687 | for(p = 0; p < numMut; p++) 688 | { 689 | for(q = 0; q < numMut; q++) 690 | { 691 | clauseHard.push_back(int2str(-1*var_k[p]) + " " + int2str(-1*var_a[q][p])); 692 | clauseHard.push_back(int2str(-1*var_k[p]) + " " + int2str(-1*var_a[q][p])); 693 | clauseHard.push_back(int2str(-1*var_a[p][q]) + " " + int2str(-1*var_k[q])); 694 | clauseHard.push_back(int2str(-1*var_a[q][p]) + " " + int2str(-1*var_k[q])); 695 | } 696 | } 697 | } 698 | 699 | // 2.(b) 700 | // for a given mutation q != x, V_{for all p != q} a(p,q) 701 | for(q = 0; q < numMut; q++) 702 | { 703 | string tmpClause = ""; 704 | for(p = 0; p <= numMut; p++) 705 | { 706 | if(p != q) 707 | { 708 | tmpClause += int2str(var_a[p][q]) + " "; 709 | } 710 | } 711 | clauseHard.push_back(tmpClause); 712 | } 713 | 714 | // 2.(c) 715 | // (a(p,q) ^ Y(t, q)) => (a(p,q) ^ Y(t,p)) 716 | // ~a(p,q) v ~Y(t, q) v Y(t,p) 717 | for(t = 0; t <= numCell; t++) 718 | { 719 | for(p = 0; p <= numMut; p++) 720 | { 721 | for(q = 0; q <= numMut; q++) 722 | { 723 | clauseHard.push_back(int2str(-1*var_a[p][q]) + " " + int2str(-1*var_y[t][q]) + " " + int2str(var_y[t][p])); 724 | } 725 | } 726 | } 727 | 728 | // 2.(d) 729 | // a(p,q) => vafP(p,q) 730 | // ~a(p,q) v vafP(p,q) 731 | for(p = 0; p <= numMut; p++) 732 | { 733 | for(q = 0; q <= numMut; q++) 734 | { 735 | if(vafP[p][q] == 0) 736 | { 737 | clauseHard.push_back(int2str(-1*var_a[p][q])); 738 | } 739 | } 740 | } 741 | 742 | // 2.(e) 743 | // cout<< "from " << startVarW << endl; 744 | numVarW = 0; 745 | for(p = 0; p <= numMut; p++) 746 | { 747 | for(q = 0; q <= numMut; q++) 748 | { 749 | // if(p != q) // FIXME: double check 750 | { 751 | string tmpClause = ""; 752 | for(t = 0; t <= numCell; t++) 753 | { 754 | numVarW++; 755 | clauseHard.push_back(int2str(startVarW+numVarW) + " " + int2str(var_y[t][q])); 756 | clauseHard.push_back(int2str(startVarW+numVarW) + " " + int2str(-1*var_y[t][p])); 757 | tmpClause += int2str(-1*(startVarW+numVarW)) + " "; 758 | } 759 | tmpClause += int2str(var_a[p][q]); 760 | if(par_maxColRemove > 0) 761 | { 762 | tmpClause += " " + int2str(var_k[p]) + " " + int2str(var_k[q]); 763 | } 764 | clauseHard.push_back(tmpClause); 765 | // cout<< "new " << tmpClause << endl; 766 | } 767 | } 768 | } 769 | 770 | // 3. 771 | // (a(p,q) ^ a(p,r) ^ ~a(q,r) ^ ~a(r,q)) => vafT(p,q,r) 772 | // ~a(p,q) v ~a(p,r) v a(q,r) v a(r,q) v vafT(p,q,r) 773 | for(p = 0; p <= numMut; p++) 774 | { 775 | for(q = 0; q <= numMut; q++) 776 | { 777 | for(r = 0; r <= numMut; r++) 778 | { 779 | if(q < r) // FIXME: double check 780 | { 781 | if(vafT[p][q][r] == 0) 782 | { 783 | clauseHard.push_back(int2str(-1*var_a[p][q]) + " " + int2str(-1*var_a[p][r]) + " " + int2str(var_a[q][r]) + " " + int2str(var_a[r][q])); 784 | } 785 | } 786 | } 787 | } 788 | } 789 | } 790 | 791 | void write_maxsat_input(string path) 792 | { 793 | int i, j; 794 | 795 | int64_t hardWeight = numZero * (int64_t)ceil(par_fnWeight) 796 | + numZero * (int64_t)ceil(par_fnWeight_neg) 797 | + numOne * (int64_t)ceil(par_fpWeight) 798 | + numOne * (int64_t)ceil(par_fpWeight_neg); 799 | // + numMut * par_colWeight + 1; 800 | 801 | ofstream fout(path.c_str()); 802 | if(fout.is_open() == false) 803 | { 804 | cerr<< "Could not open file: " << path << endl; 805 | exit(EXIT_FAILURE); 806 | } 807 | // 808 | if(IS_PWCNF) 809 | { 810 | // fout<< "p wcnf " << numVarY + numVarX + numVarB + numVarK + numVarA + numVarW << " " << clauseSoft.size() + clauseHard.size() << " " << hardWeight << "\n"; 811 | fout<< "p wcnf " << numVarY + numVarX + numVarB << " " << clauseSoft.size() + clauseHard.size() << " " << hardWeight << "\n"; 812 | } 813 | else 814 | { 815 | // fout<< "p wcnf " << numVarY + numVarX + numVarB + numVarK + numVarA + numVarW << " " << clauseSoft.size() + clauseHard.size() << "\n"; 816 | fout<< "p wcnf " << numVarY + numVarX + numVarB << " " << clauseSoft.size() + clauseHard.size() << "\n"; 817 | } 818 | // soft clauses 819 | for(i = 0; i < clauseSoft.size(); i++) 820 | { 821 | fout<< clauseSoft[i] << " 0\n"; 822 | } 823 | // hard clauses 824 | for(i = 0; i < clauseHard.size(); i++) 825 | { 826 | fout<< hardWeight << " " << clauseHard[i] << " 0\n"; 827 | } 828 | 829 | fout.close(); 830 | } 831 | 832 | bool read_maxsat_output_columnElim(string path, int &numRemovedCol, set &removedCol) 833 | { 834 | numRemovedCol = 0; 835 | string line; 836 | bool oLine = false, sLine = false, vLine = false; 837 | ifstream fin(path.c_str()); 838 | if(fin.is_open() == false) 839 | { 840 | cerr<< "Could not open file: " << path << endl; 841 | exit(EXIT_FAILURE); 842 | } 843 | // parse 844 | while(getline(fin, line)) 845 | { 846 | if(line[0] == 'o') 847 | { 848 | oLine = true; 849 | } 850 | if(line[0] == 's') 851 | { 852 | sLine = true; 853 | } 854 | if(line[0] == 'v') 855 | { 856 | vLine = true; 857 | // update the input matrix 858 | int tmpVar, tmpVarAbs; 859 | istringstream sin(line.substr(1)); 860 | while(sin >> tmpVar) 861 | { 862 | tmpVarAbs = abs(tmpVar); 863 | if(startVarK < tmpVarAbs && tmpVarAbs <= startVarK + numVarK) // it is a k variable 864 | { 865 | if(tmpVar > 0) // column to be removed 866 | { 867 | numRemovedCol++; 868 | removedCol.insert(tmpVar - (numVarY + numVarX + numVarB) - 1); // 0-based index 869 | } 870 | } 871 | } 872 | } 873 | } 874 | fin.close(); 875 | return (oLine && sLine && vLine); 876 | } 877 | 878 | bool read_maxsat_output_bitFlips(string path, int &flip, int &flip01, int &flip10, int &flip20, int &flip21, set &removedCol) 879 | { 880 | flip = 0; 881 | flip01 = 0; 882 | flip10 = 0; 883 | flip20 = 0; 884 | flip21 = 0; 885 | string line; 886 | bool oLine = false, sLine = false, vLine = false; 887 | ifstream fin(path.c_str()); 888 | if(fin.is_open() == false) 889 | { 890 | cerr<< "Could not open file: " << path << endl; 891 | exit(EXIT_FAILURE); 892 | } 893 | // parse 894 | while(getline(fin, line)) 895 | { 896 | if(line[0] == 'o') 897 | { 898 | oLine = true; 899 | } 900 | if(line[0] == 's') 901 | { 902 | sLine = true; 903 | } 904 | if(line[0] == 'v') 905 | { 906 | vLine = true; 907 | // update the input matrix 908 | int tmpVar, tmpVarAbs, oldVal; 909 | istringstream sin(line.substr(1)); 910 | while(sin >> tmpVar) 911 | { 912 | tmpVarAbs = abs(tmpVar); 913 | // if(tmpVarAbs <= numVarY && removedCol.find(tmpVarAbs) == removedCol.end()) 914 | if(tmpVarAbs <= numVarY && removedCol.find(map_y2ij[tmpVarAbs].second) == removedCol.end()) 915 | { 916 | oldVal = mat[map_y2ij[tmpVarAbs].first][map_y2ij[tmpVarAbs].second]; 917 | 918 | if(oldVal == 0) 919 | { 920 | if(tmpVar > 0) 921 | { 922 | mat[map_y2ij[tmpVarAbs].first][map_y2ij[tmpVarAbs].second] = 1; 923 | if(map_y2ij[tmpVarAbs].first != numCell && map_y2ij[tmpVarAbs].second != numMut) 924 | { 925 | flip++; 926 | flip01++; 927 | } 928 | } 929 | } 930 | else if(oldVal == 1) 931 | { 932 | if(tmpVar < 0) 933 | { 934 | mat[map_y2ij[tmpVarAbs].first][map_y2ij[tmpVarAbs].second] = 0; 935 | if(map_y2ij[tmpVarAbs].first != numCell && map_y2ij[tmpVarAbs].second != numMut) 936 | { 937 | flip++; 938 | flip10++; 939 | } 940 | } 941 | } 942 | else // oldVal == 2 943 | { 944 | if(tmpVar < 0) 945 | { 946 | mat[map_y2ij[tmpVarAbs].first][map_y2ij[tmpVarAbs].second] = 0; 947 | if(map_y2ij[tmpVarAbs].first != numCell && map_y2ij[tmpVarAbs].second != numMut) 948 | { 949 | flip++; 950 | flip20++; 951 | } 952 | } 953 | else // tmpVar > 0 954 | { 955 | mat[map_y2ij[tmpVarAbs].first][map_y2ij[tmpVarAbs].second] = 1; 956 | if(map_y2ij[tmpVarAbs].first != numCell && map_y2ij[tmpVarAbs].second != numMut) 957 | { 958 | flip++; 959 | flip21++; 960 | } 961 | } 962 | } 963 | } 964 | // // output a variables 965 | // if(startVarA < tmpVarAbs && tmpVarAbs <= startVarA + numVarA) // it is a A variable 966 | // { 967 | // cout<< "a(" << map_a2pq[tmpVarAbs].first << "," << map_a2pq[tmpVarAbs].second << ") = " << (tmpVar > 0 ? 1 : 0) << endl; 968 | // } 969 | } 970 | } 971 | } 972 | fin.close(); 973 | return (oLine && sLine && vLine); 974 | } 975 | 976 | void write_output_matrix(string path, set &removedCol) 977 | { 978 | int i, j; 979 | ofstream fout(path.c_str()); 980 | // header 981 | for(i = 0; i < mutId.size(); i++) 982 | // for(i = 0; i < mutId.size() - 1; i++) 983 | { 984 | if(removedCol.find(i-1) == removedCol.end()) // column not removed 985 | fout<< mutId[i] << "\t"; 986 | } 987 | fout<< "\n"; 988 | //content 989 | // for(i = 0; i <= numCell; i++) 990 | for(i = 0; i < numCell; i++) 991 | { 992 | fout<< cellId[i] << "\t"; 993 | // for(j = 0; j <= numMut; j++) 994 | for(j = 0; j < numMut; j++) 995 | { 996 | if(removedCol.find(j) == removedCol.end()) // column not removed 997 | fout<< mat[i][j] << "\t"; 998 | } 999 | fout<< "\n"; 1000 | } 1001 | 1002 | fout.close(); 1003 | } 1004 | 1005 | void get_bulk_data(string path) 1006 | { 1007 | int p, q, r; 1008 | string tmpStr; 1009 | double tmpFlt; 1010 | int refCount, mutCount; 1011 | string info; 1012 | string line; 1013 | ifstream fin(path.c_str()); 1014 | if(fin.is_open() == false) 1015 | { 1016 | cerr<< "Could not open file: " << path << endl; 1017 | exit(EXIT_FAILURE); 1018 | } 1019 | // get header line 1020 | getline(fin, line); 1021 | // get VAF information 1022 | p = 0; 1023 | while(getline(fin, line)) 1024 | { 1025 | if(par_isTrueVAF == false) 1026 | { 1027 | istringstream sin(line); 1028 | sin >> tmpStr; 1029 | sin >> tmpStr; 1030 | sin >> tmpStr; 1031 | sin >> mutCount; 1032 | sin >> refCount; 1033 | vaf[p++] = (double)mutCount/(mutCount + refCount); 1034 | } 1035 | else 1036 | { 1037 | int pos = line.find("trueVAF="); 1038 | vaf[p++] = str2double(line.substr(pos+8, line.find(';', pos) - (pos + 8))); 1039 | } 1040 | } 1041 | // artificial mutation; its VAF should be set to 1 1042 | vaf[numMut] = 5; 1043 | // calc vafP 1044 | for(p = 0; p <= numMut; p++) 1045 | { 1046 | for(q = 0; q <= numMut; q++) 1047 | { 1048 | if(vaf[p]*(1+par_delta) >= vaf[q]) 1049 | { 1050 | vafP[p][q] = 1; 1051 | } 1052 | else 1053 | { 1054 | vafP[p][q] = 0; 1055 | } 1056 | } 1057 | } 1058 | // calc vafT 1059 | for(p = 0; p <= numMut; p++) 1060 | { 1061 | for(q = 0; q <= numMut; q++) 1062 | { 1063 | for(r = 0; r <= numMut; r++) 1064 | { 1065 | if(vaf[p]*(1+par_delta) >= vaf[q] + vaf[r]) 1066 | { 1067 | vafT[p][q][r] = 1; 1068 | } 1069 | else 1070 | { 1071 | vafT[p][q][r] = 0; 1072 | } 1073 | } 1074 | } 1075 | } 1076 | } 1077 | 1078 | bool is_conflict_free() 1079 | { 1080 | for(int p = 0; p < numMut; p++) 1081 | { 1082 | for(int q = p + 1; q < numMut; q++) 1083 | { 1084 | bool seen11 = false; 1085 | bool seen01 = false; 1086 | bool seen10 = false; 1087 | for(int r = 0; r < numCell; r++) 1088 | { 1089 | if(mat[r][p] == 1 && mat[r][q] == 1) seen11 = true; 1090 | if(mat[r][p] == 0 && mat[r][q] == 1) seen01 = true; 1091 | if(mat[r][p] == 1 && mat[r][q] == 0) seen10 = true; 1092 | } 1093 | if(seen11 && seen01 && seen10) return false; 1094 | } 1095 | } 1096 | return true; 1097 | } 1098 | 1099 | int main(int argc, char *argv[]) 1100 | { 1101 | if(argc <= 1) 1102 | { 1103 | print_usage(); 1104 | exit(EXIT_FAILURE); 1105 | } 1106 | 1107 | if(command_line_parser(argc, argv) == false) 1108 | { 1109 | exit(EXIT_FAILURE); 1110 | } 1111 | 1112 | // // calculate integer weights; log in base 10 1113 | // par_fnWeight = round(-1 * par_fnRate + log10(par_const)); 1114 | // par_fnWeight_neg = round(log10(1 - pow(2, -1 * par_fnRate)) + log10(par_const)); 1115 | // par_fpWeight = round(-1 * par_fpRate + log10(par_const)); 1116 | // par_fpWeight_neg = round(log10(1 - pow(2, -1 * par_fpRate)) + log10(par_const)); 1117 | 1118 | // calculate integer weights; log in base 10 1119 | par_fnWeight = par_precisionFactor * log(par_const * par_fnRate); 1120 | par_fnWeight_neg = par_precisionFactor * log(par_const * (1 - par_fnRate)); 1121 | par_fpWeight = par_precisionFactor * log(par_const * par_fpRate); 1122 | par_fpWeight_neg = par_precisionFactor * log(par_const * (1 - par_fpRate)); 1123 | 1124 | cout<< "par_fnWeight\t" << par_fnWeight << endl; 1125 | cout<< "par_fnWeight_neg\t" << par_fnWeight_neg << endl; 1126 | cout<< "par_fpWeight\t" << par_fpWeight << endl; 1127 | cout<< "par_fpWeight_neg\t" << par_fpWeight_neg << endl; 1128 | 1129 | // return 0; 1130 | 1131 | // create working directory if does not exist 1132 | // FIXME: use a more portable mkdir... int mkdir(const char *path, mode_t mode); 1133 | string cmd = "mkdir -p " + par_outDir; 1134 | system(cmd.c_str()); 1135 | string fileName = par_outDir + "/" + get_file_name(par_inputFile, true); 1136 | 1137 | // // set weights according to the new formulation 1138 | // if(par_maxColRemove > 0) 1139 | // { 1140 | // // par_colWeight = 0; // for old column elimination formulation 1141 | // par_colWeight = par_maxColRemove; 1142 | // } 1143 | // else 1144 | // { 1145 | // par_colWeight = 0; 1146 | // } 1147 | 1148 | // double cpuTime = getCpuTime(); 1149 | double realTime = getRealTime(); 1150 | 1151 | get_input_data(par_inputFile); 1152 | 1153 | // if(par_bulkFile != "") 1154 | // { 1155 | // get_bulk_data(par_bulkFile); 1156 | // } 1157 | // set variables 1158 | set_y_variables(); 1159 | set_x_variables(); 1160 | set_b_variables(); 1161 | // if(par_maxColRemove > 0) // column elimination enabled 1162 | // { 1163 | // set_k_variables(); 1164 | // } 1165 | // if(par_bulkFile != "") 1166 | // { 1167 | // set_a_variables(); 1168 | // } 1169 | // add clauses 1170 | add_variable_clauses(); 1171 | add_conflict_clauses(); 1172 | // if(par_maxColRemove > 0) // column elimination enabled 1173 | // { 1174 | // // add_column_clauses(); 1175 | // add_column_clauses_weight(); 1176 | // } 1177 | // if(par_bulkFile != "") 1178 | // { 1179 | // add_vaf_clauses(); 1180 | // } 1181 | // 1182 | write_maxsat_input(fileName + ".maxSAT.in"); 1183 | 1184 | // run Max-SAT solver 1185 | double maxsatTime = getRealTime(); 1186 | cmd = MAXSAT_EXE + " " + fileName + ".maxSAT.in" + " > " + fileName + ".maxSAT.out"; 1187 | system(cmd.c_str()); 1188 | maxsatTime = getRealTime() - maxsatTime; 1189 | 1190 | int numFlip = 0; 1191 | int numFlip01 = 0; 1192 | int numFlip10 = 0; 1193 | int numFlip20 = 0; 1194 | int numFlip21 = 0; 1195 | int numRemovedCol = 0; 1196 | set removedCol; 1197 | 1198 | // if(par_maxColRemove > 0) 1199 | // { 1200 | // if(read_maxsat_output_columnElim(fileName + ".maxSAT.out", numRemovedCol, removedCol) == false) 1201 | // { 1202 | // cerr<< "[ERROR] Max-SAT solver faild!"<< endl; 1203 | // exit(EXIT_FAILURE); 1204 | // } 1205 | // } 1206 | // 1207 | if(read_maxsat_output_bitFlips(fileName + ".maxSAT.out", numFlip, numFlip01, numFlip10, numFlip20, numFlip21, removedCol) == false) 1208 | { 1209 | cerr<< "[ERROR] Max-SAT solver faild!"<< endl; 1210 | exit(EXIT_FAILURE); 1211 | } 1212 | 1213 | // solution is found, save it! 1214 | // write_output_matrix(fileName + ".output", removedCol); 1215 | write_output_matrix(fileName + ".CFMatrix", removedCol); 1216 | // report the log file 1217 | ofstream fLog((fileName + ".log").c_str()); 1218 | if(fLog.is_open() == false) 1219 | { 1220 | cerr<< "Could not open file: " << fileName + ".log" << endl; 1221 | exit(EXIT_FAILURE); 1222 | } 1223 | // fLog.precision(6); 1224 | // fLog<< fixed; 1225 | fLog<< "FILE_NAME: " << get_file_name(par_inputFile) << "\n"; 1226 | fLog<< "NUM_CELLS(ROWS): " << numCell << "\n"; 1227 | fLog<< "NUM_MUTATIONS(COLUMNS): " << numMut << "\n"; 1228 | fLog<< "FN_RATE: " << par_fnRate << "\n"; 1229 | fLog<< "FN_WEIGHT: " << (INT_WEIGHTS ? (int)round(par_fnWeight) : par_fnWeight) << "\n"; 1230 | fLog<< "FN_WEIGHT_NEG: " << (INT_WEIGHTS ? (int)round(par_fnWeight_neg) : par_fnWeight_neg) << "\n"; 1231 | fLog<< "FP_RATE: " << par_fpRate << "\n"; 1232 | fLog<< "FP_WEIGHT: " << (INT_WEIGHTS ? (int)round(par_fpWeight) : par_fpWeight) << "\n"; 1233 | fLog<< "FP_WEIGHT_NEG: " << (INT_WEIGHTS ? (int)round(par_fpWeight_neg) : par_fpWeight_neg) << "\n"; 1234 | fLog<< "NUM_THREADS: " << par_threads << "\n"; 1235 | fLog<< "MODEL_SOLVING_TIME_SECONDS: " << maxsatTime << "\n"; 1236 | fLog<< "RUNNING_TIME_SECONDS: " << getRealTime() - realTime << "\n"; 1237 | fLog<< "IS_CONFLICT_FREE: " << (is_conflict_free() ? "YES" : "NO") << "\n"; // FIXME: write the function 1238 | fLog<< "TOTAL_FLIPS_REPORTED: " << numFlip01 + numFlip10 << "\n"; 1239 | fLog<< "0_1_FLIPS_REPORTED: " << numFlip01 << "\n"; 1240 | fLog<< "1_0_FLIPS_REPORTED: " << numFlip10 << "\n"; 1241 | fLog<< "2_0_FLIPS_REPORTED: " << numFlip20 << "\n"; 1242 | fLog<< "2_1_FLIPS_REPORTED: " << numFlip21 << "\n"; 1243 | fLog<< "MUTATIONS_REMOVED_UPPER_BOUND: " << par_maxColRemove << "\n"; 1244 | fLog<< "MUTATIONS_REMOVED_NUM: " << numRemovedCol << "\n"; 1245 | fLog<< "MUTATIONS_REMOVED_INDEX: "; 1246 | int ii; 1247 | set::iterator it; 1248 | for(ii = 1, it = removedCol.begin(); it != removedCol.end(); it++, ii++) 1249 | { 1250 | fLog<< (*it)+1 << (ii < removedCol.size() ? "," : ""); 1251 | } 1252 | fLog << "\n"; 1253 | fLog<< "MUTATIONS_REMOVED_NAME: "; 1254 | for(ii = 1, it = removedCol.begin(); it != removedCol.end(); it++, ii++) 1255 | { 1256 | fLog<< mutId[(*it)+1] << (ii < removedCol.size() ? "," : ""); 1257 | } 1258 | fLog << "\n"; 1259 | 1260 | fLog.close(); 1261 | 1262 | if(remove((fileName + ".maxSAT.in").c_str()) != 0 ) 1263 | cerr<< "Could not remove file:" << fileName + ".maxSAT.in" << endl; 1264 | if(remove((fileName + ".maxSAT.out").c_str()) != 0 ) 1265 | cerr<< "Could not remove file:" << fileName + ".maxSAT.out" << endl; 1266 | 1267 | return EXIT_SUCCESS; 1268 | } 1269 | -------------------------------------------------------------------------------- /PhISCS-B/csp_z3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # ============================================================================== 4 | # Written by : Farid Rashidi 5 | # Modified by: Farid Rashidi 6 | # Last Update: Dec 20, 2018 7 | # ============================================================================== 8 | 9 | import numpy as np 10 | import pandas as pd 11 | from datetime import datetime 12 | from itertools import * 13 | import argparse 14 | import os, sys, errno 15 | from helperFunctions import * 16 | 17 | def read_data(file): 18 | df = pd.read_csv(file, sep='\t', index_col=0) 19 | df = df.replace('?', 3) 20 | df = df.astype(int) 21 | return df.values, df.columns 22 | 23 | 24 | def write_output(outresult, file, inputfile, col_el): 25 | dfi = pd.read_csv(inputfile, sep='\t', index_col=0) 26 | df = pd.DataFrame(outresult) 27 | df = df.add_prefix('mut') 28 | df.columns = dfi.columns 29 | df.index = dfi.index 30 | df.index.name = 'cellID/mutID' 31 | col_el2 = [] 32 | col_el2[:] = [x - 1 for x in col_el] 33 | df.drop(dfi.columns[col_el2], axis=1, inplace=True) 34 | df.to_csv(file, sep='\t') 35 | return df.values 36 | 37 | 38 | def read_vafs(file, delta, allow_vaf): 39 | if allow_vaf==False: 40 | return [], [] 41 | df = pd.read_table(file) 42 | m = df.shape[0] 43 | vaf = [] 44 | for i in range(m): 45 | vaf.append(float(df[df.columns[5]][i].split(';')[1].replace('trueVAF=',''))) 46 | 47 | vafP = np.zeros(shape=(m, m)).astype(int) 48 | vafT = np.zeros(shape=(m, m, m)).astype(int) 49 | 50 | p = q = range(df.shape[0]) 51 | loopP = list(product(p, q)) 52 | for [p, q] in loopP: 53 | if p != q: 54 | if (vaf[p]*(1+delta)) >= vaf[q]: 55 | vafP[p][q] = 1 56 | 57 | p = q = t = range(m) 58 | loopT = list(product(p, q, t)) 59 | for [r, a, b] in loopT: 60 | if p != q and p != t and q != t: 61 | if (vaf[p]*(1+delta)) >= (vaf[q]+vaf[t]): 62 | vafT[p][q][t] = 1 63 | 64 | return vafP, vafT 65 | 66 | 67 | def compare_flips(inp, output, n, m, zeroToOne): 68 | totalflip = 0 69 | for i in range(n): 70 | for j in range(m): 71 | if zeroToOne: 72 | if inp[i][j] == 0 and output[i][j] == 1: 73 | totalflip = totalflip + 1 74 | else: 75 | if inp[i][j] == 1 and output[i][j] == 0: 76 | totalflip = totalflip + 1 77 | return totalflip 78 | 79 | 80 | def compare_na(inp, output, n, m, twoToZero): 81 | totalflip = 0 82 | for i in range(n): 83 | for j in range(m): 84 | if twoToZero: 85 | if inp[i][j] == 3 and output[i][j] == 0: 86 | totalflip = totalflip + 1 87 | else: 88 | if inp[i][j] == 3 and output[i][j] == 1: 89 | totalflip = totalflip + 1 90 | return totalflip 91 | 92 | 93 | def check_conflict_free(sol_matrix): 94 | conflict_free = True 95 | for p in range(sol_matrix.shape[1]): 96 | for q in range(p + 1, sol_matrix.shape[1]): 97 | oneone = False 98 | zeroone = False 99 | onezero = False 100 | for r in range(sol_matrix.shape[0]): 101 | if sol_matrix[r][p] == -1: 102 | return 'NO' 103 | if sol_matrix[r][p] == 1 and sol_matrix[r][q] == 1: 104 | oneone = True 105 | if sol_matrix[r][p] == 0 and sol_matrix[r][q] == 1: 106 | zeroone = True 107 | if sol_matrix[r][p] == 1 and sol_matrix[r][q] == 0: 108 | onezero = True 109 | if oneone and zeroone and onezero: 110 | conflict_free = False 111 | if conflict_free: 112 | return 'YES' 113 | else: 114 | return 'NO' 115 | 116 | 117 | def getX(i,j): 118 | return 'X_' + str(i) + '_' + str(j) 119 | 120 | def getK(i): 121 | return 'K_' + str(i) 122 | 123 | def getZ(i,j): 124 | return 'Z_' + str(i) + '_' + str(j) 125 | 126 | def getY(i,j): 127 | return 'Y_' + str(i) + '_' + str(j) 128 | 129 | def getB(p,q,a,b): 130 | return 'B_' + str(p) + '_' + str(q) + '_' + str(a) + '_' + str(b) 131 | 132 | def getA(p,q): 133 | return 'A_' + str(p) + '_' + str(q) 134 | 135 | 136 | def produce_input(fstr, data, numCells, numMuts, allow_col_elim, fn_weight, fp_weight, w_weight, maxCol, allow_vaf, vafP, vafT): 137 | file = open(fstr, 'w') 138 | file.write('(check-sat-using sat)\n') 139 | for i in range(numCells): 140 | for j in range(numMuts): 141 | file.write('(declare-const Y_' + str(i) + '_' + str(j) + ' Bool)\n') 142 | 143 | for p in range(numMuts): 144 | for q in range(numMuts): 145 | file.write('(declare-const B_' + str(p) + '_' + str(q) + '_0_1 Bool)\n') 146 | file.write('(declare-const B_' + str(p) + '_' + str(q) + '_1_0 Bool)\n') 147 | file.write('(declare-const B_' + str(p) + '_' + str(q) + '_1_1 Bool)\n') 148 | 149 | if allow_col_elim: 150 | for i in range(numCells): 151 | for j in range(numMuts): 152 | file.write('(declare-const X_' + str(i) + '_' + str(j) + ' Bool)\n') 153 | for j in range(numMuts): 154 | file.write('(declare-const '+getK(j)+' Bool)\n') 155 | else: 156 | K = [] 157 | 158 | if allow_vaf: 159 | for p in range(numMuts): 160 | for q in range(numMuts): 161 | file.write('(declare-const '+getA(p,q)+' Bool)\n') 162 | 163 | # Objective 164 | for i in range(numCells): 165 | for j in range(numMuts): 166 | if data[i][j] == 0: 167 | file.write('(assert-soft '+getY(i,j)+' :weight '+str(np.log(fn_weight/(1-fp_weight)))+')\n') 168 | elif data[i][j] == 1: 169 | file.write('(assert-soft '+getY(i,j)+' :weight '+str(np.log((1-fn_weight)/fp_weight))+')\n') 170 | 171 | # Constraint for not allowing removed columns go further than maxCol 172 | if allow_col_elim: 173 | for combo in combinations(range(numMuts), maxCol+1): 174 | temp = '(assert (not (and' 175 | for i in combo: 176 | temp = temp + ' ' + getK(i) 177 | temp = temp + ')))\n' 178 | file.write(temp) 179 | 180 | for i in range(numCells): 181 | for j in range(numMuts): 182 | file.write('(assert (or (not '+getY(i,j)+') (not '+getK(j)+') '+getX(i,j)+' ))\n') 183 | file.write('(assert (or '+getY(i,j)+' (not '+getX(i,j)+') ))\n') 184 | file.write('(assert (or '+getK(j)+' (not '+getX(i,j)+') ))\n') 185 | if data[i][j] == 0: 186 | file.write('(assert-soft (not '+getK(j)+') :weight '+str(np.log(1-fp_weight))+')\n') 187 | file.write('(assert-soft '+getX(i,j)+' :weight '+str(-np.log(fn_weight/(1-fp_weight)))+')\n') 188 | elif data[i][j] == 1: 189 | file.write('(assert-soft (not '+getK(j)+') :weight '+str(np.log(fp_weight))+')\n') 190 | file.write('(assert-soft '+getX(i,j)+' :weight '+str(-np.log((1-fn_weight)/fp_weight))+')\n') 191 | 192 | # Constraint for VAFs 193 | if allow_vaf: 194 | for p in range(numMuts): 195 | for q in range(numMuts): 196 | if p==q: 197 | file.write('(assert (= '+getA(p,q)+' false))\n') 198 | else: 199 | file.write('(assert (or (not '+getA(p,q)+') (not '+getA(q,p)+')))\n') #1.a 200 | if allow_col_elim: 201 | file.write('(assert (or (not (or '+getA(p,q)+' '+getA(q,p)+')) (and (not ' 202 | +getK(p)+') (not '+getK(q)+'))))\n') #1.b 203 | if vafP[p][q] == 0: 204 | file.write('(assert (= '+getA(p,q)+' false))\n') #1.d 205 | for r in range(numMuts): 206 | if p != q and p != r and q != r: 207 | if vafT[p][q][r] == 0 and q < r: 208 | file.write('(assert (= (and '+getA(p,q)+' ' 209 | +getA(p,r)+' (not '+getA(q,r)+') (not '+getA(r,q)+')) false))\n') #2 210 | 211 | 212 | for t in range(numCells): 213 | for p in range(numMuts): 214 | for q in range(numMuts): 215 | file.write('(assert (or (not (and '+getA(p,q)+' '+getY(t,q)+')) (and ' 216 | +getA(p,q)+' '+getY(t,p)+')))\n') #1.c 217 | 218 | 219 | 220 | # Constraint for checking conflict 221 | for i in range(numCells): 222 | for p in range(numMuts): 223 | for q in range(numMuts): 224 | if p <= q: 225 | file.write('(assert (or (not '+getY(i,p)+') (not '+getY(i,q)+') '+getB(p,q,1,1)+'))\n') 226 | file.write('(assert (or '+getY(i,p)+' (not '+getY(i,q)+') '+getB(p,q,0,1)+'))\n') 227 | file.write('(assert (or (not '+getY(i,p)+') '+getY(i,q)+' '+getB(p,q,1,0)+'))\n') 228 | if allow_col_elim: 229 | file.write('(assert (or '+getK(p)+' '+getK(q)+' (not ' 230 | +getB(p,q,0,1)+') (not '+getB(p,q,1,0)+') (not '+getB(p,q,1,1)+')))\n') 231 | else: 232 | file.write('(assert (or (not '+getB(p,q,0,1)+') (not '+getB(p,q,1,0)+') (not '+getB(p,q,1,1)+')))\n') 233 | 234 | file.write('(check-sat)\n') 235 | file.write('(get-model)\n') 236 | file.write('(get-objectives)\n') 237 | 238 | 239 | def exe_command(file, time_out): 240 | command = str(os.path.dirname(os.path.realpath(__file__))) 241 | command += '/solvers/z3/build/z3 ' 242 | if time_out > 0: 243 | command = command + '-t:' + str(time_out) + '000 ' 244 | # command = command + '-T:' + str(time_out) + ' ' 245 | command = command + '-smt2 ' + file + ' > ' + os.path.splitext(file)[0] + '.temp2' 246 | os.system(command) 247 | 248 | 249 | def read_ouput(n, m, fstr, allow_col_elim): 250 | file = open(fstr, 'r') 251 | lines = file.readlines() 252 | i = -1 253 | j = -1 254 | a = 0 255 | b = 1 256 | outresult = -1*np.ones(shape=(n, m)).astype(int) 257 | col_el = [] 258 | 259 | if allow_col_elim: 260 | for index in range(len(lines)): 261 | line = lines[index] 262 | if 'define-fun K' in line: 263 | next_line = lines[index+1] 264 | i = line.split(' ')[3].split('_')[1] 265 | i = int(i) 266 | if 'true' in next_line: 267 | col_el.append(i+1) 268 | 269 | for index in range(len(lines)): 270 | line = lines[index] 271 | if 'define-fun Y' in line: 272 | i = line.split(' ')[3].split('_')[1] 273 | j = line.split(' ')[3].split('_')[2] 274 | i = int(i) 275 | j = int(j) 276 | next_line = lines[index+1] 277 | if j+1 in col_el: 278 | outresult[i][j] = -1 279 | else: 280 | if 'true' in next_line: 281 | outresult[i][j] = 1 282 | else: 283 | outresult[i][j] = 0 284 | if 'objectives' in line: 285 | next_line = lines[index+1] 286 | try: 287 | a = float(next_line.split(' ')[4]) 288 | b = float(next_line.split(' ')[5].replace(')))\n','')) 289 | except: 290 | pass 291 | 292 | 293 | return outresult, col_el, -1*a/b 294 | 295 | 296 | if __name__ == '__main__': 297 | t0 = datetime.now() 298 | parser = argparse.ArgumentParser(description='CSP by Z3 solver', add_help=True) 299 | parser.add_argument('-f', '--file', required = True, 300 | type = str, 301 | help = 'Input matrix file') 302 | parser.add_argument('-n', '--fnWeight', required = True, 303 | type = float, 304 | help = 'Weight for false negative') 305 | parser.add_argument('-p', '--fpWeight', required = True, 306 | type = float, 307 | help = 'Weight for false negative') 308 | parser.add_argument('-w', '--wWeight', default = 0, 309 | type = float, 310 | help = 'Weight for columns eliminated') 311 | parser.add_argument('-o', '--outDir', required = True, 312 | type = str, 313 | help = 'Output directory') 314 | parser.add_argument('-m', '--maxMut', default = 0, 315 | type = int, 316 | help = 'Max number mutations to be eliminated [0]') 317 | parser.add_argument('-t', '--threads', default = 1, 318 | type = int, 319 | help = 'Number of threads [1]') 320 | parser.add_argument('-b', '--bulk', 321 | type = str, 322 | help = 'Bulk sequencing file') 323 | parser.add_argument('-e', '--delta', default = 0.1, 324 | type = float, 325 | help = 'Delta in VAF [0.1]') 326 | parser.add_argument('-T', '--timeout', default = 0, 327 | type = int, 328 | help = 'Timeout in seconds [0]') 329 | args = parser.parse_args() 330 | 331 | inFile = args.file 332 | fn_weight = args.fnWeight 333 | fp_weight = args.fpWeight 334 | w_weight = args.wWeight 335 | outDir = args.outDir 336 | noisy_data, mutations_names = read_data(inFile) 337 | row = noisy_data.shape[0] 338 | col = noisy_data.shape[1] 339 | logFile = outDir + '/' + os.path.splitext(inFile.split('/')[-1])[0] + '.log' 340 | if args.timeout is not None: 341 | timeOut = args.timeout 342 | else: 343 | timeOut = 0 344 | 345 | try: 346 | os.makedirs(outDir) 347 | except OSError as exc: 348 | if exc.errno == errno.EEXIST and os.path.isdir(outDir): 349 | pass 350 | else: 351 | raise 352 | 353 | allow_col_elim = False 354 | maxCol = args.maxMut 355 | if maxCol == 0: 356 | allow_col_elim = False 357 | maxCol = 0 358 | else: 359 | allow_col_elim = True 360 | 361 | allow_vaf = False 362 | vafFile = '' 363 | vafDelta = 0 364 | if args.bulk is not None: 365 | allow_vaf = True 366 | vafFile = args.bulk 367 | vafDelta = args.delta 368 | 369 | vafP, vafT = read_vafs(vafFile, vafDelta, allow_vaf) 370 | log = open(logFile, 'w') 371 | produce_input(os.path.splitext(logFile)[0] + '.temp1', noisy_data, row, col, allow_col_elim, 372 | fn_weight, fp_weight, w_weight, maxCol, allow_vaf, vafP, vafT) 373 | 374 | t1 = datetime.now() 375 | exe_command(os.path.splitext(logFile)[0] + '.temp1', timeOut) 376 | total_model = datetime.now()-t1 377 | 378 | output_data, col_el, obj = read_ouput(row, col, os.path.splitext(logFile)[0] + '.temp2', allow_col_elim) 379 | output_mat = write_output(output_data, os.path.splitext(logFile)[0] + '.CFMatrix', inFile, col_el) 380 | command = 'rm ' + os.path.splitext(logFile)[0]+'.temp1' 381 | os.system(command) 382 | command = 'rm ' + os.path.splitext(logFile)[0]+'.temp2' 383 | os.system(command) 384 | total_running = datetime.now()-t0 385 | 386 | log.write('COMMAND: "{0}"\n'.format(' '.join(sys.argv))) 387 | log.write('NUM_CELLS(ROWS): '+str(row)+'\n') 388 | log.write('NUM_MUTATIONS(COLUMNS): '+str(col)+'\n') 389 | log.write('FN_WEIGHT: '+str(fn_weight)+'\n') 390 | log.write('FP_WEIGHT: '+str(fp_weight)+'\n') 391 | log.write('COLUMN_ELIMINATION_WEIGHT: '+str(w_weight)+'\n') 392 | log.write('NUM_THREADS: '+str(1)+'\n') 393 | log.write('MODEL_SOLVING_TIME_SECONDS: '+str('{0:.3f}'.format(total_model.total_seconds()))+'\n') 394 | log.write('RUNNING_TIME_SECONDS: '+str('{0:.3f}'.format(total_running.total_seconds()))+'\n') 395 | log.write('IS_CONFLICT_FREE: '+str(check_conflict_free(output_mat))+'\n') 396 | i = inFile 397 | o = os.path.splitext(logFile)[0] + '.CFMatrix' 398 | log.write('LIKELIHOOD: '+ str(get_liklihood(i, o, fn_weight, fp_weight, col_el))+'\n') 399 | # log.write('LIKELIHOOD: '+ str(whole_obj+costant_obj-obj)+'\n') 400 | a = compare_flips(noisy_data, output_data, row, col, True) 401 | b = compare_flips(noisy_data, output_data, row, col, False) 402 | c = compare_na(noisy_data, output_data, row, col, True) 403 | d = compare_na(noisy_data, output_data, row, col, False) 404 | log.write('COL_WEIGHT: '+str(w_weight)+'\n') 405 | log.write('TOTAL_FLIPS_REPORTED: '+str(a+b+c+d)+'\n') 406 | log.write('0_1_FLIPS_REPORTED: '+str(a)+'\n') 407 | log.write('1_0_FLIPS_REPORTED: '+str(b)+'\n') 408 | log.write('?_0_FLIPS_REPORTED: '+str(c)+'\n') 409 | log.write('?_1_FLIPS_REPORTED: '+str(d)+'\n') 410 | log.write('MUTATIONS_REMOVED_UPPER_BOUND: '+str(maxCol)+'\n') 411 | log.write('MUTATIONS_REMOVED_NUM: '+str(len(col_el))+'\n') 412 | temp = 'MUTATIONS_REMOVED_ID: '+ ',' . join([str(mutations_names[i-1]) for i in sorted(col_el)]) 413 | log.write(temp+'\n') 414 | log.write('-----------------------------------\n') 415 | log.close() 416 | -------------------------------------------------------------------------------- /PhISCS-B/helperFunctions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import os 4 | from math import * 5 | 6 | def readMatrixIntoHash(pathInputFile): 7 | assert os.path.exists(pathInputFile), "There does not exist file " + pathInputFile 8 | inputFile = open(pathInputFile, "r") 9 | inputLines = inputFile.readlines() 10 | inputFile.close() 11 | assert len(inputLines) > 0, "ERROR. Input file " + pathInputFile + " is empty." 12 | 13 | headerEntries = inputLines[0].strip().split() 14 | columnIDs = headerEntries[1:len(headerEntries)] 15 | numColumns = len(columnIDs) 16 | assert numColumns > 0, "ERROR. First (header) line in " + pathSCFile + " is empty. Exiting!!!" 17 | inputLinesWithoutHeader = inputLines[1:len(inputLines)] 18 | D = {} 19 | for line in inputLinesWithoutHeader: 20 | lineColumns = line.strip().split() 21 | rowID = lineColumns[0].strip() 22 | assert rowID not in D.keys(), "ERROR in function readMatrixIntoHash. " + rowID + " is already in keys." 23 | D[rowID] = {} 24 | for i in range(numColumns): 25 | if lineColumns[1+i] != '?': 26 | D[rowID][columnIDs[i]] = int(lineColumns[1+i]) 27 | else: 28 | D[rowID][columnIDs[i]] = int(3) 29 | return D 30 | 31 | 32 | def get_liklihood(inputSCMatrixFile, outputCFMatrixFile, fn, fp, removedMutations): 33 | D = readMatrixIntoHash(inputSCMatrixFile) 34 | E = readMatrixIntoHash(outputCFMatrixFile) 35 | alpha = float(fp) 36 | beta = float(fn) 37 | missingEntryCharacter = 3 38 | 39 | objectiveValueFromCFMatrix = 0.0 40 | cellIDs = list(E.keys()) 41 | mutIDs = E[cellIDs[0]].keys() 42 | dummyVariable = 1 43 | objective = 0 44 | for j in mutIDs: 45 | numZeros = 0 46 | numOnes = 0 47 | for i in cellIDs: 48 | if D[i][j] == 0: 49 | numZeros += 1 50 | objective += np.log(beta/(1-alpha)) * E[i][j] 51 | elif D[i][j] == 1: 52 | numOnes += 1 53 | objective += np.log((1-beta)/alpha) * E[i][j] 54 | objective += numZeros * np.log(1-alpha) 55 | objective += numOnes * np.log(alpha) 56 | if j in removedMutations: 57 | objective -= (numZeros * np.log(1-alpha) + numOnes * (np.log(alpha) + np.log((1-beta)/alpha))) 58 | return objective 59 | 60 | 61 | def draw_tree(filename, addBulk, bulkfile): 62 | import pandas as pd 63 | import pygraphviz as pyg 64 | 65 | graph = pyg.AGraph(strict=False, directed=True, dpi=300) 66 | font_name = 'Avenir' 67 | 68 | class Node: 69 | def __init__(self, name, parent): 70 | self.name = name 71 | self.parent = parent 72 | self.children = [] 73 | if parent: 74 | parent.children.append(self) 75 | 76 | def print_tree(node): 77 | graph.add_node(node.name, label=node.name, fontname=font_name, color='black', penwidth=3.5) 78 | for child in node.children: 79 | graph.add_edge(node.name, child.name) 80 | print_tree(child) 81 | 82 | def contains(col1, col2): 83 | for i in range(len(col1)): 84 | if not col1[i] >= col2[i]: 85 | return False 86 | return True 87 | 88 | def write_tree(matrix, names): 89 | i = 0 90 | while i < matrix.shape[1]: 91 | j = i + 1 92 | while j < matrix.shape[1]: 93 | if np.array_equal(matrix[:,i], matrix[:,j]): 94 | matrix = np.delete(matrix, j, 1) 95 | x = names.pop(j) 96 | names[i] += '

' + x 97 | j -= 1 98 | j += 1 99 | names[i] = '<'+names[i]+'>' 100 | i += 1 101 | 102 | rows = len(matrix) 103 | cols = len(matrix[0]) 104 | dimensions = np.sum(matrix, axis=0) 105 | # ordered indeces 106 | indeces = np.argsort(dimensions) 107 | dimensions = np.sort(dimensions) 108 | mutations_name = [] 109 | for i in range(cols): 110 | mutations_name.append(names[indeces[i]]) 111 | 112 | root = Node(mutations_name[-1], None) 113 | mut_nod = {} 114 | mut_nod[mutations_name[cols-1]] = root 115 | 116 | i = cols - 2 117 | while i >=0: 118 | if dimensions[i] == 0: 119 | break 120 | attached = False 121 | for j in range(i+1, cols): 122 | if contains(matrix[:, indeces[j]], matrix[:, indeces[i]]): 123 | node = Node(mutations_name[i], mut_nod[mutations_name[j]]) 124 | mut_nod[mutations_name[i]] = node 125 | attached = True 126 | break 127 | if not attached: 128 | node = Node(mutations_name[i], root) 129 | mut_nod[mutations_name[i]] = node 130 | i -=1 131 | print_tree(root) 132 | 133 | if addBulk: 134 | vafs = {} 135 | bulkMutations = readMutationsFromBulkFile(bulkfile) 136 | sampleIDs = bulkMutations[0].getSampleIDs() 137 | for mut in bulkMutations: 138 | temp_vaf = [] 139 | for sample in sampleIDs: 140 | temp_vaf.append('' + str(mut.getVAF(sampleID=sample)) + '') 141 | vafs[mut.getID()] = '{} ({})'.format(mut.getID(), ','.join(temp_vaf)) 142 | 143 | inp = np.genfromtxt(filename, skip_header=1, delimiter='\t') 144 | with open(filename, 'r') as fin: 145 | if addBulk: 146 | mutation_names = [vafs[x] for x in fin.readline().strip().split('\t')[1:]] 147 | else: 148 | mutation_names = fin.readline().strip().split('\t')[1:] 149 | sol_matrix = np.delete(inp, 0, 1) 150 | write_tree(sol_matrix, mutation_names) 151 | graph.layout(prog='dot') 152 | outputpath = filename[:-len('.CFMatrix')] 153 | graph.draw('{}.png'.format(outputpath)) 154 | -------------------------------------------------------------------------------- /PhISCS-B/solvers/aspino/aspino-static: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/aspino/aspino-static -------------------------------------------------------------------------------- /PhISCS-B/solvers/maxino/maxino-2015-k16-static: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/maxino/maxino-2015-k16-static -------------------------------------------------------------------------------- /PhISCS-B/solvers/maxino/maxino-2015-kdyn-static: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/maxino/maxino-2015-kdyn-static -------------------------------------------------------------------------------- /PhISCS-B/solvers/mscg/mscg14-linux-x86-64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/mscg/mscg14-linux-x86-64 -------------------------------------------------------------------------------- /PhISCS-B/solvers/mscg/mscg15a-linux-x86-64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/mscg/mscg15a-linux-x86-64 -------------------------------------------------------------------------------- /PhISCS-B/solvers/mscg/mscg15b-linux-x86-64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/mscg/mscg15b-linux-x86-64 -------------------------------------------------------------------------------- /PhISCS-B/solvers/open-wbo/open-wbo_glucose4.1_static: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/open-wbo/open-wbo_glucose4.1_static -------------------------------------------------------------------------------- /PhISCS-B/solvers/open-wbo/open-wbo_minisat2.2_static: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/open-wbo/open-wbo_minisat2.2_static -------------------------------------------------------------------------------- /PhISCS-B/solvers/qmaxsat/qmaxsat14.04auto-glucose3_static: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/qmaxsat/qmaxsat14.04auto-glucose3_static -------------------------------------------------------------------------------- /PhISCS-B/solvers/qmaxsat/qmaxsat14.04pms-glucose2_static: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/qmaxsat/qmaxsat14.04pms-glucose2_static -------------------------------------------------------------------------------- /PhISCS-B/solvers/qmaxsat/qmaxsat14.04wpms-glucose2_static: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-B/solvers/qmaxsat/qmaxsat14.04wpms-glucose2_static -------------------------------------------------------------------------------- /PhISCS-I/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfu-compbio/PhISCS/50317fea0fd27b5a8ee9c9e050a79df8c3bdb4b4/PhISCS-I/__init__.py -------------------------------------------------------------------------------- /PhISCS-I/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # ============================================================================== 4 | # Written by : Salem Malikic 5 | # Modified by: Farid Rashidi 6 | # Last Update: Apr 25, 2019 7 | # ============================================================================== 8 | 9 | 10 | from gurobipy import * 11 | from helperFunctions import * 12 | from datetime import datetime 13 | import argparse 14 | import errno 15 | import pandas as pd 16 | from faridFunctions import * 17 | 18 | 19 | # COMMAND LINE ARGUMENTS PARSING 20 | parser = argparse.ArgumentParser(description='PhISCS-I by Gurobi solver', add_help=True) 21 | # Required arguments: 22 | parser.add_argument('-SCFile', '--SCFile', required=True, type=str, 23 | help='Path to single cell data matrix file') 24 | parser.add_argument('-fn', '--fnProbability', required=True, type=float, 25 | help='Probablity of false negative') 26 | parser.add_argument('-fp', '--fpProbability', required=True, type=float, 27 | help='Probablity of false positive') 28 | 29 | # Optional: 30 | parser.add_argument('-o', '--outDir', default='.', type=str, 31 | help='Output directory') 32 | parser.add_argument('-kmax', '--maxMutationsToEliminate', default=0, type=int, 33 | help='Max number of mutations to be eliminated [default value is 0]') 34 | parser.add_argument('-bulkFile', '--bulkFile', default=None, type=str, 35 | help='Path to bulk data file') 36 | parser.add_argument('-delta', '--delta', default=0.20, type=float, 37 | help='Delta parameter accounting for VAF variance [default value is 0.20]') 38 | parser.add_argument('-time', '--time', type=int, default=86400, 39 | help='Max time (in seconds) allowed for the computation [default value is 24 hours]') 40 | parser.add_argument('--drawTree', action='store_true', 41 | help='Draw output tree by Graphviz') 42 | parser.add_argument('--drawFarid', action='store_true', 43 | help='Draw output tree by Graphviz') 44 | # https://stackoverflow.com/questions/15753701/argparse-option-for-passing-a-list-as-option 45 | # parser.add_argument('--candidateISAV', 46 | # help='', 47 | # type=str) 48 | 49 | parser.add_argument('-w', '--colEliminationWeight', default=0, type=float, 50 | help='Weight of column elimination [default value is 0]') 51 | parser.add_argument('-threads', '--threads', default=1, type=int, 52 | help='Number of threads [default value is 1]') 53 | 54 | 55 | args = parser.parse_args() 56 | 57 | 58 | # assert os.path.exists(args.outDir) == False, "ERROR!!! There already exists file or folder with name " + args.outDir + ". Exiting." 59 | try: 60 | os.makedirs(args.outDir) 61 | except OSError as exc: 62 | if exc.errno == errno.EEXIST and os.path.isdir(args.outDir): 63 | pass 64 | else: 65 | raise 66 | 67 | filename = os.path.splitext(os.path.basename(args.SCFile))[0] 68 | outfile = os.path.join(args.outDir, filename) 69 | gurobi_log = "{}.gurobi".format(outfile) 70 | start_model = datetime.now() 71 | verbose = False 72 | tree = False 73 | 74 | # ======= Reading SC data input (cell x mut matrix with headers) 75 | df = pd.read_csv(args.SCFile, sep='\t', index_col=0) 76 | df = df.replace('?', 3) 77 | df = df.astype(int) 78 | mutIDs = df.columns 79 | cellIDs = df.index 80 | I = df.values 81 | # SCFile = open(args.SCFile, "r") 82 | # mutIDs = SCFile.readline().rstrip().split()[1:] 83 | # cellIDs = [] 84 | # I = [] 85 | # for line in SCFile: 86 | # lineColumns = line.strip().split() 87 | # cellID = lineColumns[0] 88 | # cellIDs.append(cellID) 89 | # I.append([int(x) for x in lineColumns[1:]]) 90 | # SCFile.close() 91 | 92 | 93 | numCells = len(cellIDs) 94 | numMutations = len(mutIDs) 95 | assert numMutations == len(set(mutIDs)), "ERROR!!! Some of the mutation IDs appear multiple times. Mutation IDs are " + str(mutIDs) 96 | assert numCells == len(set(cellIDs)), "ERROR!!! Some of the cell IDs appear multiple times. Cell IDs are " + str(cellIDs) 97 | 98 | 99 | beta = args.fnProbability 100 | alpha = args.fpProbability 101 | 102 | 103 | isTrueVAF = False # trueVAF was used for the internal purposes during the method development 104 | usingBulk = False 105 | delta = None 106 | bulkMutations = None 107 | if args.bulkFile: 108 | delta = args.delta 109 | usingBulk = True 110 | bulkMutations = readMutationsFromBulkFile(args.bulkFile) 111 | assert len(bulkMutations) == numMutations, "ERROR!!! Single-cell and bulk data do not have the same number of mutations" 112 | for i in range(numMutations): 113 | assert bulkMutations[i].ID == mutIDs[i], "Mutations must be sorted in the same order in single cell and bulk data" 114 | 115 | 116 | # =========== VARIABLES 117 | model = Model('PhISCS_ILP') 118 | # model.Params.LogFile = gurobi_log 119 | model.Params.LogFile = '' 120 | model.Params.Threads = args.threads 121 | model.setParam('TimeLimit', args.time) 122 | 123 | print('Generating variables...') 124 | 125 | 126 | # ===== Matrix Y is matrix of corrected (i.e. true) genotypes w.r.t. input SC matrix I 127 | Y = {} 128 | for c in range(numCells): 129 | for m in range(numMutations): 130 | Y[c, m] = model.addVar(vtype=GRB.BINARY, name='Y({0},{1})'.format(c, m)) 131 | 132 | 133 | 134 | # ===== Variables B control the existence of conflict between columns 135 | B = {} 136 | for p in range(numMutations+1): 137 | for q in range(numMutations+1): 138 | B[p, q, 1, 1] = model.addVar(vtype=GRB.BINARY, obj=0, 139 | name='B[{0},{1},1,1]'.format(p, q)) 140 | B[p, q, 1, 0] = model.addVar(vtype=GRB.BINARY, obj=0, 141 | name='B[{0},{1},1,0]'.format(p, q)) 142 | B[p, q, 0, 1] = model.addVar(vtype=GRB.BINARY, obj=0, 143 | name='B[{0},{1},0,1]'.format(p, q)) 144 | 145 | 146 | # ==== Variable K[j] is set to 1 if and only if mutation j is among eliminated mutations 147 | K = {} 148 | # candidateISAV = [int(item.replace('mut','')) for item in args.candidateISAV.split(',')] 149 | # f_i = args.SCFile 150 | # f_o = '/data/frashidi/PhISCS/_result/TP_FP/noBulk_k_0/' + filename + '.CFMatrix' 151 | # candidateISAV = give_me_muts_to_filter(f_i, f_o, args.maxMutationsToEliminate) 152 | for m in range(numMutations+1): 153 | ''' 154 | if m in list(set(range(numMutations)) - set(candidateISAV)): 155 | K[m] = model.addVar(vtype=GRB.BINARY, name='K[{0}]'.format(m), lb=0, ub=0) 156 | else: 157 | K[m] = model.addVar(vtype=GRB.BINARY, name='K[{0}]'.format(m)) 158 | ''' 159 | K[m] = model.addVar(vtype=GRB.BINARY, name='K[{0}]'.format(m)) 160 | model.addConstr(K[numMutations] == 0) # null mutation can not be eliminated 161 | 162 | 163 | # ==== A[p,q] = 1 if p is ancestor of q 164 | A = {} 165 | if usingBulk: 166 | for p in range(numMutations + 1): # mutation with index numMutation is null mutation 167 | for q in range(numMutations + 1): 168 | A[p,q] = model.addVar(vtype=GRB.BINARY, obj=0, name='A[{0},{1}]'.format(p,q)) 169 | 170 | 171 | model.update() 172 | 173 | # ====== CONSTRAINTS 174 | print('Generating constraints...') 175 | 176 | # --- number of eliminated columns is upper bounded by user provided constant 177 | model.addConstr(quicksum(K[m] for m in range(numMutations)) <= args.maxMutationsToEliminate) 178 | 179 | 180 | # --- Enforce three gametes rule 181 | for i in range(numCells): 182 | for p in range(numMutations): 183 | for q in range(numMutations): 184 | model.addConstr(Y[i,p] + Y[i,q] - B[p,q,1,1] <= 1) 185 | model.addConstr(-Y[i,p] + Y[i,q] - B[p,q,0,1] <= 0) 186 | model.addConstr(Y[i,p] - Y[i,q] - B[p,q,1,0] <= 0) 187 | 188 | # --- Null mutation present in each cell 189 | for p in range(numMutations+1): 190 | model.addConstr(B[p,numMutations, 1, 0] == 0) 191 | 192 | 193 | # --- Forbid conflict between columns (three gametes rule) 194 | for p in range(numMutations): 195 | for q in range(numMutations): 196 | model.addConstr(B[p,q,0,1] + B[p,q,1,0] + B[p,q,1,1] <= 2 + K[p] + K[q]) 197 | 198 | 199 | # === Constraints for integrating VAF obtained from bulk data into the model 200 | if usingBulk: 201 | validateSampleIDsConcordance(bulkMutations) 202 | sampleIDs = bulkMutations[0].getSampleIDs() 203 | bulkMutations.append(generateArtificialNullMutation(bulkMutations[0])) 204 | for p in range(numMutations): 205 | for q in range(p+1, numMutations): 206 | model.addConstr(A[p,q] + A[q,p] <= 1-K[p]) 207 | model.addConstr(A[p,q] + A[q,p] <= 1-K[q]) 208 | model.addConstr(A[p,q] + A[q,p] >= B[p,q,1,1] - K[p] - K[q]) 209 | for p in range(numMutations+1): 210 | model.addConstr(A[p,p] == 0) 211 | for q in range(numMutations+1): 212 | model.addConstr(A[p, q] <= 1 - K[p]) 213 | model.addConstr(A[p, q] <= 1 - K[q]) 214 | 215 | if p= A[p, q] * VAF_q) 229 | #''' 230 | for r in range(numMutations+1): 231 | if r == q: 232 | continue 233 | VAF_r = bulkMutations[r].getVAF(sampleID) 234 | if isTrueVAF: 235 | VAF_r = bulkMutations[r].getTrueVAF(sampleID) 236 | # Constraint 2 237 | model.addConstr( 238 | VAF_p * (1 + delta) >= 239 | VAF_q * (A[p, q] - A[r, q] - A[q, r]) + 240 | VAF_r * (A[p, r] - A[r, q] - A[q, r]) 241 | ) 242 | #''' 243 | for r in range(numMutations+1): 244 | if r == q: 245 | continue 246 | # Constraint 1.d 247 | model.addConstr(A[p, r] >= A[p, q] + A[q, r] - 1) 248 | 249 | 250 | candidateAncestors = [i for i in range(numMutations+1)] 251 | candidateAncestors.remove(p) 252 | 253 | if p < numMutations: 254 | model.addConstr(quicksum(A[s,p] for s in candidateAncestors) >= 1 - K[p]) 255 | elif p == numMutations: 256 | model.addConstr(quicksum(A[s,p] for s in candidateAncestors) == 0) 257 | else: 258 | print("p index out of range. Exiting") 259 | sys.exit(2) 260 | 261 | 262 | # --- Defining the objective function 263 | objective = 0 264 | 265 | for j in range(numMutations): 266 | numZeros = 0 267 | numOnes = 0 268 | for i in range(numCells): 269 | if I[i][j] == 0: 270 | numZeros += 1 271 | objective += np.log(beta/(1-alpha)) * Y[i,j] 272 | elif I[i][j] == 1: 273 | numOnes += 1 274 | objective += np.log((1-beta)/alpha) * Y[i,j] 275 | 276 | objective += numZeros * np.log(1-alpha) 277 | objective += numOnes * np.log(alpha) 278 | objective -= K[j] * (numZeros * np.log(1-alpha) + numOnes * (np.log(alpha) + np.log((1-beta)/alpha))) 279 | 280 | model.setObjective(objective, GRB.MAXIMIZE) 281 | time_to_model = datetime.now() - start_model 282 | 283 | # --- Optimize 284 | start_optimize = datetime.now() 285 | model.optimize() 286 | 287 | 288 | 289 | # ====== POST OPTIMIZATION 290 | if model.status == GRB.Status.INFEASIBLE: 291 | print('The model is unfeasible.') 292 | exit(0) 293 | 294 | time_to_opt = datetime.now() - start_optimize 295 | time_to_run = datetime.now() - start_model 296 | 297 | 298 | 299 | optimal_solution = model.ObjVal 300 | print('Optimal solution: %f' % optimal_solution) 301 | 302 | removedMutsIDs = [] 303 | sol_K = [] 304 | for j in range(numMutations): 305 | sol_K.append(nearestInt(float(K[j].X))) 306 | if sol_K[j] == 1: 307 | removedMutsIDs.append(mutIDs[j]) 308 | 309 | sol_Y = [] 310 | for i in range(numCells): 311 | sol_Y.append([nearestInt(float(Y[i,j].X)) for j in range(numMutations)]) 312 | 313 | conflictFreeMatrix = open("{}.CFMatrix".format(outfile), "w") 314 | conflictFreeMatrix.write("cellID/mutID") 315 | for j in range(numMutations): 316 | if sol_K[j] == 0: 317 | conflictFreeMatrix.write("\t" + mutIDs[j]) 318 | 319 | 320 | conflictFreeMatrix.write("\n") 321 | for i in range(numCells): 322 | conflictFreeMatrix.write(cellIDs[i]) 323 | for j in range(numMutations): 324 | if sol_K[j] == 0: 325 | conflictFreeMatrix.write("\t" + str(sol_Y[i][j])) 326 | conflictFreeMatrix.write("\n") 327 | conflictFreeMatrix.close() 328 | 329 | 330 | flips_0_1 = 0 331 | flips_1_0 = 0 332 | flips_3_0 = 0 333 | flips_3_1 = 0 334 | for i in range(numCells): 335 | for j in range(numMutations): 336 | if sol_K[j] == 0: 337 | if I[i][j] == 0 and sol_Y[i][j] == 1: 338 | flips_0_1 += 1 339 | elif I[i][j] == 1 and sol_Y[i][j] == 0: 340 | flips_1_0 += 1 341 | elif I[i][j] == 3 and sol_Y[i][j] == 0: 342 | flips_3_0 += 1 343 | elif I[i][j] == 3 and sol_Y[i][j] == 1: 344 | flips_3_1 += 1 345 | 346 | 347 | # check if matrix is conflict free 348 | conflictFree = True 349 | for p in range(numMutations): 350 | if sol_K[p] == 1: 351 | continue 352 | for q in range(p + 1, numMutations): 353 | if sol_K[q] == 1: 354 | continue 355 | oneone = False 356 | zeroone = False 357 | onezero = False 358 | 359 | for r in range(numCells): 360 | if sol_Y[r][p] == 1 and sol_Y[r][q] == 1: 361 | oneone = True 362 | if sol_Y[r][p] == 0 and sol_Y[r][q] == 1: 363 | zeroone = True 364 | if sol_Y[r][p] == 1 and sol_Y[r][q] == 0: 365 | onezero = True 366 | 367 | if oneone and zeroone and onezero: 368 | conflictFree = False 369 | print('ERROR!!! Conflict in output matrix in columns (%d, %d)' % (p, q)) 370 | 371 | 372 | log = open('{}.log'.format(outfile), 'w+') 373 | # --- Input info 374 | log.write('COMMAND: "{0}"\n'.format(' '.join(sys.argv))) 375 | log.write('NUM_CELLS(ROWS): {0}\n'.format(str(numCells))) 376 | log.write('NUM_MUTATIONS(COLUMNS): {0}\n'.format(str(numMutations))) 377 | log.write('FN_WEIGHT: {0}\n'.format(str(beta))) 378 | log.write('FP_WEIGHT: {0}\n'.format(str(alpha))) 379 | log.write('COLUMN_ELIMINATION_WEIGHT: {0}\n'.format(str(args.colEliminationWeight))) 380 | log.write('NUM_THREADS: {0}\n'.format(str(args.threads))) 381 | log.write('MODEL_SOLVING_TIME_SECONDS: {0:.3f}\n'.format(time_to_opt.total_seconds())) 382 | log.write('RUNNING_TIME_SECONDS: {0:.3f}\n'.format(time_to_run.total_seconds())) 383 | if conflictFree: 384 | conflictFree = 'YES' 385 | else: 386 | conflictFree = 'NO' 387 | log.write('IS_CONFLICT_FREE: {0}\n'.format(conflictFree)) 388 | log.write('LIKELIHOOD: {0}\n'.format(str(optimal_solution))) 389 | log.write('MIP_Gap_Value: %f\n' % model.MIPGap) 390 | log.write('TOTAL_FLIPS_REPORTED: {0}\n'.format(str(flips_0_1 + flips_1_0 + flips_3_0 + flips_3_1))) 391 | log.write('0_1_FLIPS_REPORTED: {0}\n'.format(str(flips_0_1))) 392 | log.write('1_0_FLIPS_REPORTED: {0}\n'.format(str(flips_1_0))) 393 | log.write('?_0_FLIPS_REPORTED: {0}\n'.format(str(flips_3_0))) 394 | log.write('?_1_FLIPS_REPORTED: {0}\n'.format(str(flips_3_1))) 395 | log.write('MUTATIONS_REMOVED_UPPER_BOUND: {0}\n'.format(str(args.maxMutationsToEliminate))) 396 | log.write('MUTATIONS_REMOVED_NUM: {0}\n'. format(str(sum(sol_K)))) 397 | print('MUTATIONS_REMOVED_ID: {}\n'.format('.'.join(removedMutsIDs))) 398 | log.write('MUTATIONS_REMOVED_ID: {}\n'.format(','.join(removedMutsIDs))) 399 | 400 | log.write("-----------------------------------\n\n") 401 | if usingBulk: 402 | for i in range(numMutations): 403 | for j in range(numMutations): 404 | log.write(bulkMutations[i].getID() + "\t" + bulkMutations[j].getID() + "\t" + str(nearestInt(float(A[i,j].X)))) 405 | if sol_K[i] == 1: 406 | log.write("\t" + bulkMutations[i].getID() + " is eliminated.") 407 | if sol_K[j] == 1: 408 | log.write("\t" + bulkMutations[j].getID() + " is eliminated.") 409 | log.write("\n") 410 | for i in range(numMutations): 411 | log.write("NULL" + "\t" + bulkMutations[i].getID() + "\t" + str(nearestInt(float(A[numMutations,i].X)))) 412 | if sol_K[i] == 1: 413 | log.write("\t" + bulkMutations[i].getID() + " is eliminated.") 414 | log.write("\n") 415 | log.write(bulkMutations[i].getID() + "\t" + "NULL" + "\t" + str(nearestInt(float(A[i,numMutations].X)))) 416 | if sol_K[i] == 1: 417 | log.write("\t" + bulkMutations[i].getID() + " is eliminated.") 418 | log.write("\n") 419 | log.close() 420 | 421 | if args.drawTree: 422 | draw_tree("{}.CFMatrix".format(outfile), usingBulk, args.bulkFile) 423 | if args.drawFarid: 424 | draw_farid("{}.CFMatrix".format(outfile), usingBulk, args.bulkFile) 425 | -------------------------------------------------------------------------------- /PhISCS-I/faridFunctions.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from collections import defaultdict 3 | 4 | def give_me_muts_to_filter(f_i, f_o, kmax): 5 | report = pd.DataFrame(columns=['mut','a','b','c']) 6 | def get_decendent(df_o, gene1, gene2): 7 | df1 = df_o[[gene1]] 8 | df3 = df1.rename(columns={gene1: "X1"}) 9 | df2 = df_o[[gene2]] 10 | df4 = df2.rename(columns={gene2: "X2"}) 11 | df = pd.concat([df3, df4], axis=1, join='outer') 12 | b = df.loc[df['X1'] >= df['X2']].shape[0] 13 | return b == df.shape[0] 14 | 15 | df_i = pd.read_csv(f_i, index_col=0, sep='\t') 16 | df_o = pd.read_csv(f_o, index_col=0, sep='\t') 17 | 18 | subset = defaultdict(list) 19 | for mut1 in df_o.columns: 20 | for mut2 in df_o.columns: 21 | if mut1 != mut2: 22 | if get_decendent(df_o, mut1, mut2): 23 | subset[mut1].append(mut2) 24 | for k, v in subset.items(): 25 | original_not_present_but_of_it_dcendent_present = 0 26 | original_present = 0 27 | for index, row in df_i.iterrows(): 28 | if row[k] == 1: 29 | original_present += 1 30 | if row[k] == 0: 31 | founded = False 32 | for mut in v: 33 | if row[mut] == 1: 34 | founded = True 35 | if founded: 36 | original_not_present_but_of_it_dcendent_present += 1 37 | p = '{:.3f}'.format(1.0*original_not_present_but_of_it_dcendent_present/original_present) 38 | report.loc[len(report)] = [k,original_not_present_but_of_it_dcendent_present,original_present,p] 39 | 40 | report = report.sort_values(by=['c','a'], ascending=[False,False]) 41 | report = report[report.a != 0] 42 | return [int(item.replace('mut','')) for item in list(report['mut'])[:kmax*3]] -------------------------------------------------------------------------------- /PhISCS-I/helperFunctions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import os 4 | 5 | def getEntryStringValue(keyValueString, entryID): 6 | keyValuePairs = keyValueString.split(";") 7 | for keyValuePair in keyValuePairs: 8 | keyString = keyValuePair.strip().split("=")[0] 9 | valueString = keyValuePair.strip().split("=")[1] 10 | if keyString == str(entryID): 11 | return valueString 12 | print("ERROR in getEntryStringValue. There does not exit entry with ID " + entryID + " in string" + keyValueString + ". EXITING !!!") 13 | sys.exit(2) 14 | 15 | 16 | def getEntryStrValue(keyValueString, entryID): 17 | return getEntryStringValue(keyValueString, entryID) 18 | 19 | 20 | def isFloat(value): 21 | try: 22 | float(value) 23 | return True 24 | except ValueError: 25 | return False 26 | 27 | 28 | def nearestInt(x): 29 | return int(x+0.5) 30 | 31 | 32 | def floatToStr(floatNumber, numDecimals = 2): 33 | if isFloat(floatNumber): 34 | return ("{:." + str(numDecimals) + "f}").format(floatNumber) 35 | else: 36 | return floatNumber 37 | 38 | 39 | def floatToString(floatNumber, numDecimals = 2): 40 | return floatToStr(floatNumber, numDecimals) 41 | 42 | 43 | def simulateMutantReferenceCounts(coverage, VAF, coverageStdev = 0.01): 44 | totalCount = coverage 45 | mutantCount = np.random.binomial(totalCount, VAF/2) 46 | referenceCount = totalCount - mutantCount 47 | return [mutantCount, referenceCount] 48 | 49 | 50 | 51 | 52 | 53 | class Mutation: 54 | 55 | def getINFOEntryStringValue(self, entryID): 56 | INFO_columns = self.INFO.split(";") 57 | for column in INFO_columns: 58 | if column.strip().split("=")[0] == str(entryID): 59 | return column.strip().split("=")[1] 60 | print("There does not exit " + entryID + " in " + self.ID + ". EXITING !!!") 61 | sys.exit(2) 62 | 63 | 64 | def getINFOEntryStrValue(self, entryID): 65 | return self.getINFOEntryStringValue(entryID) 66 | 67 | 68 | def __init__(self, ID = "NA", chromosome="none", position="0", mutReads={}, refReads={}, INFO=""): 69 | self.ID = ID 70 | self.chromosome = chromosome 71 | self.position = position 72 | self.INFO = INFO 73 | assert set(mutReads.keys()) == set(refReads.keys()), "In Mutation constructor, set of samples different for variant and reference reads" 74 | self.mutReads = {} 75 | for sampleID in mutReads.keys(): 76 | self.mutReads[sampleID] = mutReads[sampleID] 77 | self.refReads = {} 78 | for sampleID in refReads.keys(): 79 | self.refReads[sampleID] = refReads[sampleID] 80 | if len(self.refReads.keys()) > 0: 81 | ERROR_MESSAGE = "In Mutation constructor, sampleIDs field in INFO does not agree with sampleIDs present in mutReads" 82 | assert set(self.getINFOEntryStringValue("sampleIDs").split(",")) == set(refReads.keys()), ERROR_MESSAGE 83 | else: 84 | ERROR_MESSAGE = "If no mut/ref reads INFO is provided then sampleIDs should be empty or absent in INFO field" 85 | assert ("sampleIDs" not in INFO) or len(self.getINFOEntryStringValue("sampleIDs")) == 0, ERROR_MESSAGE 86 | 87 | 88 | def setMutReadsInSample(self, newReadCount, sampleID): 89 | assert sampleID in self.mutReads.keys(), "ERROR in Mutation.py in setMutReadsInSample. Sample ID not present in hash." 90 | self.mutReads[sampleID] = newReadCount 91 | 92 | def setRefReadsInSample(self, newReadCount, sampleID): 93 | assert sampleID in self.refReads.keys(), "ERROR in Mutation.py in setRefReadsInSample. Sample ID not present in hash." 94 | self.refReads[sampleID] = newReadCount 95 | 96 | def getVAF(self, sampleID=""): 97 | if sampleID == "": 98 | assert len(self.refReads.keys()) == 1, "ERROR. sampleID must be provided in getVAF of Mutation class if num samples is not 1" 99 | return self.getVAF(self.mutReads.keys()[0]) 100 | assert sampleID in self.mutReads.keys(), "ERROR. Incorrect sampleID " + sampleID + " in class Mutation, function getVAF" 101 | if self.mutReads[sampleID] + self.refReads[sampleID] == 0: 102 | return "NA" 103 | return float(2.0*self.mutReads[sampleID])/(self.mutReads[sampleID] + self.refReads[sampleID]) 104 | 105 | 106 | def getChromosome(self): 107 | return self.chromosome 108 | 109 | 110 | def getPosition(self): 111 | return self.position 112 | 113 | 114 | def getSampleIDs(self): 115 | # we expect to have something like sampleIDs=S1,S2,S3; 116 | return self.getINFOEntryStringValue("sampleIDs").strip().split(",") 117 | 118 | 119 | def updateINFOEntryValue(self, entryID, newValue): 120 | INFO_pairs = self.INFO.strip(";").split(";") 121 | updatedINFO = "" 122 | IDfound = False 123 | for pair in INFO_pairs: # pair is (ID, value) pair in form "ID=value" 124 | pairID = pair.split("=")[0] 125 | if pairID == entryID: 126 | updatedINFO += pairID + "=" + str(newValue) + ";" 127 | IDfound = True 128 | else: 129 | updatedINFO += pair + ";" 130 | assert IDfound == True, "ERROR in function updateINFOEntryValue in Mutation class. INFO does not contain entry with ID " + entryID 131 | self.INFO = updatedINFO 132 | 133 | 134 | def addINFOEntry(self, entryID, value): 135 | INFO_pairs = self.INFO.strip(";").split(";") 136 | IDfound = False 137 | for pair in INFO_pairs: # pair is (ID, value) pair in form "ID=value" 138 | pairID = pair.split("=")[0] 139 | if pairID == entryID: 140 | IDfound = True 141 | assert IDfound == False, "ERROR in function addINFOEntry in Mutation class. INFO already contains entry with ID " + entryID 142 | self.INFO = self.INFO.rstrip(";") + ";" + entryID + "=" + value + ";" 143 | 144 | 145 | def updateID(self, newID): 146 | self.updateINFOEntryValue("ID", newID) 147 | 148 | 149 | def toString(self): 150 | strRepresentation = "" 151 | strRepresentation += str(self.ID) + "\t" 152 | strRepresentation += str(self.chromosome) + "\t" 153 | strRepresentation += str(self.position) + "\t" 154 | strRepresentation += ';'.join([str(self.mutReads[sampleID]) for sampleID in self.getSampleIDs()]) + "\t" 155 | strRepresentation += ';'.join([str(self.refReads[sampleID]) for sampleID in self.getSampleIDs()]) + "\t" 156 | strRepresentation += self.INFO 157 | return strRepresentation 158 | 159 | 160 | def __lt__(self, other): 161 | if int(self.chromosome) < int(other.chromosome): 162 | return True 163 | if int(self.chromosome) == int(other.chromosome): 164 | return self.position < other.position 165 | return False 166 | 167 | 168 | def getID(self): 169 | return self.ID 170 | 171 | def getGeneID(self): 172 | if (self.INFO.startswith("geneID=") or ";geneID=" in self.INFO) and (self.INFO.startswith("ID=") or ";ID=" in self.INFO): 173 | print("ERROR in function getGeneID in Mutation class") 174 | print("Mutation " + self.toString() + " has both geneID and ID in its INFO.") 175 | print("This causes issues in plotting functions (ID used in phiscs, geneID in BSCITE") 176 | sys.exit(2) 177 | if self.INFO.startswith("geneID") or ";geneID=" in self.INFO: 178 | return self.getINFOEntryStringValue("geneID") 179 | elif self.INFO.startswith("ID=") or ";ID=" in self.INFO: 180 | return self.getINFOEntryStringValue("ID") 181 | else: 182 | print("ERROR in function getGeneID in Mutation class. INFO entry does not contain geneID nor ID") 183 | print(self.toString()) 184 | sys.exit(2) 185 | 186 | 187 | def getRefReads(self, sampleID=""): 188 | if sampleID == "": 189 | ERROR_MESSAGE = "ERROR. In getRefReads() function in Mutation class, sampleID argument must be provided, " 190 | ERROR_MESSAGE += "unless the number of samples is equal to 1." 191 | assert len(self.refReads.keys()) == 1, ERROR_MESSAGE 192 | return self.getRefReads(self.refReads.keys()[0]) 193 | return self.refReads[sampleID] 194 | 195 | 196 | def getMutReads(self, sampleID=""): 197 | if sampleID == "": 198 | ERROR_MESSAGE = "ERROR. In getMutReads() function in Mutation class, sampleID argument must be provided, " 199 | ERROR_MESSAGE += "unless the number of samples is equal to 1." 200 | assert len(self.mutReads.keys()) == 1, ERROR_MESSAGE 201 | return self.getMutReads(self.mutReads.keys()[0]) 202 | return self.mutReads[sampleID] 203 | 204 | 205 | def getTotalReads(self, sampleID=""): 206 | return self.getMutReads(sampleID) + self.getRefReads(sampleID) 207 | 208 | 209 | def getTrueVAF(self, sampleID=""): 210 | if sampleID == "": 211 | ERROR_MESSAGE = "ERROR. In getTrueVAF() function in Mutation class, sampleID argument must be provided, " 212 | ERROR_MESSAGE += "unless the number of samples is equal to 1." 213 | assert len(self.mutReads.keys()) == 1, ERROR_MESSAGE 214 | return self.getTrueVAF(self.mutReads.keys()[0]) 215 | sampleIDs = self.getSampleIDs() 216 | assert sampleID in sampleIDs, "ERROR in function getTrueVAF() in Mutation class. sampleID " + sampleID + " not found!" 217 | trueVAFs = [float(x) for x in self.getINFOEntryStringValue("trueVAF").split(",")] 218 | assert len(trueVAFs) == len(sampleIDs), "ERROR in function getTrueVAF() in Mutation class. Lengths of trueVAFs and sampleIDs unequal" 219 | for i in range(len(sampleIDs)): 220 | if sampleIDs[i] == sampleID: 221 | return trueVAFs[i] 222 | assert True, "ERROR. Last assert in getTrueVAF() in Mutation class failed." 223 | 224 | 225 | 226 | def updateMutRefReads(self, newCoverage, sampleID=""): 227 | if sampleID == "": 228 | ERROR_MESSAGE = "ERROR. In updateMutRefReads() function in Mutation class, sampleID argument must be provided, " 229 | ERROR_MESSAGE += "unless the number of samples is equal to 1." 230 | assert len(self.mutReads.keys()) == 1, ERROR_MESSAGE 231 | return self.updateMutRefReads(newCoverage, self.mutReads.keys()[0]) 232 | 233 | VAF = self.getTrueVAF(sampleID) 234 | #newTotalReads = int(np.random.normal(newCoverage, newCoverageStdev) + 0.5) # number of reads spanning mutation locus 235 | newTotalReads = newCoverage 236 | newMutReads = np.random.binomial(newTotalReads, VAF/2) 237 | newRefReads = newTotalReads - newMutReads 238 | self.mutReads[sampleID] = newMutReads 239 | self.refReads[sampleID] = newRefReads 240 | return True 241 | 242 | 243 | def updateMutRefReadsAllSamples(self, newCoverage): 244 | for sampleID in self.mutReads.keys(): 245 | self.updateMutRefReads(newCoverage, sampleID) 246 | 247 | 248 | def updateTrueVAFinSample(self, sampleID, newVAF): 249 | assert sampleID in self.getSampleIDs(), "ERROR in function updateTrueVAFinSample. No sample with ID " + sampleID 250 | updateSampleVAFindex = self.getSampleIDs().index(sampleID) 251 | currentValues = self.getINFOEntryStringValue("trueVAF").strip().split(",") 252 | newValues = [] 253 | for i in range(len(currentValues)): #i is sample index 254 | if i == updateSampleVAFindex: 255 | newValues.append(str(newVAF)) 256 | else: 257 | newValues.append(currentValues[i]) 258 | self.updateINFOEntryValue("trueVAF", ",".join(newValues)) 259 | 260 | 261 | 262 | def addSample(self, sampleID, mutReads, refReads, trueVAF = ""): 263 | ERROR_MESSAGE = "\nEROR in addSample in Mutation.py. SampleID " + sampleID + " is already present." 264 | ERROR_MESSAGE += "\nMutation ID is " + self.getID() 265 | assert sampleID not in self.getSampleIDs(), ERROR_MESSAGE 266 | if trueVAF != "": 267 | currentTrueVAFstring = self.getINFOEntryStringValue("trueVAF") 268 | self.updateINFOEntryValue("trueVAF", currentTrueVAFstring.rstrip(";") + "," + floatToString(trueVAF, 5)) 269 | currentSampleIDsString = self.getINFOEntryStringValue("sampleIDs") 270 | self.updateINFOEntryValue("sampleIDs", currentSampleIDsString.rstrip(";") + "," + sampleID) 271 | self.mutReads[sampleID] = mutReads 272 | self.refReads[sampleID] = refReads 273 | 274 | 275 | def getAverageCoverageInAllSamples(self): 276 | assert set(self.mutReads.keys()) == set(self.getSampleIDs()), "ERROR in getAverageCoverageInAllSamples. Unequal sets of sample IDs." 277 | assert set(self.mutReads.keys()) == set(self.refReads.keys()), "ERROR in getAverageCoverageInAllSamples. Unequal sets of sample IDs." 278 | totalCoverage = 0 279 | for sampleID in self.getSampleIDs(): 280 | totalCoverage += self.mutReads[sampleID] 281 | totalCoverage += self.refReads[sampleID] 282 | return int(totalCoverage/len(self.getSampleIDs())) 283 | 284 | 285 | def reorderSampleIDs(self, desiredOrderOfSamples): # this function was used in parsing data from Andrew's paper 286 | sampleIDs = self.getINFOEntryStringValue("sampleIDs").strip().split(",") 287 | assert set(sampleIDs) == set(desiredOrderOfSamples), "ERROR in reorderSampleIDs. Different sets of sampleIDs." 288 | 289 | newINFOvalue = "" 290 | processedEntries = [] 291 | 292 | for entryID in [x.strip().split("=")[0] for x in self.INFO.rstrip(";").split(";")]: 293 | assert entryID not in processedEntries, "ERROR. Entry " + entryID + " repeated in reorderSampleIDs. Mut ID is " + self.getID() 294 | entryValueInSample = {} 295 | valuesInSamples = self.getINFOEntryStringValue(entryID).strip().split(",") 296 | 297 | if entryID in ["refNuc", "altNuc", "geneID"]: 298 | assert len(valuesInSamples) == 1, "ERROR in reorderSampleIDs. Reference or altered nucleotide takes only one value." 299 | newINFOvalue += entryID + "=" + valuesInSamples[0] + ";" 300 | else: 301 | assert len(valuesInSamples) == len(sampleIDs), "ERROR in reorderSampleIDs. Different lengths of sample IDs." 302 | for i in range(len(sampleIDs)): 303 | entryValueInSample[sampleIDs[i]] = valuesInSamples[i] 304 | newINFOvalue += entryID 305 | newINFOvalue += "=" 306 | newINFOvalue += ",".join([entryValueInSample[sampleID] for sampleID in desiredOrderOfSamples]) 307 | newINFOvalue += ";" 308 | processedEntries.append(entryID) 309 | 310 | self.INFO = newINFOvalue 311 | 312 | 313 | def strToMutation(inputString): 314 | stringColumns = inputString.strip().split() 315 | ID = stringColumns[0] 316 | chromosome = stringColumns[1] 317 | position = int(stringColumns[2]) 318 | assert len(stringColumns) > 5, "ERROR in function strToMutation. INFO column empty. sampleIDs= required in this column" 319 | INFO = stringColumns[5] 320 | 321 | sampleIDs = getEntryStringValue(INFO, "sampleIDs").strip().split(",") 322 | assert len(sampleIDs) > 0, "sampleIDs field absent in " + inputString + " and therefore can not convert this string to Mutation." 323 | 324 | mutReads = {} # column[3] 325 | refReads = {} # column[4] 326 | ERROR_MESSAGE = "ERROR! Number of sampleIDs and number of mutReads (thirdColumn), as well as number of refReads (column 4) must be equal." 327 | ERROR_MESSAGE += "\nSome of these numbers are not equal and therefore string " + inputString + " can not be converted to Mutation." 328 | assert len(stringColumns[3].strip().split(";")) == len(sampleIDs), ERROR_MESSAGE 329 | assert len(stringColumns[4].strip().split(";")) == len(sampleIDs), ERROR_MESSAGE 330 | 331 | for i in range(len(sampleIDs)): 332 | sampleID = sampleIDs[i] 333 | mutReads[sampleID] = int(stringColumns[3].split(";")[i]) 334 | refReads[sampleID] = int(stringColumns[4].split(";")[i]) 335 | 336 | return Mutation(ID, chromosome, position, mutReads, refReads, INFO) 337 | 338 | 339 | def readMutationsFromBulkFile(pathBulkFile): 340 | assert os.path.exists(pathBulkFile), "ERROR in function readMutationsFromBulkFile!!! There does not exist bulk file " + pathBulkFile 341 | bulkFile = open(pathBulkFile, "r") 342 | bulkFile.readline() 343 | bulkMutations = [] 344 | for line in bulkFile: 345 | bulkMutations.append(strToMutation(line)) 346 | bulkFile.close() 347 | return bulkMutations 348 | 349 | 350 | def validateSampleIDsConcordance(bulkMutations): 351 | sampleIDs = bulkMutations[0].getSampleIDs() 352 | numSamples = len(sampleIDs) 353 | assert numSamples > 0, "ERROR in validateSampleIDsConcordance. Number of samples zero!" 354 | for i in range(1, len(bulkMutations)): 355 | currentSampleIDs = bulkMutations[i].getSampleIDs() 356 | assert len(currentSampleIDs) == len(sampleIDs), "ERROR in function validateSampleIDsConcordance for mutations " + bulkMutations[0].getID() + " " + bulkMutations[i].getID() 357 | for sampleIndex in range(numSamples): 358 | assert sampleIDs[sampleIndex] == currentSampleIDs[sampleIndex], "ERROR in function validateSampleIDsConcordance. Sample IDs are not concordant" 359 | 360 | 361 | 362 | def generateArtificialNullMutation(existingBulkMutation): 363 | sampleIDs = existingBulkMutation.getSampleIDs() 364 | refReads = {} 365 | mutReads = {} 366 | for sampleID in sampleIDs: 367 | mutReads[sampleID] = existingBulkMutation.getTotalReads(sampleID) 368 | refReads[sampleID] = 0 369 | INFO = "" 370 | INFO += "trueVAF=" + ",".join(['1.0' for s in sampleIDs]) + ";" 371 | INFO += "sampleIDs=" + ",".join(sampleIDs) + ";" 372 | return Mutation("NULL Mutation abcdef", "NULLchromosome", 0, mutReads, refReads, INFO) 373 | 374 | 375 | def draw_tree(filename, addBulk, bulkfile): 376 | import pandas as pd 377 | import pygraphviz as pyg 378 | 379 | graph = pyg.AGraph(strict=False, directed=True, dpi=300) 380 | font_name = 'Avenir' 381 | 382 | class Node: 383 | def __init__(self, name, parent): 384 | self.name = name 385 | self.parent = parent 386 | self.children = [] 387 | if parent: 388 | parent.children.append(self) 389 | 390 | def print_tree(node): 391 | graph.add_node(node.name, label=node.name, fontname=font_name, color='black', penwidth=3.5) 392 | for child in node.children: 393 | graph.add_edge(node.name, child.name) 394 | print_tree(child) 395 | 396 | def contains(col1, col2): 397 | for i in range(len(col1)): 398 | if not col1[i] >= col2[i]: 399 | return False 400 | return True 401 | 402 | def write_tree(matrix, names): 403 | i = 0 404 | while i < matrix.shape[1]: 405 | j = i + 1 406 | while j < matrix.shape[1]: 407 | if np.array_equal(matrix[:,i], matrix[:,j]): 408 | matrix = np.delete(matrix, j, 1) 409 | x = names.pop(j) 410 | names[i] += '

' + x 411 | j -= 1 412 | j += 1 413 | names[i] = '<'+names[i]+'>' 414 | i += 1 415 | 416 | rows = len(matrix) 417 | cols = len(matrix[0]) 418 | dimensions = np.sum(matrix, axis=0) 419 | # ordered indeces 420 | indeces = np.argsort(dimensions) 421 | dimensions = np.sort(dimensions) 422 | mutations_name = [] 423 | for i in range(cols): 424 | mutations_name.append(names[indeces[i]]) 425 | 426 | root = Node(mutations_name[-1], None) 427 | mut_nod = {} 428 | mut_nod[mutations_name[cols-1]] = root 429 | 430 | i = cols - 2 431 | while i >=0: 432 | if dimensions[i] == 0: 433 | break 434 | attached = False 435 | for j in range(i+1, cols): 436 | if contains(matrix[:, indeces[j]], matrix[:, indeces[i]]): 437 | node = Node(mutations_name[i], mut_nod[mutations_name[j]]) 438 | mut_nod[mutations_name[i]] = node 439 | attached = True 440 | break 441 | if not attached: 442 | node = Node(mutations_name[i], root) 443 | mut_nod[mutations_name[i]] = node 444 | i -=1 445 | print_tree(root) 446 | 447 | if addBulk: 448 | vafs = {} 449 | bulkMutations = readMutationsFromBulkFile(bulkfile) 450 | sampleIDs = bulkMutations[0].getSampleIDs() 451 | for mut in bulkMutations: 452 | temp_vaf = [] 453 | for sample in sampleIDs: 454 | temp_vaf.append('' + str(mut.getVAF(sampleID=sample)) + '') 455 | vafs[mut.getID()] = '{} ({})'.format(mut.getID(), ','.join(temp_vaf)) 456 | 457 | inp = np.genfromtxt(filename, skip_header=1, delimiter='\t') 458 | with open(filename, 'r') as fin: 459 | if addBulk: 460 | mutation_names = [vafs[x] for x in fin.readline().strip().split('\t')[1:]] 461 | else: 462 | mutation_names = fin.readline().strip().split('\t')[1:] 463 | sol_matrix = np.delete(inp, 0, 1) 464 | write_tree(sol_matrix, mutation_names) 465 | graph.layout(prog='dot') 466 | outputpath = filename[:-len('.CFMatrix')] 467 | graph.draw('{}.png'.format(outputpath)) 468 | 469 | 470 | 471 | def draw_farid(filename, addBulk, bulkfile): 472 | add_cells=True 473 | 474 | import pandas as pd 475 | import pygraphviz as pyg 476 | import networkx as nx 477 | from networkx.drawing.nx_agraph import graphviz_layout, to_agraph 478 | 479 | def contains(col1, col2): 480 | for i in range(len(col1)): 481 | if not col1[i] >= col2[i]: 482 | return False 483 | return True 484 | 485 | df = pd.read_csv(filename, sep='\t', index_col=0) 486 | splitter_mut = '\n' 487 | matrix = df.values 488 | names_mut = list(df.columns) 489 | 490 | i = 0 491 | while i < matrix.shape[1]: 492 | j = i + 1 493 | while j < matrix.shape[1]: 494 | if np.array_equal(matrix[:,i], matrix[:,j]): 495 | matrix = np.delete(matrix, j, 1) 496 | x = names_mut.pop(j) 497 | names_mut[i] += splitter_mut + x 498 | j -= 1 499 | j += 1 500 | i += 1 501 | 502 | rows = matrix.shape[0] 503 | cols = matrix.shape[1] 504 | dimensions = np.sum(matrix, axis=0) 505 | indices = np.argsort(dimensions) 506 | dimensions = np.sort(dimensions) 507 | names_mut = [names_mut[indices[i]] for i in range(cols)] 508 | 509 | G = nx.DiGraph(dpi=300) 510 | G.add_node(cols) 511 | G.add_node(cols-1) 512 | G.add_edge(cols, cols-1, label=names_mut[cols-1]) 513 | node_mud = {} 514 | node_mud[names_mut[cols-1]] = cols-1 515 | 516 | i = cols - 2 517 | while i >= 0: 518 | if dimensions[i] == 0: 519 | break 520 | attached = False 521 | for j in range(i+1, cols): 522 | if contains(matrix[:, indices[j]], matrix[:, indices[i]]): 523 | G.add_node(i) 524 | G.add_edge(node_mud[names_mut[j]], i, label=names_mut[i]) 525 | node_mud[names_mut[i]] = i 526 | attached = True 527 | break 528 | if not attached: 529 | G.add_node(i) 530 | G.add_edge(cols, i, label=names_mut[i]) 531 | node_mud[names_mut[i]] = i 532 | i -=1 533 | 534 | clusters = {} 535 | for node in G: 536 | if node == cols: 537 | G._node[node]['label'] = '<germ
cells
>' 538 | G._node[node]['fontname'] = 'Helvetica' 539 | G._node[node]['width'] = 0.4 540 | G._node[node]['style'] = 'filled' 541 | G._node[node]['penwidth'] = 3 542 | G._node[node]['fillcolor'] = 'gray60' 543 | continue 544 | untilnow_mut = [] 545 | sp = nx.shortest_path(G, cols, node) 546 | for i in range(len(sp)-1): 547 | untilnow_mut += G.get_edge_data(sp[i], sp[i+1])['label'].split(splitter_mut) 548 | untilnow_cell = df.loc[(df[untilnow_mut] == 1).all(axis=1) & \ 549 | (df[[x for x in df.columns if x not in untilnow_mut]] == 0).all(axis=1)].index 550 | if len(untilnow_cell) > 0: 551 | clusters[node] = '\n'.join(untilnow_cell) 552 | else: 553 | clusters[node] = '-' 554 | 555 | if add_cells: 556 | G._node[node]['label'] = clusters[node] 557 | else: 558 | G._node[node]['label'] = '' 559 | G._node[node]['shape'] = 'circle' 560 | G._node[node]['fontname'] = 'Helvetica' 561 | G._node[node]['width'] = 0.4 562 | G._node[node]['style'] = 'filled' 563 | G._node[node]['penwidth'] = 2 564 | G._node[node]['fillcolor'] = 'gray90' 565 | i = 1 566 | for k, v in clusters.items(): 567 | if v == '-': 568 | clusters[k] = i*'-' 569 | i += 1 570 | 571 | header = '' 572 | if addBulk: 573 | vafs = {} 574 | bulkMutations = readMutationsFromBulkFile(bulkfile) 575 | sampleIDs = bulkMutations[0].getSampleIDs() 576 | for mut in bulkMutations: 577 | temp_vaf = [] 578 | for sample in sampleIDs: 579 | temp_vaf.append(str(mut.getVAF(sampleID=sample))) 580 | vafs[mut.getID()] = ''+','.join(temp_true)+'' 581 | for edge in G.edges(): 582 | temp = [] 583 | for mut in G.get_edge_data(edge[0],edge[1])['label'].split(splitter_mut): 584 | mut = '' + mut + '' + ': ' + vafs_true[mut] + '; ' + vafs_noisy[mut] 585 | temp.append(mut) 586 | temp = '<' + '
'.join(temp) + '>' 587 | G.get_edge_data(edge[0],edge[1])['label'] = temp 588 | 589 | for mut in bulkMutations: 590 | try: 591 | isatype = mut.getINFOEntryStringValue('ISAVtype') 592 | header += mut.getID() + ': ' + isatype + '
' 593 | except: 594 | pass 595 | 596 | temp = df.columns[(df==0).all(axis=0)] 597 | if len(temp) > 0: 598 | header += 'Became Germline: ' + ','.join(temp) + '
' 599 | 600 | with open(filename[:-len('.CFMatrix')]+'.log') as fin: 601 | i = 0 602 | for line in fin: 603 | i += 1 604 | if i > 10 and i < 18: 605 | header += line.rstrip() + '
' 606 | 607 | 608 | H = nx.relabel_nodes(G, clusters) 609 | html = '''<{}>'''.format(header) 610 | H.graph['graph'] = {'label':html, 'labelloc':'t', 'resolution':300, 'fontname':'Helvetica', 'fontsize':8} 611 | H.graph['node'] = {'fontname':'Helvetica', 'fontsize':8} 612 | H.graph['edge'] = {'fontname':'Helvetica', 'fontsize':8} 613 | 614 | mygraph = to_agraph(H) 615 | mygraph.layout(prog='dot') 616 | outputpath = filename[:-len('.CFMatrix')] 617 | mygraph.draw('{}.png'.format(outputpath)) 618 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PhISCS 2 | 3 | PhISCS is a tool for sub-perfect tumor phylogeny reconstruction via integrative use of single-cell and bulk sequencing data. If bulk sequencing data is used, we expect that mutations originate from diploid regions of the genome. Due to variance in VAF values, we recommend the use of bulk data in cases when sequencing depth is at least 1000x (haploid coverage). As output, PhISCS reports tree of tumor evolution together with a set of eliminated mutations, where eliminated mutations represent mutations violating Infinite Sites Assumption (due to deletion of variant allele or due to recurrent mutation) or mutations affected by copy number aberrations that were missed during the tumor copy number profiling (e.g. gain of non-variant allele). 4 | 5 | PhISCS has been published in **Genome Research** [(doi:10.1101/gr.234435.118)](https://doi.org/10.1101/gr.234435.118). If you find this code useful in your research, please consider citing. 6 | ``` 7 | @article{malikic2019phiscs, 8 | doi = {10.1101/gr.234435.118}, 9 | url = {https://doi.org/10.1101/gr.234435.118}, 10 | year = 2019, 11 | month = oct, 12 | publisher = {Cold Spring Harbor Laboratory}, 13 | volume = {29}, 14 | number = {11}, 15 | pages = {1860--1877}, 16 | author = {Salem Malikic and Farid {Rashidi Mehrabadi} and Simone Ciccolella and Md. Khaledur Rahman and Camir Ricketts and Ehsan Haghshenas and Daniel Seidman and Faraz Hach and Iman Hajirasouliha and S. Cenk Sahinalp}, 17 | title = {{{PhISCS}: a combinatorial approach for subperfect tumor phylogeny reconstruction via integrative use of single-cell and bulk sequencing data}}, 18 | journal = {Genome Research} 19 | } 20 | ``` 21 | 22 | ## Contents 23 | 1. [Installation](#installation) 24 | * [PhISCS-I](#installationilp) 25 | * [Prerequisite: ILP solver](#prerequisiteilp) 26 | * [PhISCS-B](#installationcsp) 27 | * [Prerequisite: CSP solver](#prerequisitecsp) 28 | 2. [Running](#running) 29 | * [Input](#input) 30 | * [Single-cell Matrix](#singlecellmatrix) 31 | * [Bulk Data](#bulkdata) 32 | * [Output](#output) 33 | * [Log File](#logfile) 34 | * [Output Matrix File](#outputmatrixfile) 35 | * [Parameters](#parameters) 36 | 3. [Example](#example) 37 | 4. [Contact](#contact) 38 | 39 | 40 | ## Installation 41 | PhISCS is written in Python and C. It supports both Python 2.7 and 3. Currently it is intended to be run on POSIX-based systems (only Linux and macOS have been tested). 42 | 43 | > **RECOMENDATION**: At the moment, in cases when both, single-cell and bulk data are used as input, we recommend the use of PhISCS-I over PhISCS-B (due to more thorough tests and software validation that we have performed for PhISCS-I). However, when single-cell data is the only input, we have extensively tested both implementations and, since PhISCS-B can have potential running time advantage in this case, we recommend its use over PhISCS-I. 44 | 45 | 46 | ### PhISCS-I 47 | ``` 48 | git clone --recursive https://github.com/sfu-compbio/PhISCS.git 49 | cd PhISCS 50 | python PhISCS-I --help 51 | ``` 52 | 53 | 54 | #### Prerequisite: ILP solver 55 | 56 | In order to run PhISCS-I, the main requirement is the installation of Gurobi solver. [Gurobi](http://www.gurobi.com) a commercial solver which is free for academic purposes. After installing it, installation of `gurobipy` package is necessary prior to being able to successfully run PhISCS-I (below we provide some examples of the input and commands used to run the tool). 57 | 58 | 59 | 60 | ### PhISCS-B 61 | 62 | ``` 63 | git clone --recursive https://github.com/sfu-compbio/PhISCS.git 64 | cd PhISCS 65 | ./PhISCS-B-configure 66 | python PhISCS-B --help 67 | ``` 68 | 69 | 70 | #### Prerequisite: CSP solver 71 | 72 | Some of CSP solver have been already included in the PhISCS package. There is an option to add a new CSP solver to PhISCS-B by provinding a path to the exe file of the desired CSP solver. 73 | 74 | 75 | 76 | ## Running 77 | 78 | 79 | ### Input 80 | 81 | 82 | #### 1. Single-cell Matrix 83 | Single-cell input is assumed to be represented in the form of ternary, __tab-delimited__, matrix with rows corresponding to single-cells and columns corresponding to mutations. We assume that this file contains headers and that matrix is ternary matrix with 0 denoting the absence and 1 denoting the presence of mutation in a given cell, whereas ? represents the lack of information about presence/absence of mutation in a given cell (i.e. missing entry). __In order to simplify parsing of the matrix, we also assume that upper left corner equals to string `cellID/mutID`__. 84 | 85 | Below is an example of single-cell data matrix. Note that mutation and cell names are arbitrary strings not containing tabs or spaces, however they must be unique. 86 | ``` 87 | cellID/mutID mut0 mut1 mut2 mut3 mut4 mut5 mut6 mut7 88 | cell0 0 0 ? 0 0 0 0 0 89 | cell1 0 ? 1 0 0 0 1 1 90 | cell2 0 0 1 0 0 0 1 1 91 | cell3 1 1 0 0 0 0 0 0 92 | cell4 0 0 1 0 0 0 0 0 93 | cell5 1 0 0 0 0 0 0 0 94 | cell6 0 0 1 0 0 0 1 1 95 | cell7 0 0 1 0 0 0 0 0 96 | cell8 ? 0 0 0 ? 0 ? 1 97 | cell9 0 1 0 0 0 0 0 0 98 | ``` 99 | 100 | 101 | #### 2. Bulk Data 102 | As bulk data input, we also expect __tab-delimited__ file with the following columns: 103 | 104 | **ID** which represents mutational ID (used in single-cell data matrix for the same mutation) 105 | **Chromosome** which represents chromosome of the mutation (any string not containing tabs or empty spaces) 106 | **Position** which represents position (on chromosome) of the mutation (any string/number not containing tabs or empty spaces) 107 | **MutantCount** is the number of mutant reads in the bulk data. If multiple bulk samples are used, values are semicolon-delimited and provided in the sorted order of samples (this order is expected to be same for all mutations, e.g. first number always representing read count in sample 1, second number in sample 2 etc.) 108 | **ReferenceCount** is the number of reference reads in the bulk data. If multiple bulk samples are used, values are semicolon-delimited and provided in the sorted order of samples (this order is expected to be same for all mutations, e.g. first number always representing read count in sample 1, second number in sample 2 etc.) 109 | **INFO** which contains additional information about the mutation and is semicolon-delimited. Entries in this column are of the form: entryID=values, where values are delimited by commas. An example of INFO column is: 110 | "sampleIDs=S0,S1;synonymous=false;exonic=true". The only obligatory information required now is information about sample origins (in cases of absence of them, arbitrary distinct strings can be used, e.g. sampleIDs=S0,S1,S2;) 111 | 112 | 113 | 114 | As an example: 115 | ``` 116 | ID Chromosome Position MutantCount ReferenceCount INFO 117 | mut0 1 0 766;511;688 4234;4489;4312 sampleIDs=primary,metastasis1,metastasis2 118 | mut1 1 1 719;479;719 4281;4521;4281 sampleIDs=primary,metastasis1,metastasis2 119 | mut2 1 2 1246;1094;859 3754;3906;4141 sampleIDs=primary,metastasis1,metastasis2 120 | mut3 1 3 298;226;272 4702;4774;4728 sampleIDs=primary,metastasis1,metastasis2 121 | mut4 1 4 353;227;255 4647;4773;4745 sampleIDs=primary,metastasis1,metastasis2 122 | mut5 1 5 306;232;279 4694;4768;4721 sampleIDs=primary,metastasis1,metastasis2 123 | mut6 1 6 725;449;492 4275;4551;4508 sampleIDs=primary,metastasis1,metastasis2 124 | mut7 1 7 703;417;507 4297;4583;4493 sampleIDs=primary,metastasis1,metastasis2 125 | 126 | ``` 127 | 128 | (in the example of bulk file shown above, we have that for mut0 number of mutant and reference reads in the first sample are respectively 766 and 4234, in the second sample 511 and 4489 and in the third sample 688 and 4312). 129 | 130 | 131 | ### Output 132 | The program will generate two files in **OUT_DIR** folder (which is set by argument -o or --outDir). This folder will be created automatically if it does not exist. 133 | 134 | 135 | #### 1. Output Matrix File 136 | The output matrix is also a tab-delimited file having the same format as the input matrix, except that eliminated mutations (columns) are excluded (so, in case when mutation elimination is allowed, this matrix typically contains less columns than the input matrix). Output matrix represents genotypes-corrected matrix (where false positives and false negatives from the input are corrected and each of the missing entries set to 0 or 1). Suppose the input file is **INPUT_MATRIX.ext**, the output matrix will be stored in file **OUT_DIR/INPUT_MATRIX.CFMatrix**. For example: 137 | ``` 138 | input file: data/ALL2.SC 139 | output file: OUT_DIR/ALL2.CFMatrix 140 | ``` 141 | 142 | 143 | #### 2. Log File 144 | Log file contains various information about the particular run of PhISCS (e.g. eliminated mutations or likelihood value). The interpretation of the relevant reported entries in this file is self-evident. Suppose the input file is **INPUT_MATRIX.ext**, the log will be stored in file **OUT_DIR/INPUT_MATRIX.log**. For example: 145 | ``` 146 | input file: data/ALL2.SC 147 | log file: OUT_DIR/ALL2.log 148 | ``` 149 | 150 | 151 | ### Parameters 152 | | Parameter | Description | Default | Mandatory | 153 | |------------|--------------------------------------------------------------------------------------------|----------|----------------| 154 | | -SCFile | Path to single-cell data matrix file | - | :radio_button: | 155 | | -fn | Probablity of false negative | - | :radio_button: | 156 | | -fp | Probablity of false positive | - | :radio_button: | 157 | | -o | Output directory | current | :white_circle: | 158 | | -kmax | Max number of mutations to be eliminated | 0 | :white_circle: | 159 | | -threads | Number of threads (supported by PhISCS-I) | 1 | :white_circle: | 160 | | -bulkFile | Path to bulk data file | - | :white_circle: | 161 | | -delta | Delta parameter accounting for VAF variance | 0.20 | :white_circle: | 162 | | -time | Max time (in seconds) allowed for the computation | 24 hours | :white_circle: | 163 | | --drawTree | Draw output tree with Graphviz | - | :white_circle: | 164 | 165 | 166 | ## Example 167 | 168 | For running PhISCS without VAFs information and without ISA violations: 169 | ``` 170 | python PhISCS-I -SCFile example/input.SC -fn 0.2 -fp 0.0001 -o result/ 171 | ``` 172 | 173 | For running PhISCS without VAFs information but with ISA violations: 174 | ``` 175 | python PhISCS-I -SCFile example/input.SC -fn 0.2 -fp 0.0001 -o result/ -kmax 1 176 | ``` 177 | 178 | For running PhISCS with both VAFs information and ISA violations (with time limit of 24 hours): 179 | ``` 180 | python PhISCS-I -SCFile example/input.SC -fn 0.2 -fp 0.0001 -o result/ -kmax 1 -bulkFile example/input.bulk -time 86400 181 | ``` 182 | 183 | For running PhISCS with VAFs information but no ISA violations (with drawing the output tree): 184 | ``` 185 | python PhISCS-I -SCFile example/input.SC -fn 0.2 -fp 0.0001 -o result/ -bulkFile example/input.bulk --drawTree 186 | ``` 187 | 188 | 189 | ## Contact 190 | If you have any questions please e-mail us at smalikic@sfu.ca or frashidi@iu.edu. 191 | -------------------------------------------------------------------------------- /example/input.SC: -------------------------------------------------------------------------------- 1 | cellID/mutID mut0 mut1 mut2 mut3 mut4 mut5 mut6 mut7 2 | cell0 0 0 ? 0 0 0 0 0 3 | cell1 0 ? 1 0 0 0 1 1 4 | cell2 0 0 1 0 0 0 1 1 5 | cell3 1 1 0 0 0 0 0 0 6 | cell4 0 0 1 0 0 0 0 0 7 | cell5 1 0 0 0 0 0 0 0 8 | cell6 0 0 1 0 0 0 1 1 9 | cell7 0 0 1 0 0 0 0 0 10 | cell8 ? 0 0 0 ? 0 ? 1 11 | cell9 0 1 0 0 0 0 0 0 -------------------------------------------------------------------------------- /example/input.bulk: -------------------------------------------------------------------------------- 1 | ID Chromosome Position MutantCount ReferenceCount INFO 2 | mut0 1 0 766;511;688 4234;4489;4312 sampleIDs=primary,metastasis1,metastasis2 3 | mut1 1 1 719;479;719 4281;4521;4281 sampleIDs=primary,metastasis1,metastasis2 4 | mut2 1 2 1246;1094;859 3754;3906;4141 sampleIDs=primary,metastasis1,metastasis2 5 | mut3 1 3 298;226;272 4702;4774;4728 sampleIDs=primary,metastasis1,metastasis2 6 | mut4 1 4 353;227;255 4647;4773;4745 sampleIDs=primary,metastasis1,metastasis2 7 | mut5 1 5 306;232;279 4694;4768;4721 sampleIDs=primary,metastasis1,metastasis2 8 | mut6 1 6 725;449;492 4275;4551;4508 sampleIDs=primary,metastasis1,metastasis2 9 | mut7 1 7 703;417;507 4297;4583;4493 sampleIDs=primary,metastasis1,metastasis2 --------------------------------------------------------------------------------