├── .gitattributes ├── .gitignore ├── ControlFileGenerator ├── ControlFileGenerator.jar ├── README.md ├── dist │ ├── README.TXT │ └── lib │ │ └── AbsoluteLayout.jar └── src │ └── CFG │ └── ControlFileGenerator │ ├── BSTNode.java │ ├── ControlFileGeneratorUI.form │ ├── ControlFileGeneratorUI.java │ ├── InputPanel.java │ ├── MigBandsInput.java │ ├── ModifyTreePanel.java │ ├── NewickTree.java │ ├── SpringUtilities.java │ └── Validate.java ├── GPhoCS_Manual.pdf ├── Makefile ├── README.md ├── bin └── README.md ├── doc └── GPhoCS_Manual.odt ├── obj └── README.md ├── sample-control-file.ctl ├── seqs-sample.txt └── src ├── AlignmentMain.c ├── AlignmentProcessor.c ├── AlignmentProcessor.h ├── GPhoCS.c ├── GPhoCS.h ├── GenericTree.c ├── GenericTree.h ├── LocusDataLikelihood.c ├── LocusDataLikelihood.h ├── MCMCcontrol.c ├── MCMCcontrol.h ├── MultiCoreUtils.h ├── PopulationTree.c ├── PopulationTree.h ├── README.md ├── omp_stub.c ├── patch.c ├── patch.h ├── readTrace.c ├── utils.c └── utils.h /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear on external disk 35 | .Spotlight-V100 36 | .Trashes 37 | 38 | # Directories potentially created on remote AFP share 39 | .AppleDB 40 | .AppleDesktop 41 | Network Trash Folder 42 | Temporary Items 43 | .apdisk 44 | 45 | 46 | # =========================== 47 | # Eclipse Project Files 48 | # =========================== 49 | 50 | .metadata 51 | bin/ 52 | tmp/ 53 | *.tmp 54 | *.bak 55 | *.swp 56 | *~.nib 57 | local.properties 58 | .settings/ 59 | .loadpath 60 | .recommenders 61 | 62 | # Eclipse Core 63 | .project 64 | 65 | # External tool builders 66 | .externalToolBuilders/ 67 | 68 | # Locally stored "Eclipse launch configurations" 69 | *.launch 70 | 71 | # CDT-specific (C/C++ Development Tooling) 72 | .cproject 73 | 74 | # Code Recommenders 75 | .recommenders/ 76 | -------------------------------------------------------------------------------- /ControlFileGenerator/ControlFileGenerator.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gphocs-dev/G-PhoCS/b807eb360b9d23012924808e8b36058faced070b/ControlFileGenerator/ControlFileGenerator.jar -------------------------------------------------------------------------------- /ControlFileGenerator/README.md: -------------------------------------------------------------------------------- 1 | The ControlFileGenerator is a graphical utility for generating control files for your G-PhoCS analysis. 2 | 3 | The GUI consists of the following tabs: (follow them one by one to generate your control file) 4 | 5 | 1) General - set the general setup features of your analysis (i/o, print factors, modeling features, and MCMC finetune values). You may use the given default values for quick generation of a setup file 6 | 7 | 2) Tree - specify the population phylogeny using extended Newick format, associate each current population with a set of samples and each ancestral population with an initial split time value for sampling 8 | 9 | 3) Mig-Bands - specify ordered pairs of migration bands (optional) 10 | 11 | 4) Save - save your control file locally 12 | 13 | *) Load - Load the settings of a previous made control file into the GUI 14 | 15 | The ControlFileGenerator is written in Java and runs under JRE version 7 and up 16 | If you don't have it installed on your computer, you may download it from https://java.com/en/download/ 17 | 18 | To run the ControlFileGenerator just run the following from the command line (from the main G-PhoCS directory): 19 | ==> java -jar ControlFileGenerator/ControlFileGenerator.jar 20 | 21 | or click on the jar file from your file system explorer 22 | 23 | 24 | 25 | The ControlFileGenerator was written by Tal Bigel @ IDC Herzliya 26 | For comments and questions, please contact Ilan Gronau (ilan.gronau@idc.ac.il) 27 | -------------------------------------------------------------------------------- /ControlFileGenerator/dist/README.TXT: -------------------------------------------------------------------------------- 1 | ======================== 2 | BUILD OUTPUT DESCRIPTION 3 | ======================== 4 | 5 | When you build an Java application project that has a main class, the IDE 6 | automatically copies all of the JAR 7 | files on the projects classpath to your projects dist/lib folder. The IDE 8 | also adds each of the JAR files to the Class-Path element in the application 9 | JAR files manifest file (MANIFEST.MF). 10 | 11 | To run the project from the command line, go to the dist folder and 12 | type the following: 13 | 14 | java -jar "ControlFileGenerator.jar" 15 | 16 | To distribute this project, zip up the dist folder (including the lib folder) 17 | and distribute the ZIP file. 18 | 19 | Notes: 20 | 21 | * If two JAR files on the project classpath have the same name, only the first 22 | JAR file is copied to the lib folder. 23 | * Only JAR files are copied to the lib folder. 24 | If the classpath contains other types of files or folders, these files (folders) 25 | are not copied. 26 | * If a library on the projects classpath also has a Class-Path element 27 | specified in the manifest,the content of the Class-Path element has to be on 28 | the projects runtime path. 29 | * To set a main class in a standard Java project, right-click the project node 30 | in the Projects window and choose Properties. Then click Run and enter the 31 | class name in the Main Class field. Alternatively, you can manually type the 32 | class name in the manifest Main-Class element. 33 | -------------------------------------------------------------------------------- /ControlFileGenerator/dist/lib/AbsoluteLayout.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gphocs-dev/G-PhoCS/b807eb360b9d23012924808e8b36058faced070b/ControlFileGenerator/dist/lib/AbsoluteLayout.jar -------------------------------------------------------------------------------- /ControlFileGenerator/src/CFG/ControlFileGenerator/BSTNode.java: -------------------------------------------------------------------------------- 1 | package CFG.ControlFileGenerator; 2 | 3 | 4 | public class BSTNode 5 | { 6 | String data; 7 | BSTNode parent; 8 | BSTNode left; 9 | BSTNode right; 10 | 11 | public BSTNode(String data) 12 | { 13 | this.data = data; 14 | this.left = null; 15 | this.right = null; 16 | this.parent = null; 17 | } 18 | 19 | public BSTNode(String data, BSTNode parent) { 20 | this.data = data; 21 | this.left = null; 22 | this.right = null; 23 | this.parent = parent; 24 | } 25 | 26 | public BSTNode(String data, BSTNode left, BSTNode right) { 27 | this.data = data; 28 | this.left = left; 29 | this.right = right; 30 | this.parent = parent; 31 | } 32 | 33 | public BSTNode(String data, BSTNode left, BSTNode right, BSTNode parent) { 34 | this.data = data; 35 | this.left = left; 36 | this.right = right; 37 | this.parent = parent; 38 | } 39 | 40 | public BSTNode() 41 | { 42 | } 43 | 44 | public void setChildren(BSTNode newLeft, BSTNode newRight) { 45 | this.left = newLeft; 46 | this.right = newRight; 47 | } 48 | 49 | public void setParent(BSTNode newParent) { 50 | this.parent = newParent; 51 | } 52 | } -------------------------------------------------------------------------------- /ControlFileGenerator/src/CFG/ControlFileGenerator/InputPanel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package CFG.ControlFileGenerator; 7 | 8 | import java.awt.Color; 9 | import javax.swing.ButtonGroup; 10 | import javax.swing.JLabel; 11 | import javax.swing.JOptionPane; 12 | import javax.swing.JPanel; 13 | import javax.swing.JRadioButton; 14 | import javax.swing.JTextField; 15 | import javax.swing.SpringLayout; 16 | 17 | /** 18 | * 19 | * @author Tal 20 | */ 21 | public class InputPanel { 22 | 23 | public static String[] POP_START = {"POP-START", "", "", "", ""}; 24 | public static String[] POP_END = {"POP-END", "", "", "", ""}; 25 | public static String NAME = "name"; 26 | public static String SAMPLE = "samples"; 27 | public static String CHILDREN = "children"; 28 | public static String TAU_INITIAL = "tau-initial"; 29 | public static String HAPLOID = "haploid"; 30 | public static String DIPLOID = "diploid"; 31 | 32 | public static boolean insertCurrentLabel(BSTNode[] nodeArray, String[] inputArray, String[] hd) { 33 | 34 | boolean operationSuccess = false; 35 | int numOfFields = nodeArray.length; 36 | JTextField[] nameTextFields = new JTextField[numOfFields]; 37 | JTextField[] labelTextFields = new JTextField[numOfFields]; 38 | ButtonGroup[] haploidDiploidGroup = new ButtonGroup[numOfFields]; 39 | JRadioButton[] haploid = new JRadioButton[numOfFields]; 40 | JRadioButton[] diploid = new JRadioButton[numOfFields]; 41 | 42 | for (int i = 0; i < numOfFields; i++) { 43 | nameTextFields[i] = new JTextField(5); 44 | nameTextFields[i].setEnabled(false); 45 | nameTextFields[i].setText(nodeArray[i].data); 46 | nameTextFields[i].setDisabledTextColor(Color.black); 47 | labelTextFields[i] = new JTextField(5); 48 | labelTextFields[i].setText(inputArray[i]); 49 | haploidDiploidGroup[i] = new ButtonGroup(); 50 | haploid[i] = new JRadioButton(); 51 | diploid[i] = new JRadioButton(); 52 | haploidDiploidGroup[i].add(haploid[i]); 53 | haploidDiploidGroup[i].add(diploid[i]); 54 | haploid[i].setText(HAPLOID); 55 | diploid[i].setText(DIPLOID); 56 | } 57 | 58 | JPanel inputPanel = new JPanel(new SpringLayout()); 59 | for (int i = 0; i < numOfFields; i++) { 60 | for (int j = 0; j < 5; j++) { 61 | JLabel label = new JLabel(POP_START[j], JLabel.TRAILING); 62 | inputPanel.add(label); 63 | } 64 | 65 | JLabel label = new JLabel("", JLabel.TRAILING); 66 | inputPanel.add(label); 67 | label = new JLabel(NAME, JLabel.TRAILING); 68 | inputPanel.add(label); 69 | label.setLabelFor(nameTextFields[i]); 70 | inputPanel.add(nameTextFields[i]); 71 | label = new JLabel("", JLabel.TRAILING); 72 | inputPanel.add(label); 73 | label = new JLabel("", JLabel.TRAILING); 74 | inputPanel.add(label); 75 | 76 | label = new JLabel("", JLabel.TRAILING); 77 | inputPanel.add(label); 78 | label = new JLabel(SAMPLE, JLabel.TRAILING); 79 | inputPanel.add(label); 80 | label.setLabelFor(labelTextFields[i]); 81 | inputPanel.add(labelTextFields[i]); 82 | label.setLabelFor(haploid[i]); 83 | inputPanel.add(haploid[i]); 84 | label.setLabelFor(diploid[i]); 85 | inputPanel.add(diploid[i]); 86 | if ("h".equals(hd[i])) { 87 | haploid[i].setSelected(true); 88 | } else { 89 | diploid[i].setSelected(true); 90 | } 91 | 92 | for (int j = 0; j < 5; j++) { 93 | label = new JLabel(POP_END[j], JLabel.TRAILING); 94 | inputPanel.add(label); 95 | } 96 | } 97 | 98 | SpringUtilities.makeCompactGrid(inputPanel, 99 | numOfFields * 4, 5, //rows, cols 100 | 6, 6, //initX, initY 101 | 6, 6); //xPad, yPad 102 | int result = JOptionPane.showConfirmDialog(null, inputPanel, "Insert the Samples of the Current-POP", JOptionPane.OK_CANCEL_OPTION); 103 | 104 | boolean noEmptySamples = true; 105 | for (int i = 0; i < labelTextFields.length; i++) { 106 | if("".equals(labelTextFields[i].getText())) 107 | { 108 | noEmptySamples = false; 109 | break; 110 | } 111 | } 112 | 113 | boolean noIdenticalSamples = true; 114 | for (int i = 0; i < labelTextFields.length; i++) { 115 | for (int j = 0; j < labelTextFields.length; j++) { 116 | if (!("".equals(labelTextFields[i].getText()))) { 117 | if (i != j && labelTextFields[i].getText().equals(labelTextFields[j].getText())) { 118 | noIdenticalSamples = false; 119 | break; 120 | } 121 | } 122 | } 123 | } 124 | if (result == JOptionPane.OK_OPTION) { 125 | ControlFileGeneratorUI.samplesAll.clear(); 126 | if (!noIdenticalSamples) { 127 | JOptionPane.showMessageDialog(null, "Can't have two different pops with identical samples", "message", JOptionPane.ERROR_MESSAGE); 128 | operationSuccess = insertCurrentLabelAfterError(nodeArray, inputArray, hd, nameTextFields, labelTextFields, haploidDiploidGroup, haploid, diploid); 129 | } else if (!noEmptySamples) { 130 | JOptionPane.showMessageDialog(null, "Can't have empty samples", "message", JOptionPane.ERROR_MESSAGE); 131 | operationSuccess = insertCurrentLabelAfterError(nodeArray, inputArray, hd, nameTextFields, labelTextFields, haploidDiploidGroup, haploid, diploid); 132 | } else { 133 | for (int i = 0; i < labelTextFields.length; i++) { 134 | inputArray[i] = labelTextFields[i].getText(); 135 | if (diploid[i].isSelected()) { 136 | hd[i] = "d"; 137 | } else { 138 | hd[i] = "h"; 139 | } 140 | } 141 | 142 | if(CheckAllSamples(inputArray)) 143 | operationSuccess = true; 144 | else { 145 | JOptionPane.showMessageDialog(null, "Can't have two different pops with identical samples", "message", JOptionPane.ERROR_MESSAGE); 146 | operationSuccess = insertCurrentLabelAfterError(nodeArray, inputArray, hd, nameTextFields, labelTextFields, haploidDiploidGroup, haploid, diploid); 147 | } 148 | 149 | } 150 | } 151 | return operationSuccess; 152 | } 153 | 154 | 155 | 156 | public static boolean insertCurrentLabelAfterError(BSTNode[] nodeArray, String[] inputArray, String[] hd, JTextField[] nameTextFields, JTextField[] labelTextFields, 157 | ButtonGroup[] haploidDiploidGroup, JRadioButton[] haploid, JRadioButton[] diploid) { 158 | 159 | boolean operationSuccess = false; 160 | int numOfFields = nodeArray.length; 161 | 162 | JPanel inputPanel = new JPanel(new SpringLayout()); 163 | for (int i = 0; i < numOfFields; i++) { 164 | for (int j = 0; j < 5; j++) { 165 | JLabel label = new JLabel(POP_START[j], JLabel.TRAILING); 166 | inputPanel.add(label); 167 | } 168 | 169 | JLabel label = new JLabel("", JLabel.TRAILING); 170 | inputPanel.add(label); 171 | label = new JLabel(NAME, JLabel.TRAILING); 172 | inputPanel.add(label); 173 | label.setLabelFor(nameTextFields[i]); 174 | inputPanel.add(nameTextFields[i]); 175 | label = new JLabel("", JLabel.TRAILING); 176 | inputPanel.add(label); 177 | label = new JLabel("", JLabel.TRAILING); 178 | inputPanel.add(label); 179 | 180 | label = new JLabel("", JLabel.TRAILING); 181 | inputPanel.add(label); 182 | label = new JLabel(SAMPLE, JLabel.TRAILING); 183 | inputPanel.add(label); 184 | label.setLabelFor(labelTextFields[i]); 185 | inputPanel.add(labelTextFields[i]); 186 | label.setLabelFor(haploid[i]); 187 | inputPanel.add(haploid[i]); 188 | label.setLabelFor(diploid[i]); 189 | inputPanel.add(diploid[i]); 190 | if ("h".equals(hd[i])) { 191 | haploid[i].setSelected(true); 192 | } else { 193 | diploid[i].setSelected(true); 194 | } 195 | 196 | for (int j = 0; j < 5; j++) { 197 | label = new JLabel(POP_END[j], JLabel.TRAILING); 198 | inputPanel.add(label); 199 | } 200 | } 201 | 202 | SpringUtilities.makeCompactGrid(inputPanel, 203 | numOfFields * 4, 5, //rows, cols 204 | 6, 6, //initX, initY 205 | 6, 6); //xPad, yPad 206 | int result = JOptionPane.showConfirmDialog(null, inputPanel, "Insert the Samples of the Current-POP", JOptionPane.OK_CANCEL_OPTION); 207 | 208 | boolean noEmptySamples = true; 209 | for (int i = 0; i < labelTextFields.length; i++) { 210 | if("".equals(labelTextFields[i].getText())) 211 | { 212 | noEmptySamples = false; 213 | break; 214 | } 215 | } 216 | 217 | boolean noIdenticalSamples = true; 218 | for (int i = 0; i < labelTextFields.length; i++) { 219 | for (int j = 0; j < labelTextFields.length; j++) { 220 | if (!("".equals(labelTextFields[i].getText()))) { 221 | if (i != j && labelTextFields[i].getText().equals(labelTextFields[j].getText())) { 222 | noIdenticalSamples = false; 223 | break; 224 | } 225 | } 226 | } 227 | } 228 | if (result == JOptionPane.OK_OPTION) { 229 | ControlFileGeneratorUI.samplesAll.clear(); 230 | if (!noIdenticalSamples) { 231 | JOptionPane.showMessageDialog(null, "Can't have two different pops with identical samples", "message", JOptionPane.ERROR_MESSAGE); 232 | operationSuccess = insertCurrentLabelAfterError(nodeArray, inputArray, hd, nameTextFields, labelTextFields, haploidDiploidGroup, haploid, diploid); 233 | } else if (!noEmptySamples) { 234 | JOptionPane.showMessageDialog(null, "Can't have empty samples", "message", JOptionPane.ERROR_MESSAGE); 235 | operationSuccess = insertCurrentLabelAfterError(nodeArray, inputArray, hd, nameTextFields, labelTextFields, haploidDiploidGroup, haploid, diploid); 236 | } else { 237 | for (int i = 0; i < labelTextFields.length; i++) { 238 | inputArray[i] = labelTextFields[i].getText(); 239 | if (diploid[i].isSelected()) { 240 | hd[i] = "d"; 241 | } else { 242 | hd[i] = "h"; 243 | } 244 | } 245 | if(CheckAllSamples(inputArray)) 246 | operationSuccess = true; 247 | else { 248 | JOptionPane.showMessageDialog(null, "Can't have two different pops with identical samples", "message", JOptionPane.ERROR_MESSAGE); 249 | operationSuccess = insertCurrentLabelAfterError(nodeArray, inputArray, hd, nameTextFields, labelTextFields, haploidDiploidGroup, haploid, diploid); 250 | } 251 | } 252 | } 253 | return operationSuccess; 254 | } 255 | 256 | public static boolean CheckAllSamples(String[] inputArray) 257 | { 258 | for(int i = 0; i < inputArray.length; i++) 259 | { 260 | String[] splited = inputArray[i].split("\\s+"); 261 | for(int j = 0; j < splited.length; j++) 262 | { 263 | ControlFileGeneratorUI.samplesAll.add(splited[j]); 264 | } 265 | } 266 | 267 | // return true; 268 | 269 | boolean noDuplicate = true; 270 | String curSample; 271 | String[] allSamples = ControlFileGeneratorUI.samplesAll.toArray(new String[ControlFileGeneratorUI.samplesAll.size()]); 272 | for(int i = 0; i < allSamples.length; i++) 273 | { 274 | curSample = allSamples[i]; 275 | for(int j = 0; j < allSamples.length; j++) 276 | { 277 | if(j != i && curSample.equals(allSamples[j])) 278 | noDuplicate = false; 279 | } 280 | } 281 | 282 | return noDuplicate; 283 | } 284 | 285 | 286 | 287 | public static boolean insertAncestralTauInitial(BSTNode[] nodeArray, String[] inputArray) { 288 | 289 | boolean operationSuccess = false; 290 | int numOfFields = nodeArray.length; 291 | JTextField[] nameTextFields = new JTextField[numOfFields]; 292 | JTextField[] leftChildTextFields = new JTextField[numOfFields]; 293 | JTextField[] rightChildTextFields = new JTextField[numOfFields]; 294 | JTextField[] tauTextFields = new JTextField[numOfFields]; 295 | for (int i = 0; i < numOfFields; i++) { 296 | nameTextFields[i] = new JTextField(5); 297 | nameTextFields[i].setEnabled(false); 298 | nameTextFields[i].setText(nodeArray[i].data); 299 | nameTextFields[i].setDisabledTextColor(Color.black); 300 | leftChildTextFields[i] = new JTextField(5); 301 | leftChildTextFields[i].setEnabled(false); 302 | leftChildTextFields[i].setText(nodeArray[i].left.data); 303 | leftChildTextFields[i].setDisabledTextColor(Color.black); 304 | rightChildTextFields[i] = new JTextField(5); 305 | rightChildTextFields[i].setEnabled(false); 306 | rightChildTextFields[i].setText(nodeArray[i].right.data); 307 | rightChildTextFields[i].setDisabledTextColor(Color.black); 308 | tauTextFields[i] = new JTextField(5); 309 | tauTextFields[i].setText(inputArray[i]); 310 | } 311 | 312 | JPanel inputPanel = new JPanel(new SpringLayout()); 313 | for (int i = 0; i < numOfFields; i++) { 314 | for (int j = 0; j < 4; j++) { 315 | JLabel label = new JLabel(POP_START[j], JLabel.TRAILING); 316 | inputPanel.add(label); 317 | } 318 | 319 | JLabel label = new JLabel("", JLabel.TRAILING); 320 | inputPanel.add(label); 321 | label = new JLabel(NAME, JLabel.TRAILING); 322 | inputPanel.add(label); 323 | label.setLabelFor(nameTextFields[i]); 324 | inputPanel.add(nameTextFields[i]); 325 | label = new JLabel("", JLabel.TRAILING); 326 | inputPanel.add(label); 327 | 328 | label = new JLabel("", JLabel.TRAILING); 329 | inputPanel.add(label); 330 | label = new JLabel(CHILDREN, JLabel.TRAILING); 331 | inputPanel.add(label); 332 | label.setLabelFor(leftChildTextFields[i]); 333 | inputPanel.add(leftChildTextFields[i]); 334 | label.setLabelFor(rightChildTextFields[i]); 335 | inputPanel.add(rightChildTextFields[i]); 336 | 337 | label = new JLabel("", JLabel.TRAILING); 338 | inputPanel.add(label); 339 | label = new JLabel(TAU_INITIAL, JLabel.TRAILING); 340 | inputPanel.add(label); 341 | label.setLabelFor(tauTextFields[i]); 342 | inputPanel.add(tauTextFields[i]); 343 | label = new JLabel("", JLabel.TRAILING); 344 | inputPanel.add(label); 345 | 346 | for (int j = 0; j < 4; j++) { 347 | label = new JLabel(POP_END[j], JLabel.TRAILING); 348 | inputPanel.add(label); 349 | } 350 | } 351 | 352 | SpringUtilities.makeCompactGrid(inputPanel, 353 | numOfFields * 5, 4, //rows, cols 354 | 6, 6, //initX, initY 355 | 6, 6); //xPad, yPad 356 | 357 | int result = JOptionPane.showConfirmDialog(null, inputPanel, "Insert the Tau-Initial of the Ancestral-POPs", JOptionPane.OK_CANCEL_OPTION); 358 | if (result == JOptionPane.OK_OPTION) { 359 | boolean allTauCanBeParsed = true; 360 | for (int i = 0; i < tauTextFields.length; i++) { 361 | if (!Validate.validateDouble(tauTextFields[i].getText())) { 362 | allTauCanBeParsed = false; 363 | break; 364 | } 365 | } 366 | if (!allTauCanBeParsed) { 367 | JOptionPane.showMessageDialog(null, "all tau-initial must be a number!", "message", JOptionPane.ERROR_MESSAGE); 368 | operationSuccess = insertAncestralTauInitialAfterError(nodeArray, inputArray, nameTextFields, leftChildTextFields, rightChildTextFields, tauTextFields); 369 | } else { 370 | boolean tauIsLegal = true; 371 | for (int i = 0; i < tauTextFields.length; i++) { 372 | if (!checkIfTauSmallerThanParent(nodeArray[i], nodeArray, tauTextFields[i], tauTextFields)) { 373 | tauIsLegal = false; 374 | break; 375 | } 376 | } 377 | if (tauIsLegal) { 378 | for (int i = 0; i < tauTextFields.length; i++) { 379 | inputArray[i] = tauTextFields[i].getText(); 380 | } 381 | operationSuccess = true; 382 | } else { 383 | JOptionPane.showMessageDialog(null, "The tau-initial of a daughter population can't be larger than its parent!", "message", JOptionPane.ERROR_MESSAGE); 384 | operationSuccess = insertAncestralTauInitialAfterError(nodeArray, inputArray, nameTextFields, leftChildTextFields, rightChildTextFields, tauTextFields); 385 | } 386 | } 387 | } 388 | return operationSuccess; 389 | } 390 | 391 | 392 | 393 | 394 | public static boolean insertAncestralTauInitialAfterError(BSTNode[] nodeArray, String[] inputArray, JTextField[] nameTextFields, JTextField[] leftChildTextFields, 395 | JTextField[] rightChildTextFields, JTextField[] tauTextFields) { 396 | 397 | boolean operationSuccess = false; 398 | int numOfFields = nodeArray.length; 399 | 400 | JPanel inputPanel = new JPanel(new SpringLayout()); 401 | for (int i = 0; i < numOfFields; i++) { 402 | for (int j = 0; j < 4; j++) { 403 | JLabel label = new JLabel(POP_START[j], JLabel.TRAILING); 404 | inputPanel.add(label); 405 | } 406 | 407 | JLabel label = new JLabel("", JLabel.TRAILING); 408 | inputPanel.add(label); 409 | label = new JLabel(NAME, JLabel.TRAILING); 410 | inputPanel.add(label); 411 | label.setLabelFor(nameTextFields[i]); 412 | inputPanel.add(nameTextFields[i]); 413 | label = new JLabel("", JLabel.TRAILING); 414 | inputPanel.add(label); 415 | 416 | label = new JLabel("", JLabel.TRAILING); 417 | inputPanel.add(label); 418 | label = new JLabel(CHILDREN, JLabel.TRAILING); 419 | inputPanel.add(label); 420 | label.setLabelFor(leftChildTextFields[i]); 421 | inputPanel.add(leftChildTextFields[i]); 422 | label.setLabelFor(rightChildTextFields[i]); 423 | inputPanel.add(rightChildTextFields[i]); 424 | 425 | label = new JLabel("", JLabel.TRAILING); 426 | inputPanel.add(label); 427 | label = new JLabel(TAU_INITIAL, JLabel.TRAILING); 428 | inputPanel.add(label); 429 | label.setLabelFor(tauTextFields[i]); 430 | inputPanel.add(tauTextFields[i]); 431 | label = new JLabel("", JLabel.TRAILING); 432 | inputPanel.add(label); 433 | 434 | for (int j = 0; j < 4; j++) { 435 | label = new JLabel(POP_END[j], JLabel.TRAILING); 436 | inputPanel.add(label); 437 | } 438 | } 439 | 440 | SpringUtilities.makeCompactGrid(inputPanel, 441 | numOfFields * 5, 4, //rows, cols 442 | 6, 6, //initX, initY 443 | 6, 6); //xPad, yPad 444 | 445 | int result = JOptionPane.showConfirmDialog(null, inputPanel, "Insert the Tau-Initial of the Ancestral-POPs", JOptionPane.OK_CANCEL_OPTION); 446 | if (result == JOptionPane.OK_OPTION) { 447 | boolean allTauCanBeParsed = true; 448 | for (int i = 0; i < tauTextFields.length; i++) { 449 | if (!Validate.validateDouble(tauTextFields[i].getText())) { 450 | allTauCanBeParsed = false; 451 | break; 452 | } 453 | } 454 | if (!allTauCanBeParsed) { 455 | JOptionPane.showMessageDialog(null, "all tau-initial must be a number!", "message", JOptionPane.ERROR_MESSAGE); 456 | operationSuccess = insertAncestralTauInitialAfterError(nodeArray, inputArray, nameTextFields, leftChildTextFields, rightChildTextFields, tauTextFields); 457 | } else { 458 | boolean tauIsLegal = true; 459 | for (int i = 0; i < tauTextFields.length; i++) { 460 | if (!checkIfTauSmallerThanParent(nodeArray[i], nodeArray, tauTextFields[i], tauTextFields)) { 461 | tauIsLegal = false; 462 | break; 463 | } 464 | } 465 | if (tauIsLegal) { 466 | for (int i = 0; i < tauTextFields.length; i++) { 467 | inputArray[i] = tauTextFields[i].getText(); 468 | } 469 | operationSuccess = true; 470 | } else { 471 | JOptionPane.showMessageDialog(null, "The tau-initial of a daughter population can't be larger than its parent!", "message", JOptionPane.ERROR_MESSAGE); 472 | operationSuccess = insertAncestralTauInitialAfterError(nodeArray, inputArray, nameTextFields, leftChildTextFields, rightChildTextFields, tauTextFields); 473 | } 474 | } 475 | } 476 | return operationSuccess; 477 | } 478 | 479 | private static boolean checkIfTauSmallerThanParent(BSTNode node, BSTNode[] nodeArray, JTextField currentTauField, JTextField[] tau) { 480 | if (node.parent != null && !("".equals(currentTauField.getText()))) { 481 | for (int i = 0; i < nodeArray.length; i++) { 482 | if (node.parent.equals(nodeArray[i])) { 483 | if (!("".equals(tau[i].getText()))) { 484 | double currentTau = Double.parseDouble(currentTauField.getText()); 485 | double parentTau = Double.parseDouble(tau[i].getText()); 486 | if (currentTau >= parentTau) { 487 | return false; 488 | } 489 | } 490 | } 491 | } 492 | } 493 | return true; 494 | } 495 | 496 | } 497 | -------------------------------------------------------------------------------- /ControlFileGenerator/src/CFG/ControlFileGenerator/MigBandsInput.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package CFG.ControlFileGenerator; 7 | 8 | import java.awt.Color; 9 | import javax.swing.JCheckBox; 10 | import javax.swing.JLabel; 11 | import javax.swing.JOptionPane; 12 | import javax.swing.JPanel; 13 | import javax.swing.JTextField; 14 | import javax.swing.SpringLayout; 15 | 16 | /** 17 | * 18 | * @author Tal 19 | */ 20 | public class MigBandsInput { 21 | 22 | final static String SOURCE = "source"; 23 | final static String TARGET = "target"; 24 | final static int NUM_OF_FIELDS = 2; 25 | 26 | 27 | public static void AddNewMigBand(BSTNode[] currentPopArray, BSTNode[] ancestralPopArray, String[] migBandNewInput, String sourceMigBand, String targetMigBand) { 28 | 29 | if (sourceMigBand.equals(targetMigBand)) { 30 | JOptionPane.showMessageDialog(null, "source and target can't be the same population", "message", JOptionPane.ERROR_MESSAGE); 31 | } else if (sourceTargetPairingExists(sourceMigBand, targetMigBand)){ 32 | JOptionPane.showMessageDialog(null, "A migration band from " + sourceMigBand + " to " + targetMigBand + " already exists" , "message", JOptionPane.ERROR_MESSAGE); 33 | } else if (!popIsDaughterOfAncestral(sourceMigBand, targetMigBand, currentPopArray, ancestralPopArray)) { 34 | JOptionPane.showMessageDialog(null, "the chosen populations can't be a daughter and an ancestral", "message", JOptionPane.ERROR_MESSAGE); 35 | } else { 36 | migBandNewInput[0] = sourceMigBand; 37 | migBandNewInput[1] = targetMigBand; 38 | } 39 | } 40 | 41 | private static boolean sourceTargetPairingExists(String source, String target) { 42 | int size = ControlFileGeneratorUI.migBandsSourceList.size(); 43 | for(int i = 0; i < size; i++) { 44 | if(ControlFileGeneratorUI.migBandsSourceList.get(i).equals(source)) { 45 | if(ControlFileGeneratorUI.migBandsTargetList.get(i).equals(target)) 46 | return true; 47 | } 48 | } 49 | return false; 50 | } 51 | 52 | public static boolean popIsDaughterOfAncestral(String source, String target, BSTNode[] currentPopArray, BSTNode[] ancestralPopArray) { 53 | int indexCurSource = -1; 54 | int indexCurTarget = -1; 55 | int indexAncSource = -1; 56 | int indexAncTarget = -1; 57 | for(int i = 0; i < currentPopArray.length; i++) { 58 | if(source.equals(currentPopArray[i].data)) 59 | indexCurSource = i; 60 | if(target.equals(currentPopArray[i].data)) 61 | indexCurTarget = i; 62 | } 63 | for(int i = 0; i < ancestralPopArray.length; i++) { 64 | if(source.equals(ancestralPopArray[i].data)) 65 | indexAncSource = i; 66 | if(target.equals(ancestralPopArray[i].data)) 67 | indexAncTarget = i; 68 | } 69 | if(indexCurSource != -1 && indexCurTarget != -1) 70 | //both are current pop 71 | return true; 72 | else if(indexCurSource != -1 && indexAncTarget != -1) 73 | return checkRelations(indexCurSource, indexAncTarget, currentPopArray, ancestralPopArray); 74 | else if(indexAncSource != -1 && indexCurTarget != -1) 75 | return checkRelations(indexAncSource, indexCurTarget, ancestralPopArray, currentPopArray); 76 | else if(indexAncSource != -1 && indexAncTarget != -1) 77 | return checkRelations(indexAncSource, indexAncTarget, ancestralPopArray, ancestralPopArray); 78 | else 79 | return false; 80 | } 81 | 82 | private static boolean checkRelations(int indexSource, int indexTarget, BSTNode[] source, BSTNode[] target) { 83 | String sourceData = source[indexSource].data; 84 | String targetData = target[indexTarget].data; 85 | BSTNode sourceTraversal = source[indexSource]; 86 | BSTNode targetTraversal = target[indexTarget]; 87 | while(sourceTraversal != null) { 88 | if(targetData.equals(sourceTraversal.data)) 89 | return false; 90 | sourceTraversal = sourceTraversal.parent; 91 | } 92 | while(targetTraversal != null) { 93 | if(sourceData.equals(targetTraversal.data)) 94 | return false; 95 | targetTraversal = targetTraversal.parent; 96 | } 97 | return true; 98 | } 99 | 100 | public static void DeleteMigBands(int migBandsCounter) { 101 | JLabel label = new JLabel("", JLabel.TRAILING); 102 | JTextField[] sourceTextFields = new JTextField[migBandsCounter]; 103 | JTextField[] targetTextFields = new JTextField[migBandsCounter]; 104 | JCheckBox[] markToDeleteButtons = new JCheckBox[migBandsCounter]; 105 | String[] newMigBandsSource = new String[migBandsCounter]; 106 | String[] newMigBandsTarget = new String[migBandsCounter]; 107 | 108 | for (int i = 0; i < migBandsCounter; i++) { 109 | sourceTextFields[i] = new JTextField(5); 110 | sourceTextFields[i].setEnabled(false); 111 | sourceTextFields[i].setText(ControlFileGeneratorUI.migBandsSourceList.get(i)); 112 | sourceTextFields[i].setDisabledTextColor(Color.black); 113 | targetTextFields[i] = new JTextField(5); 114 | targetTextFields[i].setEnabled(false); 115 | targetTextFields[i].setText(ControlFileGeneratorUI.migBandsTargetList.get(i)); 116 | targetTextFields[i].setDisabledTextColor(Color.black); 117 | markToDeleteButtons[i] = new JCheckBox(); 118 | markToDeleteButtons[i].setSelected(false); 119 | 120 | newMigBandsSource[i] = ""; 121 | newMigBandsTarget[i] = ""; 122 | } 123 | 124 | JPanel modifyMigBandPanel = new JPanel(new SpringLayout()); 125 | 126 | for (int i = 0; i < migBandsCounter; i++) { 127 | label.setLabelFor(markToDeleteButtons[i]); 128 | modifyMigBandPanel.add(markToDeleteButtons[i]); 129 | label = new JLabel(SOURCE, JLabel.TRAILING); 130 | modifyMigBandPanel.add(label); 131 | label.setLabelFor(sourceTextFields[i]); 132 | modifyMigBandPanel.add(sourceTextFields[i]); 133 | label = new JLabel(TARGET, JLabel.TRAILING); 134 | modifyMigBandPanel.add(label); 135 | label.setLabelFor(targetTextFields[i]); 136 | modifyMigBandPanel.add(targetTextFields[i]); 137 | } 138 | 139 | SpringUtilities.makeCompactGrid(modifyMigBandPanel, 140 | migBandsCounter, 5, //rows, cols 141 | 6, 6, //initX, initY 142 | 6, 6); //xPad, yPad 143 | 144 | int result = JOptionPane.showConfirmDialog(null, modifyMigBandPanel, "Mark the Migration-Bands you wish to delete", JOptionPane.OK_CANCEL_OPTION); 145 | 146 | if (result == JOptionPane.OK_OPTION) { 147 | for(int i = 0; i < migBandsCounter; i++) { 148 | if(!markToDeleteButtons[i].isSelected()) { 149 | newMigBandsSource[i] = sourceTextFields[i].getText(); 150 | newMigBandsTarget[i] = targetTextFields[i].getText(); 151 | } 152 | } 153 | ControlFileGeneratorUI.migBandsSourceList.clear(); 154 | ControlFileGeneratorUI.migBandsTargetList.clear(); 155 | 156 | for(int i = 0; i < migBandsCounter; i++) { 157 | if(!"".equals(newMigBandsSource[i])) { 158 | ControlFileGeneratorUI.migBandsSourceList.add(newMigBandsSource[i]); 159 | ControlFileGeneratorUI.migBandsTargetList.add(newMigBandsTarget[i]); 160 | } 161 | } 162 | } 163 | } 164 | } -------------------------------------------------------------------------------- /ControlFileGenerator/src/CFG/ControlFileGenerator/ModifyTreePanel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package CFG.ControlFileGenerator; 7 | 8 | import javax.swing.JLabel; 9 | import javax.swing.JOptionPane; 10 | import javax.swing.JPanel; 11 | import javax.swing.JTextField; 12 | import javax.swing.SpringLayout; 13 | 14 | /** 15 | * 16 | * @author Tal 17 | */ 18 | public class ModifyTreePanel { 19 | 20 | public static void modifyCurrentLabel(BSTNode[] nodeArray, String[] inputArray) { 21 | 22 | int numOfFields = nodeArray.length; 23 | JTextField[] textFields = new JTextField[numOfFields]; 24 | String[] labels = new String[numOfFields]; 25 | for (int i = 0; i < numOfFields; i++) { 26 | textFields[i] = new JTextField(5); 27 | textFields[i].setText(inputArray[i]); 28 | labels[i] = "Insert the sample of Current POP " + nodeArray[i].data + ":"; 29 | } 30 | 31 | JPanel inputPanel = new JPanel(new SpringLayout()); 32 | for (int i = 0; i < numOfFields; i++) { 33 | JLabel label = new JLabel(labels[i], JLabel.TRAILING); 34 | inputPanel.add(label); 35 | label.setLabelFor(textFields[i]); 36 | inputPanel.add(textFields[i]); 37 | } 38 | 39 | SpringUtilities.makeCompactGrid(inputPanel, 40 | numOfFields, 2, //rows, cols 41 | 6, 6, //initX, initY 42 | 6, 6); //xPad, yPad 43 | int result = JOptionPane.showConfirmDialog(null, inputPanel,"Insert the Samples of the Current-POP", JOptionPane.OK_CANCEL_OPTION); 44 | if (result == JOptionPane.OK_OPTION) { 45 | for (int i = 0; i < textFields.length; i++) { 46 | inputArray[i] = textFields[i].getText(); 47 | } 48 | } 49 | 50 | } 51 | 52 | public static void modifyAncestralTauInitial(BSTNode[] nodeArray, String[] inputArray) { 53 | 54 | int numOfFields = nodeArray.length; 55 | JTextField[] textFields = new JTextField[numOfFields]; 56 | String[] labels = new String[numOfFields]; 57 | String[] childrenLabels = new String[numOfFields]; 58 | for (int i = 0; i < numOfFields; i++) { 59 | textFields[i] = new JTextField(5); 60 | textFields[i].setText(inputArray[i]); 61 | labels[i] = "Insert the Tau-Initial of Ancestral-POP " + nodeArray[i].data + ":"; 62 | // childrenLabels[i] = "Daughters of Ancestral-POP: " + nodeArray[i].left.data + " , " + nodeArray[i].right.data; 63 | } 64 | 65 | JPanel inputPanel = new JPanel(new SpringLayout()); 66 | for (int i = 0; i < numOfFields; i++) { 67 | JLabel label = new JLabel(labels[i], JLabel.TRAILING); 68 | inputPanel.add(label); 69 | label.setLabelFor(textFields[i]); 70 | inputPanel.add(textFields[i]); 71 | } 72 | 73 | SpringUtilities.makeCompactGrid(inputPanel, 74 | numOfFields, 2, //rows, cols 75 | 6, 6, //initX, initY 76 | 6, 6); //xPad, yPad 77 | int result = JOptionPane.showConfirmDialog(null, inputPanel,"Insert the Tau-Initial of the Ancestral-POPs", JOptionPane.OK_CANCEL_OPTION); 78 | if (result == JOptionPane.OK_OPTION) { 79 | for (int i = 0; i < textFields.length; i++) { 80 | inputArray[i] = textFields[i].getText(); 81 | } 82 | } 83 | 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /ControlFileGenerator/src/CFG/ControlFileGenerator/NewickTree.java: -------------------------------------------------------------------------------- 1 | package CFG.ControlFileGenerator; 2 | 3 | import java.util.LinkedList; 4 | 5 | public class NewickTree { 6 | 7 | BSTNode[] childArray; 8 | BSTNode[] parentArray; 9 | int childArrayLen; 10 | int parentArrayLen; 11 | 12 | public NewickTree(BSTNode[] children, BSTNode[] parents) { 13 | childArrayLen = children.length; 14 | parentArrayLen = parents.length; 15 | childArray = new BSTNode[childArrayLen]; 16 | parentArray = new BSTNode[parentArrayLen]; 17 | for(int i = 0; i < childArray.length; i++) { 18 | childArray[i] = new BSTNode(children[i].data, children[i].parent); 19 | } 20 | for(int i = 0; i < parentArray.length; i++) { 21 | parentArray[i] = new BSTNode(parents[i].data, parents[i].left, parents[i].right); 22 | } 23 | 24 | for(int i = 0; i < parentArray.length; i++) { 25 | for(int j = 0; j < parentArray.length; j++) { 26 | if(parentArray[i].data.equals(parentArray[j].left.data)) { 27 | parentArray[i].setParent(parentArray[j]); 28 | } 29 | if(parentArray[i].data.equals(parentArray[j].left.data)) { 30 | parentArray[i].setParent(parentArray[j]); 31 | } 32 | } 33 | } 34 | } 35 | public NewickTree(String s) { 36 | if (s.length() == 0) { 37 | return; 38 | } 39 | BSTNode n = recurs(s); 40 | LinkedList childList = new LinkedList(); 41 | LinkedList parentList = new LinkedList(); 42 | addToList(n, childList, parentList); 43 | childArrayLen = childList.size(); 44 | parentArrayLen = parentList.size(); 45 | childArray = new BSTNode[childArrayLen]; 46 | parentArray = new BSTNode[parentArrayLen]; 47 | childList.toArray(childArray); 48 | parentList.toArray(parentArray); 49 | } 50 | 51 | 52 | //The start of the recurssion 53 | private static BSTNode recurs(String s) { 54 | int len = s.length(); 55 | BSTNode root = new BSTNode(getCurrentString(s)); 56 | recursion(s.substring(0, len - root.data.length()), root); 57 | return root; 58 | } 59 | 60 | //The recurssion method 61 | private static void recursion(String s, BSTNode n) { 62 | if (s.length() < 5) { 63 | return; 64 | } 65 | int len = s.length(); 66 | int counter = 0; 67 | //counts the '(' and ')' and takes the ',' which is only inside the outer brackets 68 | for (int i = 0; i < len; i++) { 69 | if (s.charAt(i) == '(') { 70 | counter++; 71 | } else if (s.charAt(i) == ')') { 72 | counter--; 73 | } else if (s.charAt(i) == ',' && counter == 1) { 74 | //taking the left substring to the ',' and returning the node with the last char 75 | BSTNode left = new BSTNode(createNode(s.substring(1, i))); 76 | if (left.data == null) 77 | break; 78 | //taking the right substring to the ',' and returning the node with the last char 79 | BSTNode right = new BSTNode(createNode(s.substring(i + 1, len - 1))); 80 | if (right.data == null) 81 | break; 82 | //arranging the nodes 83 | n.left = left; 84 | n.right = right; 85 | left.parent = n; 86 | right.parent = n; 87 | if ( i >= len - 2) 88 | continue; 89 | //recursion left and right 90 | recursion(s.substring(1, i - left.data.length()), left); 91 | recursion(s.substring(i + 1, len - right.data.length() - 1), right); 92 | } 93 | } 94 | } 95 | 96 | //A private function which recieves a string and returns the last char of it 97 | 98 | private static String createNode(String s) { 99 | if (s.length() == 0) 100 | return null; 101 | String value = getCurrentString(s); 102 | return value; 103 | } 104 | 105 | private static String getCurrentString(String s) { 106 | StringBuilder sb = new StringBuilder(); 107 | int len = s.length(); 108 | for(int i = len - 1; i >= 0; i--) { 109 | char cur = s.charAt(i); 110 | if (cur == '(' || cur == ')' || cur == ',') 111 | break; 112 | sb.append(s.charAt(i)); 113 | } 114 | return sb.reverse().toString(); 115 | } 116 | 117 | //receives the two lists and puts accordingly to the node (if it has children or not) in the appropriate list 118 | private static void addToList(BSTNode node, LinkedList childLst, LinkedList parentLst) { 119 | if (node != null) { 120 | addToList(node.left, childLst, parentLst); 121 | addToList(node.right, childLst, parentLst); 122 | if (node.left == null) { 123 | childLst.add(node); 124 | } else { 125 | parentLst.add(node); 126 | } 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /ControlFileGenerator/src/CFG/ControlFileGenerator/SpringUtilities.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 1995, 2008, Oracle and/or its affiliates. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * - Neither the name of Oracle or the names of its 16 | * contributors may be used to endorse or promote products derived 17 | * from this software without specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 20 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | package CFG.ControlFileGenerator; 33 | 34 | import javax.swing.*; 35 | import javax.swing.SpringLayout; 36 | import java.awt.*; 37 | 38 | /** 39 | * A 1.4 file that provides utility methods for 40 | * creating form- or grid-style layouts with SpringLayout. 41 | * These utilities are used by several programs, such as 42 | * SpringBox and SpringCompactGrid. 43 | */ 44 | public class SpringUtilities { 45 | /** 46 | * A debugging utility that prints to stdout the component's 47 | * minimum, preferred, and maximum sizes. 48 | */ 49 | public static void printSizes(Component c) { 50 | System.out.println("minimumSize = " + c.getMinimumSize()); 51 | System.out.println("preferredSize = " + c.getPreferredSize()); 52 | System.out.println("maximumSize = " + c.getMaximumSize()); 53 | } 54 | 55 | /** 56 | * Aligns the first rows * cols 57 | * components of parent in 58 | * a grid. Each component is as big as the maximum 59 | * preferred width and height of the components. 60 | * The parent is made just big enough to fit them all. 61 | * 62 | * @param rows number of rows 63 | * @param cols number of columns 64 | * @param initialX x location to start the grid at 65 | * @param initialY y location to start the grid at 66 | * @param xPad x padding between cells 67 | * @param yPad y padding between cells 68 | */ 69 | public static void makeGrid(Container parent, 70 | int rows, int cols, 71 | int initialX, int initialY, 72 | int xPad, int yPad) { 73 | SpringLayout layout; 74 | try { 75 | layout = (SpringLayout)parent.getLayout(); 76 | } catch (ClassCastException exc) { 77 | System.err.println("The first argument to makeGrid must use SpringLayout."); 78 | return; 79 | } 80 | 81 | Spring xPadSpring = Spring.constant(xPad); 82 | Spring yPadSpring = Spring.constant(yPad); 83 | Spring initialXSpring = Spring.constant(initialX); 84 | Spring initialYSpring = Spring.constant(initialY); 85 | int max = rows * cols; 86 | 87 | //Calculate Springs that are the max of the width/height so that all 88 | //cells have the same size. 89 | Spring maxWidthSpring = layout.getConstraints(parent.getComponent(0)). 90 | getWidth(); 91 | Spring maxHeightSpring = layout.getConstraints(parent.getComponent(0)). 92 | getHeight(); 93 | for (int i = 1; i < max; i++) { 94 | SpringLayout.Constraints cons = layout.getConstraints( 95 | parent.getComponent(i)); 96 | 97 | maxWidthSpring = Spring.max(maxWidthSpring, cons.getWidth()); 98 | maxHeightSpring = Spring.max(maxHeightSpring, cons.getHeight()); 99 | } 100 | 101 | //Apply the new width/height Spring. This forces all the 102 | //components to have the same size. 103 | for (int i = 0; i < max; i++) { 104 | SpringLayout.Constraints cons = layout.getConstraints( 105 | parent.getComponent(i)); 106 | 107 | cons.setWidth(maxWidthSpring); 108 | cons.setHeight(maxHeightSpring); 109 | } 110 | 111 | //Then adjust the x/y constraints of all the cells so that they 112 | //are aligned in a grid. 113 | SpringLayout.Constraints lastCons = null; 114 | SpringLayout.Constraints lastRowCons = null; 115 | for (int i = 0; i < max; i++) { 116 | SpringLayout.Constraints cons = layout.getConstraints( 117 | parent.getComponent(i)); 118 | if (i % cols == 0) { //start of new row 119 | lastRowCons = lastCons; 120 | cons.setX(initialXSpring); 121 | } else { //x position depends on previous component 122 | cons.setX(Spring.sum(lastCons.getConstraint(SpringLayout.EAST), 123 | xPadSpring)); 124 | } 125 | 126 | if (i / cols == 0) { //first row 127 | cons.setY(initialYSpring); 128 | } else { //y position depends on previous row 129 | cons.setY(Spring.sum(lastRowCons.getConstraint(SpringLayout.SOUTH), 130 | yPadSpring)); 131 | } 132 | lastCons = cons; 133 | } 134 | 135 | //Set the parent's size. 136 | SpringLayout.Constraints pCons = layout.getConstraints(parent); 137 | pCons.setConstraint(SpringLayout.SOUTH, 138 | Spring.sum( 139 | Spring.constant(yPad), 140 | lastCons.getConstraint(SpringLayout.SOUTH))); 141 | pCons.setConstraint(SpringLayout.EAST, 142 | Spring.sum( 143 | Spring.constant(xPad), 144 | lastCons.getConstraint(SpringLayout.EAST))); 145 | } 146 | 147 | /* Used by makeCompactGrid. */ 148 | private static SpringLayout.Constraints getConstraintsForCell( 149 | int row, int col, 150 | Container parent, 151 | int cols) { 152 | SpringLayout layout = (SpringLayout) parent.getLayout(); 153 | Component c = parent.getComponent(row * cols + col); 154 | return layout.getConstraints(c); 155 | } 156 | 157 | /** 158 | * Aligns the first rows * cols 159 | * components of parent in 160 | * a grid. Each component in a column is as wide as the maximum 161 | * preferred width of the components in that column; 162 | * height is similarly determined for each row. 163 | * The parent is made just big enough to fit them all. 164 | * 165 | * @param rows number of rows 166 | * @param cols number of columns 167 | * @param initialX x location to start the grid at 168 | * @param initialY y location to start the grid at 169 | * @param xPad x padding between cells 170 | * @param yPad y padding between cells 171 | */ 172 | public static void makeCompactGrid(Container parent, 173 | int rows, int cols, 174 | int initialX, int initialY, 175 | int xPad, int yPad) { 176 | SpringLayout layout; 177 | try { 178 | layout = (SpringLayout)parent.getLayout(); 179 | } catch (ClassCastException exc) { 180 | System.err.println("The first argument to makeCompactGrid must use SpringLayout."); 181 | return; 182 | } 183 | 184 | //Align all cells in each column and make them the same width. 185 | Spring x = Spring.constant(initialX); 186 | for (int c = 0; c < cols; c++) { 187 | Spring width = Spring.constant(0); 188 | for (int r = 0; r < rows; r++) { 189 | width = Spring.max(width, 190 | getConstraintsForCell(r, c, parent, cols). 191 | getWidth()); 192 | } 193 | for (int r = 0; r < rows; r++) { 194 | SpringLayout.Constraints constraints = 195 | getConstraintsForCell(r, c, parent, cols); 196 | constraints.setX(x); 197 | constraints.setWidth(width); 198 | } 199 | x = Spring.sum(x, Spring.sum(width, Spring.constant(xPad))); 200 | } 201 | 202 | //Align all cells in each row and make them the same height. 203 | Spring y = Spring.constant(initialY); 204 | for (int r = 0; r < rows; r++) { 205 | Spring height = Spring.constant(0); 206 | for (int c = 0; c < cols; c++) { 207 | height = Spring.max(height, 208 | getConstraintsForCell(r, c, parent, cols). 209 | getHeight()); 210 | } 211 | for (int c = 0; c < cols; c++) { 212 | SpringLayout.Constraints constraints = 213 | getConstraintsForCell(r, c, parent, cols); 214 | constraints.setY(y); 215 | constraints.setHeight(height); 216 | } 217 | y = Spring.sum(y, Spring.sum(height, Spring.constant(yPad))); 218 | } 219 | 220 | //Set the parent's size. 221 | SpringLayout.Constraints pCons = layout.getConstraints(parent); 222 | pCons.setConstraint(SpringLayout.SOUTH, y); 223 | pCons.setConstraint(SpringLayout.EAST, x); 224 | } 225 | } -------------------------------------------------------------------------------- /ControlFileGenerator/src/CFG/ControlFileGenerator/Validate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This class' methods are used for validation purposes 3 | */ 4 | package CFG.ControlFileGenerator; 5 | 6 | /** 7 | * 8 | * @author Tal 9 | */ 10 | public class Validate { 11 | 12 | // Validates that a given input string is a positive integer 13 | public static boolean validateInt(String input) { 14 | if("".equals(input)) 15 | return true; 16 | int len = input.length(); 17 | for (int i = 0; i < len; i++) { 18 | if(!Character.isDigit(input.charAt(i))) { 19 | return false; 20 | } 21 | } 22 | return true; 23 | } 24 | 25 | // Validates that a given input string is a positive numerical number 26 | public static boolean validateDouble(String input) { 27 | if("".equals(input)) 28 | return true; 29 | if(".".equals(input)) 30 | return false; 31 | int len = input.length(); 32 | int dotCounter = 0; 33 | for (int i = 0; i < len; i++) { 34 | if(!Character.isDigit(input.charAt(i))) { 35 | if (input.charAt(i) != '.') { 36 | return false; 37 | } else { 38 | if (dotCounter == 0) { 39 | dotCounter++; 40 | } 41 | else { 42 | return false; 43 | } 44 | } 45 | } 46 | } 47 | return true; 48 | } 49 | 50 | // Helper function to check invalid input for the tree. 51 | public static boolean validateTreeInput(String treeInput) { 52 | int len = treeInput.length(); 53 | if (len == 0) { 54 | return false; 55 | } 56 | int counterOpen = 0; 57 | int counterClose = 0; 58 | int counterComma = 0; 59 | char curSign; 60 | char lastSign = treeInput.charAt(len - 1); 61 | if (lastSign == '(' || lastSign == ')' || lastSign == ',') { 62 | // Checks if the last sign is not a braket or a comma 63 | return true; 64 | } 65 | for (int i = 0; i < len; i++) { 66 | curSign = treeInput.charAt(i); 67 | if (curSign == '(') { 68 | counterOpen++; 69 | } else if (curSign == ')') { 70 | counterClose++; 71 | } else if (curSign == ',') { 72 | counterComma++; 73 | } 74 | 75 | // Checks if there are more closing brackets before open ones 76 | if (counterClose > counterOpen) { 77 | return true; 78 | } 79 | } 80 | // Checks that the number of open brackets and closing brackets is equal 81 | if (counterOpen != counterClose) { 82 | return true; 83 | // Checks that the number of commas is equal to the number of brackets 84 | } else if (counterOpen != counterComma) { 85 | return true; 86 | } 87 | 88 | return false; 89 | } 90 | 91 | public static boolean validateNewickTree(String treeInput, int inputLen) { 92 | int commaCounter = 0; 93 | NewickTree nwtValidate = new NewickTree(treeInput); 94 | 95 | // Checks there is no whitespace in one of the names 96 | for (int i = 0; i < nwtValidate.childArrayLen; i++) { 97 | if (checkStringForWhiteSpace(nwtValidate.childArray[i].data)) 98 | return true; 99 | } 100 | 101 | for (int i = 0; i < nwtValidate.parentArrayLen; i++) { 102 | if (checkStringForWhiteSpace(nwtValidate.parentArray[i].data)) 103 | return true; 104 | } 105 | 106 | 107 | // Checks for duplicates 108 | for (int i = 0; i < nwtValidate.childArrayLen; i++) { 109 | for (int j = 0; j < nwtValidate.childArrayLen; j++) { 110 | if (nwtValidate.childArray[i].data.compareTo(nwtValidate.childArray[j].data) == 0 && i != j) 111 | return true; 112 | } 113 | for (int j = 0; j < nwtValidate.parentArrayLen; j++) { 114 | if (nwtValidate.childArray[i].data.compareTo(nwtValidate.parentArray[j].data) == 0) 115 | return true; 116 | } 117 | } 118 | 119 | for (int i = 0; i < nwtValidate.parentArrayLen; i++) { 120 | for (int j = 0; j < nwtValidate.childArrayLen; j++) { 121 | if (nwtValidate.parentArray[i].data.compareTo(nwtValidate.childArray[j].data) == 0) 122 | return true; 123 | } 124 | for (int j = 0; j < nwtValidate.parentArrayLen; j++) { 125 | if (nwtValidate.parentArray[i].data.compareTo(nwtValidate.parentArray[j].data) == 0 && i != j) 126 | return true; 127 | } 128 | } 129 | 130 | for (int i = 0; i < treeInput.length(); i ++) { 131 | char c = treeInput.charAt(i); 132 | if(c == ',') 133 | commaCounter++; 134 | } 135 | 136 | // Checking if the number of commas fits the given input 137 | if (commaCounter * 2 + 1 != nwtValidate.childArrayLen + nwtValidate.parentArrayLen) 138 | return true; 139 | 140 | return false; 141 | } 142 | 143 | private static boolean checkStringForWhiteSpace(String s) { 144 | for(int i = 0; i < s.length(); i++){ 145 | if(Character.isWhitespace(s.charAt(i))){ 146 | return true; 147 | } 148 | } 149 | return false; 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /GPhoCS_Manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gphocs-dev/G-PhoCS/b807eb360b9d23012924808e8b36058faced070b/GPhoCS_Manual.pdf -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile should be placed in a folder above the src/ , obj/ , and bin/ subfolders 2 | 3 | # the compiler to use. 4 | CC=@gcc 5 | 6 | # Multi threading enabling flag 7 | # ENABLE_OMP_THREADS = 1 8 | 9 | #CC=/usr/bin/i586-mingw32msvc-gcc 10 | #AR=/usr/bin/i586-mingw32msvc-ar 11 | 12 | # compiler options 13 | #Debugging 14 | #CFLAGS += -g -O0 -fstack-protector-all -Wall -DDEBUG -std=c99 -fopenmp -ggdb 15 | 16 | #Production 17 | CFLAGS+= -fstack-protector-all -Wall -O3 -std=c99 18 | 19 | ifeq ($(TARGETOS), Windows) 20 | CFLAGS += -DWINDOWS -liberty 21 | endif 22 | 23 | ifdef ENABLE_OMP_THREADS 24 | CFLAGS += -fopenmp -DENABLE_OMP_THREADS 25 | BUILD_MSG = "Building with multithread support." 26 | else 27 | BUILD_MSG = "Building w/o multithread support." 28 | endif 29 | 30 | all: print_message \ 31 | bin/G-PhoCS \ 32 | bin/readTrace 33 | 34 | print_message: 35 | @echo ${BUILD_MSG} 36 | @echo "CFLAGS: "${CFLAGS} 37 | 38 | bin/readTrace: obj/readTrace.o 39 | $(CC) $(CFLAGS) obj/readTrace.o -o bin/readTrace 40 | 41 | bin/G-PhoCS: obj/GPhoCS.o \ 42 | obj/MCMCcontrol.o \ 43 | obj/utils.o \ 44 | obj/GenericTree.o \ 45 | obj/PopulationTree.o \ 46 | obj/LocusDataLikelihood.o \ 47 | obj/AlignmentProcessor.o \ 48 | obj/omp_stub.o \ 49 | obj/patch.o 50 | $(CC) $(CFLAGS) obj/GPhoCS.o \ 51 | obj/MCMCcontrol.o \ 52 | obj/utils.o \ 53 | obj/GenericTree.o \ 54 | obj/PopulationTree.o \ 55 | obj/LocusDataLikelihood.o \ 56 | obj/AlignmentProcessor.o \ 57 | obj/omp_stub.o \ 58 | obj/patch.o \ 59 | $(CFLAGS) -lm -o bin/G-PhoCS 60 | 61 | bin/AlignmentProcessor: obj/utils.o \ 62 | obj/AlignmentProcessor.o \ 63 | obj/AlignmentMain.o 64 | $(CC) $(CFLAGS) obj/utils.o \ 65 | obj/AlignmentProcessor.o \ 66 | obj/AlignmentMain.o \ 67 | $(CFLAGS) -lm -o bin/AlignmentProcessor 68 | 69 | obj/readTrace.o: src/readTrace.c 70 | $(CC) $(CFLAGS) -c src/readTrace.c -o obj/readTrace.o 71 | 72 | obj/GPhoCS.o: src/GPhoCS.c \ 73 | src/patch.c \ 74 | src/omp_stub.c \ 75 | src/MCMCcontrol.h \ 76 | src/LocusDataLikelihood.h \ 77 | src/utils.h \ 78 | src/GenericTree.h \ 79 | src/PopulationTree.h \ 80 | src/AlignmentProcessor.h \ 81 | src/MultiCoreUtils.h 82 | $(CC) $(CFLAGS) -c src/GPhoCS.c -o obj/GPhoCS.o 83 | 84 | obj/omp_stub.o: src/omp_stub.c 85 | $(CC) $(CFLAGS) -c src/omp_stub.c -o obj/omp_stub.o 86 | 87 | obj/utils.o: src/utils.c \ 88 | src/omp_stub.c \ 89 | src/utils.h \ 90 | src/MultiCoreUtils.h 91 | $(CC) $(CFLAGS) -c src/utils.c -o obj/utils.o 92 | 93 | obj/GenericTree.o: src/GenericTree.c \ 94 | src/GenericTree.h \ 95 | src/utils.h 96 | $(CC) $(CFLAGS) -c src/GenericTree.c -o obj/GenericTree.o 97 | 98 | obj/PopulationTree.o: src/PopulationTree.c \ 99 | src/PopulationTree.h \ 100 | src/utils.h 101 | $(CC) $(CFLAGS) -c src/PopulationTree.c -o obj/PopulationTree.o 102 | 103 | obj/LocusDataLikelihood.o: src/LocusDataLikelihood.c \ 104 | src/LocusDataLikelihood.h \ 105 | src/utils.h \ 106 | src/GenericTree.h 107 | $(CC) $(CFLAGS) -c src/LocusDataLikelihood.c -o obj/LocusDataLikelihood.o 108 | 109 | obj/MCMCcontrol.o: src/MCMCcontrol.c \ 110 | src/MCMCcontrol.h \ 111 | src/PopulationTree.h \ 112 | src/utils.h 113 | $(CC) $(CFLAGS) -c src/MCMCcontrol.c -o obj/MCMCcontrol.o 114 | 115 | obj/AlignmentProcessor.o: src/AlignmentProcessor.c \ 116 | src/AlignmentProcessor.h \ 117 | src/utils.h 118 | $(CC) $(CFLAGS) -c src/AlignmentProcessor.c -o obj/AlignmentProcessor.o 119 | 120 | obj/AlignmentMain.o: src/AlignmentMain.c \ 121 | src/AlignmentProcessor.h 122 | $(CC) $(CFLAGS) -c src/AlignmentMain.c -o obj/AlignmentMain.o 123 | 124 | obj/patch.o: src/patch.c \ 125 | src/patch.h 126 | $(CC) $(CFLAGS) -c src/patch.c -o obj/patch.o 127 | 128 | clean: 129 | @echo "Cleaning" 130 | @rm -rf obj/*.o bin/readTrace bin/G-PhoCS-1-2-3 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | G-PhoCS 2 | ======= 3 | 4 | G-PhoCS is a software package for inferring ancestral population sizes, population divergence times, and migration rates from individual genome sequences. G-PhoCS accepts as input a set of multiple sequence alignments from separate neutrally evolving loci along the genome. Parameter inference is done in a Bayesian manner, using a Markov Chain Monte Carlo (MCMC) to jointly sample model parameters and genealogies at the input loci. 5 | 6 | G-PhoCS is inspired by and derived from MCMCcoal (now [BP&P](http://abacus.gene.ucl.ac.uk/software/)), developed by Ziheng Yang. Two main conceptual differences separate G-PhoCS from MCMCcoal: 7 | 1. G-PhoCS models gene flow between populations along user-defined migration bands. 8 | 2. G-PhoCS analyzes unphased diploid genotypes using a novel method for integrating over all possible phases. 9 | 10 | Additional adjustments were made to the C implementation of MCMCcoal in order to make it more efficient and reduce running time. 11 | 12 | More information on G-PhoCS can be found in Section 4 of the [supplement](https://static-content.springer.com/esm/art%3A10.1038%2Fng.937/MediaObjects/41588_2011_BFng937_MOESM24_ESM.pdf) to our [paper](http://www.nature.com/ng/journal/v43/n10/full/ng.937.html), and in the G-PhoCS user [manual](https://github.com/gphocs-dev/G-PhoCS/blob/master/G-PhoCS/GPhoCS_Manual.pdf). 13 | 14 | For more information: [http://compgen.cshl.edu/GPhoCS/](http://compgen.cshl.edu/GPhoCS/) 15 | 16 | 17 | Installation (Unix only for now) 18 | ------------ 19 | 20 | 1. Clone the G-PhoCS repository
21 | ==> git clone https://github.com/gphocs-dev/G-PhoCS.git 22 | 23 | 2. Move to the directory:
24 | ==> cd G-PhoCS/ 25 | 26 | 3. Compile G-PhoCS
27 | ==> make 28 | 29 | * The G-PhoCS binaries (GPhoCS and readTrace) can be now found in the bin/ subdirectory. 30 | * The object files are placed in the obj/ subdirectory; Those are: 31 | * AlignmentProcessor 32 | * GenericTree 33 | * GPhoCS 34 | * LocusDataLikelihood 35 | * MCMCcontrol 36 | * PopulationTree 37 | * readTrace 38 | * utils 39 | 40 | * It is highly recommended to have a test run post-installation using the supplied sample files. Type this in the command line while still in the G-PhoCS directory.
41 | ==> bin/G-PhoCS sample-control-file.ctl 42 | 43 | * In order to more easily write control files, you are encouraged to use the Jar located in the Control File Generator folder. 44 | 45 | Latest updates 46 | -------------- 47 | The main updates in version 1.3 include: 48 | * Introductoin of a multi-threaded implementation, which allows reducing running time on multi-core CPUs. 49 | * A control file generator Java applet for constructing setup files for G-PhoCS analysis. 50 | 51 | The main updates in version 1.2.3 include: 52 | * enabling analysis of ancient DNA samples by associating a sample age parameter with each ancient sample. Use 'age' attribute in CURRENT-POP. 53 | 54 | More details in the user manual. 55 | 56 | User Guidelines 57 | --------------- 58 | 59 | When preparing your data for analysis by G-PhoCS, you will need to create a sequence file and a control file (see Sections 4&5 in user manual). 60 | The sequence file contains your sequence data, and the control file contains the specification for the prior distribution over model parameters and instructions for the sampler. 61 | We provide sample sequence and control files for you to use for testing and initial experimentation. 62 | 63 | The main output of G-PhoCS is a trace file containing parameter values traced during the Markov chain (Section 3 in user manual). 64 | A summary log containing information on the status of the MCMC is printed to the standard output. 65 | 66 | -------------------------------------------------------------------------------- /bin/README.md: -------------------------------------------------------------------------------- 1 | A bin directory where the results from the sample control files are as well as contain any binary executables. 2 | -------------------------------------------------------------------------------- /doc/GPhoCS_Manual.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gphocs-dev/G-PhoCS/b807eb360b9d23012924808e8b36058faced070b/doc/GPhoCS_Manual.odt -------------------------------------------------------------------------------- /obj/README.md: -------------------------------------------------------------------------------- 1 | An obj directory where the results from the make file will appear 2 | -------------------------------------------------------------------------------- /sample-control-file.ctl: -------------------------------------------------------------------------------- 1 | GENERAL-INFO-START 2 | 3 | seq-file seqs-sample.txt 4 | trace-file mcmc.log 5 | locus-mut-rate CONST 6 | 7 | mcmc-iterations 5000 8 | iterations-per-log 50 9 | logs-per-line 10 10 | 11 | 12 | find-finetunes FALSE 13 | finetune-coal-time 0.01 14 | finetune-mig-time 0.3 15 | finetune-theta 0.04 16 | finetune-mig-rate 0.02 17 | finetune-tau 0.0000008 18 | finetune-mixing 0.003 19 | # finetune-locus-rate 0.3 20 | 21 | tau-theta-print 10000.0 22 | tau-theta-alpha 1.0 # for STD/mean ratio of 100% 23 | tau-theta-beta 10000.0 # for mean of 1e-4 24 | 25 | mig-rate-print 0.001 26 | mig-rate-alpha 0.002 27 | mig-rate-beta 0.00001 28 | 29 | GENERAL-INFO-END 30 | 31 | CURRENT-POPS-START 32 | 33 | POP-START 34 | name A 35 | samples one d 36 | POP-END 37 | 38 | POP-START 39 | name B 40 | samples two d 41 | POP-END 42 | 43 | POP-START 44 | name C 45 | samples three d 46 | POP-END 47 | 48 | POP-START 49 | name D 50 | samples five d 51 | POP-END 52 | 53 | CURRENT-POPS-END 54 | 55 | ANCESTRAL-POPS-START 56 | 57 | POP-START 58 | name AB 59 | children A B 60 | tau-initial 0.000005 61 | tau-beta 20000.0 62 | finetune-tau 0.0000008 63 | POP-END 64 | 65 | POP-START 66 | name ABC 67 | children AB C 68 | tau-initial 0.00001 69 | tau-beta 20000.0 70 | finetune-tau 0.0000008 71 | POP-END 72 | 73 | POP-START 74 | name root 75 | children ABC D 76 | tau-initial 0.00005 77 | tau-beta 20000.0 78 | finetune-tau 0.00000286 79 | POP-END 80 | 81 | ANCESTRAL-POPS-END 82 | 83 | MIG-BANDS-START 84 | BAND-START 85 | source D 86 | target B 87 | mig-rate-print 0.1 88 | BAND-END 89 | 90 | MIG-BANDS-END 91 | -------------------------------------------------------------------------------- /src/AlignmentMain.c: -------------------------------------------------------------------------------- 1 | /** 2 | \file AlignmentMain.c 3 | Functions for performing 4 gamete test and analyzing patterns in samples. 4 | 5 | */ 6 | 7 | /******************************************************************************************************/ 8 | /****** INCLUDES ******/ 9 | /******************************************************************************************************/ 10 | 11 | #include "AlignmentProcessor.h" 12 | #include "utils.h" 13 | 14 | 15 | /***************************************************************************************************************/ 16 | /****** GLOBAL DATA STRUCTURES ******/ 17 | /***************************************************************************************************************/ 18 | 19 | 20 | 21 | /******************************************************************************************************/ 22 | /****** INTERNAL FUNCTION DECLARATION ******/ 23 | /******************************************************************************************************/ 24 | 25 | 26 | 27 | /******************************************************************************************************/ 28 | /****** M A I N ******/ 29 | /******************************************************************************************************/ 30 | 31 | /** Prints to stdout, usage text for programs Haploid2Diploid, AnalyzePatterns, 4GameteTest 32 | @param Name of the program (i.e. Haploid2Diploid) 33 | */ 34 | void usage(char* progName) { 35 | printf("Usage: %s [-l num-loci] \n",progName); 36 | printf(" - seq-file: sequence file to read\n"); 37 | printf(" - out-file: output file in which to write diploid alignments\n"); 38 | printf(" - num-samples: number of haploid samples in each alignment\n"); 39 | printf(" - diploid-list: list of haploid indices (1-based) indicating diploid pairs (i indicates a pair (i,i+1))\n"); 40 | printf(" - -l num-loci: max number of loci to transform (optional)\n"); 41 | 42 | return; 43 | } 44 | 45 | 46 | 47 | /** Main function for executable 'AnalyzePatterns ' 48 | @param argc Number of command line arguments 49 | @param argv Array of command line arguments 50 | @return return value of program 51 | @note This executable is not built as part of GPhoCS 52 | */ 53 | int main_analyze_patterns(int argc, char*argv[]) { 54 | 55 | char sampleFile[STRING_LENGTH]; 56 | char seqFile[STRING_LENGTH]; 57 | char** seqNames; 58 | int numSamples, sample, numInformative, numLoci = -1; 59 | int res; 60 | FILE *fsample; 61 | 62 | if(argc<= 2) { 63 | printf("Usage: AlignmentProcessor [num-loci]\n"); 64 | return 1; 65 | } 66 | 67 | strcpy(sampleFile,argv[1]); 68 | strcpy(seqFile,argv[2]); 69 | if(argc == 4) { 70 | res=sscanf(argv[3],"%d",&numLoci); 71 | if(res < 1) { 72 | fprintf(stderr, "Error: 3rd option (%s) cannot act as number of loci.\n",argv[3]); 73 | return 1; 74 | } 75 | } 76 | fsample=(FILE*)fopen(sampleFile,"r"); 77 | if(fsample == NULL) { 78 | fprintf(stderr, "Error: Could not open sample file %s.\n", sampleFile); 79 | return 1; 80 | } 81 | res = fscanf(fsample,"%d",&numSamples); 82 | seqNames = (char**)malloc(numSamples*sizeof(char*)); 83 | if(seqNames == NULL) { 84 | fprintf(stderr, "Error: Out Of Memory seqNames array in main().\n"); 85 | fclose(fsample); 86 | return 1; 87 | } 88 | seqNames[0] = (char*)malloc(NAME_LENGTH*numSamples*sizeof(char)); 89 | if(seqNames == NULL) { 90 | fprintf(stderr, "Error: Out Of Memory seqNames space in main().\n"); 91 | free(seqNames); 92 | fclose(fsample); 93 | return 1; 94 | } 95 | 96 | for(sample=0; sample [num-loci]\n"); 159 | return 1; 160 | } 161 | 162 | strcpy(sampleFile,argv[1]); 163 | strcpy(seqFile,argv[2]); 164 | if(argc == 4) { 165 | res=sscanf(argv[3],"%d",&numLoci); 166 | if(res < 1) { 167 | fprintf(stderr, "Error: 3rd option (%s) cannot act as number of loci.\n",argv[3]); 168 | return 1; 169 | } 170 | } 171 | fsample=(FILE*)fopen(sampleFile,"r"); 172 | if(fsample == NULL) { 173 | fprintf(stderr, "Error: Could not open sample file %s.\n", sampleFile); 174 | return 1; 175 | } 176 | res = fscanf(fsample,"%d",&numSamples); 177 | seqNames = (char**)malloc(numSamples*sizeof(char*)); 178 | if(seqNames == NULL) { 179 | fprintf(stderr, "Error: Out Of Memory seqNames array in main().\n"); 180 | fclose(fsample); 181 | return 1; 182 | } 183 | seqNames[0] = (char*)malloc(NAME_LENGTH*numSamples*sizeof(char)); 184 | if(seqNames == NULL) { 185 | fprintf(stderr, "Error: Out Of Memory seqNames space in main().\n"); 186 | free(seqNames); 187 | fclose(fsample); 188 | return 1; 189 | } 190 | 191 | for(sample=0; sample 9 | #include "utils.h" 10 | /******************************************************************************************************/ 11 | /****** CONSTANTS ******/ 12 | /******************************************************************************************************/ 13 | 14 | 15 | 16 | /***************************************************************************************************************/ 17 | /****** DATA TYPES ******/ 18 | /***************************************************************************************************************/ 19 | 20 | 21 | 22 | /** LocusProfile 23 | Holds info about site profiles observed in locus alignment 24 | */ 25 | typedef struct LOCUS_PROFILE_STRUCT{ 26 | int numPatterns; /**< number of distinct patterns observed in locus alignment */ 27 | char name[NAME_LENGTH]; /**< id of the genealogy used to model this locus */ 28 | int genealogyId; /**< genealogy id, for sampling representative genealogies */ 29 | int* patternIds; /**< list of patterns Ids */ 30 | int* patternCounts; /**< number of occurrences per observed pattern */ 31 | }LocusProfile; 32 | 33 | 34 | /***************************************************************************************************************/ 35 | /****** GLOBAL DATA STRUCTURES ******/ 36 | /***************************************************************************************************************/ 37 | 38 | 39 | 40 | /** AlignmentData 41 | Holds info about site patterns observed in a series of locus alignments 42 | */ 43 | struct ALIGNMENT_DATA_STRUCT{ 44 | int numSamples; /**< number of maximal samples to read per alignment */ 45 | char** sampleNames; /**< names of all samples */ 46 | unsigned short* isDiploid; /**< binary array indicating the diploid samples */ 47 | int numPatterns; /**< number of distinct site patterns observed */ 48 | char** patternArray; /**< list of all site patterns observed */ 49 | int numLoci; /**< number of loci analyzed (or to be analized) */ 50 | LocusProfile* locusProfiles; /**< a list of locus profiles */ 51 | }AlignmentData; 52 | 53 | 54 | 55 | /** AlignmentHets 56 | Holds info about site patterns observed in a series of locus alignments 57 | */ 58 | struct PHASED_PATTERNS_STRUCT{ 59 | int numHaploids; /**< number of haploids representing samples */ 60 | int numPhasedPatterns; /**< number of site patterns with het genotype */ 61 | int* numPhases; /**< number of phases per pattern (only indicated in first phase) */ 62 | char** patternArray; /**< list of all phased site patterns observed */ 63 | int numLoci; /**< number of loci analyzed (or to be analized) */ 64 | LocusProfile* locusProfiles; /**< a list of locus profiles per phased pattern */ 65 | }PhasedPatterns; 66 | 67 | 68 | 69 | /******************************************************************************************************/ 70 | /****** FUNCTION DECLARATIONS ******/ 71 | /******************************************************************************************************/ 72 | 73 | 74 | 75 | /** initAlignmentData 76 | Initializes alignment data structures and initial space 77 | @param numLoci Number of Loci to analyze 78 | @param numSamples Total number of samples to analyze 79 | @param initSeqLength Tentative maximum sequence length of Alignment (treated dynamically) 80 | @param initNumPatterns Tentative maximum number of patterns in Alignment (treated dynamically) 81 | @param sampleNames Array of char strings containing names of all the samples defined in the control file 82 | @return 0 if successful (-1 if allocation problems) 83 | */ 84 | int initAlignmentData(int numLoci, int numSamples, int initSeqLength, int initNumPatterns, char** sampleNames); 85 | 86 | 87 | 88 | /** finalizeAlignmentData 89 | Finalizes data structure after all preprocessing is done (mostly compact memory and frees unnecessary memory usage) 90 | @return 0 91 | */ 92 | int finalizeAlignmentData(); 93 | 94 | 95 | 96 | /** freeAlignmentData 97 | Frees all memory allocated for alignement data 98 | @return 0 99 | */ 100 | int freeAlignmentData(); 101 | 102 | 103 | 104 | /** printPatterns 105 | Prints array containing patterns to stdout 106 | @param patternArray Array containing patterns to print 107 | @param patternCounts Array containing number of occurances for each pattern 108 | @param numPatterns Number of patterns containing in patternArray 109 | @param numSamples Number of samples per pattern 110 | */ 111 | void printSitePatterns(char** patternArray, int* patternCounts, int numPatterns, int numSamples); 112 | 113 | 114 | 115 | /** printLocusProfiles 116 | Prints profiles of all loci 117 | */ 118 | void printLocusProfiles(); 119 | 120 | 121 | /** printAlignmentError 122 | Prints content of error message encountered durring alignment 123 | */ 124 | void printAlignmentError(); 125 | 126 | 127 | /** readSeqFile 128 | Reads series of locus alignment from file, initializes Alignment data structures, performs initial processing of all alignments into site patterns 129 | @param seqFileName Path to file containing sequences to read in 130 | @param numSample Number of samples to read according to the control file 131 | @param sampleNames List of sample names from the control file, used to associate sequences with samples 132 | @param numLociToRead Maximum number of loci to read if defined in control file (ignored if -1) 133 | @warning has to be called before all other processing procedures can be called 134 | @note if numLociToRead is positive and smaller than number of loci in file, reads only the first numLociToRead loci 135 | @return 0 if successful (-1 otherwise) 136 | */ 137 | int readSeqFile(const char* seqFileName, int numSample, char** sampleNames, int numLociToRead); 138 | 139 | 140 | 141 | /** readSeqs 142 | Reads sequences of a single locus from file and writes them in preallocated internal space 143 | @param seqFile File descriptor containing sequences to read from 144 | @param numSeqs Number of sequences in this locus to read from file 145 | @param seqLength Length of each sequence to read from file 146 | @param seqArray Preallocated space used to save sequences read from file 147 | @param locus Number defining current locus that is being read form file 148 | @note Assumes each sequence is preceded by a name in a predefined list 149 | @note Orders sequences according to their names, using AlignmentData.sampleNames as a reference from names to indices 150 | @note Reads sequence, while ignoring any white spaces (except newline) 151 | @note Capitalizes all bases and checks to see if they are legitimate nucleotides or ambiguities:T,C,A,G ; U,Y,R,M,K,S,W,H,B,V,D ; N,?,- 152 | @return 0 if OK (-1 if file in bad format) 153 | */ 154 | int readSeqs(FILE* seqFile, int numSeqs, int seqLength, char** seqArray, int locus); 155 | 156 | 157 | /** processLocusAlignment 158 | Reads through alignment columns and identifies site patterns (according to JC symmetries) 159 | @param seqArray Single locus alignment, read in from file, these are the sequences to process 160 | @param seqLength Length of the alignment 161 | @param locusProfile allocated LocusProfile to populate with alignment data 162 | @note Adds new site patterns to patternArray 163 | @note Records locus pattern profile in locusProfile 164 | @return 0 if successful (-1 otherwise) 165 | */ 166 | int processLocusAlignment(char** seqArray, int seqLength, LocusProfile* locusProfile); 167 | 168 | 169 | 170 | /** processHetPatterns 171 | Creates a phased version of het patterns 172 | @param patternArray UnPhased Het patterns to process 173 | @param patternCounts For each pattern in patternArray, the number of times it occured 174 | @param numPatterns Number of patterns in patternArray 175 | @param breakSymmetries If == 1, takes into consideration symmetry breaking schemes for hets 176 | @param phasedPatternArray_ptr Phased version of hets saved here (array of length maxNumPhasedPatterns pre-allocated), more space allocated if needed and maxNumPhasedPatterns is updated accordingly 177 | @param numPhasesArray_ptr For each pattern the number of phases associated with it (pointer to array - may be reallocated by function) 178 | @param maxNumPhasedPatterns Maximum number of phased patterns (may be updated by function) 179 | @note Assumes first element in array (phasedPatternArray) points to entire space needed. 180 | @return Num phased patterns if successful (-1 otherwise) 181 | */ 182 | int processHetPatterns(char** patternArray, int* patternCounts, int numPatterns, unsigned short breakSymmetries, char*** phasedPatternArray_ptr, int** numPhasesArray_ptr, int* maxNumPhasedPatterns); 183 | 184 | 185 | 186 | /** countInformativePatterns 187 | Returns number of informative sites in subset of samples. 188 | @param includeSample Binary array of size AlignmentData.numSamples, that selects subset of samples in which to look for informative sites, a 1 indicates to include the site, a 0 means not to include the site 189 | @return Number of informative sites in subset of samples 190 | */ 191 | int countInformativePatterns(unsigned short* includeSample); 192 | 193 | 194 | 195 | /** fourGameteTest 196 | Performs the 4-gamete test on all loci 197 | @note Prints out potential violations (some printed cases might not be actual violations - need to check by eye) 198 | @return 0 if all is OK, and -1 otherwise 199 | */ 200 | int fourGameteTest(); 201 | 202 | 203 | 204 | /** getPatternTypes 205 | Sorts patterns into types and prints how many columns are observed of each type 206 | @note A type consists of number of occurrences of each base (in decreasing order) 207 | @return 0 if all ok (-1 otherwise) 208 | */ 209 | int getPhasedPatternTypes(); 210 | 211 | 212 | 213 | /***************************************************************************************************************/ 214 | /****** END OF FILE ******/ 215 | /***************************************************************************************************************/ 216 | 217 | #endif 218 | -------------------------------------------------------------------------------- /src/GPhoCS.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GPhoCS.h 3 | * 4 | * Created on: Feb 4, 2017 5 | * Author: ron 6 | */ 7 | 8 | #ifndef SRC_GPHOCS_H_ 9 | #define SRC_GPHOCS_H_ 10 | 11 | #include "LocusDataLikelihood.h" 12 | #include 13 | 14 | // --- CONSTANTS -------------------------------------------------------------- 15 | 16 | #define LOG_STEPS_NOT 17 | #define CHECKALL_NOT 18 | //#define CHECKALL 19 | 20 | #define NUM_TYPES 5 21 | #define TARGET_ACCEPTANCE_PERCENT 35 22 | #define TARGET_ACCEPTANCE_RANGE 5 23 | #define FINETUNE_RESOLUTION 0.0000001 24 | #define MAX_FINETUNE 10 25 | #define ACCEPTANCE_FUDGE 2 26 | 27 | #define GPHOCS_VERSION_NUM "1.3.2" 28 | #define GPHOCS_VERSION_DATE "Oct. 2017" 29 | 30 | int typeCount[NUM_TYPES]; 31 | 32 | // --- GLOBAL DATA STRUCTURES ------------------------------------------------- 33 | 34 | // Data setup. "Singleton" 35 | struct DATA_STATE { 36 | // average log-likelihood per genealogy of data given pop tree 37 | double logLikelihood; // (ln[P(X|Z)]+ln[P(Z|M)])/numLoci 38 | 39 | // log likelihood (not averaged) of data given all genealogies: 40 | double dataLogLikelihood; // ln[P(X|Z,M,T)] 41 | 42 | // log likelihood of all genealogies given model & parameters - 43 | double genealogyLogLikelihood; // ln[P(Z|M,T)] 44 | 45 | double rateVar; // the actual variance in locus-specific 46 | // mutation rate 47 | LocusData** lociData; // array of LocusData data structures 48 | // (of length numLoci). 49 | // (allocated in processAlignments) 50 | } dataState; 51 | 52 | // Miscellaneous statistics. "Singleton" 53 | struct MISC_STATS { 54 | int rubberband_mig_conflicts; // number of rubber band conflicts 55 | // with migration nodes 56 | 57 | int spr_zero_targets; // number of times an SPR event 58 | // encounters zero target edges 59 | 60 | int not_enough_migs; // number of times not enough pre- 61 | // allocated space for migration nodes 62 | 63 | // int small_interval; // very small interval for moving 64 | // coalescent event or migration event 65 | 66 | // double spr_lnld_disc; // the size of the smallest discrepancy 67 | //in log-likelihood computation 68 | } misc_stats; 69 | 70 | // --- FUNCTION DECLARATIONS -------------------------------------------------- 71 | 72 | void printUsage(char *programName); 73 | int processAlignments(); 74 | int readRateFile(const char* fileName); 75 | int initLociWithoutData(); 76 | void printParamVals(double paramVals[], int startParam, int endParam, FILE* o); 77 | int recordTypes(); 78 | int recordParamVals(double paramVals[]); 79 | int performMCMC(); 80 | void printGenealogyAndExit(int gen, int errStatus); 81 | int freeAllMemory(); 82 | 83 | // Sampling functions 84 | int UpdateGB_InternalNode(double finetune); // step 1: update coalescent times 85 | int UpdateGB_MigrationNode(double finetune); // step 2: update migration times 86 | int UpdateGB_MigSPR(); // step 3: update genealogy struct 87 | int UpdateTheta(double finetune); // step 4: No to MT 88 | int UpdateMigRates(double finetune); // step 5: No to MT, 89 | // update migration bands 90 | 91 | void UpdateTau(double *finetunes, // step 6: update tau 92 | int *accepted); // More difficult to MT, 93 | // most time consuming 94 | 95 | void UpdateSampleAge(double *finetunes, // similar to update Tau. 96 | int *accepted); // Modifies the time 97 | 98 | int UpdateLocusRate(double finetune); 99 | int UpdateAdmixCoeffs(double finetune); // Shall be skipped ("ledaleg") 100 | int mixing(double finetune); 101 | 102 | void allocateAllMemory() ; // TODO - tidy header file 103 | double getLogPrior() ; 104 | 105 | 106 | 107 | #endif /* SRC_GPHOCS_H_ */ 108 | -------------------------------------------------------------------------------- /src/GenericTree.c: -------------------------------------------------------------------------------- 1 | /** 2 | \file GenericTree.c 3 | Create, Read from file, Print Generic Trees; Get age from node/branch lengths 4 | 5 | A code file containing some procedures and data types for manipulating Tree strucutres 6 | */ 7 | 8 | 9 | #include "GenericTree.h" 10 | #include "utils.h" 11 | #include 12 | #include 13 | 14 | 15 | /***************************************************************************************************************/ 16 | /****** INTERNAL CONSTANTS ******/ 17 | /***************************************************************************************************************/ 18 | 19 | 20 | 21 | 22 | /******************************************************************************************************/ 23 | /****** FUNCTION DECLARATIONS ******/ 24 | /******************************************************************************************************/ 25 | 26 | 27 | 28 | double getAgeOfNode(GenericBinaryTree* tree, int nodeId); 29 | 30 | 31 | 32 | /******************************************************************************************************/ 33 | /****** FUNCTION IMPLEMENTATION ******/ 34 | /******************************************************************************************************/ 35 | 36 | 37 | 38 | /*********************************************************************************** 39 | * createGenericTree 40 | * - allocates memory for generic tree 41 | * - returns pointer to the new generic tree 42 | ***********************************************************************************/ 43 | GenericBinaryTree* createGenericTree(int numLeaves) { 44 | int leaf, numNodes = 2*numLeaves-1; 45 | 46 | GenericBinaryTree* tree = (GenericBinaryTree*)malloc(sizeof(GenericBinaryTree)); 47 | if(tree == NULL) { 48 | fprintf(stderr, "\nError: Out Of Memory generic tree.\n"); 49 | return NULL; 50 | } 51 | 52 | tree->father = (int*)malloc(3*numNodes*sizeof(int)); 53 | if(tree->father == NULL) { 54 | fprintf(stderr, "\nError: Out Of Memory id arrays for generic tree.\n"); 55 | return NULL; 56 | } 57 | tree->leftSon = tree->father + numNodes; 58 | tree->rightSon = tree->father + 2*numNodes; 59 | 60 | tree->label1 = (double*)malloc(2*numNodes*sizeof(double)); 61 | if(tree->father == NULL) { 62 | fprintf(stderr, "\nError: Out Of Memory label arrays for generic tree.\n"); 63 | return NULL; 64 | } 65 | tree->label2 = tree->label1 + numNodes; 66 | 67 | tree->leafNames = (char**)malloc(numLeaves*sizeof(char*)); 68 | if(tree->leafNames == NULL) { 69 | fprintf(stderr, "\nError: Out Of Memory name array for generic tree.\n"); 70 | return NULL; 71 | } 72 | // leaf names are restricted to NAME_LENGTH chars 73 | tree->leafNames[0] = (char*)malloc(numLeaves*NAME_LENGTH*sizeof(char)); 74 | if(tree->leafNames[0] == NULL) { 75 | fprintf(stderr, "\nError: Out Of Memory name space for generic tree.\n"); 76 | return NULL; 77 | } 78 | for(leaf=0; leafleafNames[leaf] = tree->leafNames[0] + leaf*NAME_LENGTH; 80 | tree->leafNames[leaf][0] = '\0'; 81 | } 82 | 83 | tree->numLeaves = numLeaves; 84 | tree->rootId = -1; 85 | 86 | return tree; 87 | } 88 | /** end of createGenericTree **/ 89 | 90 | 91 | 92 | /*********************************************************************************** 93 | * freeGenericTree 94 | * - frees memory for generic tree 95 | * - returns 0 96 | ***********************************************************************************/ 97 | int freeGenericTree(GenericBinaryTree* tree) { 98 | 99 | free(tree->leafNames[0]); 100 | free(tree->leafNames); 101 | free(tree->label1); 102 | free(tree->father); 103 | free(tree); 104 | 105 | return 0; 106 | 107 | } 108 | /** end of freeGenericTree **/ 109 | 110 | 111 | 112 | /*********************************************************************************** 113 | * branchLengthIntoAge 114 | * - transforms label1 from branch length (of branch above node) to age of node 115 | * - calls a recursive procedure from root to leaves 116 | * - returns 0 if all is OK, and -1 otherwise (if tree is not ultrametric) 117 | ***********************************************************************************/ 118 | int branchLengthIntoAge(GenericBinaryTree* tree) { 119 | double rootAge; 120 | 121 | rootAge = getAgeOfNode(tree, tree->rootId); 122 | 123 | if(rootAge < 0) { 124 | return -1; 125 | } 126 | 127 | tree->label1[tree->rootId] = rootAge; 128 | return 0; 129 | } 130 | /** end of branchLengthIntoAge **/ 131 | 132 | 133 | 134 | /*********************************************************************************** 135 | * ageIntoBranchLength 136 | * - transforms label1 from age into branch length (of branch above node) 137 | * - performs a post-order traversal of nodes in a loop 138 | * - returns 0 if all is OK, and -1 otherwise (if tree is not ultrametric) 139 | ***********************************************************************************/ 140 | int ageIntoBranchLength(GenericBinaryTree* tree) { 141 | int numLeaves = tree->numLeaves; 142 | int numNodes = 2*numLeaves - 1; 143 | int node, fatherNode; 144 | 145 | unsigned short fromWhere; // 0 -arriving to node from father, 1- from left son, 2- from right son 146 | 147 | 148 | // start with root 149 | node = tree->rootId; 150 | fromWhere = 0; 151 | while(1) { 152 | 153 | if(fromWhere == 0 && node >= numLeaves) { 154 | // arrived to internal node from father: move to left son 155 | node = tree->leftSon[node]; 156 | fromWhere = 0; 157 | continue; 158 | } else if(fromWhere == 1){ 159 | // arrived to internal node from left son: move to right son 160 | node = tree->rightSon[node]; 161 | fromWhere = 0; 162 | continue; 163 | } 164 | 165 | // at this point node is either a leaf, or internal node with fromWhere=2 166 | fatherNode = tree->father[node]; 167 | if(node == tree->rootId) { 168 | // breaking at root 169 | tree->label1[node] = 0.0; 170 | break; 171 | } 172 | if( fatherNode < 0 || fatherNode > numNodes) { 173 | fprintf(stderr, "\nError: Illegal tree. Node %d has father %d.\n",node, fatherNode); 174 | return -1; 175 | } 176 | 177 | // label 1 is transformed from age to length of branch above node 178 | tree->label1[node] = tree->label1[fatherNode] - tree->label1[node]; 179 | 180 | // move back up 181 | if(node == tree->leftSon[fatherNode]) { 182 | fromWhere = 1; 183 | } else if(node == tree->rightSon[fatherNode]) { 184 | fromWhere = 2; 185 | } else { 186 | fprintf(stderr, "\nError: Illegal tree. Node %d has father %d with sons %d, %d.\n",node, fatherNode, tree->leftSon[fatherNode], tree->rightSon[fatherNode]); 187 | return -1; 188 | } 189 | 190 | node = fatherNode; 191 | 192 | } // end of while(node) 193 | 194 | return 0; 195 | 196 | } 197 | /** end of ageIntoBranchLength **/ 198 | 199 | 200 | 201 | /*********************************************************************************** 202 | * readGenericTree 203 | * - reads a generic binary tree from file (Newick format) 204 | * - assumes each node can be associated (possibly) with two labels, 205 | * the first indicated by ':', and the second indicated by '#' 206 | * - if readLeafIndices==1, determines the index of each leaf according 207 | * to its name (index = atoi(name)-1) 208 | * - the parsing algorithm assumes each subtree is represented by a string of the following form: 209 | * NODE [':' label1] ['#' label2] termination_char 210 | * note that order of labels is unimportant and termination_char is: 211 | * * ',' for left subtree 212 | * * ')' for right subtree 213 | * * ';' for the total tree 214 | * - a leaf is represented by NODE=name, where 'name' is a string which does not include any 215 | * white spaces or any of the Newick saved characters " (),:#; ". 216 | * - a subtree rooted at an internal node is represented by 217 | * NODE = '(' left_subtree right_subtree 218 | * - returns 0 if all is OK, 1 if reached EOF before tree, and -1 otherwise 219 | ***********************************************************************************/ 220 | int readGenericTree(FILE* file, GenericBinaryTree* tree, unsigned short readLeafIndices) { 221 | char ch = '\0'; 222 | char newickSavedChars[]="(),:#;"; 223 | int node, nextAvailableNode, nextAvailableLeaf; 224 | int nameIndex; 225 | 226 | // read to first '(' 227 | while(ch != EOF && ch!='('){ 228 | ch=fgetc(file); 229 | } 230 | if(ch == EOF) { 231 | // fprintf(stderr, "\nError: Unexpected End of File when reading generic tree.\n"); 232 | return 1; 233 | } 234 | 235 | // first '(' corresponds to root of tree 236 | node = 2*tree->numLeaves - 2; 237 | tree->rootId = node; 238 | tree->father[node] = -1; 239 | tree->leftSon[node] = -1; 240 | tree->rightSon[node] = -1; 241 | nextAvailableNode = node-1; 242 | nextAvailableLeaf = 0; 243 | 244 | while(1) { 245 | ch = fgetc(file); 246 | 247 | if(ch == EOF) { 248 | fprintf(stderr, "\nError: Unexpected End of File when reading generic tree.\n"); 249 | return -1; 250 | } 251 | if(isspace(ch)) continue; 252 | if(ch == ';') break; 253 | 254 | // printf("%c",ch); 255 | // fflush(stdout); 256 | // label characters (':' and '#') 257 | if(ch == ':') { 258 | if(0 > fscanf(file,"%lf",&(tree->label1[node]))) 259 | return -1; 260 | } 261 | else if(ch == '#') { 262 | if(0 > fscanf(file,"%lf",&(tree->label2[node]))) 263 | return -1; 264 | } 265 | 266 | // move down to internal node 267 | else if(ch == '(') { 268 | if(nextAvailableNode < tree->numLeaves) { 269 | fprintf(stderr, "\nError: Too many internal nodes in Newick string (expecting %d leaves).\n", tree->numLeaves); 270 | return -1; 271 | } 272 | if(tree->leftSon[node] < 0) { 273 | tree->leftSon[node] = nextAvailableNode; 274 | } else if(tree->rightSon[node] < 0) { 275 | tree->rightSon[node] = nextAvailableNode; 276 | } else { 277 | fprintf(stderr, "\nError: More than 2 sons for node %d in Newick string.\n", node); 278 | return -1; 279 | } 280 | tree->father[nextAvailableNode] = node; 281 | tree->leftSon[nextAvailableNode] = -1; 282 | tree->rightSon[nextAvailableNode] = -1; 283 | tree->label1[nextAvailableNode] = -0.5; 284 | tree->label2[nextAvailableNode] = -0.5; 285 | node = nextAvailableNode--; 286 | } 287 | 288 | // move up from (left/right) son to father 289 | else if(ch == ',' || ch == ')') { 290 | node = tree->father[node]; 291 | if(node < 0) { 292 | fprintf(stderr, "\nError: Unbalanced parentheses in Newick string (too many ')'s).\n"); 293 | return -1; 294 | } 295 | if(ch == ',' && (tree->leftSon[node] < 0 || tree->rightSon[node] >= 0)) { 296 | fprintf(stderr, "\nError: Subtree terminates with ',' but is not left subtree.\n"); 297 | return -1; 298 | } 299 | if(ch == ')' && tree->rightSon[node] < 0) { 300 | fprintf(stderr, "\nError: Subtree terminates with ')' but is not right subtree.\n"); 301 | return -1; 302 | } 303 | } 304 | 305 | // move down to leaf node 306 | else { 307 | if(readLeafIndices) { 308 | // leaf index should be "name-1"; 309 | ungetc(ch,file); 310 | if(1 > fscanf(file, "%d", &nextAvailableLeaf) || nextAvailableLeaf < 1 || nextAvailableLeaf > tree->numLeaves) { 311 | fprintf(stderr, "\nError: Leaf should have integer name in the range 1..%d.\n", tree->numLeaves); 312 | return -1; 313 | } 314 | // printf("ind[%d]",nextAvailableLeaf); 315 | // fflush(stdout); 316 | nextAvailableLeaf--; 317 | } 318 | else if(nextAvailableLeaf >= tree->numLeaves) { 319 | fprintf(stderr, "\nError: Too many leaves in Newick string (expecting %d leaves).\n", tree->numLeaves); 320 | return -1; 321 | } 322 | 323 | if(tree->leftSon[node] < 0) { 324 | tree->leftSon[node] = nextAvailableLeaf; 325 | } else if(tree->rightSon[node] < 0) { 326 | tree->rightSon[node] = nextAvailableLeaf; 327 | } else { 328 | fprintf(stderr, "\nError: More than 2 sons for node %d in Newick string.\n", node); 329 | return -1; 330 | } 331 | tree->father[nextAvailableLeaf] = node; 332 | tree->leftSon[nextAvailableLeaf] = -1; 333 | tree->rightSon[nextAvailableLeaf] = -1; 334 | tree->label1[nextAvailableLeaf] = -1.0; 335 | tree->label2[nextAvailableLeaf] = -1.0; 336 | node = nextAvailableLeaf; 337 | 338 | if(!readLeafIndices) { 339 | // read leaf name 340 | nextAvailableLeaf++; 341 | for (nameIndex=0; nameIndexleafNames[node][nameIndex] = ch; 343 | ch = fgetc(file); 344 | if(ch == EOF) { 345 | fprintf(stderr, "\nUnexpected End of File when reading generic tree from file.\n"); 346 | return -1; 347 | } 348 | if(isspace(ch) || NULL != strchr(newickSavedChars,ch)) { 349 | tree->leafNames[node][nameIndex+1] = '\0'; 350 | ungetc(ch,file); 351 | break; 352 | } 353 | // printf("%c",ch); 354 | // fflush(stdout); 355 | } 356 | } 357 | }// end of if - char case 358 | 359 | } // end of while(ch) 360 | 361 | if(node != tree->rootId) { 362 | fprintf(stderr, "\nError: Unbalanced parentheses in Newick string (too many '('s).\n"); 363 | return -1; 364 | } 365 | 366 | // printf(" - end of tree.\n"); 367 | // for(node=0; nodenumLeaves; node++) { 368 | // printf("Leaf %d name %s.\n",node, tree->leafNames[node]); 369 | // } 370 | 371 | return 0; 372 | 373 | } 374 | /** end of readGenericTree **/ 375 | 376 | 377 | 378 | /*********************************************************************************** 379 | * printGenericTree 380 | * - prints a generic binary tree to file (in Newick format) 381 | * - if printLabel2 == 1, prints two labels, otherwise, just prints label1 382 | * - for each node writes: 383 | * NODE ':' label1 ['#' label2] termination_char 384 | * where termination_char is: 385 | * * ',' for left subtree 386 | * * ')' for right subtree 387 | * * ';' for the total tree 388 | * - for a leaf, NODE is just its name. 389 | * - for a subtree rooted at an internal node, NODE = '(' left_subtree right_subtree 390 | * - implements recursion in a loop 391 | * - returns 0 if all is OK, and -1 otherwise 392 | ***********************************************************************************/ 393 | int printGenericTree(FILE* file, GenericBinaryTree* tree, unsigned short printLabel2) { 394 | int numLeaves = tree->numLeaves; 395 | int numNodes = 2*numLeaves - 1; 396 | int node, fatherNode; 397 | 398 | unsigned short fromWhere; // 0 -arriving to node from father, 1- from left son, 2- from right son 399 | 400 | 401 | // start with root 402 | node = tree->rootId; 403 | fromWhere = 0; 404 | while(1) { 405 | 406 | if(fromWhere == 0 && node >= numLeaves) { 407 | // arrived to internal node from father: print "(" and move to left son 408 | fprintf(file, "("); 409 | node = tree->leftSon[node]; 410 | fromWhere = 0; 411 | continue; 412 | } else if(fromWhere == 1){ 413 | // arrived to internal node from left son: print "," and move to right son 414 | fprintf(file, ","); 415 | node = tree->rightSon[node]; 416 | fromWhere = 0; 417 | continue; 418 | } 419 | 420 | // at this point node is either a leaf, or internal node with fromWhere=2 421 | if(node < numLeaves) { 422 | // leaf 423 | fprintf(file, "%s",tree->leafNames[node]); 424 | } else { 425 | // fromWhere=2 426 | fprintf(file, ")"); 427 | } 428 | 429 | if(node == tree->rootId) { 430 | // finish up and break 431 | fprintf(file, ";\n"); 432 | break; 433 | } 434 | 435 | // print labels 436 | fprintf(file, ":%lf ",tree->label1[node]); 437 | if(printLabel2) { 438 | fprintf(file, " #%lf",tree->label2[node]); 439 | } 440 | 441 | // move back up 442 | fatherNode = tree->father[node]; 443 | if(fatherNode < 0 || fatherNode > numNodes) { 444 | fprintf(stderr, "\nError: Illegal tree. Node %d has father %d.\n",node, fatherNode); 445 | return -1; 446 | } 447 | 448 | if(node == tree->leftSon[fatherNode]) { 449 | fromWhere = 1; 450 | } else if(node == tree->rightSon[fatherNode]) { 451 | fromWhere = 2; 452 | } else { 453 | fprintf(stderr, "\nError: Illegal tree. Node %d has father %d with sons %d, %d.\n",node, fatherNode, tree->leftSon[fatherNode], tree->rightSon[fatherNode]); 454 | return -1; 455 | } 456 | 457 | node = fatherNode; 458 | 459 | } // end of while(1) 460 | 461 | return 0; 462 | 463 | } 464 | /** end of printGenericTree **/ 465 | 466 | 467 | 468 | /***************************************************************************************************************/ 469 | /****** INTERNAL FUNCTION IMPLEMENTATION ******/ 470 | /***************************************************************************************************************/ 471 | 472 | 473 | 474 | /*********************************************************************************** 475 | * getAgeOfNode 476 | * - returns age of node assuming label1 corresponds to branch length 477 | * - performed using self-recursive calls, and replacing all label1's in subtree by ages 478 | * - does not replace label1 of node by age 479 | * - returns node age, if all is OK, and -1 if tree is not ultrametric 480 | ***********************************************************************************/ 481 | double getAgeOfNode(GenericBinaryTree* tree, int nodeId) { 482 | 483 | double leftAge, rightAge, age; 484 | 485 | if(nodeId < tree->numLeaves) { 486 | return 0.0; 487 | } 488 | 489 | leftAge = getAgeOfNode(tree, tree->leftSon[nodeId]); 490 | rightAge = getAgeOfNode(tree, tree->rightSon[nodeId]); 491 | age = leftAge + tree->label1[ tree->leftSon[nodeId] ]; 492 | 493 | if(leftAge < 0 || rightAge < 0) { 494 | 495 | return -1.0; 496 | } 497 | 498 | if(fabs(age - rightAge - tree->label1[ tree->rightSon[nodeId] ]) > 2e-8) { 499 | // printf("Inconsistent age found for node %d: right %lf, left %lf, diff %g).\n", 500 | // nodeId, rightAge + tree->label1[ tree->rightSon[nodeId] ], age, rightAge + tree->label1[ tree->rightSon[nodeId] ] - age); 501 | // return -1.0; 502 | } 503 | 504 | tree->label1[ tree->leftSon[nodeId] ] = leftAge; 505 | tree->label1[ tree->rightSon[nodeId] ] = rightAge; 506 | 507 | return age; 508 | } 509 | /** end of getAgeOfNode **/ 510 | 511 | /***************************************************************************************************************/ 512 | /****** END OF FILE ******/ 513 | /***************************************************************************************************************/ 514 | -------------------------------------------------------------------------------- /src/GenericTree.h: -------------------------------------------------------------------------------- 1 | #ifndef GENERIC_TREE_H 2 | #define GENERIC_TREE_H 3 | /** 4 | \file GenericTree.h 5 | Defines GenericTree struct 6 | 7 | */ 8 | 9 | #include 10 | 11 | /******************************************************************************************************/ 12 | /****** CONSTANTS ******/ 13 | /******************************************************************************************************/ 14 | 15 | 16 | 17 | /***************************************************************************************************************/ 18 | /****** DATA TYPES ******/ 19 | /***************************************************************************************************************/ 20 | 21 | 22 | /*********************************************************************************** 23 | * GenericTree 24 | * - leaf ids are 0..numLeaves-1 25 | * - all other nodes have 2 sons (left and right) 26 | * - all nodes have father (other than root, which has father = -1) 27 | * - label1 read after ':' sign and label2 read after '#' sign - typically correspond to age and additional info 28 | ***********************************************************************************/ 29 | 30 | typedef struct GENERIC_BINARY_TREE{ 31 | int numLeaves; // number of nodes in tree 32 | int rootId; // index of root node 33 | char** leafNames; // array of strings for leaf names 34 | int* father; // array of indices of father for each node (-1 for root) 35 | int* leftSon; // array of indices of left sons for each node (-1 for leaf) 36 | int* rightSon; // array of indices of right sons for each node (-1 for leaf) 37 | double* label1; // array of labels for each node 38 | double* label2; // array of additional labels for each node 39 | 40 | }GenericBinaryTree; 41 | 42 | 43 | 44 | /*********************************************************************************** 45 | * ?? 46 | ***********************************************************************************/ 47 | 48 | 49 | /***************************************************************************************************************/ 50 | /****** GLOBAL DATA STRUCTURES ******/ 51 | /***************************************************************************************************************/ 52 | 53 | 54 | 55 | /******************************************************************************************************/ 56 | /****** FUNCTION DECLARATIONS ******/ 57 | /******************************************************************************************************/ 58 | 59 | 60 | 61 | /*********************************************************************************** 62 | * createGenericTree 63 | * - allocates memory for generic tree 64 | * - returns pointer to the new generic tree 65 | ***********************************************************************************/ 66 | GenericBinaryTree* createGenericTree(int numLeaves); 67 | 68 | 69 | 70 | /*********************************************************************************** 71 | * freeGenericTree 72 | * - frees memory for generic tree 73 | * - returns 0 74 | ***********************************************************************************/ 75 | int freeGenericTree(GenericBinaryTree* tree); 76 | 77 | 78 | 79 | /*********************************************************************************** 80 | * readGenericTree 81 | * - reads a generic binary tree from file (Newick format) 82 | * - assumes each node can be associated (possibly) with two labels, 83 | * the first indicated by ':', and the second indicated by '#' 84 | * - if readLeafIndices==1, determines the index of each leaf according 85 | * to its name (index = atoi(name)-1) 86 | * - returns 0 if all is OK, and -1 otherwise 87 | ***********************************************************************************/ 88 | int readGenericTree(FILE* file, GenericBinaryTree* tree, unsigned short readLeafIndices); 89 | 90 | 91 | 92 | /*********************************************************************************** 93 | * printGenericTree 94 | * - prints a generic binary tree to file (in Newick format) 95 | * - if printLabel2 == 1, prints two labels, otherwise, just prints label1 96 | * - for each node writes: 97 | * NODE ':' label1 ['#' label2] termination_char 98 | * where termination_char is: 99 | * * ',' for left subtree 100 | * * ')' for right subtree 101 | * * ';' for the total tree 102 | * - for a leaf, NODE is just its name. 103 | * - for a subtree rooted at an internal node, NODE = '(' left_subtree right_subtree 104 | * - implements recursion in a loop 105 | * - returns 0 if all is OK, and -1 otherwise 106 | ***********************************************************************************/ 107 | int printGenericTree(FILE* file, GenericBinaryTree* tree, unsigned short printLabel2); 108 | 109 | 110 | 111 | /*********************************************************************************** 112 | * branchLengthIntoAge 113 | * - transforms label1 from branch length (of branch above node) to age of node 114 | * - calls a recursive procedure from root to leaves 115 | * - returns 0 if all is OK, and -1 otherwise (if tree is not ultrametric) 116 | ***********************************************************************************/ 117 | int branchLengthIntoAge(GenericBinaryTree* tree); 118 | 119 | 120 | 121 | /*********************************************************************************** 122 | * ageIntoBranchLength 123 | * - transforms label1 from age into branch length (of branch above node) 124 | * - performs a post-order traversal of nodes in a loop 125 | * - returns 0 if all is OK, and -1 otherwise (if tree is not ultrametric) 126 | ***********************************************************************************/ 127 | int ageIntoBranchLength(GenericBinaryTree* tree); 128 | 129 | 130 | 131 | /***************************************************************************************************************/ 132 | /****** END OF FILE ******/ 133 | /***************************************************************************************************************/ 134 | 135 | #endif 136 | -------------------------------------------------------------------------------- /src/LocusDataLikelihood.h: -------------------------------------------------------------------------------- 1 | #ifndef LOCUS_DATA_LIKELIHOOD_H 2 | #define LOCUS_DATA_LIKELIHOOD_H 3 | /** 4 | \file LocusDataLikelihood.h 5 | Compute likelihood of data given locus-specific genealogy. 6 | 7 | This header file describes the interface functions for computing 8 | likelihood of data given locus-specific genealogy. 9 | When changes are made to genealogy, old version is being saved, in case changes 10 | are rejected. 11 | Procedures for reading sequence data and doing initial analysis are taken from 12 | PAML and original MCMCcoal (NEED TO CHANGE THIS !!) 13 | 14 | */ 15 | 16 | 17 | 18 | #include "GenericTree.h" 19 | 20 | #define OPT1 21 | #define OPT2_not 22 | 23 | /***************************************************************************************************************/ 24 | /****** DATA TYPES ******/ 25 | /***************************************************************************************************************/ 26 | 27 | 28 | 29 | /*********************************************************************************** 30 | * LocusData 31 | * - Data structure which holds likelihood of a locus and also saves 32 | * the relevant information for quick re-computation of this likelihood 33 | * given changes in the locus genealogy. 34 | ***********************************************************************************/ 35 | typedef struct LOCUS_LIKELIHOOD LocusData; 36 | 37 | 38 | /***************************************************************************************************************/ 39 | /****** EXTERNAL FUNCTION DECLARATION ******/ 40 | /***************************************************************************************************************/ 41 | 42 | 43 | 44 | /*********************************************************************************** 45 | * createLocusData 46 | * - creates a new LocusData structure and allocates memory for all structures other than data. 47 | * - receives as input the number of samples in the locus (genealogy leaves) 48 | * and the mode in which to compute likelihood of het columns in alignment: 49 | * 0 indicates using only first phase for each heterozygote 50 | * 1 indicates using average likelihood of all phasings for each heterozygote 51 | * 2 indicates using phasing with maximum likelihood for each heterozygote 52 | * - returns a pointer to the structure. 53 | ***********************************************************************************/ 54 | LocusData* createLocusData (int numLeaves, unsigned short hetMode); 55 | 56 | 57 | 58 | /*********************************************************************************** 59 | * initializeLocusData 60 | * - initializes all data structures for locus data 61 | * - computes leaf likelihoods for all phased patterns 62 | * - if patternCounts != NULL, sets all pattern counts (otherwise, set them to 0). 63 | * - returns 0 if all OK, and -1 otherwise 64 | ***********************************************************************************/ 65 | int initializeLocusData(LocusData* locusData, char** patternArray, int numPatterns, int* numPhases, int* patternCounts); 66 | 67 | 68 | 69 | /*********************************************************************************** 70 | * freeLocusData 71 | * - frees all allocated memory for LocusData 72 | * - returns 0 73 | ***********************************************************************************/ 74 | int freeLocusData (LocusData* locusData); 75 | 76 | 77 | 78 | /*********************************************************************************** 79 | * attachLeaf - UNUSED 80 | * - attaches a leaf to existing sub-genealogy 81 | * - used when generating a starting genealogy 82 | * - leafId indicates the id of the leaf being attached 83 | * - target indicates the node above which leaf is attached (at a given age) 84 | * - id of attachment node is set to numLeaves-1 more than id of attached leaf 85 | * - returns the id of the attachment node 86 | ***********************************************************************************/ 87 | int attachLeaf_UNUSED (LocusData* locusData, int leafId, int target, double age); 88 | 89 | 90 | 91 | /*********************************************************************************** 92 | * setLocusMutationRate 93 | * - sets the mutation rate for locus to given value 94 | ***********************************************************************************/ 95 | void setLocusMutationRate (LocusData* locusData, double newRate); 96 | 97 | 98 | 99 | /*********************************************************************************** 100 | * getLocusMutationRate 101 | * - returns the mutation rate for locus 102 | ***********************************************************************************/ 103 | double getLocusMutationRate (LocusData* locusData); 104 | 105 | 106 | /*********************************************************************************** 107 | * computeAllConditionals 108 | * - computes all conditional likelihoods at all nodes of tree under all patterns 109 | * - calls recursive procedure computeConditionalJC to recompute conditional probabilities 110 | * and override all old versions. 111 | * - also initializes all pattern counts to zero 112 | * - returns 0 113 | ***********************************************************************************/ 114 | int computeAllConditionals (LocusData* locusData); 115 | 116 | 117 | 118 | /*********************************************************************************** 119 | * computeLocusDataLikelihood 120 | * - computes log-likelihood of data at a given locus, given its genealogy 121 | * - calls recursive procedure computeConditionalJC to recompute conditional probabilities 122 | * and makes sure to save old versions. 123 | * - if useOldConditionals == 1, uses previously computed conditionals, when possible. 124 | * - otherwise, recomputes everything from scratch 125 | * - returns the log-likelihood 126 | ***********************************************************************************/ 127 | double computeLocusDataLikelihood (LocusData* locusData, unsigned short useOldConditionals); 128 | 129 | 130 | 131 | /*********************************************************************************** 132 | * computePatternLogLikelihood 133 | * - receives a list of patterns and their counts and computes their log likelihood 134 | * - uses pre-computed conditionals and doesn't modify anything in the data structure. 135 | * - returns the log likelihood 136 | ***********************************************************************************/ 137 | double computePatternLogLikelihood (LocusData* locusData, int numPatterns, int* patternIds, int* patternCounts); 138 | 139 | 140 | 141 | /*********************************************************************************** 142 | * !!!!! FOR DEBUGGING !!!!! 143 | ***********************************************************************************/ 144 | double computeLocusDataLikelihood_deb (LocusData* locusData, unsigned short useOldConditionals); 145 | 146 | 147 | /*********************************************************************************** 148 | * addSitePatterns 149 | * - adds a set of site patterns to live pattern set 150 | * - receives ids of patterns to add and their respective counts 151 | * - if revertToSaved == 1, then conditionals do not need to be computed (since they already exist), 152 | * and new likelihood is the saved one. 153 | * - returns delta in log likelihood of this step 154 | ***********************************************************************************/ 155 | double addSitePatterns (LocusData* locusData, int numPatterns, int* patternIds, int* patternCounts, unsigned short revertToSaved); 156 | 157 | 158 | 159 | /*********************************************************************************** 160 | * reduceSitePatterns 161 | * - reduces the counts of a set of site patterns in likelihood computation 162 | * - receives ids of patterns to reduce and the respective counts 163 | * - if revertToSaved == 1, uses saved likelihood to compute new likelihood 164 | * - returns delta in log likelihood of this step 165 | ***********************************************************************************/ 166 | double reduceSitePatterns (LocusData* locusData, int numPatterns, int* patternIds, int* patternCounts, unsigned short revertToSaved); 167 | 168 | 169 | 170 | /*********************************************************************************** 171 | * checkLocusDataLikelihood 172 | * - re-computes log-likelihood of data at a given locus, given its genealogy 173 | * - if inconsistency is found in computed log likelihood, checks inconsistencies in 174 | * all recorded conditional probabilities 175 | * - returns 1 if all is OK, and 0 if inconsistencies were found 176 | ***********************************************************************************/ 177 | int checkLocusDataLikelihood (LocusData* locusData); 178 | 179 | 180 | 181 | /*********************************************************************************** 182 | * revertToSaved 183 | * - reverts locus data structure (genealogy and conditional likelihoods) to saved version 184 | * - returns 0 185 | ***********************************************************************************/ 186 | int revertToSaved(LocusData* locusData); 187 | 188 | 189 | 190 | /*********************************************************************************** 191 | * resetSaved 192 | * - resets saved data and maintains updates 193 | * - returns 0 194 | ***********************************************************************************/ 195 | int resetSaved(LocusData* locusData); 196 | 197 | 198 | 199 | /*********************************************************************************** 200 | * adjustGenNodeAge 201 | * - adjusts nodes age and saves old version 202 | * - nodeId is id of node and age is new age 203 | * - returns 0 204 | ***********************************************************************************/ 205 | int adjustGenNodeAge(LocusData* locusData, int nodeId, double age); 206 | 207 | 208 | 209 | /*********************************************************************************** 210 | * scaleAllNodeAges 211 | * - scales all node ages in genealogy with a given multiplicative factor 212 | * - returns the delta in log-likelihood of suggested step 213 | * - saves all original ages and conditionals 214 | ***********************************************************************************/ 215 | double scaleAllNodeAges(LocusData* locusData, double factor); 216 | 217 | 218 | 219 | /*********************************************************************************** 220 | * executeGenSPR 221 | * - executes an SPR operation on locus genealogy 222 | * - subtree_root is node id for root of pruned subtree,targetBranch is id of node below 223 | * branch where subtree should be regrafted, and age indicates time of regrafting. 224 | * - returns 0 if root node remains the same 225 | * - otherwise, returns 1 if subtree is a child of the root AFTER regrafting 226 | * - otherwise, returns 2 if subtree was a child of the root BEFORE regrafting 227 | ***********************************************************************************/ 228 | int executeGenSPR(LocusData* locusData, int subtreeRoot, int targetBranch, double age); 229 | 230 | 231 | 232 | 233 | /*********************************************************************************** 234 | * copyGenericTreeToLocus 235 | * - copies a generic tree into likelihood tree 236 | * - assumes label1 holds age of node 237 | * - returns 0 238 | ***********************************************************************************/ 239 | int copyGenericTreeToLocus(LocusData* locusData, GenericBinaryTree* genericTree); 240 | 241 | 242 | 243 | 244 | /*********************************************************************************** 245 | * printLocusGenTree 246 | * - prints the genealogy tree (including supplied population and event id) 247 | * - prints each node in a separate line, according to id order 248 | * - if there are nodes which are considered for changes, print out their id's 249 | ***********************************************************************************/ 250 | void printLocusGenTree(LocusData* locusData, FILE* stream, int* nodePops, int* nodeEvents); 251 | 252 | 253 | 254 | 255 | /*********************************************************************************** 256 | * printLocusDataStats 257 | * - prints stats on alignment patterns 258 | * - stats are outputted in one line (with newline) to stdout in the following order: 259 | * - num hom patterns, num het patterns with 2 phases, , num het patterns with 4 phases... 260 | * - for each of the above stats, prints consecutively the number of distinct patterns 261 | * and the number of columns corresponding to it in the alignment 262 | * - maxLogPhases is an upper bound on the number of phased hets per pattern 263 | ***********************************************************************************/ 264 | void printLocusDataStats(LocusData* locusData, int maxLogPhases); 265 | 266 | 267 | 268 | /*********************************************************************************** 269 | * printLocusDataPatterns 270 | * - prints the alignment of the locus, pattern by pattern to output file. 271 | * - patterns are printed in columns, according to leaf id. 272 | * - below each pattern is its multiplicity in the alignment 273 | * - het patterns are represented by all phasings 274 | * (mult is written below first phasing) 275 | ***********************************************************************************/ 276 | void printLocusDataPatterns(LocusData* locusData, FILE* outFile); 277 | 278 | 279 | 280 | /*********************************************************************************** 281 | * computePairwiseLCAs 282 | * - procedure for computing a 2D matrix with the ids of the LCAs (least 283 | * common ancestors) of all pairs of leaves 284 | * - returns 1 if successful, 0 otherwise 285 | ***********************************************************************************/ 286 | int computePairwiseLCAs (LocusData* locusData, int** lcaMatrix, int* leafArray_aux); 287 | 288 | 289 | 290 | /*********************************************************************************** 291 | * getSortedAges 292 | * - procedure for computing a sorted list of node ages (from most recent to root) 293 | * - assumes array given as input has 2x space for all internal nodes (also for auxiliary space for sorting) 294 | * - calls recursive procedure getSortedAges_rec on root 295 | * - returns 1 if successful, 0 otherwise 296 | ***********************************************************************************/ 297 | int getSortedAges (LocusData* locusData, double* ageArray); 298 | 299 | 300 | 301 | /* a sequence of "get functions" for various attributes - AVOID USING !!! */ 302 | 303 | 304 | 305 | /*********************************************************************************** 306 | * getLocusDataLikelihood 307 | * - returns the log likelihood of the locus as last recorded (no new computations) 308 | ***********************************************************************************/ 309 | double getLocusDataLikelihood (LocusData* locusData); 310 | 311 | 312 | 313 | /*********************************************************************************** 314 | * getLocusRoot 315 | * - returns the root node id 316 | ***********************************************************************************/ 317 | int getLocusRoot (LocusData* locusData); 318 | 319 | 320 | 321 | /*********************************************************************************** 322 | * getNodeAge 323 | * - returns the age of a node 324 | ***********************************************************************************/ 325 | double getNodeAge (LocusData* locusData, int nodeId); 326 | 327 | 328 | 329 | /*********************************************************************************** 330 | * getNodeFather 331 | * - returns the id of the father of a node 332 | ***********************************************************************************/ 333 | int getNodeFather (LocusData* locusData, int nodeId); 334 | 335 | 336 | 337 | /*********************************************************************************** 338 | * getNodeSon 339 | * - returns the id of a son of a node (son = 0 -> left, son = 1 -> right) 340 | ***********************************************************************************/ 341 | int getNodeSon (LocusData* locusData, int nodeId, unsigned short son); 342 | 343 | 344 | 345 | /***************************************************************************************************************/ 346 | /****** END OF FILE ******/ 347 | /***************************************************************************************************************/ 348 | #endif 349 | -------------------------------------------------------------------------------- /src/MCMCcontrol.h: -------------------------------------------------------------------------------- 1 | #ifndef MCMC_CONTROL_FILE_H 2 | #define MCMC_CONTROL_FILE_H 3 | /** 4 | \file MCMCcontrol.h 5 | Read and processes and holds control information for MCMC 6 | 7 | This header file describes the interface functions for reading and 8 | processing a control file. 9 | 10 | */ 11 | 12 | # include "PopulationTree.h" 13 | 14 | /***************************************************************************************************************/ 15 | /****** DATA TYPES ******/ 16 | /***************************************************************************************************************/ 17 | 18 | 19 | 20 | 21 | /********* 22 | * UpdateStats used to hold information on MCMC update steps 23 | * (finetunes, acceptance rates, etc.) 24 | *********/ 25 | typedef struct UPDATE_STAST{ 26 | double coalTime; 27 | double SPR; 28 | double migTime; 29 | double theta; 30 | double migRate; 31 | double *taus; 32 | double locusRate; 33 | double admix; 34 | double mixing; 35 | } UpdateStats; 36 | 37 | 38 | 39 | /***************************************************************************************************************/ 40 | /****** GLOBAL DATA STRUCTURES ******/ 41 | /***************************************************************************************************************/ 42 | 43 | 44 | 45 | /********* 46 | * i/o setup 47 | *********/ 48 | struct IO_SETUP { 49 | char seedFileName[NAME_LENGTH]; // name of random seed file 50 | char seqFileName[NAME_LENGTH]; // name of sequence file 51 | // char debugFileName[NAME_LENGTH]; // name of debug trace file 52 | char admixFileName[NAME_LENGTH]; // name of admixture trace file 53 | char rateFileName[NAME_LENGTH]; // name of locus-rate file 54 | char traceFileName[NAME_LENGTH]; // name of trace file (for MCMC trace output) 55 | char nodeStatsFileName[NAME_LENGTH]; // name of coalescent stats file (for MCMC model evaluation) 56 | char combStatsFileName[NAME_LENGTH]; // name of coalescent stats file (for MCMC model evaluation) 57 | int samplesPerLog; // number of samples for which to generate a log summary in stdout 58 | int logsPerLine; // number of sample logs per log line 59 | 60 | FILE* traceFile; // trace file 61 | FILE* debugFile; // debugging file 62 | FILE* admixFile; // admixture stats file 63 | FILE* coalStatsFile; // coalescent stats file 64 | FILE** nodeStatsFile; // coalescent stats files for nodes (one per pop) 65 | FILE* combStatsFile; // comb stats (coalescent & migration) file for all possible combs 66 | } ioSetup; 67 | 68 | 69 | /********* 70 | * mcmc setup 71 | *********/ 72 | struct MCMC_SETUP { 73 | // general info 74 | int numParameters; // number of model parameters 75 | unsigned short useData; // flag which indicated whether to use sequence data or to sample from prior 76 | int randomSeed; // random seed used 77 | 78 | // sampling info 79 | int numSamples; // number of sampling iterations to perform 80 | int burnin; // number of iterations for burnin 81 | int sampleSkip; // number of samples to skip between each recorded sample 82 | int startMig; // number of generations to skip before starting to sample migrations 83 | int genetreeSamples; // number of gene tree updates per each population parameter update 84 | 85 | unsigned short allowAdmixture; // flag which is turned on when allowing admixed samples in model 86 | unsigned short mutRateMode; // flag which is turned on when constant mutation rates are assumed across loci 87 | double varRatesAlpha; // alpha for a Dirichlet distribution of variable rates across loci 88 | int genRateRef; // reference genealogy for updates in rate 89 | 90 | // finetune parameters 91 | UpdateStats finetunes; 92 | unsigned short doMixing; // flag which is turned on to allow usage of mising procedure (default is 1) 93 | int findFinetunes; //if == 1, dynamically search for finetunes 94 | int findFinetunesSamplesPerStep; //if using find-finetunes, this is the number of samples to take before adjusting finetune values 95 | int findFinetunesNumSteps; //if using find-finetunes, this is the number of steps before settling in 96 | 97 | double* printFactors; // array of factors in which to output parameters (allocated in readControlFile) 98 | // char traceFileTitle[500]; 99 | } mcmcSetup; 100 | 101 | 102 | 103 | /********* 104 | * data setup 105 | *********/ 106 | struct DATA_SETUP { 107 | int numLoci; // number of loci in data 108 | int numSamples; // number of total samples 109 | int maxSamples; // maximum number of samples for allocation purposes 110 | int numPopPartitions; // number of partitions to break each pop into for stats 111 | 112 | int* numSamplesPerPop; // number of samples per population 113 | char** sampleNames; // array of sample names - ordered according to population order 114 | PopulationTree* popTree; // population tree 115 | } dataSetup; 116 | 117 | 118 | 119 | /********* 120 | * admixed samples - for admixture 121 | *********/ 122 | struct ADMIXED_SAMPLES { 123 | int number; // number of admixed samples 124 | int* samples; // list of admixed samples 125 | int** popPairs; // list of population pairs (one per sample) 126 | int* index; // index for each admixed sample (-1 for non-admixed) 127 | } admixed_samples; 128 | 129 | /***************************************************************************************************************/ 130 | /****** EXTERNAL FUNCTION DECLARATION ******/ 131 | /***************************************************************************************************************/ 132 | 133 | 134 | 135 | /*********************************************************************************** 136 | * initGeneralInfo 137 | * - initializes control and I/O settings to default settings 138 | * - returns 0 139 | ***********************************************************************************/ 140 | int initGeneralInfo(); 141 | 142 | 143 | 144 | /*********************************************************************************** 145 | * readControlFile 146 | * - reads control file and initializes control and I/O settings 147 | * - returns 0, if all OK, and -1 otherwise. 148 | ***********************************************************************************/ 149 | int readControlFile(char* controlFileName); 150 | 151 | 152 | 153 | /*********************************************************************************** 154 | * readSecondaryControlFile 155 | * - reads secondary control file with only general info and mig bands 156 | * - returns 0, if all OK, and -1 otherwise. 157 | ***********************************************************************************/ 158 | int readSecondaryControlFile(char* controlFileName); 159 | 160 | 161 | 162 | /*********************************************************************************** 163 | * checkSettings 164 | * - tests validity and completeness of settings collected in control file(s) 165 | * - returns number of errors found 166 | ***********************************************************************************/ 167 | int checkSettings(); 168 | 169 | 170 | 171 | /*********************************************************************************** 172 | * printPriorSettings 173 | * - prints the prior settings onto standard output 174 | * - returns 0 175 | ***********************************************************************************/ 176 | int printPriorSettings(); 177 | 178 | 179 | 180 | /*********************************************************************************** 181 | * finalizeNumParameters 182 | * - determines number of parameters in the model, and finalizes printFactor array 183 | * - returns 0 184 | ***********************************************************************************/ 185 | int finalizeNumParameters(); 186 | 187 | 188 | 189 | /***************************************************************************************************************/ 190 | /****** END OF FILE ******/ 191 | /***************************************************************************************************************/ 192 | #endif 193 | -------------------------------------------------------------------------------- /src/MultiCoreUtils.h: -------------------------------------------------------------------------------- 1 | #ifndef MultiCoreUtils 2 | #define MultiCoreUtils 3 | 4 | #ifdef ENABLE_OMP_THREADS 5 | 6 | #include 7 | 8 | #define THREAD_SCHEDULING_STRATEGY static //use static, dynamic or guided. see openmp help for difference. you can also set: dynamic,100 if you wish to specify the chunk size. DEFAULT = static 9 | 10 | /* flags to disable or enable MT on specific methods 11 | * comment out a DEFINE to disable. 12 | */ 13 | 14 | #define THREAD_UpdateGB_InternalNode 15 | #define THREAD_UpdateGB_MigrationNode 16 | #define THREAD_UpdateGB_MigSPR 17 | #define THREAD_UpdateTau 18 | #define THREAD_UpdateMigRates 19 | #define THREAD_mixing 20 | //#define THREAD_UpdateTheta 21 | #define THREAD_UpdateSampleAge 22 | 23 | #else 24 | extern void omp_set_num_threads(int n); 25 | extern int omp_get_max_threads(); 26 | extern int omp_get_thread_num(); 27 | #endif 28 | 29 | 30 | //#define RECORD_METHOD_TIMES 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/PopulationTree.h: -------------------------------------------------------------------------------- 1 | #ifndef POPULATION_TREE_H 2 | #define POPULATION_TREE_H 3 | 4 | /** 5 | \file PopulationTree.h 6 | Data Structures to implement handling a population tree with migration bands 7 | 8 | Contains the relevant data structures and procedure implementations 9 | for handling a population tree with migration bands. 10 | */ 11 | 12 | 13 | #include "utils.h" 14 | /***************************************************************************************************************/ 15 | /****** EXTERNAL CONSTANTS ******/ 16 | /***************************************************************************************************************/ 17 | 18 | 19 | 20 | /***************************************************************************************************************/ 21 | /****** DATA TYPES ******/ 22 | /***************************************************************************************************************/ 23 | 24 | 25 | 26 | /*********************************************************************************** 27 | * GammaPrior 28 | * - Holds parameters for gamma prior 29 | ***********************************************************************************/ 30 | typedef struct GAMMA_PRIOR { 31 | double sampleStart; // mean for sample start (sampling 10% below and above) 32 | double alpha; // alpha for gamma prior 33 | double beta; // beta for gamma prior 34 | } GammaPrior; 35 | 36 | 37 | 38 | /*********************************************************************************** 39 | * MigrationBandSet 40 | * - Holds relevant info for set of migration band active at a certain time interval 41 | * at their target population 42 | ***********************************************************************************/ 43 | typedef struct MIGRATION_BAND_SET MigrationBandSet; 44 | 45 | struct MIGRATION_BAND_SET { 46 | int numMigBands; // length of migBandIds[] array 47 | int* migBandIds; // an array of migration band id's 48 | double rate; 49 | double age; // start time of band set 50 | MigrationBandSet* next; // next migration band set (back in time) 51 | MigrationBandSet* prev; // previous migration band set (next in time) 52 | }; 53 | 54 | 55 | 56 | /*********************************************************************************** 57 | * MigrationBand 58 | * - Holds relevant info for migration band 59 | ***********************************************************************************/ 60 | typedef struct MIGRATION_BAND { 61 | int sourcePop; // id of source population 62 | int targetPop; // id of target population 63 | double migRate; // migration rate for band 64 | // double upperBound; // upper bound for uniform prior 65 | GammaPrior migRatePrior; // parameters for gamma-prior of migration rate - NOT IN USE !! 66 | double startTime; // start time for migration band 67 | double endTime; // end time for migration band 68 | MigrationBandSet* firstSet; // set of migration bands right after this one starts 69 | MigrationBandSet* lastSet; // set of migration bands right before this one ends 70 | } MigrationBand; 71 | 72 | 73 | 74 | /*********************************************************************************** 75 | * Population 76 | * - Holds relevant info for population 77 | ***********************************************************************************/ 78 | typedef struct POPULATION Population; 79 | 80 | struct POPULATION { 81 | char name[STRING_LENGTH]; // name of population 82 | int id; // population identifier 83 | int numSamples; // number of samples for current population (leaf in tree). 84 | double age; // start time for population (for ancestral population) 85 | double sampleAge; // age of samples in current population - used for extinct populations 86 | unsigned short updateSampleAge; // set to 1 iff algorithm is to update the sample age 87 | double theta; // theta parameter for population 88 | GammaPrior thetaPrior; // parameters for gamma-prior of theta 89 | GammaPrior agePrior; // parameters for gamma-prior of age (for ancestral population) 90 | Population* father; // father population 91 | Population* sons[2]; // two child populations (for ancestral population) 92 | unsigned short* isAncestralTo; // a boolean array indicating all descendant populations 93 | 94 | // migration bands 95 | int numInMigBands; // length of inMigBands[] array 96 | int* inMigBands; // array of in migration band id's 97 | int numOutMigBands; // length of outMigBands[] array 98 | int* outMigBands; // array of out migration band id's 99 | MigrationBandSet* migBandSequence; // pointer to first migration band set in the sequence 100 | // of (incoming) sets active along the population 101 | }; 102 | 103 | 104 | 105 | /*********************************************************************************** 106 | * PopulationTree 107 | * - Holds relevant info for population 108 | ***********************************************************************************/ 109 | typedef struct POPULATION_TREE { 110 | int numCurPops; // number of current populations 111 | int numPops; // number of populations in tree ( = 2*numCurPops-1 ) 112 | int numMigBands; // number of migration bands in tree 113 | int rootPop; // id of root population 114 | Population** pops; // an array of pointers to populations 115 | MigrationBand* migBands; // an array of migration bands 116 | MigrationBandSet* migBandSetStackTop; // pointer to top of MigrationBandSet stack 117 | 118 | Population* popArray; // pointer to allocated memory for all populations 119 | MigrationBandSet* migBandSetArray; // pointer to allocated memory for MigrationBandSets 120 | unsigned short* isAncestralArray; // pointer to allocated memory for isAncestralTo[] arrays 121 | int* migBandIdArray; // pointer to allocated memory for in/out migband arrays for pops 122 | } PopulationTree; 123 | 124 | 125 | 126 | /***************************************************************************************************************/ 127 | /****** EXTERNAL FUNCTION DECLARATIONS ******/ 128 | /***************************************************************************************************************/ 129 | 130 | 131 | 132 | /*********************************************************************************** 133 | * createPopTree 134 | * - allocates basic memory for population tree (no migration bands yet) 135 | * - returns pointer to newly allocated population tree 136 | ***********************************************************************************/ 137 | PopulationTree* createPopTree(int numPops); 138 | 139 | 140 | 141 | /*********************************************************************************** 142 | * initMigrationBands 143 | * - initializes data structures for migration bands in population tree (including allocating some memory) 144 | * - sets start and end times 145 | * - for each population creates a timed-sequence of migration band sets 146 | * - returns 0 147 | ***********************************************************************************/ 148 | int initMigrationBands(PopulationTree* popTree); 149 | 150 | 151 | 152 | /*********************************************************************************** 153 | * freePopTree 154 | * - frees all memory allocated for population tree 155 | * - returns 0 156 | ***********************************************************************************/ 157 | int freePopTree(PopulationTree* popTree); 158 | 159 | 160 | 161 | /*********************************************************************************** 162 | * printPopulationTree 163 | * - prints population tree 164 | ***********************************************************************************/ 165 | void printPopulationTree(PopulationTree* popTree, FILE* stream, int printTauTheta); 166 | 167 | 168 | 169 | /*********************************************************************************** 170 | * getPopIdByName 171 | * - returns a population id of a population given its name (-1 if no match is found) 172 | * - used primarily to decode migration bands as specified in control file 173 | * (called by readControlFile). 174 | ***********************************************************************************/ 175 | int getPopIdByName(PopulationTree* popTree, const char* name); 176 | 177 | 178 | 179 | /*********************************************************************************** 180 | * samplePopParameters 181 | * - samples population parameters according to prior average (only thetas and taus) 182 | * - each parameter is sampled uniformly in the interval [0.9,1.1]*mean 183 | * (where mean is the prior mean for that parameter) 184 | * - makes sure a population's age does not exceed its father's 185 | * - initializes all migration rates to 0. 186 | * - returns 0 187 | ***********************************************************************************/ 188 | int samplePopParameters(PopulationTree* popTree); 189 | 190 | 191 | 192 | /*********************************************************************************** 193 | * sampleMigRates 194 | * - samples migration rates for all mig bands 195 | * - each rate is sampled uniformly in the interval [0.9,1.1]*mean 196 | * (where mean is the prior mean for that parameter) 197 | * - returns 0 198 | ***********************************************************************************/ 199 | int sampleMigRates(PopulationTree* popTree); 200 | 201 | 202 | 203 | /*********************************************************************************** 204 | * updateMigrationBandTimes 205 | * - updates start and end times of given migration band according to ages of populations 206 | * - returns 0 if no change was made, and 1 otherwise 207 | ***********************************************************************************/ 208 | unsigned short updateMigrationBandTimes(PopulationTree* popTree, int migBand); 209 | 210 | 211 | 212 | /*********************************************************************************** 213 | * computeMigrationBandTimes 214 | * - traverses all migration bands and sets their start and end times according to 215 | * times of target and source populations. 216 | * - returns the number of migration bands with zero span 217 | ***********************************************************************************/ 218 | int computeMigrationBandTimes(PopulationTree* popTree); 219 | 220 | 221 | 222 | /***************************************************************************************************************/ 223 | /****** END OF FILE ******/ 224 | /***************************************************************************************************************/ 225 | #endif 226 | -------------------------------------------------------------------------------- /src/README.md: -------------------------------------------------------------------------------- 1 | This directory contains the source code for G-PhoCS: 2 | * _GPhoCS_ - main file containing the root functions that implement the MCMC sampling algorithm. 3 | * _MCMCcontrol_ - module for reading and parsing a control file. 4 | * _AlignmentProcessor_ - module for reading and processing alignment from the sequence file. 5 | * _PopulationTree_ - module for the population tree data structure. 6 | * _LocusDataLikelihood_ - module for data structure used to compute probability of the data given local genealogy - P(X|G). 7 | * _GenericTree_ - module for generic binary tree data structure. 8 | * _patch_ - file containing functions that implement computations for probability of the local genealogy given the paramterized population phylogeny - P(G|M). 9 | * _utils_ - a collection of mathematical utility functions. 10 | 11 | Additional Utility Files: 12 | * _readTrace.c_ - program for reading and processing the output trace of G-PhoCS. 13 | * _AlignmentMain.c_ - utility functions for computing various statistics on the input alignments. 14 | -------------------------------------------------------------------------------- /src/omp_stub.c: -------------------------------------------------------------------------------- 1 | #ifndef ENABLE_OMP_THREADS 2 | //_OPENMP 3 | 4 | void omp_set_dynamic(int n){} 5 | void omp_set_num_threads(int n){} 6 | int omp_get_num_threads(){return 1;} 7 | int omp_get_thread_num(){return 0;} 8 | int omp_get_max_threads(){return 1;} 9 | 10 | #endif 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/patch.h: -------------------------------------------------------------------------------- 1 | /* 2 | * patch.h 3 | * 4 | * Created on: Feb 4, 2017 5 | * Author: ron 6 | */ 7 | 8 | #ifndef SRC_PATCH_H_ 9 | #define SRC_PATCH_H_ 10 | 11 | 12 | #ifndef NULL 13 | #define NULL ((void *) 0) 14 | #endif 15 | 16 | 17 | #define MAX_MIG_BANDS 100 // max migration bands in the population tree 18 | #define MAX_MIGS 10 // max migration events per genealogy 19 | #define NSPECIES 20 // max # of species 20 | #define NS 200 // max # of sequences 21 | #define OLDAGE 999 // upper bound on age (can be extended...) 22 | #define MAX_EVENTS (NS + 2*NSPECIES + 3*MAX_MIGS) 23 | #define NUM_DELTA_STATS_INSTANCES 2 24 | 25 | #define DEBUG_NODE_CHANGE_NOT 26 | #define DEBUG_RUBBERBAND_NOT 27 | /* END OF PATCH.C DEFS */ 28 | 29 | 30 | /***************************************************************************************************************/ 31 | /****** GLOBAL DATA STRUCTURES ******/ 32 | /***************************************************************************************************************/ 33 | 34 | 35 | 36 | 37 | typedef struct { 38 | int accepted; 39 | double datastate_dataLogLikelihood; 40 | double datastate_logLikelihood; 41 | double finetune; 42 | double genLogLikelihood; 43 | int gen; 44 | int id; // some debug parameters for multi thread debugging 45 | int result_id; // some debug parameters for multi thread debugging 46 | } UpdateGB_InternalNode_ReturnData; 47 | 48 | typedef struct { 49 | double coal_stats[2*NSPECIES-1], mig_stats[MAX_MIG_BANDS]; 50 | int num_coals[2*NSPECIES-1], num_migs[MAX_MIG_BANDS]; 51 | } GENETREE_STATS; 52 | 53 | /* genetree_stats_delta 54 | Holds difference in stats for populations affected by changes in a genetree. 55 | Not generally used for changes in theta, mig_rates. 56 | For easy and fast update if changes are accepted. 57 | changed_events holds the ids of all events in the interval between original and 58 | new position of event. All these intervals have a change in number of lineages. 59 | */ 60 | typedef struct { 61 | int original_event; // id of event describing original placing of node 62 | int updated_event; // id of event describing updated (new) placing of node 63 | int num_lin_delta; // the difference in lineage number for all events affected (typically +1 or -1) 64 | int num_changed_events; // number of events affected by change 65 | int changed_events[MAX_EVENTS]; // an array of ids of events affected by change 66 | int num_pops_changed; // number of population affected by change 67 | int pops_changed [2*NSPECIES-1]; // an array of populations affected by change 68 | int num_mig_bands_changed; // number of migration bands affected by change 69 | int mig_bands_changed[MAX_MIG_BANDS]; // an array of migration bands affected by change 70 | double coal_stats_delta[2*NSPECIES-1]; // difference in coalescence statistics per population affected 71 | double mig_stats_delta[MAX_MIG_BANDS]; // difference in migration statistics per migration band affected 72 | } GENETREE_STATS_DELTA; 73 | 74 | /* rubberband_migs 75 | structure for holding data on migration events out of rubber-banded populations 76 | which are affected by rubber-band operation (per gen). 77 | num_moved_events - total number of affected events. (in/out migrations and start/end of migration bands). 78 | orig_events - original copies of events. 79 | new_events - new copies of events. 80 | pops - population in which each event resides. 81 | new_ages - age of each new event. 82 | 83 | rubberband_migs is an array of size numLoci allocated in getMem(). 84 | */ 85 | 86 | typedef struct { 87 | 88 | int num_moved_events, orig_events[MAX_MIGS+MAX_MIG_BANDS], new_events[MAX_MIGS+MAX_MIG_BANDS], pops[MAX_MIGS+MAX_MIG_BANDS]; 89 | double new_ages[MAX_MIGS+MAX_MIG_BANDS]; 90 | 91 | } RUBBERBAND_MIGS; 92 | 93 | /* mig_spr_stats 94 | holds statistics for the SPR sampling operation with migration. 95 | In use in UpdateGB_MigSPR and in traceLineage. 96 | */ 97 | typedef struct { 98 | int father_event_old, father_event_new; 99 | int father_pop_new; 100 | int target; 101 | int num_old_migs, num_new_migs; 102 | int old_migs[MAX_MIGS], new_migs_in[MAX_MIGS], new_migs_out[MAX_MIGS], new_migs_bands[MAX_MIGS]; 103 | double new_migs_ages[MAX_MIGS]; 104 | double genetree_delta_lnLd[2]; 105 | } MIG_SPR_STATS; 106 | 107 | 108 | typedef struct { 109 | GENETREE_STATS genetree_stats_check; 110 | GENETREE_STATS_DELTA genetree_stats_delta[NUM_DELTA_STATS_INSTANCES]; 111 | MIG_SPR_STATS mig_spr_stats; 112 | double genDeltaLogLikelihood; 113 | double genLogLikelihood; 114 | RUBBERBAND_MIGS rubberband_migs; 115 | int mig_conflict_log; 116 | } Locus_SuperStruct; 117 | Locus_SuperStruct *locus_data; 118 | 119 | //double averageMigTimes[MAX_MIG_BANDS]; //Unused 120 | 121 | /* node surrogates. 122 | */ 123 | int** nodePops; // a 2D array (numLoci X numNodes) for populations per genealogy node. 124 | int** nodeEvents; // a 2D array (numLoci X numNodes) for event ids per genealogy node. 125 | 126 | struct ADMIXTURE_STATUS { 127 | int numSampledLoci; // number of loci for which to show admixture status 128 | int* sampledLoci; // array of sampled loci 129 | double** sampleLocusAdmixRate; // array of admixture rates (up to the curent point of sampling) for each locus and each admixed sample 130 | int* admixtureCounts; // number of loci in which sample is in alternative population 131 | double* admixtureCoefficients; // estimated coefficients for all samples 132 | } admixture_status; 133 | 134 | /* genetree_migs is a struct which contains information about 135 | migration events in a specific genealogy. 136 | Array of structs (of length numLoci is allocated in GetMem(). 137 | */ 138 | struct GENETREE_MIGS { 139 | int num_migs; // number of migration event in genetree 140 | int living_mignodes[MAX_MIGS]; // array of indices for mignodes for dynamic managing of migration nodes 141 | struct MIGNODE{ 142 | int gtree_branch; // id of gene tree node below the migration node 143 | int migration_band; // id of migration band relevant to this node 144 | int target_pop, source_pop; // source and target populations of migration event (backwards view) 145 | int target_event, source_event; // ids of event corresponding to this migration node. 146 | double age; // time of event 147 | } mignodes[MAX_MIGS]; 148 | }* genetree_migs; 149 | 150 | 151 | /* event chain 152 | Each event corresponds to a time band within a population where no events 153 | (coalescence/migration) take place. An event is attributed with one of 5 types 154 | corresponding to the event taking place at the end of the interval. Events are 155 | sorted in a list according to chronology within a population. 156 | Actual array of events is allocated in getMem() 157 | */ 158 | 159 | typedef enum event_type {COAL, IN_MIG, OUT_MIG, MIG_BAND_START, MIG_BAND_END, SAMPLES_START, END_CHAIN, DUMMY} EventType; 160 | typedef struct EVENT{ 161 | EventType type; 162 | int node_id, next, prev; 163 | double elapsed_time; // time from last event 164 | int num_lineages; // number of lineages before the event 165 | } Event; 166 | struct EVENT_CHAIN{ 167 | int total_events; // total number of events pre-allocated to this chain 168 | int first_event[2*NSPECIES-1]; // pointers to first event for every population 169 | int last_event[2*NSPECIES-1]; // pointers to last event for every population 170 | int free_events; // pointer to a chain of free events for use. Always have at least one free event 171 | Event* events; 172 | } *event_chains; 173 | 174 | 175 | /* genetree stats 176 | holds relevant statistics for fast computation of probability of tree 177 | given model parameters (split times, pop sizes and migration rates). 178 | Holds statistics for every population is species tree. 179 | -- NEED TO ADD DOCUMENTATION FOR THIS -- !!! 180 | genetree_stats[gen] holds relevant information for genetree of genealogy 'gen'. 181 | array allocated in GetMem(). 182 | genetree_stats_total holds sum of statistics for all loci. coal_stats here 183 | considers also all gen-specific heredity factors (but not thetas). 184 | */ 185 | 186 | GENETREE_STATS *genetree_stats, *genetree_stats_total_partitioned, genetree_stats_total , genetree_stats_total_check; 187 | 188 | /* genetree stats flat 189 | holds relevant statistics for fast computation of probability of tree 190 | given a null model (single population). 191 | array allocated in GetMem(). 192 | */ 193 | struct GENETREE_STATS_FLAT{ 194 | double coal_stats_flat, mig_stats_flat; 195 | int num_coals_total, num_migs_total; 196 | double* sortedAgesArray; 197 | } genetree_stats_flat; 198 | 199 | /* genetree node stata 200 | holds relevant statistics for coalescent distribution across populations 201 | array allocated in GetMem(). 202 | */ 203 | struct GENETREE_NODE_STATS{ 204 | /*** memory allocation ***/ 205 | double* doubleArray; 206 | double** doublePtrArray; 207 | double*** doublePtrPtrArray; 208 | int* intArray; 209 | int** intPtrArray; 210 | 211 | /*** auxilliary matrices for computation ***/ 212 | int** lcaMatrix; // matrix of LCAs for all pairs of leaves 213 | int* leafArray; // array of leaves for LCA computation 214 | int* firstNodesInPop; // array with the first coal node in each pop 215 | double* firstNodesAges; // array with the age of the first coal node in each pop 216 | double* nodeAges; // array with the ages of all internal nodes 217 | 218 | /*** main stat matrices ***/ 219 | double*** probCoalMatrix; // Prob[sample pair coalesce in pop] 220 | double*** probFirstCoalMatrix; // Prob[sample pair is the first coalescence in pop] 221 | double*** coalTimeMatrix; // mean coal time for coalescence of pair (given they coalesce in pop) 222 | } genetree_node_stats; 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | /******************************************************************************************************/ 233 | /****** FUNCTION DECLARATIONS ******/ 234 | /******************************************************************************************************/ 235 | 236 | 237 | int GetMem (void); 238 | int GetRandomGtree(GenericBinaryTree* tree, int gen); 239 | int adjustRootEvents(); 240 | 241 | 242 | int analyzeGenetreeFile(char* genetree_file); 243 | int writeMScommandLine(char* outfile); 244 | int findLastMig(int gen, int node_id, double time); 245 | int findFirstMig(int gen, int node_id, double time); 246 | int Coalescence1Pop (PopulationTree* popTree, GenericBinaryTree* tree, int gen, int pop, int* livingLineages); 247 | int removeEvent(int gen, int event); 248 | 249 | // auxiliary functions 250 | int findInconsistency(int gen, int node); 251 | int getSptreeNodeByName(const char* name); 252 | int orderByAge(int subtree_root, int* ordered_nodes); 253 | int getLineagesAtInterval(int gen, int start_event, int pop, int exc_node, int* out_array); 254 | int getEdgesForTimePop(int gen, double time, int pop, int exc_node, int* out_array); 255 | int populationPostOrder(int pop, int* ordered_pops); 256 | 257 | // event chain functions 258 | int checkAll(); 259 | int checkGtreeStructure(int gen); 260 | int synchronizeEvents(int gen); 261 | int printEventChains(FILE* stream, int gen); 262 | int createEvent(int gen, int pop, double age); 263 | int createEventBefore(int gen, int pop, int event, double elapsed_time); 264 | int constructEventChain(int gen); 265 | int computeFlatStats(); 266 | int computeNodeStats(); 267 | int computeTotalStats(); 268 | double recalcStats(int gen, int pop); 269 | int recalcStats_partitioned(int gen, int pop); 270 | int computeGenetreeStats(int gen); 271 | int computeGenetreeStats_partitioned(void); 272 | double gtreeLnLikelihood(int gen); 273 | double computeDeltaLnLd(int gen, int instance); 274 | int computeMigStatsDelta(int instance, double bottom_age, int bottom_pop, double top_age, int num_lins_delta , int gen); 275 | int computeCoalStatsDelta(int instance, int gen, int bottom_event, int bottom_pop, int top_event, int num_lins_delta); 276 | double considerEventMove(int gen, int instance, int event_id, int source_pop, double original_age, int target_pop, double new_age); 277 | int acceptEventChainChanges(int gen, int instance); 278 | int rejectEventChainChanges(int gen, int instance); 279 | double rubberBandRipple(int gen, int do_or_redo); 280 | double rubberBand(int gen, int pop, double static_point, double moving_point, double factor, unsigned short postORpre, int* out_num_events); 281 | int replaceMigNodes(int gen, int node); 282 | int traceLineage(int gen, int node, int reconnect); 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | #endif /* SRC_PATCH_H_ */ 293 | -------------------------------------------------------------------------------- /src/readTrace.c: -------------------------------------------------------------------------------- 1 | /** 2 | \file readTrace.c 3 | Post-run analysis of trace output file 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | static struct option long_options[] = 15 | { 16 | /* These options don't set a flag. 17 | We distinguish them by their indices. */ 18 | {"block-size", required_argument, 0, 'b'}, 19 | {"discard", required_argument, 0, 'd'}, 20 | {"sub-sampling", required_argument, 0, 's'}, 21 | {"help", no_argument, 0, 'h'}, 22 | {0, 0, 0, 0} 23 | }; 24 | 25 | void printHelp() { 26 | printf("-b, --block-size SIZE Blocksize\n"); 27 | printf("-d, --discard NUMBER Number of samples from to discard from beginning of file\n"); 28 | printf("-h, --help This help page\n"); 29 | // printf("-s, --subsample NUMBER Subsampling, sample every x lines\n"); 30 | // printf("\nReport bugs to \n"); 31 | } 32 | 33 | void printUsage(char *filename) { 34 | printf("Usage: %s [options]\n", filename); 35 | printHelp(); 36 | } 37 | 38 | int main (int argc, char*argv[]) { 39 | FILE *traceFile = NULL; 40 | const int bufferLen = 4096; 41 | char **colNames; 42 | char line[bufferLen], *token, tempStr[bufferLen]; 43 | long double *sums; 44 | float temp; 45 | char delims[] = " \t"; 46 | int numCols, ancestorNum, i, numLines, count, blockCount; 47 | int discardXFromBeginning=0; 48 | int blockSize=-1; 49 | char valueLine[bufferLen]; 50 | char titleLine[bufferLen]; 51 | char formatStr[bufferLen]; 52 | double **data; 53 | 54 | int *width; 55 | 56 | int option_index; 57 | int c; 58 | 59 | opterr = 0; 60 | 61 | while (1) 62 | { 63 | option_index = 0; 64 | c = getopt_long(argc, argv, "b:d:s:ht:", long_options, &option_index); 65 | 66 | if (c == -1) 67 | break; 68 | 69 | switch (c) 70 | { 71 | case 'b': //Block size 72 | blockSize = atoi(optarg); 73 | break; 74 | case 'd': //Discard # samples from beginning 75 | discardXFromBeginning = atoi(optarg); 76 | break; 77 | // case 's': //Subsampling 78 | // subSample = atoi(optarg); 79 | // break; 80 | case 'h': 81 | printUsage(argv[0]); 82 | printHelp(); 83 | return 0; 84 | break; 85 | case '?': 86 | if ((optopt == 'b') || (optopt == 'd') || (optopt == 's')) 87 | fprintf (stderr, "Option -%c requires an argument.\n", optopt); 88 | else if (isprint (optopt)) 89 | fprintf (stderr, "Unknown option `-%c'.\n", optopt); 90 | else 91 | fprintf (stderr, 92 | "Unknown option character `\\x%x'.\n", 93 | optopt); 94 | return 1; 95 | default: 96 | abort (); 97 | } 98 | } 99 | 100 | 101 | if(argv[optind] == NULL) { 102 | fprintf(stderr, "Missing trace filename.\n"); 103 | printUsage(argv[0]); 104 | return 1; 105 | } 106 | 107 | //Open trace file 108 | traceFile = fopen(argv[optind], "r"); 109 | //Verify trace file was opened successfully 110 | if(traceFile == NULL) { 111 | fprintf(stderr, "Could not find trace file '%s' specified.\n", argv[optind]); 112 | printUsage(argv[0]); 113 | return 1; 114 | } 115 | 116 | numLines = 0; //Determine number of lines in the file 117 | if(NULL == fgets(line, bufferLen, traceFile)) // discard header 118 | { 119 | fprintf(stderr, "Unable to discard header of the trace file\n"); 120 | return -1; 121 | } 122 | while(NULL != fgets(line, bufferLen, traceFile)) 123 | { 124 | /** if(NULL == fgets(line, bufferLen, traceFile)) 125 | { 126 | fprintf(stderr, "Unable to count lines of the trace file\n"); 127 | return -1; 128 | } 129 | **/ 130 | numLines++; 131 | } 132 | //If user didn't specify a block size, then set the block size to be the number of lines in the file 133 | if (blockSize < 0) { 134 | blockSize = numLines; 135 | } 136 | if(discardXFromBeginning >= numLines) { 137 | fprintf(stderr, "%d lines specified to discard, but trace file contains only %d lines.\n", discardXFromBeginning , numLines); 138 | return 1; 139 | } 140 | fseek(traceFile, 0, SEEK_SET); 141 | //Get first line of trace file 142 | if(NULL == fgets(line, bufferLen, traceFile)) 143 | { 144 | fprintf(stderr, "Unable to get the first line of the trace file.\n" ); 145 | return -1; 146 | } 147 | 148 | strcpy(tempStr, line); 149 | 150 | //Read each entry in the first line of the trace file 151 | // to determine the number of columns 152 | token = strtok(line, delims); 153 | numCols = 0; 154 | while (token != NULL) { 155 | // printf("column %s.\n",token); 156 | numCols++; 157 | token = strtok(NULL, delims); 158 | } 159 | // remove first column from considerations 160 | numCols--; 161 | 162 | // printf("Reading trace for file %s with %d columns.\n",argv[optind], numCols); 163 | 164 | 165 | width = (int*)malloc(sizeof(int) * numCols); 166 | //Allocate space to hold each float from the file 167 | data = (double**)malloc(sizeof(double*) * (((numLines-1)/blockSize)+1)); 168 | for(i=0;i<=((numLines-1)/blockSize);i++) { 169 | data[i] = (double*)malloc(sizeof(double) * numCols); 170 | } 171 | //Allocate space for column names 172 | colNames = (char**)malloc(sizeof(char*) * numCols); 173 | //Allocate memory for sums of numbers read in from file 174 | sums = (long double*)malloc(sizeof(long double) * numCols); 175 | 176 | if(tempStr[strlen(tempStr)-1] == '\n') //Strip newline off title line 177 | tempStr[strlen(tempStr)-1] = ' '; 178 | 179 | //Parse each column name (header) from the line we read in 180 | token = strtok(tempStr, delims); 181 | token = strtok(NULL, delims); 182 | for(i=0;i width[ancestorNum]) 225 | width[ancestorNum] = strlen(tempStr); 226 | //Save the average we just computed to memory so we can display it later once we know all column widths 227 | data[blockCount][ancestorNum] = sums[ancestorNum] / blockSize; 228 | //Reset the sums 229 | sums[ancestorNum] = 0; 230 | } 231 | 232 | //read in next entry 233 | token = strtok(NULL, delims); 234 | } 235 | if(count == blockSize) { 236 | count = 0; 237 | blockCount++; 238 | //printf("block count %d\n",blockCount); 239 | } 240 | 241 | //Update line number 242 | } 243 | 244 | //All trace file in memory 245 | fclose(traceFile); 246 | 247 | if(count > 0) { 248 | for(ancestorNum=0; ancestorNum < numCols; ancestorNum++) { 249 | //Format the float as a string 250 | sprintf(tempStr, "%.6Lf ", sums[ancestorNum] / count); 251 | //If this new float (in characters) is longer than any previous, update the width this column will display at 252 | if(strlen(tempStr) > width[ancestorNum]) { 253 | width[ancestorNum] = strlen(tempStr); 254 | //Save the average we just computed to memory so we can display it later once we know all column widths 255 | data[blockCount][ancestorNum] = sums[ancestorNum] / count; 256 | } 257 | } 258 | blockCount++; 259 | } 260 | 261 | sprintf(valueLine, "%s", ""); 262 | sprintf(titleLine, "%s", ""); 263 | //Print results to screen 264 | for(i=0; i < blockCount; i++) { 265 | for(ancestorNum=0; ancestorNum < numCols; ancestorNum++) { 266 | //Just as before, format the float as a string w/ precision of 6 267 | sprintf(tempStr, "%.6f ", data[i][ancestorNum]); 268 | //Create the format string that takes the column width into account 269 | sprintf(formatStr, "%%s%%-%ds", width[ancestorNum]); 270 | //Add the float to the string to be printed formatted for the right width 271 | sprintf(valueLine, formatStr, valueLine, tempStr); 272 | //Add the title to the string to be printed for the right width 273 | if(i==0) //Only create the title once 274 | sprintf(titleLine, formatStr, titleLine, colNames[ancestorNum]); 275 | if((((ancestorNum+1) % 90) == 0) || ancestorNum == numCols-1) { 276 | if(i==0)//Only print the title on the first line 277 | printf("%s\n", titleLine); 278 | printf("%s\n", valueLine); //Print the line of formatted values 279 | sprintf(titleLine, "%s", ""); //Clear the titleLine & valueLine strings 280 | sprintf(valueLine, "%s", ""); 281 | } 282 | } 283 | } 284 | 285 | 286 | printf("\n"); 287 | 288 | 289 | //Completed successfully 290 | return 0; 291 | } 292 | -------------------------------------------------------------------------------- /src/utils.c: -------------------------------------------------------------------------------- 1 | /** 2 | \file utils.c 3 | Utility definitions & functions (miscellaneous). 4 | 5 | Contains some utility functions and definitions. 6 | 7 | */ 8 | #include "MultiCoreUtils.h" 9 | 10 | #include "utils.h" 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | 21 | int debug; 22 | int verbose; 23 | char parseFileDelims[] = " \t\n"; 24 | 25 | 26 | 27 | /*********************************************************************************** 28 | * logSumOfExponents 29 | * - takes an array of (log) values and returns the log of the sums of the exponents of the log-values 30 | * - does this by exponentiating only ratios, to prevent under/over flow. 31 | * - indsToSum is a boolean array indicating for each index whether to sum it or not. (if NULL, then sum all) 32 | ***********************************************************************************/ 33 | double logSumOfExponents(double* logArray, int arrayLength, unsigned short* indsToSum) { 34 | 35 | double MAX_RATIO = 10.0; 36 | int ind, maxInd; 37 | 38 | double maxLog; // records maximum value of array 39 | double sumExpRatios; // sum of the ratios e^{A[ind]-maxLog} 40 | double diff; // difference maxLog - A[ind] 41 | 42 | if(arrayLength <= 0) { 43 | return logArray[0]; 44 | } 45 | 46 | // perform first loop to figure out maxLog (and potentially avoid taking too many exponents 47 | maxLog = 0.0; 48 | maxInd = -1; 49 | for(ind=0; ind maxLog) { 54 | maxLog = logArray[ind]; 55 | maxInd = ind; 56 | } 57 | } 58 | 59 | if(maxInd < 0) { 60 | // printf("\n\n----- no valid values to sum in log array -----\n\n"); 61 | return -1e34; 62 | } 63 | 64 | sumExpRatios = 1; // contribution of maxInd 65 | for(ind=0; ind 0) { 112 | return length; 113 | } 114 | } 115 | 116 | // finish reading the srting, and update length, but not buffer 117 | //We have to read until a whitespace, just don't add to the buffer 118 | while(!isspace(fgetc(stream)) && !feof(stream)) { 119 | length++; 120 | } 121 | 122 | return length; 123 | } 124 | /** end of readStringFromFile **/ 125 | 126 | 127 | /*********************************************************************************** 128 | * mergeArrays 129 | * - merges two sorted portions of a single array (first part should typically be shorter than second) 130 | * - borderPoint indicates the start point of the second array 131 | * - tmpSpace array is provided as a temporary workspace (of size borderPoint) 132 | ***********************************************************************************/ 133 | void mergeArrays(double array[], int numEntries, int borderPoint, double tmpArray[]) { 134 | 135 | int i,j; 136 | 137 | // copy first half onto temp space 138 | for(i=0; iarray[i]) { 179 | swap2(array[j],array[i]); 180 | } 181 | } 182 | } 183 | return sum; 184 | } 185 | 186 | i = numEntries/2; 187 | sum = mergeSort(array , i , tmpArray); 188 | sum += mergeSort(array+i, numEntries-i , tmpArray); 189 | 190 | mergeArrays(array, numEntries, i, tmpArray); 191 | 192 | return sum; 193 | } 194 | 195 | void resetBooleanArray1(unsigned short* booleanArray, int arrayLength){ 196 | int i; 197 | for(i=0; i 200) { 207 | resetBooleanArray1(booleanArray,arrayLength); 208 | } else { 209 | memcpy(booleanArray, zeros, arrayLength*sizeof(unsigned short)); 210 | } 211 | return; 212 | } 213 | 214 | void turnOnBooleanArray(unsigned short* booleanArray, int arrayLength) { 215 | for( arrayLength--; arrayLength>=0; arrayLength--) { 216 | booleanArray[arrayLength] = 1; 217 | } 218 | return; 219 | } 220 | 221 | 222 | 223 | // time functions 224 | 225 | static time_t time_start; 226 | 227 | void starttime (void) 228 | { 229 | time_start=time(NULL); 230 | } 231 | 232 | 233 | static TIMERS runTimes; 234 | 235 | void setStartTimeMethod(enum METHOD_NAME method) 236 | { 237 | time_t tic = time(NULL); 238 | switch(method) 239 | { 240 | case T_UpdateGB_InternalNode: runTimes.UpdateGB_InternalNode.start = tic; break; 241 | case T_UpdateGB_MigrationNode: runTimes.UpdateGB_MigrationNode.start = tic;break; 242 | case T_UpdateGB_MigSPR: runTimes.UpdateGB_MigSPR.start = tic;break; 243 | case T_UpdateTau: runTimes.UpdateTau.start = tic;break; 244 | case T_UpdateMigRates: runTimes.UpdateMigRates.start = tic;break; 245 | case T_mixing: runTimes.mixing.start = tic;break; 246 | case T_UpdateTheta: runTimes.UpdateTheta.start = tic;break; 247 | case T_UpdateSampleAge: runTimes.UpdateSampleAge.start = tic;break; 248 | case T_UpdateLocusRate: runTimes.UpdateLocusRate.start = tic;break; 249 | case T_UpdateAdmixCoeffs: runTimes.UpdateAdmixCoeffs.start = tic;break; 250 | case T_MCMCIterations: runTimes.MCMCIterations.start = tic;break; 251 | } 252 | } 253 | 254 | void setEndTimeMethod(enum METHOD_NAME method) 255 | { 256 | time_t toc = time(NULL); 257 | switch(method) 258 | { 259 | case T_UpdateGB_InternalNode: runTimes.UpdateGB_InternalNode.accumulated += (toc - runTimes.UpdateGB_InternalNode.start); break; 260 | case T_UpdateGB_MigrationNode: runTimes.UpdateGB_MigrationNode.accumulated += (toc - runTimes.UpdateGB_MigrationNode.start); break; 261 | case T_UpdateGB_MigSPR: runTimes.UpdateGB_MigSPR.accumulated += (toc - runTimes.UpdateGB_MigSPR.start); break; 262 | case T_UpdateTau: runTimes.UpdateTau.accumulated += (toc - runTimes.UpdateTau.start); break; 263 | case T_UpdateMigRates: runTimes.UpdateMigRates.accumulated += (toc - runTimes.UpdateMigRates.start); break; 264 | case T_mixing: runTimes.mixing.accumulated += (toc - runTimes.mixing.start); break; 265 | case T_UpdateTheta: runTimes.UpdateTheta.accumulated += (toc - runTimes.UpdateTheta.start); break; 266 | case T_UpdateSampleAge: runTimes.UpdateSampleAge.accumulated += (toc - runTimes.UpdateSampleAge.start); break; 267 | case T_UpdateLocusRate: runTimes.UpdateLocusRate.accumulated += (toc - runTimes.UpdateLocusRate.start); break; 268 | case T_UpdateAdmixCoeffs: runTimes.UpdateAdmixCoeffs.accumulated += (toc - runTimes.UpdateAdmixCoeffs.start); break; 269 | case T_MCMCIterations: runTimes.MCMCIterations.accumulated += (toc - runTimes.MCMCIterations.start); break; 270 | 271 | } 272 | } 273 | void printMethodTimes() 274 | { 275 | #ifdef RECORD_METHOD_TIMES 276 | 277 | char timeString[STRING_LENGTH]; 278 | printf("===== METHOD RUN TIME ======\n"); 279 | printf("UpdateGB_InternalNode (sec): %s\n", printtime_i(runTimes.UpdateGB_InternalNode.accumulated , timeString)); 280 | printf("UpdateGB_MigrationNode (sec): %s\n", printtime_i(runTimes.UpdateGB_MigrationNode.accumulated, timeString)); 281 | printf("UpdateGB_MigSPR (sec): %s\n", printtime_i(runTimes.UpdateGB_MigSPR.accumulated, timeString)); 282 | printf("UpdateTau (sec): %s\n", printtime_i(runTimes.UpdateTau.accumulated, timeString)); 283 | printf("UpdateMigRates (sec): %s\n", printtime_i(runTimes.UpdateMigRates.accumulated, timeString)); 284 | printf("mixing (sec): %s\n", printtime_i(runTimes.mixing.accumulated, timeString)); 285 | printf("UpdateTheta (sec): %s\n", printtime_i(runTimes.UpdateTheta.accumulated, timeString)); 286 | printf("UpdateSampleAge (sec): %s\n", printtime_i(runTimes.UpdateSampleAge.accumulated, timeString)); 287 | printf("UpdateLocusRate NO MT(sec): %s\n", printtime_i(runTimes.UpdateLocusRate.accumulated, timeString)); 288 | printf("UpdateAdmixCoeffs NO MT(sec): %s\n", printtime_i(runTimes.UpdateAdmixCoeffs.accumulated, timeString)); 289 | time_t total = 0; 290 | total += runTimes.UpdateGB_InternalNode.accumulated; 291 | total += runTimes.UpdateGB_MigrationNode.accumulated; 292 | total += runTimes.UpdateGB_MigSPR.accumulated; 293 | total += runTimes.UpdateTau.accumulated; 294 | total += runTimes.UpdateMigRates.accumulated; 295 | total += runTimes.mixing.accumulated; 296 | total += runTimes.UpdateTheta.accumulated; 297 | total += runTimes.UpdateSampleAge.accumulated; 298 | printf("===== Total in MT Methods: %s\n" , printtime_i(total, timeString)); 299 | total += runTimes.UpdateLocusRate.accumulated; 300 | total += runTimes.UpdateAdmixCoeffs.accumulated; 301 | printf("===== Total in All MCMCM Methods %s\n" , printtime_i(total, timeString)); 302 | printf("===== Total in MCMCM Iterations: %s\n" , printtime_i(runTimes.MCMCIterations.accumulated, timeString)); 303 | #endif 304 | } 305 | char *printtime_i(int t , char timestr[]) 306 | { 307 | int h, m, s; 308 | 309 | h=t/3600; m=(t%3600)/60; s=t-(t/60)*60; 310 | if(h) sprintf(timestr,"%d:%02d:%02d", h,m,s); 311 | else sprintf(timestr,"00:%02d:%02d", m,s); 312 | return(timestr); 313 | } 314 | char* printtime (char timestr[]) 315 | { 316 | /* print time elapsed since last call to starttime() 317 | */ 318 | time_t t; 319 | int h, m, s; 320 | 321 | t=time(NULL)-time_start; 322 | h=t/3600; m=(t%3600)/60; s=t-(t/60)*60; 323 | if(h) sprintf(timestr,"%d:%02d:%02d", h,m,s); 324 | else sprintf(timestr,"%2d:%02d", m,s); 325 | return(timestr); 326 | } 327 | 328 | 329 | /*----------------------------------------------------------------------------- 330 | * This returns a variable in the range (a,b) by reflecting 331 | * x back into the range 332 | */ 333 | double reflect(double x, double a, double b ) 334 | { 335 | 336 | // safety margins for upper and lower bounds 337 | static double slack = 0.000000001; 338 | double xnew, double_interval; 339 | 340 | a += slack; 341 | b -= slack; 342 | 343 | // if interval is empty (due to slackness), 344 | // return middle of interval 345 | if( b <= a ) 346 | { 347 | //fprintf(stderr, "very small interval in reflect(%g,%g,%g)" 348 | // " [slackness = %g].\n",x,a-slack,b+slack,slack); 349 | return (a+b)/2.; 350 | } 351 | 352 | if( x < b && x > a ) 353 | return x; 354 | 355 | // reflect upwards, if necessary 356 | xnew = x; 357 | if( xnew <= a ) 358 | xnew = 2. * a - xnew; 359 | 360 | // "fold twice" as many time as needed 361 | double_interval = 2. * ( b - a ); 362 | xnew = xnew - double_interval*floor( (xnew-a) / double_interval ); 363 | 364 | // reflect downwards one last time, if necessary 365 | if( xnew >= b ) 366 | xnew = 2. * b - xnew; 367 | 368 | // value should be within interval at this stage, but numerical 369 | // precision might put it slightly outside 370 | while( xnew <= a || xnew >= b ) 371 | { 372 | if( xnew >= b ) 373 | { 374 | if( debug ) 375 | { 376 | fprintf(stderr, "reflect percision in reflect(%g,%g,%g): " 377 | "obtaining reflection at %g (%g greater than upper " 378 | "bound)\n", x, 379 | a - slack, 380 | b + slack, 381 | xnew, xnew-b); 382 | } 383 | xnew = 2. * b - xnew; 384 | } 385 | else 386 | { 387 | if( debug ) 388 | { 389 | fprintf(stderr, "reflect percision in reflect(%g,%g,%g): obtaining " 390 | "reflection at %g (%g smaller than lower bound)\n", 391 | x, a - slack, 392 | b + slack, xnew, a-xnew); 393 | } 394 | xnew = 2*a - xnew; 395 | } 396 | } 397 | return xnew; 398 | } 399 | 400 | //================= Random Generator related functions ======================== 401 | RandGeneratorContext RndCtx; 402 | 403 | //----------------------------------------------------------------------------- 404 | #define MALLOC_AND_ASSIGN(ptr, t, sz, val) \ 405 | ptr = (t*) malloc(sz); \ 406 | if (NULL == ptr ) \ 407 | printf("Error on Random context allocation"); \ 408 | for(i=0; i < RndCtx.nOfSlots; ++i) \ 409 | ptr[i] = val; 410 | 411 | void initRandomGenerator( int nNumLoci, unsigned int unSeed ) 412 | { 413 | if( 4 != sizeof(int) ) 414 | puts("oh-oh, we are in trouble. int is not 32-bit?"); 415 | 416 | //The last extra slot is for general purpose computations 417 | RndCtx.nOfSlots = nNumLoci + 1; 418 | 419 | int bs = sizeof(int) * RndCtx.nOfSlots; 420 | int i = 0; 421 | unsigned int v = 170 * (unSeed % 178) + 137; 422 | MALLOC_AND_ASSIGN( RndCtx.rndu_z, unsigned int, bs, v ) //137 423 | v = unSeed*127773; 424 | MALLOC_AND_ASSIGN( RndCtx.rndu_w, unsigned int, bs, v ) //123456757 425 | MALLOC_AND_ASSIGN( RndCtx.rndu_x, unsigned int, bs, 11 ) 426 | MALLOC_AND_ASSIGN( RndCtx.rndu_y, unsigned int, bs, 23 ) 427 | bs = sizeof(double) * RndCtx.nOfSlots; 428 | MALLOC_AND_ASSIGN( RndCtx.m2s2_kernel, double, bs, 8. ) 429 | double dv = sqrt(RndCtx.m2s2_kernel[0]/(RndCtx.m2s2_kernel[0] + 1.)); 430 | MALLOC_AND_ASSIGN( RndCtx.m2N_kernel, double, bs, dv ) 431 | dv = sqrt(1./(RndCtx.m2s2_kernel[0] + 1.)); 432 | MALLOC_AND_ASSIGN( RndCtx.s2N_kernel, double, bs, dv ) 433 | 434 | MALLOC_AND_ASSIGN( RndCtx.rndgamma2_b, double, bs, 0.0 ) 435 | MALLOC_AND_ASSIGN( RndCtx.rndgamma2_h, double, bs, 0.0 ) 436 | MALLOC_AND_ASSIGN( RndCtx.rndgamma2_ss, double, bs, 0.0 ) //0.0 437 | 438 | MALLOC_AND_ASSIGN( RndCtx.rndgamma1_a, double, bs, 0.0 ) 439 | MALLOC_AND_ASSIGN( RndCtx.rndgamma1_p, double, bs, 0.0 ) 440 | MALLOC_AND_ASSIGN( RndCtx.rndgamma1_uf, double, bs, 0.0 ) 441 | MALLOC_AND_ASSIGN( RndCtx.rndgamma1_ss, double, bs, 10.0 ) //10.0 442 | MALLOC_AND_ASSIGN( RndCtx.rndgamma1_d, double, bs, 0.0 ) 443 | /* 444 | rndu_z=170*(seed%178)+137; 445 | rndu_w = seed*127773; 446 | 447 | m2N_kernel = sqrt(m2s2_kernel/(m2s2_kernel+1)); 448 | s2N_kernel = sqrt(1/(m2s2_kernel+1)); 449 | */ 450 | } 451 | 452 | /*----------------------------------------------------------------------------- 453 | standard normal variate, using the Box-Muller method (1958), improved by 454 | Marsaglia and Bray (1964). The method generates a pair of random 455 | variates, and only one used. 456 | See N. L. Johnson et al. (1994), Continuous univariate distributions, 457 | vol 1. p.153. 458 | */ 459 | double rndnormal( int nLocusIdx ) 460 | { 461 | double u, v, s; 462 | while( 1 ) 463 | { 464 | u = 2*rndu( nLocusIdx ) - 1; 465 | v = 2*rndu( nLocusIdx ) - 1; 466 | s = u * u + v * v; 467 | if( s > 0 && s < 1 ) 468 | break; 469 | } 470 | s = sqrt( -2. * log( s ) / s ); 471 | return u * s; 472 | } 473 | 474 | /*----------------------------------------------------------------------------- 475 | This returns a variate from the mixture of two normals 476 | N(-m, s2) and N(m, s2), with mean 0 and variance m^2 + s2 = 1 477 | and m^2/s^2 = 8. 478 | 479 | Let this standard variate be z. Then mean + z * sigma will be a variate 480 | with mean mean and SD sigma. This is useful for generating MCMC proposals 481 | */ 482 | double rnd2normal8( int nLocusIdx ) 483 | { 484 | double z = RndCtx.m2N_kernel[nLocusIdx] 485 | + rndnormal( nLocusIdx ) * RndCtx.s2N_kernel[nLocusIdx]; 486 | z = rndu( nLocusIdx ) < 0.5 ? z : -z; 487 | return z; 488 | } 489 | 490 | /*----------------------------------------------------------------------------- 491 | U(0,1): AS 183: Appl. Stat. 31:188-190 492 | Wichmann BA & Hill ID. 1982. An efficient and portable 493 | pseudo-random number generator. Appl. Stat. 31:188-190 494 | 495 | x, y, z are any numbers in the range 1-30000. Integer operation up 496 | to 30323 required. 497 | */ 498 | double rndu( int nLocusIdx ) 499 | { 500 | double r; 501 | 502 | RndCtx.rndu_x[nLocusIdx] = 171 * ( RndCtx.rndu_x[nLocusIdx] % 177 ) 503 | - 2 * ( RndCtx.rndu_x[nLocusIdx] / 177 ); 504 | RndCtx.rndu_y[nLocusIdx] = 172 * ( RndCtx.rndu_y[nLocusIdx] % 176 ) 505 | - 35 * ( RndCtx.rndu_y[nLocusIdx] / 176 ); 506 | RndCtx.rndu_z[nLocusIdx] = 170 * ( RndCtx.rndu_z[nLocusIdx] % 178 ) 507 | - 63 * ( RndCtx.rndu_z[nLocusIdx] / 178 ); 508 | r = RndCtx.rndu_x[nLocusIdx] / 30269.0 509 | + RndCtx.rndu_y[nLocusIdx] / 30307.0 510 | + RndCtx.rndu_z[nLocusIdx] / 30323.0; 511 | r = ( r - (int)r ); 512 | return r; 513 | } 514 | 515 | /*----------------------------------------------------------------------------- 516 | random standard gamma (Mean=Var=s, with shape parameter=s, scale para=1) 517 | r^(s-1)*exp(-r) 518 | J. Dagpunar (1988) Principles of random variate generation, 519 | Clarendon Press, Oxford 520 | calling rndgamma1() if s<1 or 521 | rndgamma2() if s>1 or 522 | exponential if s=1 523 | @@TODO: (still actual?) 524 | This is unsafe, and is found to return 0 when s is very small. 525 | */ 526 | double rndgamma( int nLocusIdx, double s ) 527 | { 528 | double r=0; 529 | if ( s <= 0 ) 530 | puts ("jgl gamma.."); 531 | else if( s < 1) 532 | r = rndgamma1( nLocusIdx, s ); 533 | else if( s > 1 ) 534 | r = rndgamma2( nLocusIdx, s ); 535 | else 536 | r = -log( rndu( nLocusIdx ) ); 537 | return r; 538 | } 539 | 540 | //----------------------------------------------------------------------------- 541 | double rndgamma1( int nLocusIdx, double s ) 542 | { 543 | /* random standard gamma for s<1 544 | switching method 545 | */ 546 | double r, x=0,small=1e-37,w; 547 | if( s != RndCtx.rndgamma1_ss[nLocusIdx] ) 548 | { 549 | RndCtx.rndgamma1_a[nLocusIdx] = 1 - s; 550 | RndCtx.rndgamma1_p[nLocusIdx] = 551 | RndCtx.rndgamma1_a[nLocusIdx]/(RndCtx.rndgamma1_a[nLocusIdx] 552 | + s * exp(-RndCtx.rndgamma1_a[nLocusIdx])); 553 | RndCtx.rndgamma1_uf[nLocusIdx] = RndCtx.rndgamma1_p[nLocusIdx] 554 | * pow( small/RndCtx.rndgamma1_a[nLocusIdx], 555 | s ); 556 | RndCtx.rndgamma1_d[nLocusIdx] = RndCtx.rndgamma1_a[nLocusIdx] 557 | * log(RndCtx.rndgamma1_a[nLocusIdx]); 558 | RndCtx.rndgamma1_ss[nLocusIdx] = s; 559 | } 560 | while( 1 ) 561 | { 562 | r = rndu( nLocusIdx ); 563 | if( r > RndCtx.rndgamma1_p[nLocusIdx] ) 564 | { 565 | x = RndCtx.rndgamma1_a[nLocusIdx] 566 | - log((1 - r) / (1 - RndCtx.rndgamma1_p[nLocusIdx])); 567 | w = RndCtx.rndgamma1_a[nLocusIdx] * log(x) 568 | - RndCtx.rndgamma1_d[nLocusIdx]; 569 | } 570 | else if( r > RndCtx.rndgamma1_uf[nLocusIdx] ) 571 | { 572 | x = RndCtx.rndgamma1_a[nLocusIdx] 573 | * pow(r / RndCtx.rndgamma1_p[nLocusIdx], 1 / s); 574 | w = x; 575 | } 576 | else 577 | return (0); 578 | 579 | r = rndu( nLocusIdx ); 580 | if( (1. - r) <= w && r > 0. ) 581 | if( r * ( w + 1 ) >= 1 || -log( r ) <= w ) 582 | continue; 583 | break; 584 | } 585 | return x; 586 | } 587 | 588 | //----------------------------------------------------------------------------- 589 | // random standard gamma for s>1 590 | // Best's (1978) t distribution method 591 | 592 | double rndgamma2( int nLocusIdx, double s ) 593 | { 594 | double r,d,f,g,x; 595 | if( s != RndCtx.rndgamma2_ss[nLocusIdx]) 596 | { 597 | RndCtx.rndgamma2_b[nLocusIdx] = s-1; 598 | RndCtx.rndgamma2_h[nLocusIdx] = sqrt(3*s-0.75); 599 | RndCtx.rndgamma2_ss[nLocusIdx] = s; 600 | } 601 | while( 1 ) 602 | { 603 | r = rndu( nLocusIdx ); 604 | g = r - r * r; 605 | f = (r -0.5) * RndCtx.rndgamma2_h[nLocusIdx]/sqrt(g); 606 | x = RndCtx.rndgamma2_b[nLocusIdx] + f; 607 | if (x <= 0) 608 | continue; 609 | r=rndu( nLocusIdx ); 610 | d = 64 * r * r * g * g * g; 611 | if( d * x < x - 2 * f * f 612 | || log(d) < 2 * ( RndCtx.rndgamma2_b[nLocusIdx] 613 | * log ( x / RndCtx.rndgamma2_b[nLocusIdx]) - f ) ) 614 | break; 615 | } 616 | return x; 617 | } 618 | //----------------------------------------------------------------------------- 619 | 620 | /* 621 | double PointChi2 (double prob, double v) 622 | { 623 | . returns z so that Prob{x1-small) return(9999); 635 | if (v<=0) return (-1); 636 | 637 | g = LnGamma (v/2); 638 | xx=v/2; c=xx-1; 639 | if (v >= -1.24*log(p)) goto l1; 640 | 641 | ch=pow((p*xx*exp(g+xx*aa)), 1/xx); 642 | if (ch-e<0) return (ch); 643 | goto l4; 644 | l1: 645 | if (v>.32) goto l3; 646 | ch=0.4; a=log(1-p); 647 | l2: 648 | q=ch; p1=1+ch*(4.67+ch); p2=ch*(6.73+ch*(6.66+ch)); 649 | t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2; 650 | ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t; 651 | if (fabs(q/ch-1)-.01 <= 0) goto l4; 652 | else goto l2; 653 | 654 | l3: 655 | x=InverseCDFNormal(p); 656 | p1=0.222222/v; ch=v*pow((x*sqrt(p1)+1-p1), 3.0); 657 | if (ch>2.2*v+6) ch=-2*(log(1-p)-c*log(.5*ch)+g); 658 | l4: 659 | q=ch; p1=.5*ch; 660 | if ((t=IncompleteGamma (p1, xx, g))<0) 661 | printf ("\nIncompleteGamma!!!\n\n"); 662 | p2=p-t; 663 | t=p2*exp(xx*aa+g+p1-c*log(ch)); 664 | b=t/ch; a=0.5*t-b*c; 665 | 666 | s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420; 667 | s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520; 668 | s3=(210+a*(462+a*(707+932*a)))/2520; 669 | s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040; 670 | s5=(84+264*a+c*(175+606*a))/2520; 671 | s6=(120+c*(346+127*c))/5040; 672 | ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6)))))); 673 | if (fabs(q/ch-1) > e) goto l4; 674 | 675 | return (ch); 676 | } 677 | */ 678 | 679 | 680 | //----------------------------------------------------------------------------- 681 | void flushLine(FILE* readFile) 682 | { 683 | static char restOfLine[16000] = {'\0'}; 684 | char* p_res = fgets(restOfLine, 16000, readFile); 685 | if(NULL == p_res) 686 | // Just to please the compiler. We might do some error logging here. 687 | return; 688 | } 689 | 690 | /*----------------------------------------------------------------------------- 691 | * Comment Aware version of strtok 692 | * If string retrieved starts with '#' it is a comment 693 | * and this function returns NULL 694 | ----------------------------------------------------------------------------*/ 695 | char *strtokCS( char * str, const char * delimiters) 696 | { 697 | char * result; 698 | result = strtok(str, delimiters); 699 | if (result == NULL || result[0] == '#') 700 | return NULL; 701 | else 702 | return result; 703 | } 704 | 705 | //============================= END OF FILE =================================== -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | /** 4 | \file utils.h 5 | Utility definitions (Miscellaneous). 6 | 7 | Contains some utility functions and definitions. 8 | 9 | */ 10 | 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | 22 | #define STRING_LENGTH 500 23 | #define NAME_LENGTH 200 24 | #define min2(a,b) ((a)<(b)?(a):(b)) 25 | #define max2(a,b) ((a)>(b)?(a):(b)) 26 | #define swap2(a,b) ({a += b ; b = a-b ; a -= b ;}) 27 | #define rndexp(nLocusIdx, mean) (-(mean)*log(rndu(nLocusIdx))) 28 | 29 | #define ROOT_SLACK 0.001 // default length of top-most interval 30 | 31 | double reflect(double x, double a, double b); 32 | 33 | //---- Random Generator related stuff ----------------------------------------- 34 | #define RAND_GENERAL_SLOT (RndCtx.nOfSlots-1) 35 | typedef struct _RandGeneratorContext 36 | { 37 | int nOfSlots; 38 | 39 | unsigned int* rndu_z; 40 | unsigned int* rndu_w; 41 | unsigned int* rndu_x; 42 | unsigned int* rndu_y; 43 | 44 | double* m2s2_kernel; 45 | double* m2N_kernel; 46 | double* s2N_kernel; 47 | 48 | double* rndgamma2_b; 49 | double* rndgamma2_h; 50 | double* rndgamma2_ss; 51 | 52 | double* rndgamma1_a; 53 | double* rndgamma1_p; 54 | double* rndgamma1_uf; 55 | double* rndgamma1_ss; 56 | double* rndgamma1_d; 57 | } RandGeneratorContext; 58 | 59 | void initRandomGenerator( int nNumLoci, unsigned int seed ); 60 | double rndnormal( int nLocusIdx ); 61 | double rnd2normal8( int nLocusIdx ); 62 | double rndu( int nLocusIdx ); 63 | double rndgamma( int nLocusIdx, double s ); 64 | double rndgamma1( int nLocusIdx, double s ); 65 | double rndgamma2( int nLocusIdx, double s ); 66 | //----------------------------------------------------------------------------- 67 | 68 | double PointChi2 (double prob, double v); 69 | char* printtime(char timestr[]); 70 | void starttime(void); 71 | void resetBooleanArray(unsigned short* booleanArray, int arrayLength); 72 | void turnOnBooleanArray(unsigned short* booleanArray, int arrayLength); 73 | double logSumOfExponents(double* logArray, int arrayLength, unsigned short* indsToSum); 74 | 75 | 76 | void mergeArrays(double array[], int numEntries, int borderPoint, double tmpArray[]); 77 | double mergeSort(double array[], int numEntries, double tmpArray[]); 78 | 79 | void flushLine(FILE* readFile); 80 | int readStringFromFile(FILE *stream, int bufferLength, char *destination); 81 | char * strtokCS( char * str, const char * delimiters); 82 | 83 | extern int debug; 84 | extern int verbose; 85 | extern char parseFileDelims[]; 86 | 87 | 88 | 89 | typedef struct { 90 | time_t start; 91 | time_t accumulated; 92 | } TIMER_METHOD; 93 | typedef struct { 94 | TIMER_METHOD UpdateGB_InternalNode; 95 | TIMER_METHOD UpdateGB_MigrationNode; 96 | TIMER_METHOD UpdateGB_MigSPR; 97 | TIMER_METHOD UpdateTau; 98 | TIMER_METHOD UpdateMigRates; 99 | TIMER_METHOD mixing; 100 | TIMER_METHOD UpdateTheta; 101 | TIMER_METHOD UpdateSampleAge; 102 | TIMER_METHOD UpdateLocusRate; 103 | TIMER_METHOD UpdateAdmixCoeffs; 104 | TIMER_METHOD MCMCIterations; 105 | } TIMERS; 106 | 107 | enum METHOD_NAME 108 | { 109 | T_UpdateGB_InternalNode, 110 | T_UpdateGB_MigrationNode, 111 | T_UpdateGB_MigSPR, 112 | T_UpdateTau, 113 | T_UpdateMigRates, 114 | T_mixing, 115 | T_UpdateTheta, 116 | T_UpdateSampleAge, 117 | T_UpdateLocusRate, 118 | T_UpdateAdmixCoeffs, 119 | T_MCMCIterations 120 | } ; 121 | 122 | 123 | void setEndTimeMethod(enum METHOD_NAME method); 124 | void printMethodTimes(); 125 | void setStartTimeMethod(enum METHOD_NAME method); 126 | char *printtime_i(int t , char timestr[]); 127 | #endif 128 | --------------------------------------------------------------------------------