├── .gitignore ├── LICENSE.md ├── Makefile ├── README.md ├── bin └── run_index.pl └── src ├── dbg.h ├── index_genome.c ├── mem.c ├── mem.h ├── mpd.c ├── mpd.h ├── mpd_lessGreedy.c ├── mpd_moreGreedy.c ├── pool_check.c └── primer_compat.c /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # project-specific 32 | *.snp 33 | *.dSYM/ 34 | build/ 35 | bin/ 36 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ## Makefile 2 | 3 | CC = gcc 4 | CC_OPTIONS = -Wall -g -v -O3 -std=gnu11 -DNDEBUG 5 | INCLUDES = 6 | CFLAGS = $(CC_OPTIONS) $(INCLUDES) 7 | LIBS = -lm 8 | 9 | OUT = bin 10 | PRIMER_LG_EXE = ${OUT}/mpd 11 | PRIMER_LG_SRC = src/mem.c src/mpd.c src/mpd_lessGreedy.c 12 | PRIMER_LG_OBJ = $(PRIMER_LG_SRC:.c=.o) 13 | 14 | PRIMER_MG_EXE = ${OUT}/mpd_greedy 15 | PRIMER_MG_SRC = src/mem.c src/mpd.c src/mpd_moreGreedy.c 16 | PRIMER_MG_OBJ = $(PRIMER_MG_SRC:.c=.o) 17 | 18 | POOL_EXE = ${OUT}/pool_check 19 | POOL_SRC = src/mem.c src/mpd.c src/pool_check.c 20 | POOL_OBJ = $(POOL_SRC:.c=.o) 21 | 22 | INDEX_EXE = ${OUT}/index_genome 23 | INDEX_SRC = src/mem.c src/mpd.c src/index_genome.c 24 | INDEX_OBJ = $(INDEX_SRC:.c=.o) 25 | 26 | PCOMP_EXE = ${OUT}/primer_compat 27 | PCOMP_SRC = src/mem.c src/mpd.c src/primer_compat.c 28 | PCOMP_OBJ = $(PCOMP_SRC:.c=.o) 29 | 30 | 31 | PROGS = $(PRIMER_LG_EXE) $(PRIMER_MG_EXE) $(POOL_EXE) $(INDEX_EXE) $(PCOMP_EXE) 32 | 33 | all: introduce $(PROGS) 34 | @echo done. 35 | 36 | $(PRIMER_LG_EXE): $(PRIMER_LG_OBJ) 37 | $(CC) -o $@ $(CFLAGS) $(PRIMER_LG_OBJ) $(LIBS) 38 | 39 | $(PRIMER_MG_EXE): $(PRIMER_MG_OBJ) 40 | $(CC) -o $@ $(CFLAGS) $(PRIMER_MG_OBJ) $(LIBS) 41 | 42 | $(POOL_EXE): $(POOL_OBJ) 43 | $(CC) -o $@ $(CFLAGS) $(POOL_OBJ) $(LIBS) 44 | 45 | $(INDEX_EXE): $(INDEX_OBJ) 46 | $(CC) -o $@ $(CFLAGS) $(INDEX_OBJ) $(LIBS) 47 | 48 | $(PCOMP_EXE): $(PCOMP_OBJ) 49 | $(CC) -o $@ $(CFLAGS) $(PCOMP_OBJ) $(LIBS) 50 | 51 | introduce: 52 | @echo "Building..." 53 | mkdir -p ${OUT} 54 | 55 | clean: 56 | rm -f src/*.o 57 | 58 | distclean: clean 59 | rm -f $(INDEX_EXE) $(PRIMER_LG_EXE) $(PRIMER_MG_EXE) $(POOL_EXE) 60 | 61 | ## end of Makefile 62 | # DO NOT DELETE THIS LINE -- make depend depends on it. 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MPD - Multiplex PCR Design 2 | ============================ 3 | 4 | by Thomas Wingo and David Cutler 5 | 6 | ## Citation 7 | 8 | Please cite our [paper](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1453-3) if you use MPD in your work. Thanks. 9 | 10 | ## Description 11 | 12 | MPD is a program designed to automate creation of multiplex primer design written in C. The `mpd_lessGreedy` and `mpd_moreGreedy` binaries differ in which primer pool they choose to start with for pool creation. Either binary can be used as stand-alone or in conjunction with the [MPD perl package](http://github.com/wingolab-org/mpd-perl). 13 | 14 | ## Installation 15 | - Clone the repository 16 | - Make with `make all` 17 | - Binaries will be compiled and saved to the `build` directory 18 | 19 | ## Required files 20 | - You will need a hashed copy of the genome to run the primer software. 21 | - Instructions below show how you can create one yourself. A prebuild hg38 genome with flat dbSnp files is available from [this repository](https://bitbucket.org/wingolab/mpd-dat/). It may be cloned like so, `git clone https://bitbucket.org/wingolab/mpd-dat.git`. 22 | 23 | ### Build Hashed Genome 24 | - Download the genome of interest as a fasta file 25 | - Use `bin/run_index.pl`, which creates a sh script to run `index_genome` 26 | - You'll need to install these perl packages to use this script: `Path::Tiny`, `Data::Dump`, and `Getopt::Long`, which can be installed using [`cpanm`][1] like so `cpanm Path::Tiny Data::Dump Getopt::Long`. 27 | 28 | ### Flat dbSnp 29 | - These can be obtained from this [this repository](https://bitbucket.org/wingolab/mpd-dat/), which were prepared from dbSNP version 140. 30 | - To create your own flat snp file set based on criteria of your own devising, each line should contain tab-delimited fields of the following: 31 | ``` 32 | name numberOfReporters chrom position MinorAlleleFrequency allele1/allele2 33 | ``` 34 | - The `numberOfReporters` field is no longer used but retained for backwards compatibility. 35 | - Prepare a `sdx` file that contains the number of chromsome files to include as the 1st line and then a list of the names of all chromosome files. On the command line you might try: `ls -1 *.line | wc -l > db_flat.sdx; ls -1 >> db_flat.sdx`. Note that the sdx should be in the same order that the chromsomes are in for the indexed genome. See the genome's sdx file (e.g., `cat hg38.d14.sdx`) to see the order. 36 | 37 | ## Run mpd 38 | - The easiest way of using MPD is to use the [Perl pacakge MPD](http://github.com/wingolab-org/mpd-perl), but either `mpd_lessGreedy` and `mpd_moreGreedy` binaries may be executed from the command line interactively. 39 | 40 | [1]: https://metacpan.org/pod/App::cpanminus 41 | -------------------------------------------------------------------------------- /bin/run_index.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Name: run_index.pl 3 | # Date Created: Mon Mar 21 13:59:19 2016 4 | # Date Modified: Mon Mar 21 13:59:19 2016 5 | # By: TS Wingo 6 | # 7 | # Description: 8 | 9 | use 5.10.0; 10 | use warnings; 11 | use strict; 12 | 13 | use Getopt::Long; 14 | use Path::Tiny; 15 | use Data::Dump qw/ dump /; 16 | 17 | # variables 18 | my ( $dir_name, $out_ext ); 19 | my $ext = "fa"; 20 | my $indexDepth = 14; 21 | 22 | # get options 23 | die "Usage: $0 [-e ] [-i index depth] -d -o \n" 24 | unless GetOptions( 25 | 'o|out=s' => \$out_ext, 26 | 'e|ext=s' => \$ext, 27 | 'd|dir=s' => \$dir_name, 28 | 'i|index=n' => \$indexDepth, 29 | ) 30 | and $dir_name 31 | and $indexDepth 32 | and $out_ext 33 | and $ext; 34 | 35 | my ( $filesAref, $sizesAref ) = FastaList( $dir_name, $ext ); 36 | WriteIn( $out_ext, $filesAref, $sizesAref ); 37 | WriteSh($out_ext); 38 | 39 | sub WriteIn { 40 | my $out_ext = shift; 41 | my $filesAref = shift; 42 | my $sizesAref = shift; 43 | 44 | my @sSizes = sort { $b <=> $a } @$sizesAref; 45 | my $maxSize = shift @sSizes; 46 | 47 | my $fh = path("$out_ext.in")->filehandle(">"); 48 | say {$fh} join "\n", ( "d", "in.index_genome.in", scalar @$filesAref ); 49 | say {$fh} join "\n", @$filesAref; 50 | say {$fh} "$out_ext.d$indexDepth"; 51 | say {$fh} ( $maxSize + 1 ); 52 | say {$fh} $indexDepth; 53 | } 54 | 55 | sub WriteSh { 56 | my $out_ext = shift; 57 | my $fh = path("$out_ext.sh")->filehandle(">"); 58 | say {$fh} qq{#!/bin/sh 59 | ./index_genome < $out_ext.in}; 60 | } 61 | 62 | sub FastaList { 63 | my $dir = shift; 64 | my $ext = shift; 65 | 66 | my ( @fastas, @sizes ); 67 | 68 | my $pt = path($dir); 69 | my @files = path($dir)->children(qr{$ext\z}); 70 | my @chrs = ( 1 .. 26, "M", "X", "Y", "Un" ); 71 | my %files = map { $_->basename() => $_ } @files; 72 | 73 | for my $chr (@chrs) { 74 | my $f = sprintf( "chr%s.fa", $chr ); 75 | if ( exists $files{$f} ) { 76 | push @fastas, $files{$f}->stringify; 77 | push @sizes, ( -s $files{$f} ); 78 | delete $files{$f}; 79 | } 80 | } 81 | 82 | for my $chr (@chrs) { 83 | for my $file ( sort keys %files ) { 84 | if ( $file =~ m/\Achr$chr/ ) { 85 | push @fastas, $files{$file}->stringify; 86 | push @sizes, ( -s $files{$file} ); 87 | delete $files{$file}; 88 | } 89 | } 90 | } 91 | if ( scalar @fastas == 0 ) { 92 | say "No fasta files to process. Exiting..."; 93 | exit(1); 94 | } 95 | return ( \@fastas, \@sizes ); 96 | } 97 | -------------------------------------------------------------------------------- /src/dbg.h: -------------------------------------------------------------------------------- 1 | #ifndef __dbg_h__ 2 | #define __dbg_h__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifdef NDEBUG 10 | #define debug(M, ...) 11 | #else 12 | #define debug(M, ...) fprintf(stderr, "DEBUG %s:%d %s(): " M "\n", __FILE__, __LINE__, __func__, ##__VA_ARGS__) 13 | #endif 14 | 15 | #define usage(A, M, ...) if(!(A)) { fprintf(stdout, "USAGE: " M "\n", ##__VA_ARGS__); goto error; } 16 | 17 | #define clean_errno() (errno == 0 ? "None" : strerror(errno)) 18 | 19 | #define log_err(M, ...) fprintf(stderr, "[ERROR] (%s:%d %s(): errno: %s) " M "\n", __FILE__, __LINE__, __func__, clean_errno(), ##__VA_ARGS__) 20 | 21 | #define log_warn(M, ...) fprintf(stderr, "[WARN] (%s:%d %s(): errno: %s) " M "\n", __FILE__, __LINE__, __func__, clean_errno(), ##__VA_ARGS__) 22 | 23 | #define log_info(M, ...) fprintf(stderr, "[INFO] (%s:%d %s()) " M "\n", __FILE__, __LINE__, __func__, ##__VA_ARGS__) 24 | 25 | #define check(A, M, ...) if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; } 26 | 27 | #define sentinel(M, ...) { log_err(M, ##__VA_ARGS__); errno=0; goto error; } 28 | 29 | #define check_mem(A) check((A), "Out of memory.") 30 | 31 | #define check_debug(A, M, ...) if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/index_genome.c: -------------------------------------------------------------------------------- 1 | #include "mpd.h" 2 | 3 | static FILE *outfile, **innfile; 4 | 5 | int 6 | main () 7 | { 8 | char **filename, ss[256], sss[4196], basename[1024]; 9 | char *scratch_pad, **contig_descript; 10 | unsigned char **compressed_map, *double_high; 11 | int i, j, k, N, max_sites, *contig_length, not_done, newpos, high_size, high_depth; 12 | int fasta, idepth, total_index, *repeat_count; 13 | int *flat_index, in_repeat; 14 | FILE *sfile, *cfile, *idfile, *rpfile, *highfile; 15 | 16 | 17 | outfile = stdout; 18 | read_var ("\nSend Output to Screen or Disk? [S,D]\n", ss); 19 | 20 | if ((strchr (ss, 'D')) || (strchr (ss, 'd'))) 21 | { 22 | read_var ("Please Enter File Name for Output\n", ss); 23 | if ((outfile = fopen (ss, "w")) == (FILE *) NULL) 24 | { 25 | printf ("\n Can not open file %s\n", ss); 26 | exit (1); 27 | } 28 | } 29 | else 30 | outfile = stdout; 31 | 32 | read_var ("Number of Contig Fasta Files to Process\n", ss); 33 | N = atoi (ss); 34 | filename = cmatrix (0, N, 0, 256); 35 | repeat_count = ivector (0, N); 36 | 37 | innfile = (FILE **) malloc ((unsigned) (N + 1) * sizeof (FILE *)); 38 | if (!innfile) 39 | log_err ("allocation failure for innfile"); 40 | 41 | compressed_map = (unsigned char **) malloc ((unsigned) (N + 1) * sizeof (unsigned char *)); 42 | if (!compressed_map) 43 | log_err ("allocation failure for compressed_map"); 44 | 45 | contig_descript = cmatrix (0, N, 0, 4196); 46 | contig_length = ivector (0, N); 47 | 48 | for (i = 0; i < N; i++) 49 | { 50 | sprintf (sss, "Please Enter Name For Contig Fasta File %d\n", i + 1); 51 | read_var (sss, filename[i]); 52 | if ((innfile[i] = fopen (filename[i], "r")) == (FILE *) NULL) 53 | { 54 | printf ("\n Can not open file %s\n", filename[i]); 55 | exit (1); 56 | } 57 | } 58 | read_var ("Basename to save compressed Genome and Indexes\n", basename); 59 | 60 | read_var ("Maximum Number Of Sites in a contig\n", ss); 61 | max_sites = atoi (ss); 62 | 63 | read_var ("Index Depth\n", ss); 64 | idepth = atoi (ss); 65 | sprintf (sss, "%s.sdx", basename); 66 | if ((sfile = fopen (sss, "w")) == (FILE *) NULL) 67 | { 68 | printf ("\nCould Not Open file %s\n", sss); 69 | exit (1); 70 | } 71 | fprintf (sfile, "%d\n", N); 72 | 73 | j = 4; 74 | for (i = 0; i < idepth; i++) 75 | j *= 4; 76 | j = (j - 4) / 3; 77 | 78 | total_index = j; 79 | printf ("\n Determined the size of flat index to be %d * %ld = %ld bytes\n", 80 | total_index, sizeof (int), total_index * sizeof (int)); 81 | 82 | flat_index = ivector (0, total_index); 83 | 84 | for (i = 0; i <= total_index; i++) 85 | flat_index[i] = 0; 86 | 87 | scratch_pad = cvector (0, max_sites); 88 | not_done = TRUE; 89 | 90 | sprintf (sss, "%s.cdx", basename); 91 | if ((cfile = fopen (sss, "w")) == (FILE *) NULL) 92 | { 93 | printf ("\nCould Not Open file %s\n", sss); 94 | exit (1); 95 | } 96 | sprintf (sss, "%s.rdx", basename); 97 | if ((rpfile = fopen (sss, "w")) == (FILE *) NULL) 98 | { 99 | printf ("\nCould Not Open file %s\n", sss); 100 | exit (1); 101 | } 102 | sprintf (sss, "%s.15x", basename); 103 | if ((highfile = fopen (sss, "w")) == (FILE *) NULL) 104 | { 105 | printf ("\nCould Not Open file %s\n", sss); 106 | exit (1); 107 | } 108 | high_depth = 15; 109 | high_size = 1; 110 | for (i = 0; i < high_depth; i++) 111 | high_size *= 4; 112 | high_size /= 8; 113 | 114 | double_high = ucvector (0, high_size); 115 | for (i = 0; i <= high_size; i++) 116 | double_high[i] = 0; 117 | 118 | for (fasta = 0; fasta < N; fasta++) 119 | { 120 | not_done = TRUE; 121 | 122 | in_repeat = FALSE; 123 | newpos = 0; 124 | 125 | fgets (sss, 4195, innfile[fasta]); 126 | j = strlen (sss); 127 | for (i = 0; i <= j; i++) 128 | if ((sss[i] == '\n') || (sss[i] == 10) || (sss[i] == 13)) 129 | contig_descript[fasta][i] = ' '; 130 | else 131 | contig_descript[fasta][i] = sss[i]; 132 | 133 | 134 | contig_descript[fasta][i] = '\0'; 135 | 136 | fgets (sss, 256, innfile[fasta]); 137 | repeat_count[fasta] = 0; 138 | 139 | while (not_done) 140 | { 141 | /* printf("%s",sss); */ 142 | j = strlen (sss); 143 | 144 | for (i = 0; i < j; i++) 145 | { 146 | if (isalpha (sss[i])) 147 | { 148 | scratch_pad[newpos] = toupper (sss[i]); 149 | if (islower (sss[i]) || (sss[i] == 'N')) 150 | { 151 | if (!in_repeat) 152 | { 153 | in_repeat = TRUE; 154 | repeat_count[fasta]++; 155 | fwrite (&newpos, sizeof (int), 1, rpfile); 156 | } 157 | } 158 | else if (in_repeat) 159 | { 160 | in_repeat = FALSE; 161 | k = newpos - 1; 162 | fwrite (&k, sizeof (int), 1, rpfile); 163 | } 164 | newpos++; 165 | } 166 | } 167 | 168 | if (feof (innfile[fasta]) != 0) 169 | not_done = FALSE; 170 | else 171 | not_done = TRUE; 172 | 173 | if (not_done) 174 | { 175 | /* sprintf(sss, ""); what is this doing? */ 176 | fgets (sss, 256, innfile[fasta]); 177 | if (strlen (sss) < 1) 178 | not_done = FALSE; 179 | } 180 | } 181 | contig_length[fasta] = newpos; 182 | if (in_repeat) 183 | fwrite (&newpos, sizeof (int), 1, rpfile); 184 | 185 | printf ("\n Finished reading fasta %d \n\n", fasta); 186 | 187 | for (i = 0; i < 4; i++) 188 | scratch_pad[newpos + i] = 'A'; /* Pad with A's */ 189 | printf ("\n\t\tBeginning to index contig %s which has length %d\n\n", contig_descript[fasta], contig_length[fasta]); 190 | flat_index_contig_high(flat_index, scratch_pad, contig_length[fasta], idepth, high_depth, double_high); 191 | printf ("\nFinished indexing contig %d \n\n", fasta); 192 | 193 | i = contig_length[fasta] % 4; 194 | int ts; 195 | if (i == 0) 196 | ts = newpos / 4; 197 | else 198 | ts = newpos / 4 + 1; 199 | 200 | compressed_map[fasta] = ucvector (0, ts); 201 | j = 0; 202 | for (i = 0; i < contig_length[fasta]; i += 4, j++) 203 | compressed_map[fasta][j] = (unsigned char) encode_basepairs (&scratch_pad[i], 4); 204 | fwrite (compressed_map[fasta], sizeof (unsigned char), j, cfile); 205 | free_ucvector (compressed_map[fasta], 0, ts); 206 | } 207 | printf ("\n Finishing up now \n\n"); 208 | fclose (cfile); 209 | fwrite (double_high, sizeof (unsigned char), high_size, highfile); 210 | fclose (highfile); 211 | for (i = 0; i < N; i++) 212 | fprintf (sfile, "%d\t%d\t%s\n", contig_length[i], repeat_count[i], contig_descript[i]); 213 | fprintf (sfile, "%d\n", idepth); 214 | 215 | sprintf (sss, "%s.idx", basename); 216 | if ((idfile = fopen (sss, "w")) == (FILE *) NULL) 217 | { 218 | printf ("\nCould Not Open file %s\n", sss); 219 | exit (1); 220 | } 221 | fwrite (flat_index, sizeof (int), total_index, idfile); 222 | fclose (idfile); 223 | 224 | fprintf (sfile, "%s.cdx\n", basename); 225 | fprintf (sfile, "%s.idx\n", basename); 226 | fprintf (sfile, "%s.rdx\n", basename); 227 | fprintf (sfile, "%s.15x\n", basename); 228 | fclose (sfile); 229 | printf ("\n Finished reading fasta got here.\n"); 230 | return (0); 231 | } 232 | -------------------------------------------------------------------------------- /src/mem.c: -------------------------------------------------------------------------------- 1 | #include "mem.h" 2 | #include "dbg.h" 3 | 4 | /*---------------------------------------------------------------------*/ 5 | 6 | char *cvector(int nl, int nh) 7 | { 8 | char *v; 9 | 10 | v = (char *) malloc((unsigned) (nh - nl + 1) * sizeof(char)); 11 | if (!v) 12 | log_err("allocation failure in cvector()"); 13 | return v - nl; 14 | } 15 | 16 | /*---------------------------------------------------------------------*/ 17 | 18 | uchar *ucvector(int nl, int nh) 19 | { 20 | uchar *v; 21 | 22 | v = (uchar *) malloc((unsigned) (nh - nl + 1) * sizeof(uchar)); 23 | if (!v) 24 | log_err("allocation failure in cvector()"); 25 | return v - nl; 26 | } 27 | 28 | /*---------------------------------------------------------------------*/ 29 | 30 | int *ivector(int nl, int nh) 31 | { 32 | int *v; 33 | 34 | v = (int *) malloc((unsigned) (nh - nl + 1) * sizeof(int)); 35 | if (!v) 36 | log_err("allocation failure in ivector()"); 37 | return v - nl; 38 | } 39 | 40 | /*---------------------------------------------------------------------*/ 41 | 42 | double *dvector(int nl, int nh) 43 | { 44 | double *v; 45 | 46 | v = (double *) malloc((unsigned) (nh - nl + 1) * sizeof(double)); 47 | if (!v) 48 | log_err("allocation failure in dvector()"); 49 | return v - nl; 50 | } 51 | 52 | /*---------------------------------------------------------------------*/ 53 | 54 | int **imatrix(int nrl, int nrh, int ncl, int nch) 55 | { 56 | int i, **m; 57 | 58 | m = (int **) malloc((unsigned) (nrh - nrl + 1) * sizeof(int *)); 59 | if (!m) 60 | log_err("allocation failure 1 in imatrix()"); 61 | m -= nrl; 62 | 63 | for (i = nrl; i <= nrh; i++) { 64 | m[i] = (int *) malloc((unsigned) (nch - ncl + 1) * sizeof(int)); 65 | if (!m[i]) 66 | log_err("allocation failure 2 in imatrix()"); 67 | m[i] -= ncl; 68 | } 69 | return m; 70 | } 71 | 72 | /*---------------------------------------------------------------------*/ 73 | 74 | void free_imatrix(int **m, int nrl, int nrh, int ncl, int nch) 75 | { 76 | int i; 77 | 78 | for (i = nrh; i >= nrl; i--) 79 | free((void *) (m[i] + ncl)); 80 | free((void *) (m + nrl)); 81 | } 82 | 83 | /*---------------------------------------------------------------------*/ 84 | 85 | double **dmatrix(int nrl, int nrh, int ncl, int nch) 86 | { 87 | int i; 88 | double **m; 89 | 90 | m = (double **) malloc((unsigned) (nrh - nrl + 1) * sizeof(double *)); 91 | if (!m) 92 | log_err("allocation failure 1 in dmatrix()"); 93 | m -= nrl; 94 | 95 | for (i = nrl; i <= nrh; i++) { 96 | m[i] = (double *) malloc((unsigned) (nch - ncl + 1) * sizeof(double)); 97 | if (!m[i]) 98 | log_err("allocation failure 2 in dmatrix()"); 99 | m[i] -= ncl; 100 | } 101 | return m; 102 | } 103 | 104 | /*---------------------------------------------------------------------*/ 105 | 106 | void free_dmatrix(double **m, int nrl, int nrh, int ncl, int nch) 107 | { 108 | int i; 109 | 110 | for (i = nrh; i >= nrl; i--) 111 | free((void *) (m[i] + ncl)); 112 | free((void *) (m + nrl)); 113 | } 114 | 115 | /*---------------------------------------------------------------------*/ 116 | 117 | char **cmatrix(int nrl, int nrh, int ncl, int nch) 118 | { 119 | int i; 120 | char **m; 121 | 122 | m = (char **) malloc((unsigned) (nrh - nrl + 1) * sizeof(char *)); 123 | if (!m) 124 | log_err("allocation failure 1 in cmatrix()"); 125 | m -= nrl; 126 | 127 | for (i = nrl; i <= nrh; i++) { 128 | m[i] = (char *) malloc((unsigned) (nch - ncl + 1) * sizeof(char)); 129 | if (!m[i]) 130 | log_err("allocation failure 2 in cmatrix()"); 131 | m[i] -= ncl; 132 | } 133 | return m; 134 | } 135 | 136 | /*---------------------------------------------------------------------*/ 137 | 138 | uchar **ucmatrix(int nrl, int nrh, int ncl, int nch) 139 | { 140 | int i; 141 | uchar **m; 142 | 143 | m = (uchar **) malloc((unsigned) (nrh - nrl + 1) * sizeof(uchar *)); 144 | if (!m) 145 | log_err("allocation failure 1 in cmatrix()"); 146 | m -= nrl; 147 | 148 | for (i = nrl; i <= nrh; i++) { 149 | m[i] = (uchar *) malloc((unsigned) (nch - ncl + 1) * sizeof(uchar)); 150 | if (!m[i]) 151 | log_err("allocation failure 2 in cmatrix()"); 152 | m[i] -= ncl; 153 | } 154 | return m; 155 | } 156 | 157 | /*---------------------------------------------------------------------*/ 158 | 159 | void free_cmatrix(char **m, int nrl, int nrh, int ncl, int nch) 160 | { 161 | int i; 162 | 163 | for (i = nrh; i >= nrl; i--) 164 | free((void *) (m[i] + ncl)); 165 | free((void *) (m + nrl)); 166 | } 167 | 168 | /*---------------------------------------------------------------------*/ 169 | 170 | void free_ucmatrix(uchar ** m, int nrl, int nrh, int ncl, int nch) 171 | { 172 | int i; 173 | 174 | for (i = nrh; i >= nrl; i--) 175 | free((void *) (m[i] + ncl)); 176 | free((void *) (m + nrl)); 177 | } 178 | 179 | /*---------------------------------------------------------------------*/ 180 | 181 | void free_cvector(char *v, int nl, int nh) 182 | { 183 | free((void *) (v + nl)); 184 | } 185 | 186 | /*---------------------------------------------------------------------*/ 187 | 188 | void free_ucvector(uchar * v, int nl, int nh) 189 | { 190 | free((void *) (v + nl)); 191 | } 192 | 193 | /*---------------------------------------------------------------------*/ 194 | 195 | void free_ivector(int *v, int nl, int nh) 196 | { 197 | free((void *) (v + nl)); 198 | } 199 | 200 | /*---------------------------------------------------------------------*/ 201 | 202 | void free_dvector(double *v, int nl, int nh) 203 | { 204 | free((void *) (v + nl)); 205 | } 206 | 207 | /*---------------------------------------------------------------------*/ 208 | 209 | int * 210 | create_ivec (int row) 211 | { 212 | int *v = (int *) malloc ((unsigned) row * sizeof (int)); 213 | check_mem(v); 214 | for (int i = 0; i < row; i++ ) 215 | { 216 | v[i] = 0; 217 | } 218 | return v; 219 | 220 | error: 221 | exit(1); 222 | } 223 | 224 | /*---------------------------------------------------------------------*/ 225 | 226 | char * 227 | create_cvec (int row) 228 | { 229 | char *v = (char *) malloc ((unsigned) row * sizeof (char)); 230 | check_mem(v); 231 | for (int i = 0; i < row; i++ ) 232 | { 233 | v[i] = 0; 234 | } 235 | return v; 236 | 237 | error: 238 | exit(1); 239 | } 240 | 241 | /*---------------------------------------------------------------------*/ 242 | 243 | int ** 244 | create_imat (int row, int col) 245 | { 246 | int **m = (int **) malloc ((unsigned) row * sizeof (int *)); 247 | check_mem(m); 248 | 249 | for (int i = 0; i < col; i++) 250 | { 251 | m[i] = (int *) malloc ((unsigned) col * sizeof (int)); 252 | check_mem(m[i]); 253 | for (int j = 0; j < col; j++ ) 254 | { 255 | m[i][j] = 0; 256 | } 257 | } 258 | return m; 259 | 260 | error: 261 | exit(1); 262 | } 263 | 264 | /*---------------------------------------------------------------------*/ 265 | 266 | char ** 267 | create_cmat (int row, int col) 268 | { 269 | char **m = (char **) malloc ((unsigned) row * sizeof (char *)); 270 | check_mem(m); 271 | 272 | for (int i = 0; i < col; i++) 273 | { 274 | m[i] = (char *) malloc ((unsigned) col * sizeof (char)); 275 | check_mem(m[i]); 276 | for (int j = 0; j < col; j++) 277 | { 278 | m[i][j] = 0; 279 | } 280 | } 281 | return m; 282 | 283 | error: 284 | exit(1); 285 | } 286 | 287 | /*---------------------------------------------------------------------*/ 288 | 289 | -------------------------------------------------------------------------------- /src/mem.h: -------------------------------------------------------------------------------- 1 | #ifndef __mem_h__ 2 | #define __mem_h__ 3 | #endif 4 | 5 | #ifndef UINT_TYPE 6 | typedef unsigned int uint; 7 | typedef unsigned char uchar; 8 | #define UINT_TYPE 1 9 | #endif 10 | 11 | int *ivector(int, int); 12 | char *cvector(int, int); 13 | uchar *ucvector(int, int); 14 | double *dvector(int, int); 15 | double **dmatrix(int, int, int, int); 16 | char **cmatrix(int, int, int, int); 17 | uchar **ucmatrix(int, int, int, int); 18 | void free_cvector(char *, int, int); 19 | void free_ucvector(uchar *, int, int); 20 | void free_ivector(int *, int, int); 21 | void free_dvector(double *, int, int); 22 | void free_dmatrix(double **, int, int, int, int); 23 | void free_cmatrix(char **, int, int, int, int); 24 | void free_ucmatrix(uchar **, int, int, int, int); 25 | int **imatrix(int, int, int, int); 26 | void free_imatrix(int **, int, int, int, int); 27 | -------------------------------------------------------------------------------- /src/mpd.c: -------------------------------------------------------------------------------- 1 | #include "mpd.h" 2 | 3 | /*---------------------------------------------------------------------*/ 4 | void 5 | make_greedy_pools (FILE *outfile, PNODE **plist, char **cmat, int *pc, int **redund, int *bstart, int Nregs, int N, int still, int *current_pool, int this_pool, int max_pool) 6 | { 7 | int i, j; 8 | if (still < 1) 9 | return; 10 | 11 | int this_p = -1; 12 | if (this_pool == 0) 13 | { 14 | // fprintf (outfile, "\nStarting a New Pool\n"); 15 | int best = -1; 16 | for (i = 0; i < Nregs; i++) 17 | if (bstart[i] >= 0) 18 | if (pc[bstart[i]] > best) 19 | { 20 | best = pc[bstart[i]]; 21 | this_p = bstart[i]; 22 | } 23 | if (this_p < 1) 24 | { 25 | printf ("\n This is impossible \n"); 26 | for (i = 0; i < Nregs; i++) 27 | printf ("\n Best start region %d is %d", i, bstart[i]); 28 | exit (1); 29 | } 30 | } 31 | else 32 | { 33 | int best = -1; 34 | for (i = 0; i < N; i++) 35 | if (plist[i] != NULL) 36 | if (pc[i] > best) 37 | { 38 | int it_fits = TRUE; 39 | for (j = 0; j < this_pool; j++) 40 | if (!cmat[i][current_pool[j]]) 41 | it_fits = FALSE; 42 | if (it_fits) 43 | { 44 | best = pc[i]; 45 | this_p = i; 46 | } 47 | } 48 | } 49 | 50 | printf ("\n Got here with this_p = %d \n", this_p); 51 | if (this_p >= 0) 52 | { 53 | fprintf (outfile, "%d\t%s\t%g\t%g\t%s\t%g\t%g\t%d\t%d\t%d\t%d\t%d\t%d\t%g\t%g\t%s\n", 54 | this_pool, 55 | plist[this_p]->forward->sequence, plist[this_p]->forward->tm, plist[this_p]->forward->gc, 56 | plist[this_p]->reverse->sequence, plist[this_p]->reverse->tm, plist[this_p]->reverse->gc, 57 | plist[this_p]->chrom, 58 | plist[this_p]->forward->start, plist[this_p]->forward->end, 59 | plist[this_p]->reverse->start, plist[this_p]->reverse->end, 60 | plist[this_p]->length, plist[this_p]->gc, plist[this_p]->tm, plist[this_p]->sequence); 61 | current_pool[this_pool] = this_p; 62 | for (i = 0; i < MAX_PAIRS; i++) 63 | if (redund[this_p][i] >= 0) 64 | { 65 | int ii = redund[this_p][i]; 66 | for (j = 0; j < Nregs; j++) 67 | if (bstart[j] == ii) 68 | bstart[j] = -1; 69 | plist[ii] = NULL; 70 | pc[ii] = 0; 71 | still--; 72 | } 73 | this_pool++; 74 | this_pool %= max_pool; 75 | } 76 | else 77 | this_pool = 0; 78 | 79 | printf ("\ngoing to make_pools with N = %d, still = %d, this_pool = %d, max_pool = %d\n\n", N, still, this_pool, 80 | max_pool); 81 | 82 | make_greedy_pools (outfile, plist, cmat, pc, redund, bstart, Nregs, N, still, current_pool, this_pool, max_pool); 83 | } 84 | 85 | /*---------------------------------------------------------------------*/ 86 | void 87 | make_less_greedy_pools (FILE *outfile, PNODE **plist, char **cmat, int *pc, int **redund, int *bstart, int Nregs, int N, int still, 88 | int *current_pool, int this_pool, int max_pool) 89 | { 90 | int i, j; 91 | if (still < 1) 92 | return; 93 | 94 | int this_p = -1; 95 | if (this_pool == 0) 96 | { 97 | // fprintf (outfile, "\nStarting a New Pool\n"); 98 | int best = 100000000; 99 | for (i = 0; i < Nregs; i++) 100 | if (bstart[i] >= 0) 101 | if (pc[bstart[i]] < best) 102 | { 103 | best = pc[bstart[i]]; 104 | this_p = bstart[i]; 105 | } 106 | if (this_p < 1) 107 | { 108 | printf ("\n This is impossible \n"); 109 | for (i = 0; i < Nregs; i++) 110 | printf ("\n Best start region %d is %d", i, bstart[i]); 111 | exit (1); 112 | } 113 | } 114 | else 115 | { 116 | int best = -1; 117 | for (i = 0; i < N; i++) 118 | if (plist[i] != NULL) 119 | if (pc[i] > best) 120 | { 121 | int it_fits = TRUE; 122 | for (j = 0; j < this_pool; j++) 123 | if (!cmat[i][current_pool[j]]) 124 | it_fits = FALSE; 125 | if (it_fits) 126 | { 127 | best = pc[i]; 128 | this_p = i; 129 | } 130 | } 131 | } 132 | 133 | printf ("\n Got here with this_p = %d \n", this_p); 134 | if (this_p >= 0) 135 | { 136 | fprintf (outfile, "%d\t%s\t%g\t%g\t%s\t%g\t%g\t%d\t%d\t%d\t%d\t%d\t%d\t%g\t%g\t%s\n", 137 | this_pool, 138 | plist[this_p]->forward->sequence, plist[this_p]->forward->tm, plist[this_p]->forward->gc, 139 | plist[this_p]->reverse->sequence, plist[this_p]->reverse->tm, plist[this_p]->reverse->gc, 140 | plist[this_p]->chrom, 141 | plist[this_p]->forward->start, plist[this_p]->forward->end, 142 | plist[this_p]->reverse->start, plist[this_p]->reverse->end, 143 | plist[this_p]->length, plist[this_p]->gc, plist[this_p]->tm, plist[this_p]->sequence); 144 | current_pool[this_pool] = this_p; 145 | for (i = 0; i < MAX_PAIRS; i++) 146 | if (redund[this_p][i] >= 0) 147 | { 148 | int ii = redund[this_p][i]; 149 | for (j = 0; j < Nregs; j++) 150 | if (bstart[j] == ii) 151 | bstart[j] = -1; 152 | plist[ii] = NULL; 153 | pc[ii] = 0; 154 | still--; 155 | } 156 | this_pool++; 157 | this_pool %= max_pool; 158 | } 159 | else 160 | this_pool = 0; 161 | 162 | printf ("\ngoing to make_pools with N = %d, still = %d, this_pool = %d, max_pool = %d\n\n", N, still, this_pool, 163 | max_pool); 164 | 165 | make_less_greedy_pools (outfile, plist, cmat, pc, redund, bstart, Nregs, N, still, current_pool, this_pool, max_pool); 166 | } 167 | /*---------------------------------------------------------------------*/ 168 | 169 | int 170 | is_poolable_primer (PNODE * p1, PNODE * p2, int size_diff_threshold, int tm_diff_threshold) 171 | { 172 | int i, f1, r1, f2, r2; 173 | char flipf1[80], flipf2[80], flipr1[80], flipr2[80]; 174 | 175 | for (i = p1->forward->start; i <= p1->reverse->end; i++) 176 | if ((i >= p2->forward->start) && (i <= p2->reverse->end)) 177 | return FALSE; 178 | 179 | for (i = p2->forward->start; i <= p2->reverse->end; i++) 180 | if ((i >= p1->forward->start) && (i <= p1->reverse->end)) 181 | return FALSE; 182 | 183 | f1 = abs (p1->forward->end - p1->forward->start) + 1; 184 | r1 = abs (p1->reverse->end - p1->reverse->start) + 1; 185 | f2 = abs (p2->forward->end - p2->forward->start) + 1; 186 | r2 = abs (p2->reverse->end - p2->reverse->start) + 1; 187 | 188 | reverse_string (p1->reverse->sequence, flipr1, r1); 189 | reverse_string (p2->reverse->sequence, flipr2, r2); 190 | reverse_string (p1->forward->sequence, flipf1, f1); 191 | reverse_string (p2->forward->sequence, flipf2, f2); 192 | 193 | if (check_uneven_dimer (p1->forward->sequence, flipf2, f1, f2)) 194 | return FALSE; 195 | 196 | if (check_uneven_dimer (p1->reverse->sequence, flipr2, r1, r2)) 197 | return FALSE; 198 | 199 | if (check_uneven_dimer (p1->forward->sequence, flipr2, f1, r2)) 200 | return FALSE; 201 | 202 | if (check_uneven_dimer (p1->reverse->sequence, flipf2, r1, f2)) 203 | return FALSE; 204 | 205 | if (fabs (p1->forward->tm - p2->forward->tm) > tm_diff_threshold) 206 | return FALSE; 207 | 208 | if (fabs (p1->reverse->tm - p2->reverse->tm) > tm_diff_threshold) 209 | return FALSE; 210 | 211 | if (abs (p1->length - p2->length) > size_diff_threshold) 212 | return FALSE; 213 | 214 | return TRUE; 215 | } 216 | /*---------------------------------------------------------------------*/ 217 | 218 | double 219 | fill_dist (SNODE ** list, int n, int *priority, int *selected, int which, int start, int stop, double *dist) 220 | { 221 | int i, j, flag, m; 222 | double totd, dtemp; 223 | 224 | totd = 0; 225 | j = m = 0; 226 | 227 | for (i = 0; i < n; i++) 228 | { 229 | if (priority[i] == which) 230 | { 231 | flag = TRUE; 232 | j = i - 1; 233 | dtemp = 0; 234 | while ((j >= 0) && (flag)) 235 | { 236 | j--; 237 | if (j >= 0) 238 | if (selected[j] == 1) 239 | flag = FALSE; 240 | } 241 | if (j >= 0) 242 | dtemp = list[i]->pos - list[j]->pos; 243 | else 244 | dtemp = list[i]->pos - start; 245 | 246 | dist[i] = dtemp * dtemp; 247 | 248 | j = i + 1; 249 | 250 | flag = TRUE; 251 | dtemp = 0; 252 | while ((j < n) && (flag)) 253 | { 254 | j++; 255 | if (j < n) 256 | if (selected[j] == 1) 257 | flag = FALSE; 258 | } 259 | if (j < n) 260 | dtemp = list[j]->pos - list[i]->pos; 261 | else 262 | dtemp = stop - list[i]->pos; 263 | 264 | dist[i] += dtemp * dtemp; 265 | } 266 | else 267 | dist[i] = 0; 268 | 269 | totd += dist[i]; 270 | if (selected[i] == 1) 271 | m++; 272 | } 273 | 274 | if (totd > 0) 275 | for (i = 0; i < n; i++) 276 | dist[i] /= totd; 277 | 278 | return totd; 279 | 280 | } 281 | 282 | /*---------------------------------------------------------------------*/ 283 | SNODE * 284 | snp_alloc (void) 285 | { 286 | SNODE *tn; 287 | 288 | tn = (SNODE *) malloc ((unsigned) sizeof (struct snp_node)); 289 | if (!tn) 290 | log_err ("allocation failure in snp_alloc()"); 291 | 292 | sprintf (tn->name, "tempname"); 293 | tn->no_disc = 0; 294 | tn->chrom = 0; 295 | tn->pos = 0; 296 | tn->het = 0.0; 297 | tn->baseA = 'N'; 298 | tn->baseB = 'N'; 299 | tn->no_pairs = 0; 300 | tn->pair = (PNODE **) malloc ((unsigned) (MAX_PAIRS + 1) * sizeof (PNODE *)); 301 | if (!tn->pair) 302 | log_err ("allocation failure 2 in snp_alloc()"); 303 | 304 | return tn; 305 | } 306 | 307 | /*---------------------------------------------------------------------*/ 308 | AMPNODE * 309 | amp_alloc (void) 310 | { 311 | AMPNODE *tn; 312 | 313 | tn = (AMPNODE *) malloc ((unsigned) sizeof (struct amp_node)); 314 | if (!tn) 315 | log_err ("allocation failure in amp_alloc()"); 316 | 317 | sprintf (tn->name, "tempname"); 318 | tn->chrom = 0; 319 | tn->start_pos = 0; 320 | tn->stop_pos = 0; 321 | tn->no_pairs = 0; 322 | tn->pair = (PNODE **) malloc ((unsigned) (MAX_PAIRS + 1) * sizeof (PNODE *)); 323 | if (!tn->pair) 324 | log_err ("allocation failure 2 in amp_alloc()"); 325 | 326 | return tn; 327 | } 328 | 329 | /*---------------------------------------------------------------------*/ 330 | int 331 | isbase (char c) 332 | { 333 | char C; 334 | 335 | C = toupper (c); 336 | 337 | if ((C == 'A') || (C == 'C') || (C == 'G') || (C == 'T')) 338 | return TRUE; 339 | 340 | return FALSE; 341 | } 342 | 343 | /*---------------------------------------------------------------------*/ 344 | SNODE ** 345 | fill_snp_list (FILE * sfile, int *n, unsigned int chrom) 346 | { 347 | int flag, i, temp_no, final_no; 348 | char sss[4096], s[256]; 349 | SNODE **temp_snp; 350 | 351 | i = 0; 352 | flag = TRUE; 353 | 354 | printf ("reading snps for chr %d\n", chrom); 355 | temp_snp = (SNODE **) malloc ((unsigned) (2000000) * sizeof (SNODE *)); 356 | if (!temp_snp) 357 | log_err ("Allocation failure in Temporary SNP storage"); 358 | 359 | fgets (sss, 4094, sfile); 360 | 361 | while (flag) 362 | { 363 | temp_snp[i] = snp_alloc (); 364 | sscanf (sss, "%s\t%d\t%d\t%d\t%s\t%c/%c", temp_snp[i]->name, &temp_snp[i]->no_disc, &temp_snp[i]->chrom, &temp_snp[i]->pos, s, /* het frequency */ 365 | &temp_snp[i]->baseA, &temp_snp[i]->baseB); 366 | 367 | temp_snp[i]->pos--; 368 | temp_snp[i]->het = (double) atof (s); 369 | temp_snp[i]->no_pairs = 0; 370 | 371 | if ((isbase (temp_snp[i]->baseA)) && (isbase (temp_snp[i]->baseB)) 372 | && (temp_snp[i]->chrom == chrom) && (temp_snp[i]->het > 0.01)) 373 | { 374 | i++; 375 | } 376 | else 377 | { 378 | free (temp_snp[i]); 379 | } 380 | 381 | sss[0] = '\0'; 382 | if ((!feof (sfile)) && (i < 2000000)) 383 | { 384 | fgets (sss, 4094, sfile); 385 | if (strlen (sss) < 3) 386 | flag = FALSE; 387 | } 388 | else 389 | flag = FALSE; 390 | } 391 | 392 | temp_no = i; 393 | qsort (temp_snp, temp_no, sizeof (SNODE *), sort_compare_struct); 394 | final_no = i; 395 | (*n) = final_no; 396 | printf ("\n Total number of SNPs found = %d\n", final_no); 397 | 398 | return temp_snp; 399 | } 400 | 401 | /*---------------------------------------------------------------------*/ 402 | AMPNODE ** 403 | fill_amp_list (FILE * sfile, int n) 404 | { 405 | int flag, i, temp_no; 406 | char sss[4096]; 407 | AMPNODE **temp_amp; 408 | 409 | i = 0; 410 | temp_no = n; 411 | flag = TRUE; 412 | 413 | temp_amp = (AMPNODE **) malloc ((unsigned) (temp_no + 1) * sizeof (AMPNODE *)); 414 | if (!temp_amp) 415 | log_err ("Allocation failure in Temporary SNP storage"); 416 | 417 | fgets (sss, 4094, sfile); 418 | 419 | while (flag) 420 | { 421 | temp_amp[i] = amp_alloc (); 422 | sscanf (sss, "chr%d\t%d\t%d\t%s", 423 | &temp_amp[i]->chrom, &temp_amp[i]->start_pos, &temp_amp[i]->stop_pos, temp_amp[i]->name); 424 | 425 | temp_amp[i]->start_pos--; 426 | temp_amp[i]->stop_pos--; 427 | temp_amp[i]->no_pairs = 0; 428 | i++; 429 | sss[0] = '\0'; 430 | if ((!feof (sfile)) && (i < n)) 431 | { 432 | fgets (sss, 4094, sfile); 433 | if (strlen (sss) < 3) 434 | flag = FALSE; 435 | } 436 | else 437 | flag = FALSE; 438 | } 439 | printf ("\n Total number of regions found = %d\n", i); 440 | return temp_amp; 441 | } 442 | 443 | /*---------------------------------------------------------------------*/ 444 | int 445 | sort_compare_struct (const void *a, const void *b) 446 | { 447 | //printf("\n Comparing %d with %d ", (*(SNODE **) a)->pos, (*(SNODE **) b)->pos); 448 | if ((*(SNODE **) a)->pos < (*(SNODE **) b)->pos) 449 | return -1; 450 | else if ((*(SNODE **) a)->pos > (*(SNODE **) b)->pos) 451 | return 1; 452 | else 453 | return 0; 454 | } 455 | 456 | /*---------------------------------------------------------------------*/ 457 | void 458 | fill_hs (char a, char b, double *h, double *s) 459 | { 460 | if (a == 'A') 461 | { 462 | if (b == 'A') 463 | { 464 | (*h) += 9100; 465 | (*s) += 24; 466 | } 467 | else if (b == 'C') 468 | { 469 | (*h) += 6500; 470 | (*s) += 17.3; 471 | } 472 | else if (b == 'G') 473 | { 474 | (*h) += 7800; 475 | (*s) += 20.8; 476 | } 477 | else if (b == 'T') 478 | { 479 | (*h) += 8600; 480 | (*s) += 23.9; 481 | } 482 | } 483 | else if (a == 'C') 484 | { 485 | if (b == 'A') 486 | { 487 | (*h) += 5800; 488 | (*s) += 12.9; 489 | } 490 | else if (b == 'C') 491 | { 492 | (*h) += 11000; 493 | (*s) += 26.6; 494 | } 495 | else if (b == 'G') 496 | { 497 | (*h) += 11900; 498 | (*s) += 27.8; 499 | } 500 | else if (b == 'T') 501 | { 502 | (*h) += 7800; 503 | (*s) += 20.8; 504 | } 505 | } 506 | else if (a == 'G') 507 | { 508 | if (b == 'A') 509 | { 510 | (*h) += 5600; 511 | (*s) += 13.5; 512 | } 513 | else if (b == 'C') 514 | { 515 | (*h) += 11100; 516 | (*s) += 26.7; 517 | } 518 | else if (b == 'G') 519 | { 520 | (*h) += 11000; 521 | (*s) += 26.6; 522 | } 523 | else if (b == 'T') 524 | { 525 | (*h) += 6500; 526 | (*s) += 17.3; 527 | } 528 | } 529 | else if (a == 'T') 530 | { 531 | if (b == 'A') 532 | { 533 | (*h) += 6000; 534 | (*s) += 16.9; 535 | } 536 | else if (b == 'C') 537 | { 538 | (*h) += 5600; 539 | (*s) += 13.5; 540 | } 541 | else if (b == 'G') 542 | { 543 | (*h) += 5800; 544 | (*s) += 12.9; 545 | } 546 | else if (b == 'T') 547 | { 548 | (*h) += 9100; 549 | (*s) += 24.0; 550 | } 551 | } 552 | /* printf("\n For %c %c we have H = %g S = %g",a,b,(*h),(*s)); */ 553 | 554 | } 555 | 556 | /*---------------------------------------------------------------------*/ 557 | double 558 | calc_tm (char *ss, int n) 559 | { 560 | int i; 561 | double h, s, tm; 562 | 563 | h = s = 0.0; 564 | 565 | for (i = 0; i < n - 1; i++) 566 | fill_hs (ss[i], ss[i + 1], &h, &s); 567 | 568 | /* tm = h/(s + 57.6945289) - 21.4624334 - 273.15; */ 569 | /* tm = h/(s + 57.6945289) - 294.6124334; */ 570 | tm = h / (s + 47.16510465) - 294.6124334; 571 | 572 | return tm; 573 | } 574 | 575 | /*---------------------------------------------------------------------*/ 576 | 577 | void 578 | convert_int_basepairs (int i, char *s, int k) 579 | { 580 | int j, l; 581 | char ss[256]; 582 | 583 | l = k - 1; 584 | for (j = 0; j < 256; j++) 585 | ss[j] = 'A'; 586 | 587 | while (l >= 0) 588 | { 589 | j = i % 4; 590 | if (j == 0) 591 | ss[l] = 'A'; 592 | else if (j == 1) 593 | ss[l] = 'C'; 594 | else if (j == 2) 595 | ss[l] = 'G'; 596 | else 597 | ss[l] = 'T'; 598 | i -= j; 599 | i /= 4; 600 | l--; 601 | } 602 | 603 | ss[k] = '\0'; 604 | strcpy (s, ss); 605 | 606 | } 607 | 608 | /*---------------------------------------------------------------------*/ 609 | void 610 | decode_basepairs (unsigned char *s, char *dest, int n) 611 | { 612 | int i, m, l; 613 | unsigned char j; 614 | char a, ss[5]; 615 | 616 | ss[4] = '\0'; 617 | for (i = 0; i < n; i++) 618 | { 619 | j = s[i]; 620 | 621 | //printf("\n Decoding %d ", j); 622 | for (l = 3; l >= 0; l--) 623 | { 624 | m = j % 4; 625 | if (m == 0) 626 | a = 'A'; 627 | else if (m == 1) 628 | a = 'C'; 629 | else if (m == 2) 630 | a = 'G'; 631 | else 632 | a = 'T'; 633 | 634 | ss[l] = a; 635 | j = j >> 2;; 636 | 637 | } 638 | 639 | for (m = 0; m < 4; m++) 640 | *dest++ = ss[m]; 641 | 642 | //printf("as %s", ss); 643 | } 644 | *dest = '\0'; 645 | } 646 | 647 | /*---------------------------------------------------------------------*/ 648 | unsigned int 649 | encode_basepairs (char *ss, int n) 650 | { 651 | unsigned int k; 652 | int i; 653 | 654 | k = 0; 655 | for (i = 0; i < n; i++) 656 | { 657 | k = k << 2; 658 | if (ss[i] == 'A'); 659 | else if (ss[i] == 'C') 660 | k++; 661 | else if (ss[i] == 'G') 662 | k += 2; 663 | else 664 | k += 3; 665 | } 666 | // printf("\nn = %d Encoding %c%c%c%c as %d ",n,ss[0],ss[1],ss[2],ss[3],k); 667 | 668 | return k; 669 | } 670 | 671 | /*---------------------------------------------------------------------*/ 672 | static double *FQ_LIST; 673 | 674 | int 675 | find_primers (SNODE ** snp_list, AMPNODE * tn, int no_snp, int *flat, char *contig, int L, int min_primer, int max_primer, int amp_max, int amp_min, double min_gc, double max_gc, double min_tm, double max_tm, int depth, int local_depth, int target_base, // originally the bp of the SNP that dave's original primer_snp program was looking for 676 | int start_pos, unsigned char *highmer, int **repeats, int no_repeats, int end_region, int chrom) 677 | { 678 | int i, j, k, *index_left, *index_right, fl, fr, this_amp, pairs_todump; 679 | int *amp_size, *right_side, *left_side, *local_index, local_size, temp_length; 680 | int *plen_l, *plen_r; 681 | double *gc_left, *gc_right, *fq_left, *fq_right; 682 | double *total_fq, *best_gc_left, *best_gc_right, tm_l, tm_r; 683 | char ss[256], sss[256], flip[256], *rt_contig, **best_left, **best_right; 684 | PNODE *product; 685 | PRIMER *p_left, *p_right; 686 | 687 | /* 688 | printf("\nForward sequence\n\n"); 689 | printf("\n In tile contig with L = %d; amp_max = %d; amp_min = %d; depth = %d\n\n", L, amp_max, amp_min, depth); 690 | for (i = 0; i < L; i++) 691 | { 692 | printf("%c", contig[i]); 693 | //if ((i + 1 == target_base) || (i == target_base)) printf(" * "); 694 | if (i % 80 == 79) printf("\n"); 695 | } 696 | printf("\n"); 697 | */ 698 | 699 | fq_left = dvector (0, L); 700 | fq_right = dvector (0, L); 701 | gc_left = dvector (0, L); 702 | gc_right = dvector (0, L); 703 | index_left = ivector (0, L); 704 | index_right = ivector (0, L); 705 | plen_l = ivector (0, L); 706 | plen_r = ivector (0, L); 707 | printf("\n Local Depth is %d \n\n",local_depth); 708 | 709 | j = 4; 710 | for (i = 0; i < local_depth; i++) 711 | j *= 4; 712 | j = (j - 4) / 3; 713 | 714 | local_size = j; 715 | local_index = ivector (0, local_size); 716 | for (i = 0; i <= local_size; i++) 717 | local_index[i] = 0; 718 | 719 | flat_index_contig (local_index, contig, L, local_depth); 720 | 721 | // printf("\n About to fill quality in forward direction\n\n"); 722 | 723 | fill_quality_scores (flat, local_index, contig, L, minim (target_base - 30, end_region), depth, local_depth, 724 | min_primer, max_primer, fq_left, gc_left, index_left, plen_l, 725 | min_gc, max_gc, min_tm, max_tm, repeats, no_repeats, highmer, start_pos); 726 | rt_contig = cvector (0, L); 727 | reverse_transcribe (contig, rt_contig, L); 728 | 729 | /* 730 | printf("\nReverse Transcribe\n\n"); 731 | printf("\n");for (i = 0; i < L; i++) {printf("%c", rt_contig[i]); 732 | if ((i + 1 == L - target_base) || (i + 2 == L - target_base)) printf(" * "); 733 | if (i % 80 == 79) 734 | printf("\n"); 735 | } 736 | printf("\n"); 737 | */ 738 | 739 | // printf ("\n About to fill quality in reverse direction\n"); 740 | 741 | fill_quality_scores (flat, local_index, rt_contig, L, minim (target_base - 30, end_region), depth, local_depth, 742 | min_primer, max_primer, fq_right, gc_right, index_right, plen_r, 743 | min_gc, max_gc, min_tm, max_tm, repeats, no_repeats, highmer, start_pos); 744 | 745 | i = 0; 746 | fl = 0; 747 | pairs_todump = MAX_PAIRS; 748 | amp_size = ivector (0, pairs_todump); 749 | best_left = cmatrix (0, pairs_todump, 0, 256); 750 | best_right = cmatrix (0, pairs_todump, 0, 256); 751 | total_fq = dvector (0, pairs_todump); 752 | right_side = ivector (0, pairs_todump); 753 | left_side = ivector (0, pairs_todump); 754 | best_gc_left = dvector (0, pairs_todump); 755 | best_gc_right = dvector (0, pairs_todump); 756 | 757 | printf("\n About to start finding primers\n"); 758 | 759 | while ((fq_left[index_left[i]] < 1e7) && (fl < pairs_todump)) 760 | { 761 | k = start_pos + index_left[i]; 762 | //printf("\n pos (k) = %d\n\n", k); 763 | if ((!poly_under_primer (k, k + plen_l[index_left[i]], snp_list, 0, no_snp - 1, (no_snp) / 2)) && 764 | (gc_left[index_left[i]] >= min_gc) && (gc_left[index_left[i]] <= max_gc) && 765 | (is_not_repeat (k, k + plen_l[index_left[i]], repeats, no_repeats, 0.001))) 766 | { 767 | j = 0; 768 | fr = 0; 769 | strncpy (ss, contig + index_left[i], plen_l[index_left[i]]); 770 | ss[plen_l[index_left[i]]] = '\0'; 771 | tm_l = calc_tm (ss, plen_l[index_left[i]]); 772 | 773 | if (fq_right[index_right[j]] >= 1e7) 774 | { 775 | printf("\n No right side primers \n\n"); 776 | break; 777 | } 778 | printf("\nMatching 5' %s 3' (fq = %g, gc = %g, len = %d tm = %g) with", ss, fq_left[index_left[i]], gc_left[index_left[i]], plen_l[index_left[i]], tm_l); 779 | 780 | while ((fq_right[index_right[j]] < 1e7) && (fr < 1)) 781 | { 782 | temp_length = L - index_right[j] - index_left[i]; 783 | 784 | /* 785 | * strncpy(sss,rt_contig+index_right[j],primer 786 | * ); sss[primer] = '\0'; printf("\n This 787 | * primer pair %s appears to %d 788 | * \n",sss,temp_length); 789 | */ 790 | 791 | k = start_pos + L - index_right[j]; 792 | if ((!poly_under_primer (k - plen_r[index_right[j]], k, snp_list, 0, no_snp - 1, (no_snp) / 2)) && 793 | (temp_length >= amp_min) && (temp_length <= amp_max) && 794 | (is_not_repeat (k - plen_r[index_right[j]], k, repeats, no_repeats, 0.001))) 795 | { 796 | strncpy (sss, rt_contig + index_right[j], plen_r[index_right[j]]); 797 | sss[plen_r[index_right[j]]] = '\0'; 798 | reverse_string (sss, flip, plen_r[index_right[j]]); 799 | if (check_uneven_dimer (ss, flip, plen_l[index_left[i]], plen_r[index_right[j]])) 800 | { 801 | printf("\n\t\t\tNot 5' %s 3' because of a dimer", sss); 802 | } 803 | else 804 | { 805 | tm_r = calc_tm (sss, plen_r[index_right[j]]); 806 | printf("\n In here with tm_l = %g and tm_r = %g\n\n",tm_r,tm_l); 807 | if (fabs (tm_l - tm_r) < 5.0) 808 | { 809 | if (fr == 0) 810 | { 811 | this_amp = L - index_right[j]; 812 | right_side[fl] = this_amp; 813 | left_side[fl] = index_left[i]; 814 | amp_size[fl] = temp_length; 815 | sprintf (best_left[fl], "%s", ss); 816 | sprintf (best_right[fl], "%s", sss); 817 | total_fq[fl] = fq_right[index_right[j]] + fq_left[index_left[i]]; 818 | best_gc_left[fl] = gc_left[index_left[i]]; 819 | best_gc_right[fl] = gc_right[index_right[j]]; 820 | } 821 | product = product_alloc (); 822 | p_left = primer_alloc (); 823 | p_right = primer_alloc (); 824 | p_left->sequence = cvector (0, plen_l[index_left[i]] + 1); 825 | sprintf (p_left->sequence, "%s", ss); 826 | p_right->sequence = cvector (0, plen_r[index_right[j]] + 1); 827 | sprintf (p_right->sequence, "%s", sss); 828 | p_left->tm = tm_l; 829 | p_right->tm = tm_r; 830 | p_left->gc = gc_left[index_left[i]]; 831 | p_right->gc = gc_right[index_right[j]]; 832 | p_left->start = start_pos + index_left[i]; 833 | p_left->end = p_left->start + plen_l[index_left[i]]; 834 | p_right->end = start_pos + L - (index_right[j]); 835 | p_right->start = p_right->end - plen_r[index_right[j]]; 836 | p_left->end--; 837 | p_right->end--; 838 | product->forward = p_left; 839 | product->reverse = p_right; 840 | product->sequence = cvector (0, temp_length + 1); 841 | strncpy (product->sequence, contig + index_left[i], temp_length); 842 | product->sequence[temp_length] = '\0'; 843 | product->length = temp_length; 844 | product->gc = calc_gc (product->sequence, temp_length); 845 | product->tm = 41.0 * product->gc - 675.0 / (double) product->length - 21.4624334; 846 | product->chrom = chrom; 847 | tn->pair[tn->no_pairs++] = product; 848 | 849 | printf("\n\tSuccess with 5' %s 3' (fq = %g, gc = %g, len = %d tm = %g)", sss, fq_right[index_right[j]], gc_right[index_right[j]], plen_r[index_right[j]], tm_r); 850 | fr++; 851 | } 852 | } 853 | } 854 | j++; 855 | } 856 | if (fr > 0) 857 | fl++; 858 | } 859 | i++; 860 | } 861 | 862 | 863 | if (fl > 0) 864 | { 865 | for (i = 0; i <= pairs_todump; i++) 866 | index_left[i] = i; 867 | 868 | 869 | FQ_LIST = total_fq; 870 | qsort ((void *) index_left, fl, sizeof (int), sort_compare_index); 871 | 872 | 873 | this_amp = right_side[index_left[0]]; 874 | 875 | //for(i=0;ipos); 921 | 922 | if ((list[which]->pos >= p_start) && (list[which]->pos <= p_end)) 923 | { 924 | printf("\nA primer which goes from %d to %d appears to have %s under it at pos %d\n\n", p_start,p_end,list[which]->name,list[which]->pos); 925 | return TRUE; 926 | } 927 | if (stop - start <= 1) 928 | return FALSE; 929 | 930 | if (list[which]->pos > p_end) 931 | { 932 | if (start >= which) 933 | return FALSE; 934 | else 935 | return poly_under_primer (p_start, p_end, list, start, which, (which + start) / 2); 936 | } 937 | if (list[which]->pos < p_start) 938 | { 939 | if (which >= stop) 940 | return FALSE; 941 | else 942 | return poly_under_primer (p_start, p_end, list, which, stop, (stop + which) / 2); 943 | } 944 | return FALSE; 945 | 946 | 947 | } 948 | 949 | /*---------------------------------------------------------------------*/ 950 | double 951 | calc_gc (char *s, int n) 952 | { 953 | char c; 954 | int i, gc; 955 | 956 | if (n <= 0) 957 | return 0; 958 | 959 | gc = 0; 960 | for (i = 0; i < n; i++) 961 | { 962 | c = toupper (s[i]); 963 | if ((c == 'G') || (c == 'C')) 964 | gc++; 965 | } 966 | 967 | return (double) gc / (double) n; 968 | } 969 | 970 | /*---------------------------------------------------------------------*/ 971 | PRIMER * 972 | primer_alloc (void) 973 | { 974 | PRIMER *tn; 975 | 976 | tn = (PRIMER *) malloc ((unsigned) sizeof (struct primer_node)); 977 | if (!tn) 978 | log_err ("allocation failure in primer_alloc"); 979 | 980 | tn->start = 0; 981 | tn->end = 0; 982 | tn->sequence = NULL; 983 | tn->tm = 0; 984 | tn->gc = 0; 985 | 986 | return tn; 987 | 988 | } 989 | 990 | /*---------------------------------------------------------------------*/ 991 | PNODE * 992 | product_alloc (void) 993 | { 994 | PNODE *tn; 995 | 996 | tn = (PNODE *) malloc ((unsigned) sizeof (struct primer_pair)); 997 | if (!tn) 998 | log_err ("allocation failure in primer_alloc()"); 999 | 1000 | tn->forward = NULL; 1001 | tn->reverse = NULL; 1002 | tn->sequence = NULL; 1003 | tn->length = 0; 1004 | tn->gc = 0; 1005 | tn->tm = 0; 1006 | tn->chrom = 0; 1007 | 1008 | return tn; 1009 | } 1010 | 1011 | /*---------------------------------------------------------------------*/ 1012 | void 1013 | fill_quality_scores (int *flat, int *local, char *contig, int L, int window, int depth, int ld, int min_primer, 1014 | int max_primer, double *fq_left, double *gc_left, int *index_left, int *plen, double min_gc, 1015 | double max_gc, double min_tm, double max_tm, int **repeats, int no_repeats, unsigned char *highmer, 1016 | int start_pos) 1017 | { 1018 | int **reps_left, **local_reps, tail, primer; 1019 | int i, j, k, m, rm, min_match, offset, *uflag; 1020 | char flip[256]; 1021 | double discount, thisd, tm, fq_temp, gc_temp; 1022 | 1023 | reps_left = imatrix (0, window, 0, depth); 1024 | local_reps = imatrix (0, window, 0, depth); 1025 | 1026 | for (j = 0; j <= window; j++) 1027 | for (i = 0; i <= depth; i++) 1028 | local_reps[j][i] = reps_left[j][i] = 0; 1029 | 1030 | uflag = ivector (0, window); 1031 | k = no_repeats / 2; 1032 | 1033 | for (i = 0; i < window; i++) 1034 | { 1035 | count_copys (local, contig + i, ld, local_reps[i]); 1036 | count_copys (flat, contig + i, depth, reps_left[i]); 1037 | 1038 | if (check_15mer (contig + i, highmer, 15) > 0) 1039 | uflag[i] = TRUE; 1040 | else 1041 | uflag[i] = FALSE; 1042 | 1043 | //for(j=1;j<=depth;j++) printf("\nAt window position %d, depth %d Found %d copys to the left %c", i,j,reps_left[i][j],*(contig+i+j-1)); 1044 | //for(j=1;j<=ld;j++) printf("\nAt window position %d, local depth %d Found %d local copys to the left %c", i,j,local_reps[i][j],*(contig+i+j-1)); 1045 | } 1046 | 1047 | // discount = 0.25; One base less lowers score by 1/4 1048 | // discount = 0.125; One base less lowers score by 1/8 1049 | // discount = 0.0625; One base less lowers score by 1/16 1050 | discount = 0.0625; 1051 | 1052 | for (i = 0; i <= window; i++) 1053 | { 1054 | fq_left[i] = 1e9; 1055 | gc_left[i] = 2.0; 1056 | } 1057 | min_match = 0; 1058 | for (i = 0; i < window - max_primer; i++) 1059 | { 1060 | for (primer = min_primer; primer <= max_primer; primer++) 1061 | { 1062 | offset = primer - ld; 1063 | tail = primer - 15; 1064 | tm = calc_tm (contig + i, primer); 1065 | fq_temp = 1e8; 1066 | 1067 | if ((tm >= min_tm) && (tm <= max_tm)) 1068 | { 1069 | fq_temp = gc_temp = 0.0; 1070 | for (j = 0; j < primer; j++) 1071 | { 1072 | rm = minim (depth, primer - j); 1073 | thisd = 1.0; 1074 | for (m = depth; m > rm; m--) 1075 | thisd *= discount; 1076 | k = i + j; 1077 | for (m = rm; m > min_match; m--) 1078 | { 1079 | fq_temp += thisd * reps_left[k][m]; 1080 | thisd *= discount; 1081 | } 1082 | 1083 | k = i + j; 1084 | if ((contig[k] == 'G') || (contig[k] == 'C')) 1085 | gc_temp += 1.0; 1086 | 1087 | if (j + tail < primer) 1088 | { 1089 | if (uflag[k]) 1090 | { 1091 | if (j + tail + 1 < primer) 1092 | { 1093 | fq_temp += 100; 1094 | } 1095 | else 1096 | { 1097 | fq_temp += 2000; 1098 | } 1099 | } 1100 | } 1101 | } 1102 | gc_temp /= (double) primer; 1103 | fq_temp /= (double) primer; 1104 | 1105 | //printf("\n Primer = %d i = %d fq = %g gc = %g \n\n",primer,i,fq_left[i],gc_left[i]); 1106 | 1107 | if ((gc_left[i] >= min_gc) && (gc_left[i] <= max_gc)) 1108 | { 1109 | if (check_hairpin (contig + i, primer)) 1110 | { 1111 | /* 1112 | * strncpy(ss,contig+i,primer); ss[primer] = '\0'; printf("\n\t\tDetermined %s to be a hairpin %g",ss,fq_left[i]); 1113 | */ 1114 | fq_temp += 1e7; 1115 | } 1116 | else 1117 | { 1118 | reverse_string (contig + i, flip, primer); 1119 | if (check_dimer (contig + i, flip, primer)) 1120 | { 1121 | // strncpy(ss,contig+i,primer); ss[primer] = '\0'; printf("\n\t\tDetermined %s to be a self-dimer %g",ss,fq_left[i]); 1122 | fq_temp += 1e7; 1123 | } 1124 | } 1125 | } 1126 | else 1127 | { 1128 | // strncpy(ss,contig+i,primer); ss[primer] = '\0'; printf("\n\t\tDetermined %s to be outside the gc window %g",ss,fq_left[i]); 1129 | fq_temp += 1e7; 1130 | } 1131 | 1132 | // if(uflag[i+tail]) fq_left[i] += 1e8; 1133 | 1134 | // printf("\nLeft i = %d fq=%g gc=%g ",i,fq_left[i],gc_left[i]); 1135 | } 1136 | if (fq_temp < fq_left[i]) 1137 | { 1138 | plen[i] = primer; 1139 | fq_left[i] = fq_temp; 1140 | gc_left[i] = gc_temp; 1141 | } 1142 | } 1143 | } 1144 | 1145 | for (i = 0; i <= window; i++) 1146 | index_left[i] = i; 1147 | 1148 | FQ_LIST = fq_left; 1149 | qsort ((void *) index_left, window - max_primer, sizeof (int), sort_compare_index); 1150 | 1151 | free_ivector (uflag, 0, window); 1152 | free_imatrix (reps_left, 0, window, 0, depth); 1153 | free_imatrix (local_reps, 0, window, 0, depth); 1154 | 1155 | // for(i=0;i= 0) 1200 | count += minim (list[i][1], y) - maxim (x, list[i][0]) + 1; 1201 | 1202 | j = find_frag (y, list, 0, no_frags - 1, no_frags / 2); 1203 | if (j >= 0) 1204 | count += minim (list[j][1], y) - maxim (x, list[j][0]) + 1; 1205 | 1206 | if ((i < 0) && (j < 0)) 1207 | { 1208 | k = find_seg (x, y, list, 0, no_frags - 1, no_frags / 2); 1209 | if (k > 0) 1210 | count += minim (list[k][1], y) - maxim (x, list[k][0]) + 1; 1211 | } 1212 | if ((double) count / (double) (y - x + 1) >= max) 1213 | return FALSE; 1214 | else 1215 | return TRUE; 1216 | } 1217 | 1218 | /*---------------------------------------------------------------------*/ 1219 | int 1220 | find_frag (int x, int **list, int start, int end, int guess) 1221 | { 1222 | /* 1223 | * if(PRINT_ME) printf("\n In find_frag x = %lu start = %d end = %d guess = %d (%lu,%lu)", x,start,nd,guess,list[guess][0],list[guess][1]); 1224 | */ 1225 | 1226 | if (x < 0) 1227 | return -1; 1228 | 1229 | if ((x >= list[guess][0]) && (x <= list[guess][1])) 1230 | return guess; 1231 | 1232 | /* 1233 | * if( (start == guess) && (end == guess) ) return -1; 1234 | */ 1235 | 1236 | if (x < list[guess][0]) 1237 | { 1238 | if (guess <= start) 1239 | return -1; 1240 | 1241 | end = guess - 1; 1242 | } 1243 | else if (x > list[guess][1]) 1244 | { 1245 | if (guess >= end) 1246 | return -1; 1247 | 1248 | start = guess + 1; 1249 | } 1250 | guess = (start + end) / 2; 1251 | return find_frag (x, list, start, end, guess); 1252 | 1253 | } 1254 | 1255 | /*---------------------------------------------------------------------*/ 1256 | int 1257 | find_seg (int x, int y, int **list, int start, int end, int guess) 1258 | { 1259 | /* 1260 | * if(PRINT_ME) printf("\n In find_frag x = %lu start = %d end = %d guess = %d (%lu,%lu)", x,start,end,guess,list[guess][0],list[guess][1]); 1261 | */ 1262 | 1263 | if ((x <= list[guess][0]) && (y >= list[guess][1])) 1264 | return guess; 1265 | 1266 | /* 1267 | * if( (start == guess) && (end == guess) ) return -1; 1268 | */ 1269 | 1270 | if (x < list[guess][0]) 1271 | { 1272 | if (guess <= start) 1273 | return -1; 1274 | 1275 | end = guess - 1; 1276 | } 1277 | else if (x > list[guess][1]) 1278 | { 1279 | if (guess >= end) 1280 | return -1; 1281 | 1282 | start = guess + 1; 1283 | } 1284 | guess = (start + end) / 2; 1285 | return find_seg (x, y, list, start, end, guess); 1286 | 1287 | } 1288 | 1289 | /*---------------------------------------------------------------------*/ 1290 | 1291 | int 1292 | check_hairpin (char *ss, int n) 1293 | { 1294 | int min; 1295 | char flip[1024]; 1296 | 1297 | if (n > 1000) 1298 | { 1299 | printf ("\n Dude what's with a %d base primer .... PPPPLEEASE \n", n); 1300 | exit (1); 1301 | } 1302 | reverse_string (ss, flip, n); 1303 | 1304 | for (min = 3; min <= n / 2 - 4; min++) 1305 | if (check_hairpin_min (ss, flip, n, min)) 1306 | return TRUE; 1307 | 1308 | return FALSE; 1309 | } 1310 | 1311 | /*---------------------------------------------------------------------*/ 1312 | 1313 | int 1314 | check_hairpin_min (char *ss, char *flip, int n, int min) 1315 | { 1316 | int half; 1317 | 1318 | half = n / 2 - min; 1319 | 1320 | if (half < 4) 1321 | return FALSE; 1322 | 1323 | return check_dimer (ss, flip, half); 1324 | 1325 | } 1326 | 1327 | /*---------------------------------------------------------------------*/ 1328 | int 1329 | test_dimer (char *p1, char *p2, int n) 1330 | { 1331 | int i, matches; 1332 | 1333 | if (n < 3) 1334 | return FALSE; 1335 | 1336 | matches = 0; 1337 | 1338 | for (i = 0; i < n; i++) 1339 | if (check_watson_crick (p1[i], p2[i])) 1340 | matches++; 1341 | 1342 | if ((double) matches / (double) n >= 0.75) 1343 | return TRUE; 1344 | 1345 | return FALSE; 1346 | 1347 | } 1348 | 1349 | /*---------------------------------------------------------------------*/ 1350 | int 1351 | check_uneven_dimer (char *p1, char *p2, int n1, int n2) 1352 | { 1353 | int i, n; 1354 | 1355 | if (n1 == n2) 1356 | return check_dimer (p1, p2, n1); 1357 | 1358 | 1359 | if ((n1 < 3) || (n2 < 3)) 1360 | return FALSE; 1361 | 1362 | n = minim (n1, n2); 1363 | if (test_dimer (p1, p2, n)) 1364 | return TRUE; 1365 | 1366 | for (i = 1; i < n1; i++) 1367 | { 1368 | if (test_dimer (p1 + i, p2, minim (n1 - i, n))) 1369 | return TRUE; 1370 | } 1371 | 1372 | for (i = 1; i < n2; i++) 1373 | { 1374 | if (test_dimer (p1, p2 + i, minim (n2 - i, n))) 1375 | return TRUE; 1376 | } 1377 | 1378 | return FALSE; 1379 | } 1380 | 1381 | /*---------------------------------------------------------------------*/ 1382 | int 1383 | check_dimer (char *p1, char *p2, int n) 1384 | { 1385 | int i; 1386 | 1387 | 1388 | if (n < 3) 1389 | return FALSE; 1390 | 1391 | if (test_dimer (p1, p2, n)) 1392 | return TRUE; 1393 | 1394 | 1395 | for (i = 1; i < n; i++) 1396 | { 1397 | if (test_dimer (p1 + i, p2, n - i)) 1398 | return TRUE; 1399 | if (test_dimer (p1, p2 + i, n - i)) 1400 | return TRUE; 1401 | } 1402 | 1403 | return FALSE; 1404 | 1405 | } 1406 | 1407 | /*---------------------------------------------------------------------*/ 1408 | int 1409 | check_watson_crick (char a, char b) 1410 | { 1411 | if (a == 'A') 1412 | { 1413 | if (b == 'T') 1414 | { 1415 | return TRUE; 1416 | } 1417 | else 1418 | { 1419 | return FALSE; 1420 | } 1421 | } 1422 | if (a == 'T') 1423 | { 1424 | if (b == 'A') 1425 | { 1426 | return TRUE; 1427 | } 1428 | else 1429 | { 1430 | return FALSE; 1431 | } 1432 | } 1433 | if (a == 'G') 1434 | { 1435 | if (b == 'C') 1436 | { 1437 | return TRUE; 1438 | } 1439 | else 1440 | { 1441 | return FALSE; 1442 | } 1443 | } 1444 | if (a == 'C') 1445 | { 1446 | if (b == 'G') 1447 | { 1448 | return TRUE; 1449 | } 1450 | else 1451 | { 1452 | return FALSE; 1453 | } 1454 | } 1455 | // added on 06-09-2014 to silence the compiler warning that this 1456 | // function might not return anything 1457 | return FALSE; 1458 | } 1459 | 1460 | /*---------------------------------------------------------------------*/ 1461 | void 1462 | count_copys (int *flat, char *s, int n, int *reps) 1463 | { 1464 | char ss[256]; 1465 | 1466 | simple_count_copys (flat, s, n, reps, TRUE); 1467 | reverse_transcribe (s, ss, n); 1468 | simple_count_copys (flat, ss, n, reps, FALSE); 1469 | } 1470 | 1471 | /*---------------------------------------------------------------------*/ 1472 | 1473 | void 1474 | simple_count_copys (int *flat, char *s, int n, int *reps, int forward) 1475 | { 1476 | int i, j; 1477 | unsigned int offset, k; 1478 | 1479 | offset = 0; 1480 | for (i = 1; i <= n; i++) 1481 | { 1482 | if (s[i - 1] == 'N') 1483 | { 1484 | for (j = 1; j <= n; j++) 1485 | reps[j] += 1e8; 1486 | return; 1487 | } 1488 | if (forward) 1489 | k = encode_basepairs (s, i); 1490 | else 1491 | k = encode_basepairs (s + n - i, i); 1492 | 1493 | reps[i] += flat[k + offset]; 1494 | 1495 | /* 1496 | * if(i == 1) printf("\n n = %d forward k = %d",n,k+offset); 1497 | */ 1498 | 1499 | offset++; 1500 | offset = offset << 2; 1501 | } 1502 | 1503 | } 1504 | 1505 | /*---------------------------------------------------------------------*/ 1506 | void 1507 | reverse_string (char *contig, char *s, int n) 1508 | { 1509 | int i; 1510 | 1511 | for (i = n - 1; i > -1; i--) 1512 | { 1513 | *s++ = *(contig + i); 1514 | } 1515 | 1516 | } 1517 | 1518 | /*---------------------------------------------------------------------*/ 1519 | void 1520 | reverse_transcribe (char *contig, char *s, int n) 1521 | { 1522 | int i; 1523 | char c; 1524 | 1525 | for (i = n - 1; i > -1; i--) 1526 | { 1527 | if (*(contig + i) == 'A') 1528 | c = 'T'; 1529 | else if (*(contig + i) == 'C') 1530 | c = 'G'; 1531 | else if (*(contig + i) == 'G') 1532 | c = 'C'; 1533 | else if (*(contig + i) == 'T') 1534 | c = 'A'; 1535 | else 1536 | c = 'N'; 1537 | 1538 | *s++ = c; 1539 | } 1540 | /* 1541 | * *s = '\0'; printf("\n%s"); 1542 | */ 1543 | 1544 | } 1545 | 1546 | /*---------------------------------------------------------------------*/ 1547 | void 1548 | transcribe (char *contig, char *s, int n) 1549 | { 1550 | int i; 1551 | char c; 1552 | 1553 | for (i = 0; i < n; i++) 1554 | { 1555 | if (*(contig + i) == 'A') 1556 | c = 'T'; 1557 | else if (*(contig + i) == 'C') 1558 | c = 'G'; 1559 | else if (*(contig + i) == 'G') 1560 | c = 'C'; 1561 | else if (*(contig + i) == 'T') 1562 | c = 'A'; 1563 | else 1564 | c = 'N'; 1565 | 1566 | s[i] = c; 1567 | } 1568 | } 1569 | 1570 | /*---------------------------------------------------------------------*/ 1571 | 1572 | void 1573 | flat_index_contig (int *index, char *contig, int L, int depth) 1574 | { 1575 | int i, stop; 1576 | 1577 | for (i = 0; i < L; i++) 1578 | { 1579 | stop = minim (depth, L - i); 1580 | 1581 | // if(i%100000 == 0) printf("\nIndex %d bases out of %d at depth %d",i,L,stop); 1582 | 1583 | if ((*(contig + i)) != 'N') 1584 | index_string (index, contig + i, stop); 1585 | } 1586 | } 1587 | 1588 | /*---------------------------------------------------------------------*/ 1589 | void 1590 | index_string (int *index, char *s, int n) 1591 | { 1592 | unsigned int i, offset; 1593 | int j; 1594 | 1595 | offset = 0; 1596 | for (j = 1; j <= n; j++) 1597 | { 1598 | i = encode_basepairs (s, j); 1599 | index[i + offset]++; 1600 | offset++; 1601 | offset = offset << 2; 1602 | } 1603 | } 1604 | 1605 | /*---------------------------------------------------------------------*/ 1606 | 1607 | int 1608 | sort_compare (const void *a, const void *b) 1609 | { 1610 | if (*((double *) a) < *((double *) b)) 1611 | return -1; 1612 | else if (*((double *) a) > *((double *) b)) 1613 | return 1; 1614 | else 1615 | return 0; 1616 | } 1617 | 1618 | /*---------------------------------------------------------------------*/ 1619 | 1620 | int 1621 | sort_compare_index (const void *a, const void *b) 1622 | { 1623 | double ad, bd; 1624 | 1625 | ad = FQ_LIST[*((int *) a)]; 1626 | bd = FQ_LIST[*((int *) b)]; 1627 | 1628 | if (ad < bd) 1629 | return -1; 1630 | else if (ad > bd) 1631 | return 1; 1632 | else 1633 | return 0; 1634 | } 1635 | 1636 | /*---------------------------------------------------------------------*/ 1637 | 1638 | void 1639 | read_var (char *line, char *result) 1640 | { 1641 | 1642 | char line1[256]; 1643 | unsigned int i; 1644 | 1645 | sprintf (line1, "%s", line); 1646 | printf ("%s", line1); 1647 | fgets (result, 250, stdin); 1648 | result[strlen (result) - 1] = '\0'; 1649 | for (i = 0; i < minim (strlen (line1), 255); i++) 1650 | if (line1[i] == '\n') 1651 | line1[i] = '\0'; 1652 | } 1653 | 1654 | /*---------------------------------------------------------------------*/ 1655 | 1656 | int 1657 | line_count (FILE * sfile) 1658 | { 1659 | int c, nl; 1660 | c = nl = 0; 1661 | while ((c = getc (sfile)) != EOF) 1662 | { 1663 | if (c == '\n') 1664 | nl++; 1665 | } 1666 | return nl; 1667 | } 1668 | 1669 | /*---------------------------------------------------------------------*/ 1670 | 1671 | int 1672 | read_primer_pools (const char *filename, int max_primer_pairs, int max_primers_in_pool, 1673 | int *primers_in_pool, PNODE ***primer_pool) 1674 | { 1675 | // open primer file 1676 | FILE *primer_file; 1677 | primer_file = fopen (filename, "r"); 1678 | check(primer_file, "cannot open primer file, '%s'", filename); 1679 | 1680 | int ppairs_count = 0; 1681 | int final_pool_number = 0; 1682 | 1683 | char header[1024]; 1684 | fgets( header, 1024, primer_file ); 1685 | char *fields; 1686 | fields = strtok( header, "\t"); 1687 | check((strcmp(fields, "Name")==0), "%s does not start with header of primer file", filename); 1688 | 1689 | while (!feof(primer_file)) 1690 | { 1691 | debug("creating new temp_ppair"); 1692 | PNODE *temp_ppair = create_ppair( 1000, 50 ); 1693 | 1694 | char primer_name[1024]; 1695 | int this_primer_num, this_pool_num; 1696 | this_primer_num = this_pool_num = 0; 1697 | 1698 | int read_line = 1699 | fscanf (primer_file, 1700 | "%s\t%d\t%d\t%s\t%lg\t%lg\t%s\t%lg\t%lg\t%d\t%d\t%d\t%d\t%d\t%d\t%lg\t%lg\t%s\n", 1701 | primer_name, 1702 | &this_pool_num, 1703 | &this_primer_num, 1704 | temp_ppair->forward->sequence, 1705 | &temp_ppair->forward->tm, 1706 | &temp_ppair->forward->gc, 1707 | temp_ppair->reverse->sequence, 1708 | &temp_ppair->reverse->tm, 1709 | &temp_ppair->reverse->gc, 1710 | &temp_ppair->chrom, 1711 | &temp_ppair->forward->start, 1712 | &temp_ppair->forward->end, 1713 | &temp_ppair->reverse->start, 1714 | &temp_ppair->reverse->end, 1715 | &temp_ppair->length, 1716 | &temp_ppair->gc, 1717 | &temp_ppair->tm, 1718 | temp_ppair->sequence); 1719 | check((read_line == 18 ), "Error processing file: %s", filename); 1720 | check((ppairs_count < max_primer_pairs), "Out of Memory. Increase primer_pool size."); 1721 | check((this_primer_num < max_primers_in_pool), "Too many primer pairs in pool %d", this_pool_num); 1722 | 1723 | primer_pool[this_pool_num][this_primer_num] = temp_ppair; 1724 | debug("assigned primer pair id '%d' to pool number '%d'", this_primer_num, this_pool_num); 1725 | 1726 | primers_in_pool[this_pool_num]++; 1727 | ppairs_count++; 1728 | debug("\n\n\tpool = %d, pool primer pairs count = %d, total primer pairs count = %d\n", 1729 | this_pool_num, primers_in_pool[this_pool_num], ppairs_count); 1730 | final_pool_number = this_pool_num; 1731 | } 1732 | 1733 | debug("returning this_pool_num: %d", final_pool_number); 1734 | return final_pool_number; 1735 | 1736 | error: 1737 | exit(1); 1738 | } 1739 | 1740 | /*---------------------------------------------------------------------*/ 1741 | 1742 | void 1743 | Print_isPcr (const char *filename, int max_pools, int *primers_in_pool, PNODE ***primer_pool) 1744 | { 1745 | FILE *isPcr_File; 1746 | isPcr_File = fopen (filename, "w"); 1747 | check(isPcr_File, "cannot open primer file, '%s'", filename); 1748 | for (int i = 0; i <= max_pools; i++ ) 1749 | { 1750 | for (int j = 0; j < primers_in_pool[i]; j++) 1751 | { 1752 | fprintf(isPcr_File, "pool_%d_%02d\t%s\t%s\n", (i+1), (j+1), 1753 | primer_pool[i][j]->forward->sequence, 1754 | primer_pool[i][j]->reverse->sequence ); 1755 | } 1756 | } 1757 | 1758 | error: 1759 | exit(1); 1760 | } 1761 | 1762 | void 1763 | Check_all_pools ( int max_pools, int *primers_in_pool, PNODE ***primer_pool, int max_amplicon_length ) 1764 | { 1765 | double max_amp_diff = (double) (max_amplicon_length * 0.15) + 1; 1766 | printf("%s\t%s\t%s\t%s\n", "Pool Number", "Primer Pair Count", "Compatable", "Comparisons"); 1767 | for (int i = 0; i <= max_pools; i++ ) 1768 | { 1769 | if (primers_in_pool[i] > 1 ) 1770 | { 1771 | check_poolability( primer_pool[i], primers_in_pool[i], i, (int) max_amp_diff); 1772 | } 1773 | else 1774 | { 1775 | printf("%d\t1\tYes\t%d\n", (i + 1), 0); 1776 | } 1777 | } 1778 | } 1779 | 1780 | /*---------------------------------------------------------------------*/ 1781 | 1782 | void 1783 | die ( char *message ) 1784 | { 1785 | if (errno) 1786 | { 1787 | perror(message); 1788 | } 1789 | else 1790 | { 1791 | printf("ERROR: %s\n", message); 1792 | } 1793 | exit(1); 1794 | } 1795 | 1796 | /*---------------------------------------------------------------------*/ 1797 | 1798 | void 1799 | check_poolability (PNODE ** primer_pool, int primers_in_pool, int pool_number, int max_amp_diff) 1800 | { 1801 | int i, j, k, *poolable_count; 1802 | char **cmat; 1803 | 1804 | poolable_count = create_ivec (primers_in_pool); 1805 | cmat = create_cmat (primers_in_pool, primers_in_pool); 1806 | 1807 | for (i = 0; i < primers_in_pool; i++) 1808 | for (j = i + 1; j < primers_in_pool; j++) 1809 | { 1810 | // debug("checking, fwd: %s rev: %s with fwd: %s rev: %s\n", 1811 | // primer_pool[i]->forward->sequence, 1812 | // primer_pool[i]->reverse->sequence, 1813 | // primer_pool[j]->forward->sequence, 1814 | // primer_pool[j]->reverse->sequence 1815 | // ); 1816 | 1817 | cmat[i][j] = is_poolable_primer (primer_pool[i], primer_pool[j], max_amp_diff, 2); 1818 | cmat[j][i] = cmat[i][j]; 1819 | poolable_count[i] += cmat[i][j]; 1820 | // debug("%d", poolable_count[i]); 1821 | poolable_count[j] += cmat[i][j]; 1822 | // debug("%d", poolable_count[j]); 1823 | } 1824 | 1825 | k = 0; 1826 | for (i = 0; i < primers_in_pool; i++) 1827 | k += poolable_count[i]; 1828 | 1829 | if ((primers_in_pool - 1) * primers_in_pool == k) 1830 | printf ("%d\t%d\tYes\t%d\n", (pool_number + 1), (primers_in_pool + 1), k); 1831 | else 1832 | printf ("%d\t%d\tNo\t%d\n", (pool_number + 1), (primers_in_pool + 1), k); 1833 | 1834 | // print_cmat (cmat, primers_in_pool); 1835 | // printf ("\n"); 1836 | 1837 | } 1838 | 1839 | /*---------------------------------------------------------------------*/ 1840 | 1841 | void 1842 | print_cmat (char **cmat, int N) 1843 | { 1844 | int i, j; 1845 | for (i = 0; i < N; i++) 1846 | { 1847 | for (j = 0; j < N; j++) 1848 | if (i == j) 1849 | printf ("."); 1850 | else 1851 | printf ("%d", cmat[i][j]); 1852 | 1853 | printf ("\n"); 1854 | } 1855 | } 1856 | 1857 | /*---------------------------------------------------------------------*/ 1858 | 1859 | PNODE *** 1860 | primer_pool_create ( int max_primers_in_pool, int max_ppairs_count) 1861 | { 1862 | PNODE ***primer_pool = (PNODE ***) malloc ((unsigned) max_ppairs_count * sizeof (PNODE **)); 1863 | check_mem(primer_pool); 1864 | 1865 | for (int i = 0; i < max_ppairs_count; i++) 1866 | { 1867 | primer_pool[i] = (PNODE **) malloc ((unsigned) max_primers_in_pool * sizeof (PNODE *)); 1868 | check_mem(primer_pool[i]); 1869 | } 1870 | return primer_pool; 1871 | 1872 | error: 1873 | exit(1); 1874 | } 1875 | 1876 | /*---------------------------------------------------------------------*/ 1877 | 1878 | PRIMER * 1879 | create_primer (int max_primer_length) 1880 | { 1881 | PRIMER *tn; 1882 | 1883 | tn = (PRIMER *) malloc( (unsigned) sizeof(PRIMER) ); 1884 | assert( tn != NULL ); 1885 | check_mem(tn); 1886 | 1887 | tn->start = 0; 1888 | tn->end = 0; 1889 | tn->sequence = create_cvec(max_primer_length); 1890 | tn->tm = 0; 1891 | tn->gc = 0; 1892 | 1893 | return tn; 1894 | 1895 | error: 1896 | exit(1); 1897 | } 1898 | 1899 | /*---------------------------------------------------------------------*/ 1900 | 1901 | PNODE * 1902 | create_ppair (int max_primer_length, int max_amplicon_length) 1903 | { 1904 | PNODE *tn; 1905 | 1906 | tn = (PNODE *) malloc( (unsigned) sizeof(PNODE) ); 1907 | assert( tn != NULL ); 1908 | check_mem(tn); 1909 | 1910 | tn->forward = create_primer(max_primer_length); 1911 | tn->reverse = create_primer(max_primer_length); 1912 | tn->sequence = create_cvec(max_amplicon_length); 1913 | tn->length = 0; 1914 | tn->gc = 0; 1915 | tn->tm = 0; 1916 | tn->chrom = 0; 1917 | 1918 | return tn; 1919 | 1920 | error: 1921 | exit(1); 1922 | } 1923 | 1924 | /*---------------------------------------------------------------------*/ 1925 | 1926 | void 1927 | flat_index_contig_high (int *index, char *contig, int L, int depth, int high_depth, unsigned char *double_high) 1928 | { 1929 | int i, stop; 1930 | char flip[256]; 1931 | 1932 | 1933 | for (i = 0; i < L; i++) 1934 | { 1935 | stop = minim (depth, L - i); 1936 | if (i % 1000000 == 0) 1937 | printf ("\nIndex %d bases out of %d at depth %d", i, L, stop); 1938 | 1939 | if ((*(contig + i)) != 'N') 1940 | { 1941 | index_string (index, contig + i, stop); 1942 | if (i + high_depth < L) 1943 | { 1944 | high_index (double_high, contig + i, high_depth); 1945 | reverse_transcribe (contig + i, flip, high_depth); 1946 | high_index (double_high, flip, high_depth); 1947 | } 1948 | } 1949 | } 1950 | 1951 | 1952 | } 1953 | 1954 | /*---------------------------------------------------------------------*/ 1955 | 1956 | void 1957 | high_index (unsigned char *dhigh, char *s, int n) 1958 | { 1959 | int i, j; 1960 | unsigned char k, bit; 1961 | 1962 | i = encode_basepairs (s, n); 1963 | /* printf("\nEntered High Index n = %d i = %u\n\n",n,i); */ 1964 | if (i >= 0) 1965 | { 1966 | j = i / 8; 1967 | k = i % 8; 1968 | bit = 1 << k; 1969 | dhigh[j] = dhigh[j] | bit; 1970 | } 1971 | } 1972 | 1973 | /*---------------------------------------------------------------------*/ 1974 | -------------------------------------------------------------------------------- /src/mpd.h: -------------------------------------------------------------------------------- 1 | #ifndef _mpp_h 2 | #define _mpp_h 3 | #define _GNU_SOURCE // for asprintf from stdio.h 4 | #define TRUE 1 5 | #define FALSE 0 6 | #define MAX_PAIRS 10 7 | #define minim(a,b) ((ab)?a:b) 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "dbg.h" 21 | #include "mem.h" 22 | 23 | typedef struct primer_node 24 | { 25 | int start; 26 | int end; 27 | char *sequence; 28 | double tm; 29 | double gc; 30 | } PRIMER; 31 | 32 | typedef struct primer_pair 33 | { 34 | PRIMER *forward; 35 | PRIMER *reverse; 36 | char *sequence; 37 | int length; 38 | double gc; 39 | double tm; 40 | int chrom; 41 | } PNODE; 42 | 43 | typedef struct snp_node 44 | { 45 | char baseA; 46 | char baseB; 47 | char name[32]; 48 | unsigned int pos; 49 | unsigned int chrom; 50 | int no_pairs; 51 | int no_disc; 52 | double het; 53 | PNODE **pair; 54 | } SNODE; 55 | 56 | typedef struct amp_node 57 | { 58 | char name[32]; 59 | unsigned int start_pos; 60 | unsigned int stop_pos; 61 | unsigned int chrom; 62 | int no_pairs; 63 | PNODE **pair; 64 | } AMPNODE; 65 | 66 | void die ( char *message ); 67 | int read_primer_pools (const char *filename, int max_ppairs, int max_primer_count, 68 | int *pool_count, PNODE ***primer_pool); 69 | void Print_isPcr (const char *filename, int max_pools, int *primers_in_pool, PNODE ***primer_pool); 70 | void reverse_string (char *contig, char *s, int n); 71 | void check_poolability (PNODE ** primer_pool, int primers_in_pool, int pool_number, int max_amp_diff); 72 | void Check_all_pools ( int max_pools, int *primers_in_pool, PNODE ***primer_pool, int max_amplicon_length ); 73 | 74 | PNODE ***primer_pool_create ( int max_ppairs, int max_ppairs_count); 75 | PRIMER *create_primer (int max_primer_length); 76 | PNODE *create_ppair (int max_primer_length, int max_amplicon_length); 77 | void flat_index_contig_high (int *index, char *contig, int L, int depth, int high_depth, unsigned char *double_high); 78 | void flat_index_contig (int *index, char *contig, int L, int depth); 79 | void high_index (unsigned char *dhigh, char *s, int n); 80 | int 81 | find_primers (SNODE ** snp_list, AMPNODE * tn, int no_snp, int *flat, char *contig, 82 | int L, int min_primer, int max_primer, int amp_max, int amp_min, 83 | double min_gc, double max_gc, double min_tm, double max_tm, 84 | int depth, int local_depth, int target_base, int start_pos, 85 | unsigned char *highmer, int **repeats, int no_repeats, int end_region, int chrom); 86 | void count_copys (int *flat, char *s, int n, int *reps); 87 | void simple_count_copys (int *flat, char *s, int n, int *reps, int forward); 88 | void reverse_transcribe (char *contig, char *s, int n); 89 | void transcribe (char *contig, char *s, int n); 90 | void reverse_string (char *contig, char *s, int n); 91 | int sort_compare_index (const void *a, const void *b); 92 | int check_hairpin (char *ss, int n); 93 | int check_hairpin_min (char *ss, char *flip, int n, int min); 94 | int check_dimer (char *p1, char *p2, int n); 95 | int check_uneven_dimer (char *p1, char *p2, int n1, int n2); 96 | int check_watson_crick (char a, char b); 97 | int test_dimer (char *p1, char *p2, int n); 98 | int find_seg (int x, int y, int **list, int start, int end, int guess); 99 | unsigned int encode_basepairs (char *ss, int n); 100 | void decode_basepairs (unsigned char *s, char *dest, int n); 101 | void index_string (int *index, char *s, int n); 102 | void convert_int_basepairs (int i, char *s, int k); 103 | void 104 | fill_quality_scores (int *flat, int *local, char *contig, int L, int window, int depth, int ld, int min_primer, 105 | int max_primer, double *fq_left, double *gc_left, int *index_left, int *plen_l, 106 | double min_gc, double max_gc, double min_tm, double max_tm, int **repeats, 107 | int no_repeats, unsigned char *highmer, int start_pos); 108 | int find_frag (int x, int **list, int start, int end, int guess); 109 | void fill_hs (char a, char b, double *h, double *s); 110 | double calc_tm (char *s, int n); 111 | int sort_compare_struct (const void *a, const void *b); 112 | int check_15mer (char *string, unsigned char *map, int n); 113 | int is_not_repeat (int x, int y, int **list, int no_frags, double max); 114 | SNODE **fill_snp_list (FILE * sfile, int *n, unsigned int chrom); 115 | AMPNODE **fill_amp_list (FILE * sfile, int n); 116 | SNODE *snp_alloc (void); 117 | AMPNODE *amp_alloc (void); 118 | int isbase (char c); 119 | double calc_gc (char *s, int n); 120 | PRIMER *primer_alloc (void); 121 | PNODE *product_alloc (void); 122 | int poly_under_primer (unsigned int p_start, unsigned int p_end, SNODE ** list, int start, int stop, int which); 123 | int *select_snps (SNODE ** list, int n, int target, int start, int stop); 124 | int pick_random (double *x, int n); 125 | int select_subset (SNODE ** list, int n, int pick, int *priority, int *selected, int which, int start, int stop); 126 | double fill_dist (SNODE ** list, int n, int *priority, int *selected, int which, int start, int stop, double *dist); 127 | int line_count (FILE * sfile); 128 | int count_compatable_primers (int **poolable, int total_primers, int k); 129 | int **zero_matrix (int **poolable, int total_primers, int k); 130 | int is_poolable_amp (AMPNODE * a1, AMPNODE * a2, int i1, int i2); 131 | void find_min_pools (int n, int *need_pooling, int **same_amplicon, int **poolable, PNODE ** primer_list); 132 | int is_poolable_primer (PNODE * p1, PNODE * p2, int size_diff_threshold, int tm_diff_threshold); 133 | int count_poolable (int t, int tot_primer, int *need_pooling, int **same_amplicon, int **poolable); 134 | void make_greedy_pools (FILE *outfile, PNODE **plist, char **cmat, int *pc, int **redund, int *bstart, int Nregs, int N, int still, int *current_pool, int this_pool, int max_pool); 135 | void make_less_greedy_pools (FILE *outfile, PNODE ** plist, char **cmat, int *pc, int **redund, int *bstart, int Nregs, int N, int still, int *current_pool, int this_pool, int max_pool); 136 | void read_var (char *line, char *result); 137 | int *create_ivec (int row); 138 | char *create_cvec (int row); 139 | int **create_imat (int row, int col); 140 | char **create_cmat (int row, int col); 141 | -------------------------------------------------------------------------------- /src/mpd_lessGreedy.c: -------------------------------------------------------------------------------- 1 | #include "mpd.h" 2 | #include "mem.h" 3 | 4 | static FILE *outfile; 5 | 6 | int 7 | main () 8 | { 9 | char ss[256], sss[4196], **filename; 10 | char *scratch_pad, **contig_descript; 11 | unsigned char **compressed_map, *highmer; 12 | int i, j, k, N, N_snpfiles, *contig_snp_count, *contig_length; 13 | int pad_size, fasta, idepth, target_contig, total_index; 14 | int *flat_index, old_index, ***repeat_list, *no_repeats; 15 | int amp_min, amp_max, pool_size, N_targets; 16 | int min_primer, max_primer, genome_start, genome_stop; 17 | FILE *sfile, *cfile, *idfile, *rfile, *highfile, *snpfile_idx, *snpfile, *target_ampfile; 18 | double max_gc, min_gc, min_tm, max_tm, tm_inc; 19 | SNODE ***snp_list; /* remember, an array of typedef is always an array of pointers this gives us a 2d array, i.e., matrix */ 20 | AMPNODE **target_amp_list; 21 | 22 | outfile = stdout; 23 | read_var ("\nSend Output to Screen or Disk? [S,D]\n", ss); 24 | 25 | if ((strchr (ss, 'D')) || (strchr (ss, 'd'))) 26 | { 27 | read_var ("Please Enter File Name for Output\n", ss); 28 | if ((outfile = fopen (ss, "w")) == (FILE *) NULL) 29 | { 30 | printf ("\n Can not open file %s\n", ss); 31 | exit (1); 32 | } 33 | } 34 | else 35 | outfile = stdout; 36 | 37 | old_index = TRUE; 38 | 39 | read_var ("Primer Picker Summary Filename (e.g., index summary like hg19.sdx)\n", ss); 40 | if ((sfile = fopen (ss, "r")) == (FILE *) NULL) 41 | { 42 | printf ("\nCould Not Open file %s\n", ss); 43 | exit (1); 44 | } 45 | 46 | // sdx file: read the 1st line that contains an int of contigs that are indexed 47 | fgets (sss, 4195, sfile); 48 | N = atoi (sss); 49 | printf ("\n There are N chromosomes %d \n\n", N); 50 | 51 | compressed_map = (unsigned char **) malloc ((unsigned) (N + 1) * sizeof (unsigned char *)); 52 | if (!compressed_map) 53 | log_err("allocation failure for compressed_map"); 54 | 55 | contig_descript = cmatrix (0, N, 0, 4196); 56 | contig_length = ivector (0, N); 57 | no_repeats = ivector (0, N); 58 | 59 | repeat_list = (int ***) malloc ((unsigned) (N + 1) * sizeof (int **)); 60 | if (!repeat_list) 61 | log_err ("allocation failure for repeat_list"); 62 | 63 | 64 | // sdx file: read in contig information that is in the format: contig_lenght number_repeats contig_description 65 | for (i = 0; i < N; i++) 66 | { 67 | fgets (sss, 4195, sfile); 68 | sscanf (sss, "%d\t%d\t%s", &contig_length[i], &no_repeats[i], contig_descript[i]); 69 | repeat_list[i] = imatrix (0, no_repeats[i], 0, 1); 70 | printf ("\n Contig %d is named %s and is length %d\n", i, contig_descript[i], contig_length[i]); 71 | } 72 | 73 | // sdx file: read in int representing depth of coverage 74 | fgets (sss, 4195, sfile); 75 | idepth = atoi (sss); 76 | 77 | // printf("\n Indexing to a depth of %d \n",idepth); 78 | 79 | // sdx file: read next line - should be hg19.cdx 80 | fgets (sss, 4195, sfile); 81 | for (i = 0; i < 4194; i++) 82 | if (isspace (sss[i])) 83 | { 84 | sss[i] = '\0'; 85 | i = 4195; 86 | } 87 | 88 | // cdx file: set file 89 | if ((cfile = fopen (sss, "r")) == (FILE *) NULL) 90 | { 91 | printf ("\nCould Not Open 1 file \"%s\"\n", sss); 92 | exit (1); 93 | } 94 | 95 | // sdx file: read next line - should be hg19.idx 96 | fgets (sss, 4195, sfile); 97 | for (i = 0; i < 4194; i++) 98 | if (isspace (sss[i])) 99 | { 100 | sss[i] = '\0'; 101 | i = 4195; 102 | } 103 | 104 | // idx file: set file 105 | if ((idfile = fopen (sss, "r")) == (FILE *) NULL) 106 | { 107 | printf ("\nCould Not Open 2 file \"%s\"\n", sss); 108 | exit (1); 109 | } 110 | 111 | // sdx file: read next line - should be hg19.rdx 112 | fgets (sss, 4195, sfile); 113 | for (i = 0; i < 4194; i++) 114 | if (isspace (sss[i])) 115 | { 116 | sss[i] = '\0'; 117 | i = 4195; 118 | } 119 | 120 | // rdx file: set file 121 | if ((rfile = fopen (sss, "r")) == (FILE *) NULL) 122 | { 123 | printf ("\nCould Not Open 3 file \"%s\"\n", sss); 124 | exit (1); 125 | } 126 | 127 | //sdx file: read next line - should be hg19.15x 128 | fgets (sss, 4195, sfile); 129 | for (i = 0; i < 4194; i++) 130 | if (isspace (sss[i])) 131 | { 132 | sss[i] = '\0'; 133 | i = 4195; 134 | } 135 | 136 | // 'highfile' or 'hg19.15x': set file 137 | if ((highfile = fopen (sss, "r")) == (FILE *) NULL) 138 | { 139 | printf ("\nCould Not Open 4 file \"%s\"\n", sss); 140 | exit (1); 141 | } 142 | fclose (sfile); 143 | 144 | printf ("\nAbout to read repeat file\n\n"); 145 | 146 | // index Description i number of contigs: N j number of repeats in a contig: no_repeats[i] k not sure location of start of repeat until end of repeat 147 | for (i = 0; i < N; i++) 148 | { 149 | for (j = 0; j < no_repeats[i]; j++) 150 | for (k = 0; k < 2; k++) 151 | if ((fread (&repeat_list[i][j][k], sizeof (int), 1, rfile)) < 1) 152 | { 153 | printf ("\n Expected to read %d repeats for contig %d, but got %d\n\n", no_repeats[i], i, j); 154 | exit (1); 155 | } 156 | // for(j=0;j<1;j++) printf("\nFor contig %d repeat %d goes from %d %d\n\n", i,j,repeat_list[i][j][0],repeat_list[i][j][1]); 157 | } 158 | fclose (rfile); 159 | printf ("\nFinished reading repeat file"); 160 | 161 | printf ("\nAbout to read 15mer file\n\n"); 162 | highmer = ucvector (0, 134217728); 163 | if ((j = fread (highmer, sizeof (unsigned char), 134217728, highfile)) != 134217728) 164 | { 165 | printf ("\n Expected to read %d 15mers but got %d\n", 134217728, j); 166 | exit (1); 167 | } 168 | fclose (highfile); 169 | printf ("\nFinished reading 15mer file\n"); 170 | 171 | //read dbSNP information -> summary file => actual files fill ***snp_list 172 | read_var ("Name of dbSNP summary file\n", ss); 173 | if ((snpfile_idx = fopen (ss, "r")) == (FILE *) NULL) 174 | { 175 | printf ("\nCould Not Open dbsnp file %s\n", ss); 176 | exit (1); 177 | } 178 | 179 | // dbsnp summary file line 1 = number of contigs/chr 180 | fgets (sss, 4195, snpfile_idx); 181 | N_snpfiles = atoi (sss); 182 | 183 | // read dbsnp files 184 | contig_snp_count = ivector (0, N_snpfiles); 185 | snp_list = (SNODE ***) malloc (N_snpfiles * sizeof (SNODE **)); 186 | filename = cmatrix (0, N_snpfiles, 0, 256); 187 | for (i = 0; i < N_snpfiles; i++) 188 | { 189 | fgets (sss, 4195, snpfile_idx); 190 | sscanf (sss, "%s", filename[i]); 191 | if ((snpfile = fopen (filename[i], "r")) == (FILE *) NULL) 192 | { 193 | printf ("\n Can not open file %s\n", filename[i]); 194 | exit (1); 195 | } 196 | snp_list[i] = fill_snp_list (snpfile, &contig_snp_count[i], i + 1); 197 | fclose (snpfile); 198 | printf ("\nFinished reading dbSNP file: %s. Found %d SNPs.\n", filename[i], contig_snp_count[i]); 199 | } 200 | fclose(snpfile_idx); 201 | 202 | // read target file 203 | read_var ("\nName of file with amplicon target coordinates\n", ss); 204 | if ((target_ampfile = fopen (ss, "r")) == (FILE *) NULL) 205 | { 206 | printf ("\nCould Not Open file with amplicon target coordinates %s\n", ss); 207 | exit (1); 208 | } 209 | N_targets = line_count (target_ampfile); 210 | fseek (target_ampfile, 0, SEEK_SET); 211 | target_amp_list = fill_amp_list (target_ampfile, N_targets); 212 | unsigned int total_size_toamp = 0; 213 | fclose (target_ampfile); 214 | 215 | for (i = 0; i < N_targets; i++) 216 | { 217 | printf ("\nSearching for Primers target #%d: %s chr%d:%d-%d\n", 218 | i + 1, 219 | target_amp_list[i]->name, 220 | target_amp_list[i]->chrom, target_amp_list[i]->start_pos + 1, target_amp_list[i]->stop_pos + 1); 221 | total_size_toamp += 1 + target_amp_list[i]->stop_pos - target_amp_list[i]->start_pos; 222 | } 223 | printf ("\nFinished reading amplicon target coordinates\n\tFound %d targets with a total_size of %u\n", N_targets,total_size_toamp); 224 | 225 | //PCR primer parameters 226 | read_var ("Minimum Primer Length\n", ss); 227 | min_primer = atoi (ss); 228 | 229 | read_var ("Maximum Primer Length\n", ss); 230 | max_primer = atoi (ss); 231 | 232 | read_var ("Minimum Amplicon Length\n", ss); 233 | amp_min = atoi (ss); 234 | 235 | read_var ("Maximum Amplicon Length\n", ss); 236 | amp_max = atoi (ss); 237 | 238 | read_var ("Minimum GC content [0..1.0]\n", ss); 239 | min_gc = (double) atof (ss); 240 | 241 | read_var ("Maximum GC content [0..1.0]\n", ss); 242 | max_gc = (double) atof (ss); 243 | 244 | read_var ("Minimum tm_primer in degrees C\n", ss); 245 | min_tm = (double) atof (ss); 246 | 247 | read_var ("Maximum tm_primer in degrees C\n", ss); 248 | max_tm = (double) atof (ss); 249 | 250 | read_var ("Maximum number of primer pairs to pool together\n", ss); 251 | pool_size = atoi (ss); 252 | 253 | read_var ("Pad size\n", ss); 254 | pad_size = atoi (ss); 255 | 256 | // sanity check that pad size 257 | if (pad_size <= max_primer) 258 | { 259 | printf("\nERROR: pad size must be larger than the maximum size of a primer\n"); 260 | exit(1); 261 | } 262 | else if (pad_size > 3 * amp_max) 263 | { 264 | printf("\nERROR: pad size is unrealistically large, i.e. 3 times the size of the maximum amplicon length\n"); 265 | exit(1); 266 | } 267 | 268 | read_var ("Tm increment (0.5 to 4.0)\n", ss); 269 | tm_inc = (double) atof (ss); 270 | 271 | if (tm_inc > 4 || tm_inc <0.5) 272 | { 273 | printf("\nError please choose an increment between 0.5 and 4 C\n"); 274 | exit(1); 275 | } 276 | 277 | // print header for outfile 278 | fprintf (outfile, "Primer_number\tForward_primer\tForward_Tm\tForward_GC\tReverse_primer\tReverse_Tm\tReverse_GC\tChr\t"); 279 | fprintf (outfile, "Forward_start_position\tForward_stop_position\tReverse_start_position\tReverse_stop_position\t"); 280 | fprintf (outfile, "Product_length\tProduct_GC\tProduct_tm\tProduct\n"); 281 | 282 | // allocate some memory for indexed genome 283 | j = 4; 284 | for (i = 0; i < idepth; i++) 285 | j *= 4; 286 | j = (j - 4) / 3; 287 | 288 | total_index = j; 289 | printf ("\n Determined the size of flat index to be %d * %lu = %lu bytes\n", 290 | total_index, sizeof (int), total_index * sizeof (int)); 291 | 292 | flat_index = ivector (0, total_index); 293 | 294 | for (i = 0; i <= total_index; i++) 295 | flat_index[i] = 0; 296 | 297 | for (fasta = 0; fasta < N; fasta++) 298 | { 299 | j = contig_length[fasta]; 300 | if (j % 4 == 0) 301 | j /= 4; 302 | else 303 | j = j / 4 + 1; 304 | 305 | compressed_map[fasta] = ucvector (0, j + 1); 306 | printf ("\n For chromosome %d we are going to read %d bytes\n", fasta, j); 307 | k = fread (compressed_map[fasta], sizeof (unsigned char), j, cfile); 308 | 309 | if (k != j) 310 | { 311 | printf ("\nCompressed Sequence %d %s should have been length %d but was %d\n", 312 | fasta, contig_descript[fasta], j, k); 313 | exit (1); 314 | } 315 | } 316 | fclose (cfile); 317 | k = fread (flat_index, sizeof (int), total_index, idfile); 318 | 319 | if (k < total_index) 320 | { 321 | printf ("\nIndexed of N'mers should have been length %d but was %d\n", total_index, k); 322 | exit (1); 323 | } 324 | fclose (idfile); 325 | 326 | // allocated memory for amp_pool => array of regions captured. 327 | PNODE **all_primer_pairs; 328 | int max_regions = (1 + maxim(5*total_size_toamp / amp_min,N_targets*2)); 329 | int max_ppairs = MAX_PAIRS * max_regions; 330 | printf ("\n We have max_regions = %d max_pairs = %d \n", max_regions, max_ppairs); 331 | all_primer_pairs = malloc ((unsigned) max_ppairs * sizeof (PNODE *)); 332 | if (!all_primer_pairs) 333 | { 334 | printf ("\n Could not allocate space for %u primer pairs \n", max_ppairs); 335 | exit (1); 336 | } 337 | int **redundant_list, *best_start; 338 | redundant_list = imatrix (0, max_ppairs, 0, MAX_PAIRS); 339 | best_start = ivector (0, max_regions); 340 | for(i=0;i<=max_regions;i++) 341 | best_start[i] = -1; 342 | for (i = 0; i <= max_ppairs; i++) 343 | { 344 | for (j = 0; j <= MAX_PAIRS; j++) 345 | redundant_list[i][j] = -1; 346 | } 347 | int amp_pool_count = 0; 348 | int primer_count = 0; 349 | 350 | // start finding primers for targets 351 | AMPNODE *loop_amp; 352 | loop_amp = amp_alloc (); 353 | printf("\n loop_amp is located at position %ld in memory \n\n",(long)loop_amp); 354 | for (k = 0; k < N_targets; k++) 355 | { 356 | // set chromosome 357 | target_contig = target_amp_list[k]->chrom - 1; 358 | 359 | // set target start and stop 360 | int target_start = target_amp_list[k]->start_pos + 1; 361 | int target_stop = target_amp_list[k]->stop_pos + 1; 362 | 363 | // set temporary start and stop 364 | int this_start, this_stop, this_midpoint; 365 | if (target_stop - target_start > amp_max) 366 | { 367 | this_start = target_start; 368 | this_stop = target_start + amp_max; 369 | } 370 | else 371 | { 372 | this_midpoint = ((target_stop - target_start) / 2) + target_start; 373 | this_start = this_midpoint - (amp_min / 2); 374 | this_stop = this_midpoint + (amp_min / 2); 375 | } 376 | 377 | // while loop variable: call it the "region" loop 378 | int not_covered = 1; 379 | 380 | // print out target info 381 | printf ("\nSearching for Primers target #%d: %s chr%d:%d-%d\n\n", 382 | k + 1, 383 | target_amp_list[k]->name, 384 | target_amp_list[k]->chrom, target_amp_list[k]->start_pos + 1, target_amp_list[k]->stop_pos + 1); 385 | printf ("\nStarting params this_start = %d, this_stop = %d\n\n", this_start, this_stop); 386 | 387 | // variables: 388 | // target_start, target_stop => region to cover with amplicons 389 | // this_start, this_stop => region to cover with the specific iteration of the while loop 390 | // genome_start, genome_stop => coordinates used to extract actual genomic region to target 391 | // 392 | 393 | 394 | while (not_covered) 395 | { 396 | printf("\n 2 loop_amp is located at position %ld in memory \n\n",(long)loop_amp); 397 | 398 | // start site - make sure it's a multiple of 4 399 | genome_start = maxim (1, this_start - pad_size); 400 | j = genome_start / 4; 401 | genome_start = j * 4; 402 | 403 | // stop site - make sure it's a multiple of 4 404 | genome_stop = (this_stop + pad_size) / 4; 405 | genome_stop = minim (genome_stop * 4, contig_length[target_contig]); 406 | 407 | // length of region 408 | j = (genome_stop - genome_start); 409 | j = minim (j, contig_length[target_contig]); 410 | 411 | // set loop_amp attributes 412 | loop_amp->start_pos = this_start + 1; 413 | loop_amp->stop_pos = this_stop + 1; 414 | loop_amp->chrom = target_amp_list[k]->chrom; 415 | loop_amp->no_pairs = 0; 416 | sprintf (loop_amp->name, "%s_%02d", target_amp_list[k]->name, amp_pool_count); 417 | 418 | // tm range 419 | int this_min_tm = min_tm; 420 | int this_max_tm = max_tm; 421 | 422 | // while loop variables: call it the "specific target" loop 423 | int found_count = 0; 424 | int this_trial = 0; 425 | 426 | // scratch pad 427 | if (j > 2000) 428 | { 429 | printf ("\nj (length) is too big %d\n\n", j); 430 | exit (1); 431 | } 432 | // allocate memory for scratch pad 433 | scratch_pad = cvector (0, j + 5); 434 | scratch_pad[0] = '\0'; 435 | decode_basepairs (&compressed_map[target_contig][genome_start / 4], scratch_pad, j / 4); 436 | 437 | do 438 | { 439 | found_count = find_primers (snp_list[target_contig], 440 | loop_amp, 441 | contig_snp_count[target_contig], // number of snps in the contig 442 | flat_index, scratch_pad, // copy of the target 443 | j, // length of region 444 | min_primer, max_primer, amp_max, amp_min, min_gc, max_gc, this_min_tm, this_max_tm, 445 | idepth, // index depth 446 | 10, // local depth 447 | j / 2, // target base (from old primer_snp.c program) 448 | genome_start + 1, 449 | highmer, 450 | repeat_list[target_contig], 451 | no_repeats[target_contig], 452 | pad_size, // size on either end of the contig to look for primer 453 | loop_amp->chrom); 454 | this_min_tm -= tm_inc; 455 | this_max_tm += tm_inc; 456 | this_trial++; 457 | if (this_min_tm < min_tm || this_max_tm > max_tm) { 458 | break; 459 | } 460 | } while (this_trial < 10 && found_count < 1); 461 | printf("\n 3 loop_amp is located at position %ld in memory \n\n",(long)loop_amp); 462 | 463 | free_cvector (scratch_pad, 0, j + 5); 464 | int nearest_stop = 0; 465 | if (found_count > 0) 466 | { 467 | if (loop_amp->no_pairs > 0) 468 | { 469 | if (amp_pool_count == max_ppairs) 470 | { 471 | printf ("ERROR: exceeded the maximum number of primer pairs allocated: %d\n\n", max_ppairs); 472 | exit (1); 473 | } 474 | 475 | printf ("\nBED: chr%d\t%d\t%d\n", loop_amp->chrom, this_start, this_stop); 476 | 477 | int start_primer_count = primer_count; 478 | 479 | printf("\n\nabout to fill the array of primers start = %d with %d pairs coming\n\n", 480 | start_primer_count,loop_amp->no_pairs); 481 | 482 | for (j = 0; j < loop_amp->no_pairs; j++) 483 | all_primer_pairs[primer_count++] = loop_amp->pair[j]; 484 | int jj; 485 | for (j = start_primer_count; j < primer_count; j++) 486 | { 487 | for (jj = 0; jj < loop_amp->no_pairs; jj++) 488 | { 489 | printf("\n\nj = %d, jj = %d, start_primer_count = %d, primer_count = %d\n\n", j, jj, start_primer_count, primer_count); 490 | redundant_list[j][jj] = start_primer_count + jj; 491 | printf("\n 4 loop_amp is located at position %ld in memory \n\n",(long)loop_amp); 492 | } 493 | } 494 | printf("\n Made it here \n\n"); 495 | best_start[amp_pool_count] = start_primer_count; 496 | amp_pool_count++; 497 | 498 | for (j = 0; j < loop_amp->no_pairs; j++) 499 | { 500 | if (nearest_stop == 0) 501 | nearest_stop = loop_amp->pair[j]->reverse->start; 502 | else if (loop_amp->pair[j]->reverse->start < nearest_stop) 503 | nearest_stop = loop_amp->pair[j]->reverse->start; 504 | } 505 | } 506 | } 507 | printf("\n\nnearest stop is %d\n\n", nearest_stop); 508 | 509 | if (nearest_stop > 0) 510 | { 511 | this_start = nearest_stop; 512 | this_stop = this_start + amp_min; 513 | } 514 | else 515 | { 516 | this_start += pad_size; 517 | this_stop = this_start + amp_min; 518 | } 519 | if (nearest_stop >= target_stop || this_start > target_stop) 520 | not_covered = 0; 521 | } 522 | } 523 | 524 | char **poolable_matrix; 525 | int *poolable_count; 526 | poolable_count = ivector (0, primer_count); 527 | for (i = 0; i < primer_count; i++) 528 | poolable_count[i] = 0; 529 | poolable_matrix = cmatrix (0, primer_count, 0, primer_count); 530 | for (i = 0; i < primer_count; i++) 531 | for (j = i + 1; j < primer_count; j++) 532 | { 533 | double max_amp_diff = (double) (amp_max * 0.15) + 1; 534 | poolable_matrix[i][j] = is_poolable_primer (all_primer_pairs[i], all_primer_pairs[j], (int) max_amp_diff, 2); 535 | poolable_matrix[j][i] = poolable_matrix[i][j]; 536 | poolable_count[i] += poolable_matrix[i][j]; 537 | poolable_count[j] += poolable_matrix[i][j]; 538 | } 539 | for (i = 0; i < amp_pool_count; i++) 540 | if (best_start[i] >= 0) 541 | { 542 | int k = best_start[i]; 543 | for (j = 0; j < MAX_PAIRS; j++) 544 | if (redundant_list[k][j] >= 0) 545 | if (poolable_count[redundant_list[k][j]] > poolable_count[best_start[i]]) 546 | best_start[i] = redundant_list[k][j]; 547 | } 548 | // 549 | // print cmat 550 | // 551 | //printf ("\n"); 552 | //for (i = 0; i < primer_count; i++) 553 | //printf ("\t%s_%s_%03d", all_primer_pairs[i]->forward->sequence, all_primer_pairs[i]->reverse->sequence, i); 554 | //for (i = 0; i < primer_count; i++) 555 | //{ 556 | //printf ("\n%s_%s_%03d", all_primer_pairs[i]->forward->sequence, all_primer_pairs[i]->reverse->sequence, i); 557 | //for (j = 0; j < primer_count; j++) 558 | //if (i != j) 559 | //printf ("\t%d", (int) poolable_matrix[i][j]); 560 | //else 561 | //printf ("\t."); 562 | //} 563 | //printf ("\n"); 564 | // 565 | // print best start matrix 566 | // 567 | // for (i = 0; i < amp_pool_count; i++) 568 | // { 569 | // int k = best_start[i]; 570 | // for (j = 0; j < MAX_PAIRS; j++) 571 | // if (redundant_list[k][j] >= 0) 572 | // printf (" %03d", redundant_list[k][j]); 573 | // else 574 | // printf (" ."); 575 | // printf ("\t| Region: %03d\t| Primer: %03d\t| Poolable Count: %03d\n", i, best_start[i], poolable_count[best_start[i]]); 576 | // } 577 | // 578 | // redundant matrix 579 | // 580 | // printf("\n\n"); 581 | // for (i=0; i= 0) 585 | // printf (" %03d", redundant_list[i][j]); 586 | // else 587 | // printf (" ."); 588 | // printf("\n"); 589 | // } 590 | 591 | printf ("\n\n going to make_pools with amp_pools = %d, primer_count = %d\n", amp_pool_count, primer_count); 592 | int *current_pool; 593 | current_pool = ivector (0, 20); 594 | make_less_greedy_pools (outfile, all_primer_pairs, poolable_matrix, poolable_count, redundant_list, best_start, amp_pool_count, 595 | primer_count, primer_count, current_pool, 0, pool_size); 596 | return 0; 597 | } 598 | 599 | -------------------------------------------------------------------------------- /src/mpd_moreGreedy.c: -------------------------------------------------------------------------------- 1 | #include "mpd.h" 2 | #include "mem.h" 3 | 4 | static FILE *outfile; 5 | 6 | int 7 | main () 8 | { 9 | char ss[256], sss[4196], **filename; 10 | char *scratch_pad, **contig_descript; 11 | unsigned char **compressed_map, *highmer; 12 | int i, j, k, N, N_snpfiles, *contig_snp_count, *contig_length; 13 | int pad_size, fasta, idepth, target_contig, total_index; 14 | int *flat_index, old_index, ***repeat_list, *no_repeats; 15 | int amp_min, amp_max, pool_size, N_targets; 16 | int min_primer, max_primer, genome_start, genome_stop; 17 | FILE *sfile, *cfile, *idfile, *rfile, *highfile, *snpfile_idx, *snpfile, *target_ampfile; 18 | double max_gc, min_gc, min_tm, max_tm, tm_inc; 19 | SNODE ***snp_list; 20 | AMPNODE **target_amp_list; 21 | 22 | outfile = stdout; 23 | read_var ("\nSend Output to Screen or Disk? [S,D]\n", ss); 24 | 25 | if ((strchr (ss, 'D')) || (strchr (ss, 'd'))) 26 | { 27 | read_var ("Please Enter File Name for Output\n", ss); 28 | if ((outfile = fopen (ss, "w")) == (FILE *) NULL) 29 | { 30 | printf ("\n Can not open file %s\n", ss); 31 | exit (1); 32 | } 33 | } 34 | else 35 | outfile = stdout; 36 | 37 | old_index = TRUE; 38 | 39 | read_var ("Primer Picker Summary Filename (e.g., index summary like hg19.sdx)\n", ss); 40 | if ((sfile = fopen (ss, "r")) == (FILE *) NULL) 41 | { 42 | printf ("\nCould Not Open file %s\n", ss); 43 | exit (1); 44 | } 45 | 46 | // sdx file: read the 1st line that contains an int of contigs that are indexed 47 | fgets (sss, 4195, sfile); 48 | N = atoi (sss); 49 | printf ("\n There are N chromosomes %d \n\n", N); 50 | 51 | compressed_map = (unsigned char **) malloc ((unsigned) (N + 1) * sizeof (unsigned char *)); 52 | if (!compressed_map) 53 | log_err ("allocation failure for compressed_map"); 54 | 55 | contig_descript = cmatrix (0, N, 0, 4196); 56 | contig_length = ivector (0, N); 57 | no_repeats = ivector (0, N); 58 | 59 | repeat_list = (int ***) malloc ((unsigned) (N + 1) * sizeof (int **)); 60 | if (!repeat_list) 61 | log_err ("allocation failure for repeat_list"); 62 | 63 | 64 | // sdx file: read in contig information that is in the format: contig_lenght number_repeats contig_description 65 | for (i = 0; i < N; i++) 66 | { 67 | fgets (sss, 4195, sfile); 68 | sscanf (sss, "%d\t%d\t%s", &contig_length[i], &no_repeats[i], contig_descript[i]); 69 | repeat_list[i] = imatrix (0, no_repeats[i], 0, 1); 70 | printf ("\n Contig %d is named %s and is length %d\n", i, contig_descript[i], contig_length[i]); 71 | } 72 | 73 | // sdx file: read in int representing depth of coverage 74 | fgets (sss, 4195, sfile); 75 | idepth = atoi (sss); 76 | 77 | // printf("\n Indexing to a depth of %d \n",idepth); 78 | 79 | // sdx file: read next line - should be hg19.cdx 80 | fgets (sss, 4195, sfile); 81 | for (i = 0; i < 4194; i++) 82 | if (isspace (sss[i])) 83 | { 84 | sss[i] = '\0'; 85 | i = 4195; 86 | } 87 | 88 | // cdx file: set file 89 | if ((cfile = fopen (sss, "r")) == (FILE *) NULL) 90 | { 91 | printf ("\nCould Not Open 1 file \"%s\"\n", sss); 92 | exit (1); 93 | } 94 | 95 | // sdx file: read next line - should be hg19.idx 96 | fgets (sss, 4195, sfile); 97 | for (i = 0; i < 4194; i++) 98 | if (isspace (sss[i])) 99 | { 100 | sss[i] = '\0'; 101 | i = 4195; 102 | } 103 | 104 | // idx file: set file 105 | if ((idfile = fopen (sss, "r")) == (FILE *) NULL) 106 | { 107 | printf ("\nCould Not Open 2 file \"%s\"\n", sss); 108 | exit (1); 109 | } 110 | 111 | // sdx file: read next line - should be hg19.rdx 112 | fgets (sss, 4195, sfile); 113 | for (i = 0; i < 4194; i++) 114 | if (isspace (sss[i])) 115 | { 116 | sss[i] = '\0'; 117 | i = 4195; 118 | } 119 | 120 | // rdx file: set file 121 | if ((rfile = fopen (sss, "r")) == (FILE *) NULL) 122 | { 123 | printf ("\nCould Not Open 3 file \"%s\"\n", sss); 124 | exit (1); 125 | } 126 | 127 | //sdx file: read next line - should be hg19.15x 128 | fgets (sss, 4195, sfile); 129 | for (i = 0; i < 4194; i++) 130 | if (isspace (sss[i])) 131 | { 132 | sss[i] = '\0'; 133 | i = 4195; 134 | } 135 | 136 | // 'highfile' or 'hg19.15x': set file 137 | if ((highfile = fopen (sss, "r")) == (FILE *) NULL) 138 | { 139 | printf ("\nCould Not Open 4 file \"%s\"\n", sss); 140 | exit (1); 141 | } 142 | fclose (sfile); 143 | 144 | printf ("\nAbout to read repeat file\n\n"); 145 | 146 | // index Description i number of contigs: N j number of repeats in a contig: no_repeats[i] k not sure location of start of repeat until end of repeat 147 | for (i = 0; i < N; i++) 148 | { 149 | for (j = 0; j < no_repeats[i]; j++) 150 | for (k = 0; k < 2; k++) 151 | if ((fread (&repeat_list[i][j][k], sizeof (int), 1, rfile)) < 1) 152 | { 153 | printf ("\n Expected to read %d repeats for contig %d, but got %d\n\n", no_repeats[i], i, j); 154 | exit (1); 155 | } 156 | // for(j=0;j<1;j++) printf("\nFor contig %d repeat %d goes from %d %d\n\n", i,j,repeat_list[i][j][0],repeat_list[i][j][1]); 157 | } 158 | fclose (rfile); 159 | printf ("\nFinished reading repeat file"); 160 | 161 | printf ("\nAbout to read 15mer file\n\n"); 162 | highmer = ucvector (0, 134217728); 163 | if ((j = fread (highmer, sizeof (unsigned char), 134217728, highfile)) != 134217728) 164 | { 165 | printf ("\n Expected to read %d 15mers but got %d\n", 134217728, j); 166 | exit (1); 167 | } 168 | fclose (highfile); 169 | printf ("\nFinished reading 15mer file\n"); 170 | 171 | //read dbSNP information -> summary file => actual files fill ***snp_list 172 | read_var ("Name of dbSNP summary file\n", ss); 173 | if ((snpfile_idx = fopen (ss, "r")) == (FILE *) NULL) 174 | { 175 | printf ("\nCould Not Open dbsnp file %s\n", ss); 176 | exit (1); 177 | } 178 | 179 | // dbsnp summary file line 1 = number of contigs/chr 180 | fgets (sss, 4195, snpfile_idx); 181 | N_snpfiles = atoi (sss); 182 | 183 | // read dbsnp files 184 | contig_snp_count = ivector (0, N_snpfiles); 185 | snp_list = (SNODE ***) malloc (N_snpfiles * sizeof (SNODE **)); 186 | filename = cmatrix (0, N_snpfiles, 0, 256); 187 | for (i = 0; i < N_snpfiles; i++) 188 | { 189 | fgets (sss, 4195, snpfile_idx); 190 | sscanf (sss, "%s", filename[i]); 191 | if ((snpfile = fopen (filename[i], "r")) == (FILE *) NULL) 192 | { 193 | printf ("\n Can not open file %s\n", filename[i]); 194 | exit (1); 195 | } 196 | snp_list[i] = fill_snp_list (snpfile, &contig_snp_count[i], i + 1); 197 | fclose (snpfile); 198 | printf ("\nFinished reading dbSNP file: %s. Found %d SNPs.\n", filename[i], contig_snp_count[i]); 199 | } 200 | fclose(snpfile_idx); 201 | 202 | // read target file 203 | read_var ("\nName of file with amplicon target coordinates\n", ss); 204 | if ((target_ampfile = fopen (ss, "r")) == (FILE *) NULL) 205 | { 206 | printf ("\nCould Not Open file with amplicon target coordinates %s\n", ss); 207 | exit (1); 208 | } 209 | N_targets = line_count (target_ampfile); 210 | fseek (target_ampfile, 0, SEEK_SET); 211 | target_amp_list = fill_amp_list (target_ampfile, N_targets); 212 | unsigned int total_size_toamp = 0; 213 | fclose (target_ampfile); 214 | 215 | for (i = 0; i < N_targets; i++) 216 | { 217 | printf ("\nSearching for Primers target #%d: %s chr%d:%d-%d\n", 218 | i + 1, 219 | target_amp_list[i]->name, 220 | target_amp_list[i]->chrom, target_amp_list[i]->start_pos + 1, target_amp_list[i]->stop_pos + 1); 221 | total_size_toamp += 1 + target_amp_list[i]->stop_pos - target_amp_list[i]->start_pos; 222 | } 223 | printf ("\nFinished reading amplicon target coordinates\n\tFound %d targets with a total_size of %u\n", N_targets,total_size_toamp); 224 | 225 | //PCR primer parameters 226 | read_var ("Minimum Primer Length\n", ss); 227 | min_primer = atoi (ss); 228 | 229 | read_var ("Maximum Primer Length\n", ss); 230 | max_primer = atoi (ss); 231 | 232 | read_var ("Minimum Amplicon Length\n", ss); 233 | amp_min = atoi (ss); 234 | 235 | read_var ("Maximum Amplicon Length\n", ss); 236 | amp_max = atoi (ss); 237 | 238 | read_var ("Minimum GC content [0..1.0]\n", ss); 239 | min_gc = (double) atof (ss); 240 | 241 | read_var ("Maximum GC content [0..1.0]\n", ss); 242 | max_gc = (double) atof (ss); 243 | 244 | read_var ("Minimum tm_primer in degrees C\n", ss); 245 | min_tm = (double) atof (ss); 246 | 247 | read_var ("Maximum tm_primer in degrees C\n", ss); 248 | max_tm = (double) atof (ss); 249 | 250 | read_var ("Maximum number of primer pairs to pool together\n", ss); 251 | pool_size = atoi (ss); 252 | 253 | read_var ("Pad size\n", ss); 254 | pad_size = atoi (ss); 255 | 256 | // sanity check that pad size 257 | if (pad_size <= max_primer) 258 | { 259 | printf("\nERROR: pad size must be larger than the maximum size of a primer\n"); 260 | exit(1); 261 | } 262 | else if (pad_size > 3 * amp_max) 263 | { 264 | printf("\nERROR: pad size is unrealistically large, i.e. 3 times the size of the maximum amplicon length\n"); 265 | exit(1); 266 | } 267 | 268 | read_var ("Tm increment (0.5 to 4.0)\n", ss); 269 | tm_inc = (double) atof (ss); 270 | 271 | if (tm_inc > 4 || tm_inc <0.5) 272 | { 273 | printf("\nError please choose an increment between 0.5 and 4 C\n"); 274 | exit(1); 275 | } 276 | 277 | // print header for outfile 278 | fprintf (outfile, "Primer_number\tForward_primer\tForward_Tm\tForward_GC\tReverse_primer\tReverse_Tm\tReverse_GC\tChr\t"); 279 | fprintf (outfile, "Forward_start_position\tForward_stop_position\tReverse_start_position\tReverse_stop_position\t"); 280 | fprintf (outfile, "Product_length\tProduct_GC\tProduct_tm\tProduct\n"); 281 | 282 | // allocate some memory for indexed genome 283 | j = 4; 284 | for (i = 0; i < idepth; i++) 285 | j *= 4; 286 | j = (j - 4) / 3; 287 | 288 | total_index = j; 289 | printf ("\n Determined the size of flat index to be %d * %lu = %lu bytes\n", 290 | total_index, sizeof (int), total_index * sizeof (int)); 291 | 292 | flat_index = ivector (0, total_index); 293 | 294 | for (i = 0; i <= total_index; i++) 295 | flat_index[i] = 0; 296 | 297 | for (fasta = 0; fasta < N; fasta++) 298 | { 299 | j = contig_length[fasta]; 300 | if (j % 4 == 0) 301 | j /= 4; 302 | else 303 | j = j / 4 + 1; 304 | 305 | compressed_map[fasta] = ucvector (0, j + 1); 306 | printf ("\n For chromosome %d we are going to read %d bytes\n", fasta, j); 307 | k = fread (compressed_map[fasta], sizeof (unsigned char), j, cfile); 308 | 309 | if (k != j) 310 | { 311 | printf ("\nCompressed Sequence %d %s should have been length %d but was %d\n", 312 | fasta, contig_descript[fasta], j, k); 313 | exit (1); 314 | } 315 | } 316 | fclose (cfile); 317 | k = fread (flat_index, sizeof (int), total_index, idfile); 318 | 319 | if (k < total_index) 320 | { 321 | printf ("\nIndexed of N'mers should have been length %d but was %d\n", total_index, k); 322 | exit (1); 323 | } 324 | fclose (idfile); 325 | 326 | // allocated memory for amp_pool => array of regions captured. 327 | PNODE **all_primer_pairs; 328 | int max_regions = (1 + maxim(5*total_size_toamp / amp_min,N_targets*2)); 329 | int max_ppairs = MAX_PAIRS * max_regions; 330 | printf ("\n We have max_regions = %d max_pairs = %d \n", max_regions, max_ppairs); 331 | all_primer_pairs = malloc ((unsigned) max_ppairs * sizeof (PNODE *)); 332 | if (!all_primer_pairs) 333 | { 334 | printf ("\n Could not allocate space for %u primer pairs \n", max_ppairs); 335 | exit (1); 336 | } 337 | int **redundant_list, *best_start; 338 | redundant_list = imatrix (0, max_ppairs, 0, MAX_PAIRS); 339 | best_start = ivector (0, max_regions); 340 | for(i=0;i<=max_regions;i++) 341 | best_start[i] = -1; 342 | for (i = 0; i <= max_ppairs; i++) 343 | { 344 | for (j = 0; j <= MAX_PAIRS; j++) 345 | redundant_list[i][j] = -1; 346 | } 347 | int amp_pool_count = 0; 348 | int primer_count = 0; 349 | 350 | // start finding primers for targets 351 | AMPNODE *loop_amp; 352 | loop_amp = amp_alloc (); 353 | printf("\n loop_amp is located at position %ld in memory \n\n",(long)loop_amp); 354 | for (k = 0; k < N_targets; k++) 355 | { 356 | // set chromosome 357 | target_contig = target_amp_list[k]->chrom - 1; 358 | 359 | // set target start and stop 360 | int target_start = target_amp_list[k]->start_pos + 1; 361 | int target_stop = target_amp_list[k]->stop_pos + 1; 362 | 363 | // set temporary start and stop 364 | int this_start, this_stop, this_midpoint; 365 | if (target_stop - target_start > amp_max) 366 | { 367 | this_start = target_start; 368 | this_stop = target_start + amp_max; 369 | } 370 | else 371 | { 372 | this_midpoint = ((target_stop - target_start) / 2) + target_start; 373 | this_start = this_midpoint - (amp_min / 2); 374 | this_stop = this_midpoint + (amp_min / 2); 375 | } 376 | 377 | // while loop variable: call it the "region" loop 378 | int not_covered = 1; 379 | 380 | // print out target info 381 | printf ("\nSearching for Primers target #%d: %s chr%d:%d-%d\n\n", 382 | k + 1, 383 | target_amp_list[k]->name, 384 | target_amp_list[k]->chrom, target_amp_list[k]->start_pos + 1, target_amp_list[k]->stop_pos + 1); 385 | printf ("\nStarting params this_start = %d, this_stop = %d\n\n", this_start, this_stop); 386 | 387 | // variables: 388 | // target_start, target_stop => region to cover with amplicons 389 | // this_start, this_stop => region to cover with the specific iteration of the while loop 390 | // genome_start, genome_stop => coordinates used to extract actual genomic region to target 391 | // 392 | 393 | 394 | while (not_covered) 395 | { 396 | printf("\n 2 loop_amp is located at position %ld in memory \n\n",(long)loop_amp); 397 | 398 | // start site - make sure it's a multiple of 4 399 | genome_start = maxim (1, this_start - pad_size); 400 | j = genome_start / 4; 401 | genome_start = j * 4; 402 | 403 | // stop site - make sure it's a multiple of 4 404 | genome_stop = (this_stop + pad_size) / 4; 405 | genome_stop = minim (genome_stop * 4, contig_length[target_contig]); 406 | 407 | // length of region 408 | j = (genome_stop - genome_start); 409 | j = minim (j, contig_length[target_contig]); 410 | 411 | // set loop_amp attributes 412 | loop_amp->start_pos = this_start + 1; 413 | loop_amp->stop_pos = this_stop + 1; 414 | loop_amp->chrom = target_amp_list[k]->chrom; 415 | loop_amp->no_pairs = 0; 416 | sprintf (loop_amp->name, "%s_%02d", target_amp_list[k]->name, amp_pool_count); 417 | 418 | // tm range 419 | int this_min_tm = min_tm; 420 | int this_max_tm = max_tm; 421 | 422 | // while loop variables: call it the "specific target" loop 423 | int found_count = 0; 424 | int this_trial = 0; 425 | 426 | // scratch pad 427 | if (j > 2000) 428 | { 429 | printf ("\nj (length) is too big %d\n\n", j); 430 | exit (1); 431 | } 432 | // allocate memory for scratch pad 433 | scratch_pad = cvector (0, j + 5); 434 | scratch_pad[0] = '\0'; 435 | decode_basepairs (&compressed_map[target_contig][genome_start / 4], scratch_pad, j / 4); 436 | 437 | do 438 | { 439 | found_count = find_primers (snp_list[target_contig], 440 | loop_amp, 441 | contig_snp_count[target_contig], // number of snps in the contig 442 | flat_index, 443 | scratch_pad, // copy of the target 444 | j, // length of region 445 | min_primer, max_primer, amp_max, amp_min, min_gc, max_gc, this_min_tm, this_max_tm, 446 | idepth, // index depth 447 | 10, // local depth 448 | j / 2, // target base (from old primer_snp.c program) 449 | genome_start + 1, 450 | highmer, 451 | repeat_list[target_contig], 452 | no_repeats[target_contig], 453 | pad_size, // size on either end of the contig to look for primer 454 | loop_amp->chrom); 455 | this_min_tm -= tm_inc; 456 | this_max_tm += tm_inc; 457 | this_trial++; 458 | if (this_min_tm < min_tm || this_max_tm > max_tm) { 459 | break; 460 | } 461 | } while (this_trial < 10 && found_count < 1); 462 | printf("\n 3 loop_amp is located at position %ld in memory \n\n",(long)loop_amp); 463 | 464 | 465 | free_cvector (scratch_pad, 0, j + 5); 466 | int nearest_stop = 0; 467 | if (found_count > 0) 468 | { 469 | if (loop_amp->no_pairs > 0) 470 | { 471 | if (amp_pool_count == max_ppairs) 472 | { 473 | printf ("ERROR: exceeded the maximum number of primer pairs allocated: %d\n\n", max_ppairs); 474 | exit (1); 475 | } 476 | 477 | printf ("\nBED: chr%d\t%d\t%d\n", loop_amp->chrom, this_start, this_stop); 478 | 479 | int start_primer_count = primer_count; 480 | 481 | printf("\n\nabout to fill the array of primers start = %d with %d pairs coming\n\n", 482 | start_primer_count,loop_amp->no_pairs); 483 | 484 | for (j = 0; j < loop_amp->no_pairs; j++) 485 | all_primer_pairs[primer_count++] = loop_amp->pair[j]; 486 | int jj; 487 | for (j = start_primer_count; j < primer_count; j++) 488 | { 489 | for (jj = 0; jj < loop_amp->no_pairs; jj++) 490 | { 491 | printf("\n\nj = %d, jj = %d, start_primer_count = %d, primer_count = %d\n\n", j, jj, start_primer_count, primer_count); 492 | redundant_list[j][jj] = start_primer_count + jj; 493 | printf("\n 4 loop_amp is located at position %ld in memory \n\n",(long)loop_amp); 494 | } 495 | } 496 | printf("\n Made it here \n\n"); 497 | best_start[amp_pool_count] = start_primer_count; 498 | amp_pool_count++; 499 | 500 | for (j = 0; j < loop_amp->no_pairs; j++) 501 | { 502 | if (nearest_stop == 0) 503 | nearest_stop = loop_amp->pair[j]->reverse->start; 504 | else if (loop_amp->pair[j]->reverse->start < nearest_stop) 505 | nearest_stop = loop_amp->pair[j]->reverse->start; 506 | } 507 | } 508 | } 509 | printf("\n\nnearest stop is %d\n\n", nearest_stop); 510 | 511 | if (nearest_stop > 0) 512 | { 513 | this_start = nearest_stop; 514 | this_stop = this_start + amp_min; 515 | } 516 | else 517 | { 518 | this_start += pad_size; 519 | this_stop = this_start + amp_min; 520 | } 521 | if (nearest_stop >= target_stop || this_start > target_stop) 522 | not_covered = 0; 523 | } 524 | } 525 | 526 | char **poolable_matrix; 527 | int *poolable_count; 528 | poolable_count = ivector (0, primer_count); 529 | for (i = 0; i < primer_count; i++) 530 | poolable_count[i] = 0; 531 | poolable_matrix = cmatrix (0, primer_count, 0, primer_count); 532 | for (i = 0; i < primer_count; i++) 533 | for (j = i + 1; j < primer_count; j++) 534 | { 535 | double max_amp_diff = (double) (amp_max * 0.15) + 1; 536 | poolable_matrix[i][j] = is_poolable_primer (all_primer_pairs[i], all_primer_pairs[j], (int) max_amp_diff, 2); 537 | poolable_matrix[j][i] = poolable_matrix[i][j]; 538 | poolable_count[i] += poolable_matrix[i][j]; 539 | poolable_count[j] += poolable_matrix[i][j]; 540 | } 541 | for (i = 0; i < amp_pool_count; i++) 542 | if (best_start[i] >= 0) 543 | { 544 | int k = best_start[i]; 545 | for (j = 0; j < MAX_PAIRS; j++) 546 | if (redundant_list[k][j] >= 0) 547 | if (poolable_count[redundant_list[k][j]] > poolable_count[best_start[i]]) 548 | best_start[i] = redundant_list[k][j]; 549 | } 550 | // 551 | // print cmat 552 | // 553 | //printf ("\n"); 554 | //for (i = 0; i < primer_count; i++) 555 | //printf ("\t%s_%s_%03d", all_primer_pairs[i]->forward->sequence, all_primer_pairs[i]->reverse->sequence, i); 556 | //for (i = 0; i < primer_count; i++) 557 | //{ 558 | //printf ("\n%s_%s_%03d", all_primer_pairs[i]->forward->sequence, all_primer_pairs[i]->reverse->sequence, i); 559 | //for (j = 0; j < primer_count; j++) 560 | //if (i != j) 561 | //printf ("\t%d", (int) poolable_matrix[i][j]); 562 | //else 563 | //printf ("\t."); 564 | //} 565 | //printf ("\n"); 566 | // 567 | // print best start matrix 568 | // 569 | // for (i = 0; i < amp_pool_count; i++) 570 | // { 571 | // int k = best_start[i]; 572 | // for (j = 0; j < MAX_PAIRS; j++) 573 | // if (redundant_list[k][j] >= 0) 574 | // printf (" %03d", redundant_list[k][j]); 575 | // else 576 | // printf (" ."); 577 | // printf ("\t| Region: %03d\t| Primer: %03d\t| Poolable Count: %03d\n", i, best_start[i], poolable_count[best_start[i]]); 578 | // } 579 | // 580 | // redundant matrix 581 | // 582 | // printf("\n\n"); 583 | // for (i=0; i= 0) 587 | // printf (" %03d", redundant_list[i][j]); 588 | // else 589 | // printf (" ."); 590 | // printf("\n"); 591 | // } 592 | 593 | printf ("\n\n going to make_pools with amp_pools = %d, primer_count = %d\n", amp_pool_count, primer_count); 594 | int *current_pool; 595 | current_pool = ivector (0, 20); 596 | make_greedy_pools ( outfile, all_primer_pairs, poolable_matrix, poolable_count, redundant_list, best_start, amp_pool_count, primer_count, primer_count, current_pool, 0, pool_size); 597 | return 0; 598 | } 599 | -------------------------------------------------------------------------------- /src/pool_check.c: -------------------------------------------------------------------------------- 1 | #include "mpd.h" 2 | 3 | // TS Wingo 4 | // created: 2013-10-28 5 | // updated: 2015-08-02 6 | // checks that the pools created by primer4amplicons.c 7 | // are actually poolable. It needs the input stripped of 8 | // lines that don't contain primers and this is done 9 | // using p4a_2_pool_check.pl 10 | 11 | int main (int argc, char **argv) 12 | { 13 | usage(argc == 4, "pool_check " ); 14 | 15 | // initialize 16 | int max_primers_in_pool = 20; 17 | int max_primer_pairs = 1000; 18 | int *primers_in_pool = create_ivec (max_primer_pairs); 19 | PNODE ***primer_pool = primer_pool_create (max_primers_in_pool, max_primer_pairs); 20 | 21 | int max_amplicon_length = atoi(argv[3]); 22 | 23 | // read data 24 | int max_pools = read_primer_pools( argv[1], max_primer_pairs, max_primers_in_pool, 25 | primers_in_pool, primer_pool); 26 | 27 | // check pools 28 | Check_all_pools( max_pools, primers_in_pool, primer_pool, max_amplicon_length); 29 | 30 | // print isPcr 31 | Print_isPcr( argv[2], max_pools, primers_in_pool, primer_pool ); 32 | 33 | return 0; 34 | 35 | error: 36 | return 1; 37 | } 38 | -------------------------------------------------------------------------------- /src/primer_compat.c: -------------------------------------------------------------------------------- 1 | #include "mpd.h" 2 | 3 | // TS Wingo 4 | // 10-30-2013 5 | // checks a primer is compat with another primer 6 | 7 | int main (int argc, char **argv) 8 | { 9 | char fp1[80], fp2[80], rp1[80], rp2[80]; 10 | int i, f1, r1, f2, r2; 11 | char flipf1[80], flipf2[80], flipr1[80], flipr2[80]; 12 | 13 | if (argc != 5) 14 | { 15 | printf("\nUsage: primer_compat forward_primer_1 reverse_primer_1 forward_primer_2 reverse_primer_2\n"); 16 | exit(1); 17 | } 18 | 19 | sprintf(fp1, "%s", argv[1]); 20 | sprintf(rp1, "%s", argv[2]); 21 | sprintf(fp2, "%s", argv[3]); 22 | sprintf(rp2, "%s", argv[4]); 23 | 24 | printf("\nprimer 1 fwd: %s\trev: %s\nprimer 2 fwd: %s\trev: %s\n", fp1, rp1, fp2, rp2); 25 | 26 | f1 = strlen(fp1); 27 | r1 = strlen(rp1); 28 | f2 = strlen(fp2); 29 | r2 = strlen(rp2); 30 | 31 | for(i=0;i<80;i++) 32 | flipf1[i] = flipf2[i] = flipr1[i] = flipr2[i] = '\0'; 33 | 34 | printf("\n\nlen primer 1 fwd: %d\trev: %d\nlen primer 2 fwd: %d\trev: %d\n", f1, r1, f2, r2); 35 | reverse_string (rp1, flipr1, r1); 36 | reverse_string (rp2, flipr2, r2); 37 | reverse_string (fp1, flipf1, f1); 38 | reverse_string (fp2, flipf2, f2); 39 | 40 | printf("\n\ncomplement of primer 1 fwd: %s\trev: %s\ncomplement of primer 2 fwd: %s\trev: %s\n", flipf1, flipr1, flipf2, flipr2); 41 | 42 | if (check_uneven_dimer (fp1, flipf2, f1, f2)) 43 | printf ("\nprimer 1 forward (%s) makes dimer with primer 2 forward (%s)\n", fp1, fp2); 44 | else if (check_uneven_dimer (rp1, flipr2, r1, r2)) 45 | printf ("\nprimer 1 reverse (%s) makes dimer with primer 2 reverse (%s)\n", rp1, rp2); 46 | else if (check_uneven_dimer (fp1, flipr2, f1, r2)) 47 | printf ("\nprimer 1 forward (%s) makes dimer with primer 2 reverse (%s)\n", fp1, rp2); 48 | else if (check_uneven_dimer (rp1, flipf2, r1, f2)) 49 | printf ("\nprimer 1 reverse (%s) makes dimer with primer 2 forward (%s)\n", rp1, fp2); 50 | else 51 | printf("\nprimer pair 1 and 2 seem compatable.\n"); 52 | } 53 | --------------------------------------------------------------------------------