├── .gitignore
├── LICENSE.md
├── Makefile
├── README.md
├── bin
└── run_index.pl
└── src
├── dbg.h
├── index_genome.c
├── mem.c
├── mem.h
├── mpd.c
├── mpd.h
├── mpd_lessGreedy.c
├── mpd_moreGreedy.c
├── pool_check.c
└── primer_compat.c
/.gitignore:
--------------------------------------------------------------------------------
1 | # Object files
2 | *.o
3 | *.ko
4 | *.obj
5 | *.elf
6 |
7 | # Precompiled Headers
8 | *.gch
9 | *.pch
10 |
11 | # Libraries
12 | *.lib
13 | *.a
14 | *.la
15 | *.lo
16 |
17 | # Shared objects (inc. Windows DLLs)
18 | *.dll
19 | *.so
20 | *.so.*
21 | *.dylib
22 |
23 | # Executables
24 | *.exe
25 | *.out
26 | *.app
27 | *.i*86
28 | *.x86_64
29 | *.hex
30 |
31 | # project-specific
32 | *.snp
33 | *.dSYM/
34 | build/
35 | bin/
36 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | ## Makefile
2 |
3 | CC = gcc
4 | CC_OPTIONS = -Wall -g -v -O3 -std=gnu11 -DNDEBUG
5 | INCLUDES =
6 | CFLAGS = $(CC_OPTIONS) $(INCLUDES)
7 | LIBS = -lm
8 |
9 | OUT = bin
10 | PRIMER_LG_EXE = ${OUT}/mpd
11 | PRIMER_LG_SRC = src/mem.c src/mpd.c src/mpd_lessGreedy.c
12 | PRIMER_LG_OBJ = $(PRIMER_LG_SRC:.c=.o)
13 |
14 | PRIMER_MG_EXE = ${OUT}/mpd_greedy
15 | PRIMER_MG_SRC = src/mem.c src/mpd.c src/mpd_moreGreedy.c
16 | PRIMER_MG_OBJ = $(PRIMER_MG_SRC:.c=.o)
17 |
18 | POOL_EXE = ${OUT}/pool_check
19 | POOL_SRC = src/mem.c src/mpd.c src/pool_check.c
20 | POOL_OBJ = $(POOL_SRC:.c=.o)
21 |
22 | INDEX_EXE = ${OUT}/index_genome
23 | INDEX_SRC = src/mem.c src/mpd.c src/index_genome.c
24 | INDEX_OBJ = $(INDEX_SRC:.c=.o)
25 |
26 | PCOMP_EXE = ${OUT}/primer_compat
27 | PCOMP_SRC = src/mem.c src/mpd.c src/primer_compat.c
28 | PCOMP_OBJ = $(PCOMP_SRC:.c=.o)
29 |
30 |
31 | PROGS = $(PRIMER_LG_EXE) $(PRIMER_MG_EXE) $(POOL_EXE) $(INDEX_EXE) $(PCOMP_EXE)
32 |
33 | all: introduce $(PROGS)
34 | @echo done.
35 |
36 | $(PRIMER_LG_EXE): $(PRIMER_LG_OBJ)
37 | $(CC) -o $@ $(CFLAGS) $(PRIMER_LG_OBJ) $(LIBS)
38 |
39 | $(PRIMER_MG_EXE): $(PRIMER_MG_OBJ)
40 | $(CC) -o $@ $(CFLAGS) $(PRIMER_MG_OBJ) $(LIBS)
41 |
42 | $(POOL_EXE): $(POOL_OBJ)
43 | $(CC) -o $@ $(CFLAGS) $(POOL_OBJ) $(LIBS)
44 |
45 | $(INDEX_EXE): $(INDEX_OBJ)
46 | $(CC) -o $@ $(CFLAGS) $(INDEX_OBJ) $(LIBS)
47 |
48 | $(PCOMP_EXE): $(PCOMP_OBJ)
49 | $(CC) -o $@ $(CFLAGS) $(PCOMP_OBJ) $(LIBS)
50 |
51 | introduce:
52 | @echo "Building..."
53 | mkdir -p ${OUT}
54 |
55 | clean:
56 | rm -f src/*.o
57 |
58 | distclean: clean
59 | rm -f $(INDEX_EXE) $(PRIMER_LG_EXE) $(PRIMER_MG_EXE) $(POOL_EXE)
60 |
61 | ## end of Makefile
62 | # DO NOT DELETE THIS LINE -- make depend depends on it.
63 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | MPD - Multiplex PCR Design
2 | ============================
3 |
4 | by Thomas Wingo and David Cutler
5 |
6 | ## Citation
7 |
8 | Please cite our [paper](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1453-3) if you use MPD in your work. Thanks.
9 |
10 | ## Description
11 |
12 | MPD is a program designed to automate creation of multiplex primer design written in C. The `mpd_lessGreedy` and `mpd_moreGreedy` binaries differ in which primer pool they choose to start with for pool creation. Either binary can be used as stand-alone or in conjunction with the [MPD perl package](http://github.com/wingolab-org/mpd-perl).
13 |
14 | ## Installation
15 | - Clone the repository
16 | - Make with `make all`
17 | - Binaries will be compiled and saved to the `build` directory
18 |
19 | ## Required files
20 | - You will need a hashed copy of the genome to run the primer software.
21 | - Instructions below show how you can create one yourself. A prebuild hg38 genome with flat dbSnp files is available from [this repository](https://bitbucket.org/wingolab/mpd-dat/). It may be cloned like so, `git clone https://bitbucket.org/wingolab/mpd-dat.git`.
22 |
23 | ### Build Hashed Genome
24 | - Download the genome of interest as a fasta file
25 | - Use `bin/run_index.pl`, which creates a sh script to run `index_genome`
26 | - You'll need to install these perl packages to use this script: `Path::Tiny`, `Data::Dump`, and `Getopt::Long`, which can be installed using [`cpanm`][1] like so `cpanm Path::Tiny Data::Dump Getopt::Long`.
27 |
28 | ### Flat dbSnp
29 | - These can be obtained from this [this repository](https://bitbucket.org/wingolab/mpd-dat/), which were prepared from dbSNP version 140.
30 | - To create your own flat snp file set based on criteria of your own devising, each line should contain tab-delimited fields of the following:
31 | ```
32 | name numberOfReporters chrom position MinorAlleleFrequency allele1/allele2
33 | ```
34 | - The `numberOfReporters` field is no longer used but retained for backwards compatibility.
35 | - Prepare a `sdx` file that contains the number of chromsome files to include as the 1st line and then a list of the names of all chromosome files. On the command line you might try: `ls -1 *.line | wc -l > db_flat.sdx; ls -1 >> db_flat.sdx`. Note that the sdx should be in the same order that the chromsomes are in for the indexed genome. See the genome's sdx file (e.g., `cat hg38.d14.sdx`) to see the order.
36 |
37 | ## Run mpd
38 | - The easiest way of using MPD is to use the [Perl pacakge MPD](http://github.com/wingolab-org/mpd-perl), but either `mpd_lessGreedy` and `mpd_moreGreedy` binaries may be executed from the command line interactively.
39 |
40 | [1]: https://metacpan.org/pod/App::cpanminus
41 |
--------------------------------------------------------------------------------
/bin/run_index.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Name: run_index.pl
3 | # Date Created: Mon Mar 21 13:59:19 2016
4 | # Date Modified: Mon Mar 21 13:59:19 2016
5 | # By: TS Wingo
6 | #
7 | # Description:
8 |
9 | use 5.10.0;
10 | use warnings;
11 | use strict;
12 |
13 | use Getopt::Long;
14 | use Path::Tiny;
15 | use Data::Dump qw/ dump /;
16 |
17 | # variables
18 | my ( $dir_name, $out_ext );
19 | my $ext = "fa";
20 | my $indexDepth = 14;
21 |
22 | # get options
23 | die "Usage: $0 [-e ] [-i index depth] -d -o \n"
24 | unless GetOptions(
25 | 'o|out=s' => \$out_ext,
26 | 'e|ext=s' => \$ext,
27 | 'd|dir=s' => \$dir_name,
28 | 'i|index=n' => \$indexDepth,
29 | )
30 | and $dir_name
31 | and $indexDepth
32 | and $out_ext
33 | and $ext;
34 |
35 | my ( $filesAref, $sizesAref ) = FastaList( $dir_name, $ext );
36 | WriteIn( $out_ext, $filesAref, $sizesAref );
37 | WriteSh($out_ext);
38 |
39 | sub WriteIn {
40 | my $out_ext = shift;
41 | my $filesAref = shift;
42 | my $sizesAref = shift;
43 |
44 | my @sSizes = sort { $b <=> $a } @$sizesAref;
45 | my $maxSize = shift @sSizes;
46 |
47 | my $fh = path("$out_ext.in")->filehandle(">");
48 | say {$fh} join "\n", ( "d", "in.index_genome.in", scalar @$filesAref );
49 | say {$fh} join "\n", @$filesAref;
50 | say {$fh} "$out_ext.d$indexDepth";
51 | say {$fh} ( $maxSize + 1 );
52 | say {$fh} $indexDepth;
53 | }
54 |
55 | sub WriteSh {
56 | my $out_ext = shift;
57 | my $fh = path("$out_ext.sh")->filehandle(">");
58 | say {$fh} qq{#!/bin/sh
59 | ./index_genome < $out_ext.in};
60 | }
61 |
62 | sub FastaList {
63 | my $dir = shift;
64 | my $ext = shift;
65 |
66 | my ( @fastas, @sizes );
67 |
68 | my $pt = path($dir);
69 | my @files = path($dir)->children(qr{$ext\z});
70 | my @chrs = ( 1 .. 26, "M", "X", "Y", "Un" );
71 | my %files = map { $_->basename() => $_ } @files;
72 |
73 | for my $chr (@chrs) {
74 | my $f = sprintf( "chr%s.fa", $chr );
75 | if ( exists $files{$f} ) {
76 | push @fastas, $files{$f}->stringify;
77 | push @sizes, ( -s $files{$f} );
78 | delete $files{$f};
79 | }
80 | }
81 |
82 | for my $chr (@chrs) {
83 | for my $file ( sort keys %files ) {
84 | if ( $file =~ m/\Achr$chr/ ) {
85 | push @fastas, $files{$file}->stringify;
86 | push @sizes, ( -s $files{$file} );
87 | delete $files{$file};
88 | }
89 | }
90 | }
91 | if ( scalar @fastas == 0 ) {
92 | say "No fasta files to process. Exiting...";
93 | exit(1);
94 | }
95 | return ( \@fastas, \@sizes );
96 | }
97 |
--------------------------------------------------------------------------------
/src/dbg.h:
--------------------------------------------------------------------------------
1 | #ifndef __dbg_h__
2 | #define __dbg_h__
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #ifdef NDEBUG
10 | #define debug(M, ...)
11 | #else
12 | #define debug(M, ...) fprintf(stderr, "DEBUG %s:%d %s(): " M "\n", __FILE__, __LINE__, __func__, ##__VA_ARGS__)
13 | #endif
14 |
15 | #define usage(A, M, ...) if(!(A)) { fprintf(stdout, "USAGE: " M "\n", ##__VA_ARGS__); goto error; }
16 |
17 | #define clean_errno() (errno == 0 ? "None" : strerror(errno))
18 |
19 | #define log_err(M, ...) fprintf(stderr, "[ERROR] (%s:%d %s(): errno: %s) " M "\n", __FILE__, __LINE__, __func__, clean_errno(), ##__VA_ARGS__)
20 |
21 | #define log_warn(M, ...) fprintf(stderr, "[WARN] (%s:%d %s(): errno: %s) " M "\n", __FILE__, __LINE__, __func__, clean_errno(), ##__VA_ARGS__)
22 |
23 | #define log_info(M, ...) fprintf(stderr, "[INFO] (%s:%d %s()) " M "\n", __FILE__, __LINE__, __func__, ##__VA_ARGS__)
24 |
25 | #define check(A, M, ...) if(!(A)) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
26 |
27 | #define sentinel(M, ...) { log_err(M, ##__VA_ARGS__); errno=0; goto error; }
28 |
29 | #define check_mem(A) check((A), "Out of memory.")
30 |
31 | #define check_debug(A, M, ...) if(!(A)) { debug(M, ##__VA_ARGS__); errno=0; goto error; }
32 |
33 | #endif
34 |
--------------------------------------------------------------------------------
/src/index_genome.c:
--------------------------------------------------------------------------------
1 | #include "mpd.h"
2 |
3 | static FILE *outfile, **innfile;
4 |
5 | int
6 | main ()
7 | {
8 | char **filename, ss[256], sss[4196], basename[1024];
9 | char *scratch_pad, **contig_descript;
10 | unsigned char **compressed_map, *double_high;
11 | int i, j, k, N, max_sites, *contig_length, not_done, newpos, high_size, high_depth;
12 | int fasta, idepth, total_index, *repeat_count;
13 | int *flat_index, in_repeat;
14 | FILE *sfile, *cfile, *idfile, *rpfile, *highfile;
15 |
16 |
17 | outfile = stdout;
18 | read_var ("\nSend Output to Screen or Disk? [S,D]\n", ss);
19 |
20 | if ((strchr (ss, 'D')) || (strchr (ss, 'd')))
21 | {
22 | read_var ("Please Enter File Name for Output\n", ss);
23 | if ((outfile = fopen (ss, "w")) == (FILE *) NULL)
24 | {
25 | printf ("\n Can not open file %s\n", ss);
26 | exit (1);
27 | }
28 | }
29 | else
30 | outfile = stdout;
31 |
32 | read_var ("Number of Contig Fasta Files to Process\n", ss);
33 | N = atoi (ss);
34 | filename = cmatrix (0, N, 0, 256);
35 | repeat_count = ivector (0, N);
36 |
37 | innfile = (FILE **) malloc ((unsigned) (N + 1) * sizeof (FILE *));
38 | if (!innfile)
39 | log_err ("allocation failure for innfile");
40 |
41 | compressed_map = (unsigned char **) malloc ((unsigned) (N + 1) * sizeof (unsigned char *));
42 | if (!compressed_map)
43 | log_err ("allocation failure for compressed_map");
44 |
45 | contig_descript = cmatrix (0, N, 0, 4196);
46 | contig_length = ivector (0, N);
47 |
48 | for (i = 0; i < N; i++)
49 | {
50 | sprintf (sss, "Please Enter Name For Contig Fasta File %d\n", i + 1);
51 | read_var (sss, filename[i]);
52 | if ((innfile[i] = fopen (filename[i], "r")) == (FILE *) NULL)
53 | {
54 | printf ("\n Can not open file %s\n", filename[i]);
55 | exit (1);
56 | }
57 | }
58 | read_var ("Basename to save compressed Genome and Indexes\n", basename);
59 |
60 | read_var ("Maximum Number Of Sites in a contig\n", ss);
61 | max_sites = atoi (ss);
62 |
63 | read_var ("Index Depth\n", ss);
64 | idepth = atoi (ss);
65 | sprintf (sss, "%s.sdx", basename);
66 | if ((sfile = fopen (sss, "w")) == (FILE *) NULL)
67 | {
68 | printf ("\nCould Not Open file %s\n", sss);
69 | exit (1);
70 | }
71 | fprintf (sfile, "%d\n", N);
72 |
73 | j = 4;
74 | for (i = 0; i < idepth; i++)
75 | j *= 4;
76 | j = (j - 4) / 3;
77 |
78 | total_index = j;
79 | printf ("\n Determined the size of flat index to be %d * %ld = %ld bytes\n",
80 | total_index, sizeof (int), total_index * sizeof (int));
81 |
82 | flat_index = ivector (0, total_index);
83 |
84 | for (i = 0; i <= total_index; i++)
85 | flat_index[i] = 0;
86 |
87 | scratch_pad = cvector (0, max_sites);
88 | not_done = TRUE;
89 |
90 | sprintf (sss, "%s.cdx", basename);
91 | if ((cfile = fopen (sss, "w")) == (FILE *) NULL)
92 | {
93 | printf ("\nCould Not Open file %s\n", sss);
94 | exit (1);
95 | }
96 | sprintf (sss, "%s.rdx", basename);
97 | if ((rpfile = fopen (sss, "w")) == (FILE *) NULL)
98 | {
99 | printf ("\nCould Not Open file %s\n", sss);
100 | exit (1);
101 | }
102 | sprintf (sss, "%s.15x", basename);
103 | if ((highfile = fopen (sss, "w")) == (FILE *) NULL)
104 | {
105 | printf ("\nCould Not Open file %s\n", sss);
106 | exit (1);
107 | }
108 | high_depth = 15;
109 | high_size = 1;
110 | for (i = 0; i < high_depth; i++)
111 | high_size *= 4;
112 | high_size /= 8;
113 |
114 | double_high = ucvector (0, high_size);
115 | for (i = 0; i <= high_size; i++)
116 | double_high[i] = 0;
117 |
118 | for (fasta = 0; fasta < N; fasta++)
119 | {
120 | not_done = TRUE;
121 |
122 | in_repeat = FALSE;
123 | newpos = 0;
124 |
125 | fgets (sss, 4195, innfile[fasta]);
126 | j = strlen (sss);
127 | for (i = 0; i <= j; i++)
128 | if ((sss[i] == '\n') || (sss[i] == 10) || (sss[i] == 13))
129 | contig_descript[fasta][i] = ' ';
130 | else
131 | contig_descript[fasta][i] = sss[i];
132 |
133 |
134 | contig_descript[fasta][i] = '\0';
135 |
136 | fgets (sss, 256, innfile[fasta]);
137 | repeat_count[fasta] = 0;
138 |
139 | while (not_done)
140 | {
141 | /* printf("%s",sss); */
142 | j = strlen (sss);
143 |
144 | for (i = 0; i < j; i++)
145 | {
146 | if (isalpha (sss[i]))
147 | {
148 | scratch_pad[newpos] = toupper (sss[i]);
149 | if (islower (sss[i]) || (sss[i] == 'N'))
150 | {
151 | if (!in_repeat)
152 | {
153 | in_repeat = TRUE;
154 | repeat_count[fasta]++;
155 | fwrite (&newpos, sizeof (int), 1, rpfile);
156 | }
157 | }
158 | else if (in_repeat)
159 | {
160 | in_repeat = FALSE;
161 | k = newpos - 1;
162 | fwrite (&k, sizeof (int), 1, rpfile);
163 | }
164 | newpos++;
165 | }
166 | }
167 |
168 | if (feof (innfile[fasta]) != 0)
169 | not_done = FALSE;
170 | else
171 | not_done = TRUE;
172 |
173 | if (not_done)
174 | {
175 | /* sprintf(sss, ""); what is this doing? */
176 | fgets (sss, 256, innfile[fasta]);
177 | if (strlen (sss) < 1)
178 | not_done = FALSE;
179 | }
180 | }
181 | contig_length[fasta] = newpos;
182 | if (in_repeat)
183 | fwrite (&newpos, sizeof (int), 1, rpfile);
184 |
185 | printf ("\n Finished reading fasta %d \n\n", fasta);
186 |
187 | for (i = 0; i < 4; i++)
188 | scratch_pad[newpos + i] = 'A'; /* Pad with A's */
189 | printf ("\n\t\tBeginning to index contig %s which has length %d\n\n", contig_descript[fasta], contig_length[fasta]);
190 | flat_index_contig_high(flat_index, scratch_pad, contig_length[fasta], idepth, high_depth, double_high);
191 | printf ("\nFinished indexing contig %d \n\n", fasta);
192 |
193 | i = contig_length[fasta] % 4;
194 | int ts;
195 | if (i == 0)
196 | ts = newpos / 4;
197 | else
198 | ts = newpos / 4 + 1;
199 |
200 | compressed_map[fasta] = ucvector (0, ts);
201 | j = 0;
202 | for (i = 0; i < contig_length[fasta]; i += 4, j++)
203 | compressed_map[fasta][j] = (unsigned char) encode_basepairs (&scratch_pad[i], 4);
204 | fwrite (compressed_map[fasta], sizeof (unsigned char), j, cfile);
205 | free_ucvector (compressed_map[fasta], 0, ts);
206 | }
207 | printf ("\n Finishing up now \n\n");
208 | fclose (cfile);
209 | fwrite (double_high, sizeof (unsigned char), high_size, highfile);
210 | fclose (highfile);
211 | for (i = 0; i < N; i++)
212 | fprintf (sfile, "%d\t%d\t%s\n", contig_length[i], repeat_count[i], contig_descript[i]);
213 | fprintf (sfile, "%d\n", idepth);
214 |
215 | sprintf (sss, "%s.idx", basename);
216 | if ((idfile = fopen (sss, "w")) == (FILE *) NULL)
217 | {
218 | printf ("\nCould Not Open file %s\n", sss);
219 | exit (1);
220 | }
221 | fwrite (flat_index, sizeof (int), total_index, idfile);
222 | fclose (idfile);
223 |
224 | fprintf (sfile, "%s.cdx\n", basename);
225 | fprintf (sfile, "%s.idx\n", basename);
226 | fprintf (sfile, "%s.rdx\n", basename);
227 | fprintf (sfile, "%s.15x\n", basename);
228 | fclose (sfile);
229 | printf ("\n Finished reading fasta got here.\n");
230 | return (0);
231 | }
232 |
--------------------------------------------------------------------------------
/src/mem.c:
--------------------------------------------------------------------------------
1 | #include "mem.h"
2 | #include "dbg.h"
3 |
4 | /*---------------------------------------------------------------------*/
5 |
6 | char *cvector(int nl, int nh)
7 | {
8 | char *v;
9 |
10 | v = (char *) malloc((unsigned) (nh - nl + 1) * sizeof(char));
11 | if (!v)
12 | log_err("allocation failure in cvector()");
13 | return v - nl;
14 | }
15 |
16 | /*---------------------------------------------------------------------*/
17 |
18 | uchar *ucvector(int nl, int nh)
19 | {
20 | uchar *v;
21 |
22 | v = (uchar *) malloc((unsigned) (nh - nl + 1) * sizeof(uchar));
23 | if (!v)
24 | log_err("allocation failure in cvector()");
25 | return v - nl;
26 | }
27 |
28 | /*---------------------------------------------------------------------*/
29 |
30 | int *ivector(int nl, int nh)
31 | {
32 | int *v;
33 |
34 | v = (int *) malloc((unsigned) (nh - nl + 1) * sizeof(int));
35 | if (!v)
36 | log_err("allocation failure in ivector()");
37 | return v - nl;
38 | }
39 |
40 | /*---------------------------------------------------------------------*/
41 |
42 | double *dvector(int nl, int nh)
43 | {
44 | double *v;
45 |
46 | v = (double *) malloc((unsigned) (nh - nl + 1) * sizeof(double));
47 | if (!v)
48 | log_err("allocation failure in dvector()");
49 | return v - nl;
50 | }
51 |
52 | /*---------------------------------------------------------------------*/
53 |
54 | int **imatrix(int nrl, int nrh, int ncl, int nch)
55 | {
56 | int i, **m;
57 |
58 | m = (int **) malloc((unsigned) (nrh - nrl + 1) * sizeof(int *));
59 | if (!m)
60 | log_err("allocation failure 1 in imatrix()");
61 | m -= nrl;
62 |
63 | for (i = nrl; i <= nrh; i++) {
64 | m[i] = (int *) malloc((unsigned) (nch - ncl + 1) * sizeof(int));
65 | if (!m[i])
66 | log_err("allocation failure 2 in imatrix()");
67 | m[i] -= ncl;
68 | }
69 | return m;
70 | }
71 |
72 | /*---------------------------------------------------------------------*/
73 |
74 | void free_imatrix(int **m, int nrl, int nrh, int ncl, int nch)
75 | {
76 | int i;
77 |
78 | for (i = nrh; i >= nrl; i--)
79 | free((void *) (m[i] + ncl));
80 | free((void *) (m + nrl));
81 | }
82 |
83 | /*---------------------------------------------------------------------*/
84 |
85 | double **dmatrix(int nrl, int nrh, int ncl, int nch)
86 | {
87 | int i;
88 | double **m;
89 |
90 | m = (double **) malloc((unsigned) (nrh - nrl + 1) * sizeof(double *));
91 | if (!m)
92 | log_err("allocation failure 1 in dmatrix()");
93 | m -= nrl;
94 |
95 | for (i = nrl; i <= nrh; i++) {
96 | m[i] = (double *) malloc((unsigned) (nch - ncl + 1) * sizeof(double));
97 | if (!m[i])
98 | log_err("allocation failure 2 in dmatrix()");
99 | m[i] -= ncl;
100 | }
101 | return m;
102 | }
103 |
104 | /*---------------------------------------------------------------------*/
105 |
106 | void free_dmatrix(double **m, int nrl, int nrh, int ncl, int nch)
107 | {
108 | int i;
109 |
110 | for (i = nrh; i >= nrl; i--)
111 | free((void *) (m[i] + ncl));
112 | free((void *) (m + nrl));
113 | }
114 |
115 | /*---------------------------------------------------------------------*/
116 |
117 | char **cmatrix(int nrl, int nrh, int ncl, int nch)
118 | {
119 | int i;
120 | char **m;
121 |
122 | m = (char **) malloc((unsigned) (nrh - nrl + 1) * sizeof(char *));
123 | if (!m)
124 | log_err("allocation failure 1 in cmatrix()");
125 | m -= nrl;
126 |
127 | for (i = nrl; i <= nrh; i++) {
128 | m[i] = (char *) malloc((unsigned) (nch - ncl + 1) * sizeof(char));
129 | if (!m[i])
130 | log_err("allocation failure 2 in cmatrix()");
131 | m[i] -= ncl;
132 | }
133 | return m;
134 | }
135 |
136 | /*---------------------------------------------------------------------*/
137 |
138 | uchar **ucmatrix(int nrl, int nrh, int ncl, int nch)
139 | {
140 | int i;
141 | uchar **m;
142 |
143 | m = (uchar **) malloc((unsigned) (nrh - nrl + 1) * sizeof(uchar *));
144 | if (!m)
145 | log_err("allocation failure 1 in cmatrix()");
146 | m -= nrl;
147 |
148 | for (i = nrl; i <= nrh; i++) {
149 | m[i] = (uchar *) malloc((unsigned) (nch - ncl + 1) * sizeof(uchar));
150 | if (!m[i])
151 | log_err("allocation failure 2 in cmatrix()");
152 | m[i] -= ncl;
153 | }
154 | return m;
155 | }
156 |
157 | /*---------------------------------------------------------------------*/
158 |
159 | void free_cmatrix(char **m, int nrl, int nrh, int ncl, int nch)
160 | {
161 | int i;
162 |
163 | for (i = nrh; i >= nrl; i--)
164 | free((void *) (m[i] + ncl));
165 | free((void *) (m + nrl));
166 | }
167 |
168 | /*---------------------------------------------------------------------*/
169 |
170 | void free_ucmatrix(uchar ** m, int nrl, int nrh, int ncl, int nch)
171 | {
172 | int i;
173 |
174 | for (i = nrh; i >= nrl; i--)
175 | free((void *) (m[i] + ncl));
176 | free((void *) (m + nrl));
177 | }
178 |
179 | /*---------------------------------------------------------------------*/
180 |
181 | void free_cvector(char *v, int nl, int nh)
182 | {
183 | free((void *) (v + nl));
184 | }
185 |
186 | /*---------------------------------------------------------------------*/
187 |
188 | void free_ucvector(uchar * v, int nl, int nh)
189 | {
190 | free((void *) (v + nl));
191 | }
192 |
193 | /*---------------------------------------------------------------------*/
194 |
195 | void free_ivector(int *v, int nl, int nh)
196 | {
197 | free((void *) (v + nl));
198 | }
199 |
200 | /*---------------------------------------------------------------------*/
201 |
202 | void free_dvector(double *v, int nl, int nh)
203 | {
204 | free((void *) (v + nl));
205 | }
206 |
207 | /*---------------------------------------------------------------------*/
208 |
209 | int *
210 | create_ivec (int row)
211 | {
212 | int *v = (int *) malloc ((unsigned) row * sizeof (int));
213 | check_mem(v);
214 | for (int i = 0; i < row; i++ )
215 | {
216 | v[i] = 0;
217 | }
218 | return v;
219 |
220 | error:
221 | exit(1);
222 | }
223 |
224 | /*---------------------------------------------------------------------*/
225 |
226 | char *
227 | create_cvec (int row)
228 | {
229 | char *v = (char *) malloc ((unsigned) row * sizeof (char));
230 | check_mem(v);
231 | for (int i = 0; i < row; i++ )
232 | {
233 | v[i] = 0;
234 | }
235 | return v;
236 |
237 | error:
238 | exit(1);
239 | }
240 |
241 | /*---------------------------------------------------------------------*/
242 |
243 | int **
244 | create_imat (int row, int col)
245 | {
246 | int **m = (int **) malloc ((unsigned) row * sizeof (int *));
247 | check_mem(m);
248 |
249 | for (int i = 0; i < col; i++)
250 | {
251 | m[i] = (int *) malloc ((unsigned) col * sizeof (int));
252 | check_mem(m[i]);
253 | for (int j = 0; j < col; j++ )
254 | {
255 | m[i][j] = 0;
256 | }
257 | }
258 | return m;
259 |
260 | error:
261 | exit(1);
262 | }
263 |
264 | /*---------------------------------------------------------------------*/
265 |
266 | char **
267 | create_cmat (int row, int col)
268 | {
269 | char **m = (char **) malloc ((unsigned) row * sizeof (char *));
270 | check_mem(m);
271 |
272 | for (int i = 0; i < col; i++)
273 | {
274 | m[i] = (char *) malloc ((unsigned) col * sizeof (char));
275 | check_mem(m[i]);
276 | for (int j = 0; j < col; j++)
277 | {
278 | m[i][j] = 0;
279 | }
280 | }
281 | return m;
282 |
283 | error:
284 | exit(1);
285 | }
286 |
287 | /*---------------------------------------------------------------------*/
288 |
289 |
--------------------------------------------------------------------------------
/src/mem.h:
--------------------------------------------------------------------------------
1 | #ifndef __mem_h__
2 | #define __mem_h__
3 | #endif
4 |
5 | #ifndef UINT_TYPE
6 | typedef unsigned int uint;
7 | typedef unsigned char uchar;
8 | #define UINT_TYPE 1
9 | #endif
10 |
11 | int *ivector(int, int);
12 | char *cvector(int, int);
13 | uchar *ucvector(int, int);
14 | double *dvector(int, int);
15 | double **dmatrix(int, int, int, int);
16 | char **cmatrix(int, int, int, int);
17 | uchar **ucmatrix(int, int, int, int);
18 | void free_cvector(char *, int, int);
19 | void free_ucvector(uchar *, int, int);
20 | void free_ivector(int *, int, int);
21 | void free_dvector(double *, int, int);
22 | void free_dmatrix(double **, int, int, int, int);
23 | void free_cmatrix(char **, int, int, int, int);
24 | void free_ucmatrix(uchar **, int, int, int, int);
25 | int **imatrix(int, int, int, int);
26 | void free_imatrix(int **, int, int, int, int);
27 |
--------------------------------------------------------------------------------
/src/mpd.c:
--------------------------------------------------------------------------------
1 | #include "mpd.h"
2 |
3 | /*---------------------------------------------------------------------*/
4 | void
5 | make_greedy_pools (FILE *outfile, PNODE **plist, char **cmat, int *pc, int **redund, int *bstart, int Nregs, int N, int still, int *current_pool, int this_pool, int max_pool)
6 | {
7 | int i, j;
8 | if (still < 1)
9 | return;
10 |
11 | int this_p = -1;
12 | if (this_pool == 0)
13 | {
14 | // fprintf (outfile, "\nStarting a New Pool\n");
15 | int best = -1;
16 | for (i = 0; i < Nregs; i++)
17 | if (bstart[i] >= 0)
18 | if (pc[bstart[i]] > best)
19 | {
20 | best = pc[bstart[i]];
21 | this_p = bstart[i];
22 | }
23 | if (this_p < 1)
24 | {
25 | printf ("\n This is impossible \n");
26 | for (i = 0; i < Nregs; i++)
27 | printf ("\n Best start region %d is %d", i, bstart[i]);
28 | exit (1);
29 | }
30 | }
31 | else
32 | {
33 | int best = -1;
34 | for (i = 0; i < N; i++)
35 | if (plist[i] != NULL)
36 | if (pc[i] > best)
37 | {
38 | int it_fits = TRUE;
39 | for (j = 0; j < this_pool; j++)
40 | if (!cmat[i][current_pool[j]])
41 | it_fits = FALSE;
42 | if (it_fits)
43 | {
44 | best = pc[i];
45 | this_p = i;
46 | }
47 | }
48 | }
49 |
50 | printf ("\n Got here with this_p = %d \n", this_p);
51 | if (this_p >= 0)
52 | {
53 | fprintf (outfile, "%d\t%s\t%g\t%g\t%s\t%g\t%g\t%d\t%d\t%d\t%d\t%d\t%d\t%g\t%g\t%s\n",
54 | this_pool,
55 | plist[this_p]->forward->sequence, plist[this_p]->forward->tm, plist[this_p]->forward->gc,
56 | plist[this_p]->reverse->sequence, plist[this_p]->reverse->tm, plist[this_p]->reverse->gc,
57 | plist[this_p]->chrom,
58 | plist[this_p]->forward->start, plist[this_p]->forward->end,
59 | plist[this_p]->reverse->start, plist[this_p]->reverse->end,
60 | plist[this_p]->length, plist[this_p]->gc, plist[this_p]->tm, plist[this_p]->sequence);
61 | current_pool[this_pool] = this_p;
62 | for (i = 0; i < MAX_PAIRS; i++)
63 | if (redund[this_p][i] >= 0)
64 | {
65 | int ii = redund[this_p][i];
66 | for (j = 0; j < Nregs; j++)
67 | if (bstart[j] == ii)
68 | bstart[j] = -1;
69 | plist[ii] = NULL;
70 | pc[ii] = 0;
71 | still--;
72 | }
73 | this_pool++;
74 | this_pool %= max_pool;
75 | }
76 | else
77 | this_pool = 0;
78 |
79 | printf ("\ngoing to make_pools with N = %d, still = %d, this_pool = %d, max_pool = %d\n\n", N, still, this_pool,
80 | max_pool);
81 |
82 | make_greedy_pools (outfile, plist, cmat, pc, redund, bstart, Nregs, N, still, current_pool, this_pool, max_pool);
83 | }
84 |
85 | /*---------------------------------------------------------------------*/
86 | void
87 | make_less_greedy_pools (FILE *outfile, PNODE **plist, char **cmat, int *pc, int **redund, int *bstart, int Nregs, int N, int still,
88 | int *current_pool, int this_pool, int max_pool)
89 | {
90 | int i, j;
91 | if (still < 1)
92 | return;
93 |
94 | int this_p = -1;
95 | if (this_pool == 0)
96 | {
97 | // fprintf (outfile, "\nStarting a New Pool\n");
98 | int best = 100000000;
99 | for (i = 0; i < Nregs; i++)
100 | if (bstart[i] >= 0)
101 | if (pc[bstart[i]] < best)
102 | {
103 | best = pc[bstart[i]];
104 | this_p = bstart[i];
105 | }
106 | if (this_p < 1)
107 | {
108 | printf ("\n This is impossible \n");
109 | for (i = 0; i < Nregs; i++)
110 | printf ("\n Best start region %d is %d", i, bstart[i]);
111 | exit (1);
112 | }
113 | }
114 | else
115 | {
116 | int best = -1;
117 | for (i = 0; i < N; i++)
118 | if (plist[i] != NULL)
119 | if (pc[i] > best)
120 | {
121 | int it_fits = TRUE;
122 | for (j = 0; j < this_pool; j++)
123 | if (!cmat[i][current_pool[j]])
124 | it_fits = FALSE;
125 | if (it_fits)
126 | {
127 | best = pc[i];
128 | this_p = i;
129 | }
130 | }
131 | }
132 |
133 | printf ("\n Got here with this_p = %d \n", this_p);
134 | if (this_p >= 0)
135 | {
136 | fprintf (outfile, "%d\t%s\t%g\t%g\t%s\t%g\t%g\t%d\t%d\t%d\t%d\t%d\t%d\t%g\t%g\t%s\n",
137 | this_pool,
138 | plist[this_p]->forward->sequence, plist[this_p]->forward->tm, plist[this_p]->forward->gc,
139 | plist[this_p]->reverse->sequence, plist[this_p]->reverse->tm, plist[this_p]->reverse->gc,
140 | plist[this_p]->chrom,
141 | plist[this_p]->forward->start, plist[this_p]->forward->end,
142 | plist[this_p]->reverse->start, plist[this_p]->reverse->end,
143 | plist[this_p]->length, plist[this_p]->gc, plist[this_p]->tm, plist[this_p]->sequence);
144 | current_pool[this_pool] = this_p;
145 | for (i = 0; i < MAX_PAIRS; i++)
146 | if (redund[this_p][i] >= 0)
147 | {
148 | int ii = redund[this_p][i];
149 | for (j = 0; j < Nregs; j++)
150 | if (bstart[j] == ii)
151 | bstart[j] = -1;
152 | plist[ii] = NULL;
153 | pc[ii] = 0;
154 | still--;
155 | }
156 | this_pool++;
157 | this_pool %= max_pool;
158 | }
159 | else
160 | this_pool = 0;
161 |
162 | printf ("\ngoing to make_pools with N = %d, still = %d, this_pool = %d, max_pool = %d\n\n", N, still, this_pool,
163 | max_pool);
164 |
165 | make_less_greedy_pools (outfile, plist, cmat, pc, redund, bstart, Nregs, N, still, current_pool, this_pool, max_pool);
166 | }
167 | /*---------------------------------------------------------------------*/
168 |
169 | int
170 | is_poolable_primer (PNODE * p1, PNODE * p2, int size_diff_threshold, int tm_diff_threshold)
171 | {
172 | int i, f1, r1, f2, r2;
173 | char flipf1[80], flipf2[80], flipr1[80], flipr2[80];
174 |
175 | for (i = p1->forward->start; i <= p1->reverse->end; i++)
176 | if ((i >= p2->forward->start) && (i <= p2->reverse->end))
177 | return FALSE;
178 |
179 | for (i = p2->forward->start; i <= p2->reverse->end; i++)
180 | if ((i >= p1->forward->start) && (i <= p1->reverse->end))
181 | return FALSE;
182 |
183 | f1 = abs (p1->forward->end - p1->forward->start) + 1;
184 | r1 = abs (p1->reverse->end - p1->reverse->start) + 1;
185 | f2 = abs (p2->forward->end - p2->forward->start) + 1;
186 | r2 = abs (p2->reverse->end - p2->reverse->start) + 1;
187 |
188 | reverse_string (p1->reverse->sequence, flipr1, r1);
189 | reverse_string (p2->reverse->sequence, flipr2, r2);
190 | reverse_string (p1->forward->sequence, flipf1, f1);
191 | reverse_string (p2->forward->sequence, flipf2, f2);
192 |
193 | if (check_uneven_dimer (p1->forward->sequence, flipf2, f1, f2))
194 | return FALSE;
195 |
196 | if (check_uneven_dimer (p1->reverse->sequence, flipr2, r1, r2))
197 | return FALSE;
198 |
199 | if (check_uneven_dimer (p1->forward->sequence, flipr2, f1, r2))
200 | return FALSE;
201 |
202 | if (check_uneven_dimer (p1->reverse->sequence, flipf2, r1, f2))
203 | return FALSE;
204 |
205 | if (fabs (p1->forward->tm - p2->forward->tm) > tm_diff_threshold)
206 | return FALSE;
207 |
208 | if (fabs (p1->reverse->tm - p2->reverse->tm) > tm_diff_threshold)
209 | return FALSE;
210 |
211 | if (abs (p1->length - p2->length) > size_diff_threshold)
212 | return FALSE;
213 |
214 | return TRUE;
215 | }
216 | /*---------------------------------------------------------------------*/
217 |
218 | double
219 | fill_dist (SNODE ** list, int n, int *priority, int *selected, int which, int start, int stop, double *dist)
220 | {
221 | int i, j, flag, m;
222 | double totd, dtemp;
223 |
224 | totd = 0;
225 | j = m = 0;
226 |
227 | for (i = 0; i < n; i++)
228 | {
229 | if (priority[i] == which)
230 | {
231 | flag = TRUE;
232 | j = i - 1;
233 | dtemp = 0;
234 | while ((j >= 0) && (flag))
235 | {
236 | j--;
237 | if (j >= 0)
238 | if (selected[j] == 1)
239 | flag = FALSE;
240 | }
241 | if (j >= 0)
242 | dtemp = list[i]->pos - list[j]->pos;
243 | else
244 | dtemp = list[i]->pos - start;
245 |
246 | dist[i] = dtemp * dtemp;
247 |
248 | j = i + 1;
249 |
250 | flag = TRUE;
251 | dtemp = 0;
252 | while ((j < n) && (flag))
253 | {
254 | j++;
255 | if (j < n)
256 | if (selected[j] == 1)
257 | flag = FALSE;
258 | }
259 | if (j < n)
260 | dtemp = list[j]->pos - list[i]->pos;
261 | else
262 | dtemp = stop - list[i]->pos;
263 |
264 | dist[i] += dtemp * dtemp;
265 | }
266 | else
267 | dist[i] = 0;
268 |
269 | totd += dist[i];
270 | if (selected[i] == 1)
271 | m++;
272 | }
273 |
274 | if (totd > 0)
275 | for (i = 0; i < n; i++)
276 | dist[i] /= totd;
277 |
278 | return totd;
279 |
280 | }
281 |
282 | /*---------------------------------------------------------------------*/
283 | SNODE *
284 | snp_alloc (void)
285 | {
286 | SNODE *tn;
287 |
288 | tn = (SNODE *) malloc ((unsigned) sizeof (struct snp_node));
289 | if (!tn)
290 | log_err ("allocation failure in snp_alloc()");
291 |
292 | sprintf (tn->name, "tempname");
293 | tn->no_disc = 0;
294 | tn->chrom = 0;
295 | tn->pos = 0;
296 | tn->het = 0.0;
297 | tn->baseA = 'N';
298 | tn->baseB = 'N';
299 | tn->no_pairs = 0;
300 | tn->pair = (PNODE **) malloc ((unsigned) (MAX_PAIRS + 1) * sizeof (PNODE *));
301 | if (!tn->pair)
302 | log_err ("allocation failure 2 in snp_alloc()");
303 |
304 | return tn;
305 | }
306 |
307 | /*---------------------------------------------------------------------*/
308 | AMPNODE *
309 | amp_alloc (void)
310 | {
311 | AMPNODE *tn;
312 |
313 | tn = (AMPNODE *) malloc ((unsigned) sizeof (struct amp_node));
314 | if (!tn)
315 | log_err ("allocation failure in amp_alloc()");
316 |
317 | sprintf (tn->name, "tempname");
318 | tn->chrom = 0;
319 | tn->start_pos = 0;
320 | tn->stop_pos = 0;
321 | tn->no_pairs = 0;
322 | tn->pair = (PNODE **) malloc ((unsigned) (MAX_PAIRS + 1) * sizeof (PNODE *));
323 | if (!tn->pair)
324 | log_err ("allocation failure 2 in amp_alloc()");
325 |
326 | return tn;
327 | }
328 |
329 | /*---------------------------------------------------------------------*/
330 | int
331 | isbase (char c)
332 | {
333 | char C;
334 |
335 | C = toupper (c);
336 |
337 | if ((C == 'A') || (C == 'C') || (C == 'G') || (C == 'T'))
338 | return TRUE;
339 |
340 | return FALSE;
341 | }
342 |
343 | /*---------------------------------------------------------------------*/
344 | SNODE **
345 | fill_snp_list (FILE * sfile, int *n, unsigned int chrom)
346 | {
347 | int flag, i, temp_no, final_no;
348 | char sss[4096], s[256];
349 | SNODE **temp_snp;
350 |
351 | i = 0;
352 | flag = TRUE;
353 |
354 | printf ("reading snps for chr %d\n", chrom);
355 | temp_snp = (SNODE **) malloc ((unsigned) (2000000) * sizeof (SNODE *));
356 | if (!temp_snp)
357 | log_err ("Allocation failure in Temporary SNP storage");
358 |
359 | fgets (sss, 4094, sfile);
360 |
361 | while (flag)
362 | {
363 | temp_snp[i] = snp_alloc ();
364 | sscanf (sss, "%s\t%d\t%d\t%d\t%s\t%c/%c", temp_snp[i]->name, &temp_snp[i]->no_disc, &temp_snp[i]->chrom, &temp_snp[i]->pos, s, /* het frequency */
365 | &temp_snp[i]->baseA, &temp_snp[i]->baseB);
366 |
367 | temp_snp[i]->pos--;
368 | temp_snp[i]->het = (double) atof (s);
369 | temp_snp[i]->no_pairs = 0;
370 |
371 | if ((isbase (temp_snp[i]->baseA)) && (isbase (temp_snp[i]->baseB))
372 | && (temp_snp[i]->chrom == chrom) && (temp_snp[i]->het > 0.01))
373 | {
374 | i++;
375 | }
376 | else
377 | {
378 | free (temp_snp[i]);
379 | }
380 |
381 | sss[0] = '\0';
382 | if ((!feof (sfile)) && (i < 2000000))
383 | {
384 | fgets (sss, 4094, sfile);
385 | if (strlen (sss) < 3)
386 | flag = FALSE;
387 | }
388 | else
389 | flag = FALSE;
390 | }
391 |
392 | temp_no = i;
393 | qsort (temp_snp, temp_no, sizeof (SNODE *), sort_compare_struct);
394 | final_no = i;
395 | (*n) = final_no;
396 | printf ("\n Total number of SNPs found = %d\n", final_no);
397 |
398 | return temp_snp;
399 | }
400 |
401 | /*---------------------------------------------------------------------*/
402 | AMPNODE **
403 | fill_amp_list (FILE * sfile, int n)
404 | {
405 | int flag, i, temp_no;
406 | char sss[4096];
407 | AMPNODE **temp_amp;
408 |
409 | i = 0;
410 | temp_no = n;
411 | flag = TRUE;
412 |
413 | temp_amp = (AMPNODE **) malloc ((unsigned) (temp_no + 1) * sizeof (AMPNODE *));
414 | if (!temp_amp)
415 | log_err ("Allocation failure in Temporary SNP storage");
416 |
417 | fgets (sss, 4094, sfile);
418 |
419 | while (flag)
420 | {
421 | temp_amp[i] = amp_alloc ();
422 | sscanf (sss, "chr%d\t%d\t%d\t%s",
423 | &temp_amp[i]->chrom, &temp_amp[i]->start_pos, &temp_amp[i]->stop_pos, temp_amp[i]->name);
424 |
425 | temp_amp[i]->start_pos--;
426 | temp_amp[i]->stop_pos--;
427 | temp_amp[i]->no_pairs = 0;
428 | i++;
429 | sss[0] = '\0';
430 | if ((!feof (sfile)) && (i < n))
431 | {
432 | fgets (sss, 4094, sfile);
433 | if (strlen (sss) < 3)
434 | flag = FALSE;
435 | }
436 | else
437 | flag = FALSE;
438 | }
439 | printf ("\n Total number of regions found = %d\n", i);
440 | return temp_amp;
441 | }
442 |
443 | /*---------------------------------------------------------------------*/
444 | int
445 | sort_compare_struct (const void *a, const void *b)
446 | {
447 | //printf("\n Comparing %d with %d ", (*(SNODE **) a)->pos, (*(SNODE **) b)->pos);
448 | if ((*(SNODE **) a)->pos < (*(SNODE **) b)->pos)
449 | return -1;
450 | else if ((*(SNODE **) a)->pos > (*(SNODE **) b)->pos)
451 | return 1;
452 | else
453 | return 0;
454 | }
455 |
456 | /*---------------------------------------------------------------------*/
457 | void
458 | fill_hs (char a, char b, double *h, double *s)
459 | {
460 | if (a == 'A')
461 | {
462 | if (b == 'A')
463 | {
464 | (*h) += 9100;
465 | (*s) += 24;
466 | }
467 | else if (b == 'C')
468 | {
469 | (*h) += 6500;
470 | (*s) += 17.3;
471 | }
472 | else if (b == 'G')
473 | {
474 | (*h) += 7800;
475 | (*s) += 20.8;
476 | }
477 | else if (b == 'T')
478 | {
479 | (*h) += 8600;
480 | (*s) += 23.9;
481 | }
482 | }
483 | else if (a == 'C')
484 | {
485 | if (b == 'A')
486 | {
487 | (*h) += 5800;
488 | (*s) += 12.9;
489 | }
490 | else if (b == 'C')
491 | {
492 | (*h) += 11000;
493 | (*s) += 26.6;
494 | }
495 | else if (b == 'G')
496 | {
497 | (*h) += 11900;
498 | (*s) += 27.8;
499 | }
500 | else if (b == 'T')
501 | {
502 | (*h) += 7800;
503 | (*s) += 20.8;
504 | }
505 | }
506 | else if (a == 'G')
507 | {
508 | if (b == 'A')
509 | {
510 | (*h) += 5600;
511 | (*s) += 13.5;
512 | }
513 | else if (b == 'C')
514 | {
515 | (*h) += 11100;
516 | (*s) += 26.7;
517 | }
518 | else if (b == 'G')
519 | {
520 | (*h) += 11000;
521 | (*s) += 26.6;
522 | }
523 | else if (b == 'T')
524 | {
525 | (*h) += 6500;
526 | (*s) += 17.3;
527 | }
528 | }
529 | else if (a == 'T')
530 | {
531 | if (b == 'A')
532 | {
533 | (*h) += 6000;
534 | (*s) += 16.9;
535 | }
536 | else if (b == 'C')
537 | {
538 | (*h) += 5600;
539 | (*s) += 13.5;
540 | }
541 | else if (b == 'G')
542 | {
543 | (*h) += 5800;
544 | (*s) += 12.9;
545 | }
546 | else if (b == 'T')
547 | {
548 | (*h) += 9100;
549 | (*s) += 24.0;
550 | }
551 | }
552 | /* printf("\n For %c %c we have H = %g S = %g",a,b,(*h),(*s)); */
553 |
554 | }
555 |
556 | /*---------------------------------------------------------------------*/
557 | double
558 | calc_tm (char *ss, int n)
559 | {
560 | int i;
561 | double h, s, tm;
562 |
563 | h = s = 0.0;
564 |
565 | for (i = 0; i < n - 1; i++)
566 | fill_hs (ss[i], ss[i + 1], &h, &s);
567 |
568 | /* tm = h/(s + 57.6945289) - 21.4624334 - 273.15; */
569 | /* tm = h/(s + 57.6945289) - 294.6124334; */
570 | tm = h / (s + 47.16510465) - 294.6124334;
571 |
572 | return tm;
573 | }
574 |
575 | /*---------------------------------------------------------------------*/
576 |
577 | void
578 | convert_int_basepairs (int i, char *s, int k)
579 | {
580 | int j, l;
581 | char ss[256];
582 |
583 | l = k - 1;
584 | for (j = 0; j < 256; j++)
585 | ss[j] = 'A';
586 |
587 | while (l >= 0)
588 | {
589 | j = i % 4;
590 | if (j == 0)
591 | ss[l] = 'A';
592 | else if (j == 1)
593 | ss[l] = 'C';
594 | else if (j == 2)
595 | ss[l] = 'G';
596 | else
597 | ss[l] = 'T';
598 | i -= j;
599 | i /= 4;
600 | l--;
601 | }
602 |
603 | ss[k] = '\0';
604 | strcpy (s, ss);
605 |
606 | }
607 |
608 | /*---------------------------------------------------------------------*/
609 | void
610 | decode_basepairs (unsigned char *s, char *dest, int n)
611 | {
612 | int i, m, l;
613 | unsigned char j;
614 | char a, ss[5];
615 |
616 | ss[4] = '\0';
617 | for (i = 0; i < n; i++)
618 | {
619 | j = s[i];
620 |
621 | //printf("\n Decoding %d ", j);
622 | for (l = 3; l >= 0; l--)
623 | {
624 | m = j % 4;
625 | if (m == 0)
626 | a = 'A';
627 | else if (m == 1)
628 | a = 'C';
629 | else if (m == 2)
630 | a = 'G';
631 | else
632 | a = 'T';
633 |
634 | ss[l] = a;
635 | j = j >> 2;;
636 |
637 | }
638 |
639 | for (m = 0; m < 4; m++)
640 | *dest++ = ss[m];
641 |
642 | //printf("as %s", ss);
643 | }
644 | *dest = '\0';
645 | }
646 |
647 | /*---------------------------------------------------------------------*/
648 | unsigned int
649 | encode_basepairs (char *ss, int n)
650 | {
651 | unsigned int k;
652 | int i;
653 |
654 | k = 0;
655 | for (i = 0; i < n; i++)
656 | {
657 | k = k << 2;
658 | if (ss[i] == 'A');
659 | else if (ss[i] == 'C')
660 | k++;
661 | else if (ss[i] == 'G')
662 | k += 2;
663 | else
664 | k += 3;
665 | }
666 | // printf("\nn = %d Encoding %c%c%c%c as %d ",n,ss[0],ss[1],ss[2],ss[3],k);
667 |
668 | return k;
669 | }
670 |
671 | /*---------------------------------------------------------------------*/
672 | static double *FQ_LIST;
673 |
674 | int
675 | find_primers (SNODE ** snp_list, AMPNODE * tn, int no_snp, int *flat, char *contig, int L, int min_primer, int max_primer, int amp_max, int amp_min, double min_gc, double max_gc, double min_tm, double max_tm, int depth, int local_depth, int target_base, // originally the bp of the SNP that dave's original primer_snp program was looking for
676 | int start_pos, unsigned char *highmer, int **repeats, int no_repeats, int end_region, int chrom)
677 | {
678 | int i, j, k, *index_left, *index_right, fl, fr, this_amp, pairs_todump;
679 | int *amp_size, *right_side, *left_side, *local_index, local_size, temp_length;
680 | int *plen_l, *plen_r;
681 | double *gc_left, *gc_right, *fq_left, *fq_right;
682 | double *total_fq, *best_gc_left, *best_gc_right, tm_l, tm_r;
683 | char ss[256], sss[256], flip[256], *rt_contig, **best_left, **best_right;
684 | PNODE *product;
685 | PRIMER *p_left, *p_right;
686 |
687 | /*
688 | printf("\nForward sequence\n\n");
689 | printf("\n In tile contig with L = %d; amp_max = %d; amp_min = %d; depth = %d\n\n", L, amp_max, amp_min, depth);
690 | for (i = 0; i < L; i++)
691 | {
692 | printf("%c", contig[i]);
693 | //if ((i + 1 == target_base) || (i == target_base)) printf(" * ");
694 | if (i % 80 == 79) printf("\n");
695 | }
696 | printf("\n");
697 | */
698 |
699 | fq_left = dvector (0, L);
700 | fq_right = dvector (0, L);
701 | gc_left = dvector (0, L);
702 | gc_right = dvector (0, L);
703 | index_left = ivector (0, L);
704 | index_right = ivector (0, L);
705 | plen_l = ivector (0, L);
706 | plen_r = ivector (0, L);
707 | printf("\n Local Depth is %d \n\n",local_depth);
708 |
709 | j = 4;
710 | for (i = 0; i < local_depth; i++)
711 | j *= 4;
712 | j = (j - 4) / 3;
713 |
714 | local_size = j;
715 | local_index = ivector (0, local_size);
716 | for (i = 0; i <= local_size; i++)
717 | local_index[i] = 0;
718 |
719 | flat_index_contig (local_index, contig, L, local_depth);
720 |
721 | // printf("\n About to fill quality in forward direction\n\n");
722 |
723 | fill_quality_scores (flat, local_index, contig, L, minim (target_base - 30, end_region), depth, local_depth,
724 | min_primer, max_primer, fq_left, gc_left, index_left, plen_l,
725 | min_gc, max_gc, min_tm, max_tm, repeats, no_repeats, highmer, start_pos);
726 | rt_contig = cvector (0, L);
727 | reverse_transcribe (contig, rt_contig, L);
728 |
729 | /*
730 | printf("\nReverse Transcribe\n\n");
731 | printf("\n");for (i = 0; i < L; i++) {printf("%c", rt_contig[i]);
732 | if ((i + 1 == L - target_base) || (i + 2 == L - target_base)) printf(" * ");
733 | if (i % 80 == 79)
734 | printf("\n");
735 | }
736 | printf("\n");
737 | */
738 |
739 | // printf ("\n About to fill quality in reverse direction\n");
740 |
741 | fill_quality_scores (flat, local_index, rt_contig, L, minim (target_base - 30, end_region), depth, local_depth,
742 | min_primer, max_primer, fq_right, gc_right, index_right, plen_r,
743 | min_gc, max_gc, min_tm, max_tm, repeats, no_repeats, highmer, start_pos);
744 |
745 | i = 0;
746 | fl = 0;
747 | pairs_todump = MAX_PAIRS;
748 | amp_size = ivector (0, pairs_todump);
749 | best_left = cmatrix (0, pairs_todump, 0, 256);
750 | best_right = cmatrix (0, pairs_todump, 0, 256);
751 | total_fq = dvector (0, pairs_todump);
752 | right_side = ivector (0, pairs_todump);
753 | left_side = ivector (0, pairs_todump);
754 | best_gc_left = dvector (0, pairs_todump);
755 | best_gc_right = dvector (0, pairs_todump);
756 |
757 | printf("\n About to start finding primers\n");
758 |
759 | while ((fq_left[index_left[i]] < 1e7) && (fl < pairs_todump))
760 | {
761 | k = start_pos + index_left[i];
762 | //printf("\n pos (k) = %d\n\n", k);
763 | if ((!poly_under_primer (k, k + plen_l[index_left[i]], snp_list, 0, no_snp - 1, (no_snp) / 2)) &&
764 | (gc_left[index_left[i]] >= min_gc) && (gc_left[index_left[i]] <= max_gc) &&
765 | (is_not_repeat (k, k + plen_l[index_left[i]], repeats, no_repeats, 0.001)))
766 | {
767 | j = 0;
768 | fr = 0;
769 | strncpy (ss, contig + index_left[i], plen_l[index_left[i]]);
770 | ss[plen_l[index_left[i]]] = '\0';
771 | tm_l = calc_tm (ss, plen_l[index_left[i]]);
772 |
773 | if (fq_right[index_right[j]] >= 1e7)
774 | {
775 | printf("\n No right side primers \n\n");
776 | break;
777 | }
778 | printf("\nMatching 5' %s 3' (fq = %g, gc = %g, len = %d tm = %g) with", ss, fq_left[index_left[i]], gc_left[index_left[i]], plen_l[index_left[i]], tm_l);
779 |
780 | while ((fq_right[index_right[j]] < 1e7) && (fr < 1))
781 | {
782 | temp_length = L - index_right[j] - index_left[i];
783 |
784 | /*
785 | * strncpy(sss,rt_contig+index_right[j],primer
786 | * ); sss[primer] = '\0'; printf("\n This
787 | * primer pair %s appears to %d
788 | * \n",sss,temp_length);
789 | */
790 |
791 | k = start_pos + L - index_right[j];
792 | if ((!poly_under_primer (k - plen_r[index_right[j]], k, snp_list, 0, no_snp - 1, (no_snp) / 2)) &&
793 | (temp_length >= amp_min) && (temp_length <= amp_max) &&
794 | (is_not_repeat (k - plen_r[index_right[j]], k, repeats, no_repeats, 0.001)))
795 | {
796 | strncpy (sss, rt_contig + index_right[j], plen_r[index_right[j]]);
797 | sss[plen_r[index_right[j]]] = '\0';
798 | reverse_string (sss, flip, plen_r[index_right[j]]);
799 | if (check_uneven_dimer (ss, flip, plen_l[index_left[i]], plen_r[index_right[j]]))
800 | {
801 | printf("\n\t\t\tNot 5' %s 3' because of a dimer", sss);
802 | }
803 | else
804 | {
805 | tm_r = calc_tm (sss, plen_r[index_right[j]]);
806 | printf("\n In here with tm_l = %g and tm_r = %g\n\n",tm_r,tm_l);
807 | if (fabs (tm_l - tm_r) < 5.0)
808 | {
809 | if (fr == 0)
810 | {
811 | this_amp = L - index_right[j];
812 | right_side[fl] = this_amp;
813 | left_side[fl] = index_left[i];
814 | amp_size[fl] = temp_length;
815 | sprintf (best_left[fl], "%s", ss);
816 | sprintf (best_right[fl], "%s", sss);
817 | total_fq[fl] = fq_right[index_right[j]] + fq_left[index_left[i]];
818 | best_gc_left[fl] = gc_left[index_left[i]];
819 | best_gc_right[fl] = gc_right[index_right[j]];
820 | }
821 | product = product_alloc ();
822 | p_left = primer_alloc ();
823 | p_right = primer_alloc ();
824 | p_left->sequence = cvector (0, plen_l[index_left[i]] + 1);
825 | sprintf (p_left->sequence, "%s", ss);
826 | p_right->sequence = cvector (0, plen_r[index_right[j]] + 1);
827 | sprintf (p_right->sequence, "%s", sss);
828 | p_left->tm = tm_l;
829 | p_right->tm = tm_r;
830 | p_left->gc = gc_left[index_left[i]];
831 | p_right->gc = gc_right[index_right[j]];
832 | p_left->start = start_pos + index_left[i];
833 | p_left->end = p_left->start + plen_l[index_left[i]];
834 | p_right->end = start_pos + L - (index_right[j]);
835 | p_right->start = p_right->end - plen_r[index_right[j]];
836 | p_left->end--;
837 | p_right->end--;
838 | product->forward = p_left;
839 | product->reverse = p_right;
840 | product->sequence = cvector (0, temp_length + 1);
841 | strncpy (product->sequence, contig + index_left[i], temp_length);
842 | product->sequence[temp_length] = '\0';
843 | product->length = temp_length;
844 | product->gc = calc_gc (product->sequence, temp_length);
845 | product->tm = 41.0 * product->gc - 675.0 / (double) product->length - 21.4624334;
846 | product->chrom = chrom;
847 | tn->pair[tn->no_pairs++] = product;
848 |
849 | printf("\n\tSuccess with 5' %s 3' (fq = %g, gc = %g, len = %d tm = %g)", sss, fq_right[index_right[j]], gc_right[index_right[j]], plen_r[index_right[j]], tm_r);
850 | fr++;
851 | }
852 | }
853 | }
854 | j++;
855 | }
856 | if (fr > 0)
857 | fl++;
858 | }
859 | i++;
860 | }
861 |
862 |
863 | if (fl > 0)
864 | {
865 | for (i = 0; i <= pairs_todump; i++)
866 | index_left[i] = i;
867 |
868 |
869 | FQ_LIST = total_fq;
870 | qsort ((void *) index_left, fl, sizeof (int), sort_compare_index);
871 |
872 |
873 | this_amp = right_side[index_left[0]];
874 |
875 | //for(i=0;ipos);
921 |
922 | if ((list[which]->pos >= p_start) && (list[which]->pos <= p_end))
923 | {
924 | printf("\nA primer which goes from %d to %d appears to have %s under it at pos %d\n\n", p_start,p_end,list[which]->name,list[which]->pos);
925 | return TRUE;
926 | }
927 | if (stop - start <= 1)
928 | return FALSE;
929 |
930 | if (list[which]->pos > p_end)
931 | {
932 | if (start >= which)
933 | return FALSE;
934 | else
935 | return poly_under_primer (p_start, p_end, list, start, which, (which + start) / 2);
936 | }
937 | if (list[which]->pos < p_start)
938 | {
939 | if (which >= stop)
940 | return FALSE;
941 | else
942 | return poly_under_primer (p_start, p_end, list, which, stop, (stop + which) / 2);
943 | }
944 | return FALSE;
945 |
946 |
947 | }
948 |
949 | /*---------------------------------------------------------------------*/
950 | double
951 | calc_gc (char *s, int n)
952 | {
953 | char c;
954 | int i, gc;
955 |
956 | if (n <= 0)
957 | return 0;
958 |
959 | gc = 0;
960 | for (i = 0; i < n; i++)
961 | {
962 | c = toupper (s[i]);
963 | if ((c == 'G') || (c == 'C'))
964 | gc++;
965 | }
966 |
967 | return (double) gc / (double) n;
968 | }
969 |
970 | /*---------------------------------------------------------------------*/
971 | PRIMER *
972 | primer_alloc (void)
973 | {
974 | PRIMER *tn;
975 |
976 | tn = (PRIMER *) malloc ((unsigned) sizeof (struct primer_node));
977 | if (!tn)
978 | log_err ("allocation failure in primer_alloc");
979 |
980 | tn->start = 0;
981 | tn->end = 0;
982 | tn->sequence = NULL;
983 | tn->tm = 0;
984 | tn->gc = 0;
985 |
986 | return tn;
987 |
988 | }
989 |
990 | /*---------------------------------------------------------------------*/
991 | PNODE *
992 | product_alloc (void)
993 | {
994 | PNODE *tn;
995 |
996 | tn = (PNODE *) malloc ((unsigned) sizeof (struct primer_pair));
997 | if (!tn)
998 | log_err ("allocation failure in primer_alloc()");
999 |
1000 | tn->forward = NULL;
1001 | tn->reverse = NULL;
1002 | tn->sequence = NULL;
1003 | tn->length = 0;
1004 | tn->gc = 0;
1005 | tn->tm = 0;
1006 | tn->chrom = 0;
1007 |
1008 | return tn;
1009 | }
1010 |
1011 | /*---------------------------------------------------------------------*/
1012 | void
1013 | fill_quality_scores (int *flat, int *local, char *contig, int L, int window, int depth, int ld, int min_primer,
1014 | int max_primer, double *fq_left, double *gc_left, int *index_left, int *plen, double min_gc,
1015 | double max_gc, double min_tm, double max_tm, int **repeats, int no_repeats, unsigned char *highmer,
1016 | int start_pos)
1017 | {
1018 | int **reps_left, **local_reps, tail, primer;
1019 | int i, j, k, m, rm, min_match, offset, *uflag;
1020 | char flip[256];
1021 | double discount, thisd, tm, fq_temp, gc_temp;
1022 |
1023 | reps_left = imatrix (0, window, 0, depth);
1024 | local_reps = imatrix (0, window, 0, depth);
1025 |
1026 | for (j = 0; j <= window; j++)
1027 | for (i = 0; i <= depth; i++)
1028 | local_reps[j][i] = reps_left[j][i] = 0;
1029 |
1030 | uflag = ivector (0, window);
1031 | k = no_repeats / 2;
1032 |
1033 | for (i = 0; i < window; i++)
1034 | {
1035 | count_copys (local, contig + i, ld, local_reps[i]);
1036 | count_copys (flat, contig + i, depth, reps_left[i]);
1037 |
1038 | if (check_15mer (contig + i, highmer, 15) > 0)
1039 | uflag[i] = TRUE;
1040 | else
1041 | uflag[i] = FALSE;
1042 |
1043 | //for(j=1;j<=depth;j++) printf("\nAt window position %d, depth %d Found %d copys to the left %c", i,j,reps_left[i][j],*(contig+i+j-1));
1044 | //for(j=1;j<=ld;j++) printf("\nAt window position %d, local depth %d Found %d local copys to the left %c", i,j,local_reps[i][j],*(contig+i+j-1));
1045 | }
1046 |
1047 | // discount = 0.25; One base less lowers score by 1/4
1048 | // discount = 0.125; One base less lowers score by 1/8
1049 | // discount = 0.0625; One base less lowers score by 1/16
1050 | discount = 0.0625;
1051 |
1052 | for (i = 0; i <= window; i++)
1053 | {
1054 | fq_left[i] = 1e9;
1055 | gc_left[i] = 2.0;
1056 | }
1057 | min_match = 0;
1058 | for (i = 0; i < window - max_primer; i++)
1059 | {
1060 | for (primer = min_primer; primer <= max_primer; primer++)
1061 | {
1062 | offset = primer - ld;
1063 | tail = primer - 15;
1064 | tm = calc_tm (contig + i, primer);
1065 | fq_temp = 1e8;
1066 |
1067 | if ((tm >= min_tm) && (tm <= max_tm))
1068 | {
1069 | fq_temp = gc_temp = 0.0;
1070 | for (j = 0; j < primer; j++)
1071 | {
1072 | rm = minim (depth, primer - j);
1073 | thisd = 1.0;
1074 | for (m = depth; m > rm; m--)
1075 | thisd *= discount;
1076 | k = i + j;
1077 | for (m = rm; m > min_match; m--)
1078 | {
1079 | fq_temp += thisd * reps_left[k][m];
1080 | thisd *= discount;
1081 | }
1082 |
1083 | k = i + j;
1084 | if ((contig[k] == 'G') || (contig[k] == 'C'))
1085 | gc_temp += 1.0;
1086 |
1087 | if (j + tail < primer)
1088 | {
1089 | if (uflag[k])
1090 | {
1091 | if (j + tail + 1 < primer)
1092 | {
1093 | fq_temp += 100;
1094 | }
1095 | else
1096 | {
1097 | fq_temp += 2000;
1098 | }
1099 | }
1100 | }
1101 | }
1102 | gc_temp /= (double) primer;
1103 | fq_temp /= (double) primer;
1104 |
1105 | //printf("\n Primer = %d i = %d fq = %g gc = %g \n\n",primer,i,fq_left[i],gc_left[i]);
1106 |
1107 | if ((gc_left[i] >= min_gc) && (gc_left[i] <= max_gc))
1108 | {
1109 | if (check_hairpin (contig + i, primer))
1110 | {
1111 | /*
1112 | * strncpy(ss,contig+i,primer); ss[primer] = '\0'; printf("\n\t\tDetermined %s to be a hairpin %g",ss,fq_left[i]);
1113 | */
1114 | fq_temp += 1e7;
1115 | }
1116 | else
1117 | {
1118 | reverse_string (contig + i, flip, primer);
1119 | if (check_dimer (contig + i, flip, primer))
1120 | {
1121 | // strncpy(ss,contig+i,primer); ss[primer] = '\0'; printf("\n\t\tDetermined %s to be a self-dimer %g",ss,fq_left[i]);
1122 | fq_temp += 1e7;
1123 | }
1124 | }
1125 | }
1126 | else
1127 | {
1128 | // strncpy(ss,contig+i,primer); ss[primer] = '\0'; printf("\n\t\tDetermined %s to be outside the gc window %g",ss,fq_left[i]);
1129 | fq_temp += 1e7;
1130 | }
1131 |
1132 | // if(uflag[i+tail]) fq_left[i] += 1e8;
1133 |
1134 | // printf("\nLeft i = %d fq=%g gc=%g ",i,fq_left[i],gc_left[i]);
1135 | }
1136 | if (fq_temp < fq_left[i])
1137 | {
1138 | plen[i] = primer;
1139 | fq_left[i] = fq_temp;
1140 | gc_left[i] = gc_temp;
1141 | }
1142 | }
1143 | }
1144 |
1145 | for (i = 0; i <= window; i++)
1146 | index_left[i] = i;
1147 |
1148 | FQ_LIST = fq_left;
1149 | qsort ((void *) index_left, window - max_primer, sizeof (int), sort_compare_index);
1150 |
1151 | free_ivector (uflag, 0, window);
1152 | free_imatrix (reps_left, 0, window, 0, depth);
1153 | free_imatrix (local_reps, 0, window, 0, depth);
1154 |
1155 | // for(i=0;i= 0)
1200 | count += minim (list[i][1], y) - maxim (x, list[i][0]) + 1;
1201 |
1202 | j = find_frag (y, list, 0, no_frags - 1, no_frags / 2);
1203 | if (j >= 0)
1204 | count += minim (list[j][1], y) - maxim (x, list[j][0]) + 1;
1205 |
1206 | if ((i < 0) && (j < 0))
1207 | {
1208 | k = find_seg (x, y, list, 0, no_frags - 1, no_frags / 2);
1209 | if (k > 0)
1210 | count += minim (list[k][1], y) - maxim (x, list[k][0]) + 1;
1211 | }
1212 | if ((double) count / (double) (y - x + 1) >= max)
1213 | return FALSE;
1214 | else
1215 | return TRUE;
1216 | }
1217 |
1218 | /*---------------------------------------------------------------------*/
1219 | int
1220 | find_frag (int x, int **list, int start, int end, int guess)
1221 | {
1222 | /*
1223 | * if(PRINT_ME) printf("\n In find_frag x = %lu start = %d end = %d guess = %d (%lu,%lu)", x,start,nd,guess,list[guess][0],list[guess][1]);
1224 | */
1225 |
1226 | if (x < 0)
1227 | return -1;
1228 |
1229 | if ((x >= list[guess][0]) && (x <= list[guess][1]))
1230 | return guess;
1231 |
1232 | /*
1233 | * if( (start == guess) && (end == guess) ) return -1;
1234 | */
1235 |
1236 | if (x < list[guess][0])
1237 | {
1238 | if (guess <= start)
1239 | return -1;
1240 |
1241 | end = guess - 1;
1242 | }
1243 | else if (x > list[guess][1])
1244 | {
1245 | if (guess >= end)
1246 | return -1;
1247 |
1248 | start = guess + 1;
1249 | }
1250 | guess = (start + end) / 2;
1251 | return find_frag (x, list, start, end, guess);
1252 |
1253 | }
1254 |
1255 | /*---------------------------------------------------------------------*/
1256 | int
1257 | find_seg (int x, int y, int **list, int start, int end, int guess)
1258 | {
1259 | /*
1260 | * if(PRINT_ME) printf("\n In find_frag x = %lu start = %d end = %d guess = %d (%lu,%lu)", x,start,end,guess,list[guess][0],list[guess][1]);
1261 | */
1262 |
1263 | if ((x <= list[guess][0]) && (y >= list[guess][1]))
1264 | return guess;
1265 |
1266 | /*
1267 | * if( (start == guess) && (end == guess) ) return -1;
1268 | */
1269 |
1270 | if (x < list[guess][0])
1271 | {
1272 | if (guess <= start)
1273 | return -1;
1274 |
1275 | end = guess - 1;
1276 | }
1277 | else if (x > list[guess][1])
1278 | {
1279 | if (guess >= end)
1280 | return -1;
1281 |
1282 | start = guess + 1;
1283 | }
1284 | guess = (start + end) / 2;
1285 | return find_seg (x, y, list, start, end, guess);
1286 |
1287 | }
1288 |
1289 | /*---------------------------------------------------------------------*/
1290 |
1291 | int
1292 | check_hairpin (char *ss, int n)
1293 | {
1294 | int min;
1295 | char flip[1024];
1296 |
1297 | if (n > 1000)
1298 | {
1299 | printf ("\n Dude what's with a %d base primer .... PPPPLEEASE \n", n);
1300 | exit (1);
1301 | }
1302 | reverse_string (ss, flip, n);
1303 |
1304 | for (min = 3; min <= n / 2 - 4; min++)
1305 | if (check_hairpin_min (ss, flip, n, min))
1306 | return TRUE;
1307 |
1308 | return FALSE;
1309 | }
1310 |
1311 | /*---------------------------------------------------------------------*/
1312 |
1313 | int
1314 | check_hairpin_min (char *ss, char *flip, int n, int min)
1315 | {
1316 | int half;
1317 |
1318 | half = n / 2 - min;
1319 |
1320 | if (half < 4)
1321 | return FALSE;
1322 |
1323 | return check_dimer (ss, flip, half);
1324 |
1325 | }
1326 |
1327 | /*---------------------------------------------------------------------*/
1328 | int
1329 | test_dimer (char *p1, char *p2, int n)
1330 | {
1331 | int i, matches;
1332 |
1333 | if (n < 3)
1334 | return FALSE;
1335 |
1336 | matches = 0;
1337 |
1338 | for (i = 0; i < n; i++)
1339 | if (check_watson_crick (p1[i], p2[i]))
1340 | matches++;
1341 |
1342 | if ((double) matches / (double) n >= 0.75)
1343 | return TRUE;
1344 |
1345 | return FALSE;
1346 |
1347 | }
1348 |
1349 | /*---------------------------------------------------------------------*/
1350 | int
1351 | check_uneven_dimer (char *p1, char *p2, int n1, int n2)
1352 | {
1353 | int i, n;
1354 |
1355 | if (n1 == n2)
1356 | return check_dimer (p1, p2, n1);
1357 |
1358 |
1359 | if ((n1 < 3) || (n2 < 3))
1360 | return FALSE;
1361 |
1362 | n = minim (n1, n2);
1363 | if (test_dimer (p1, p2, n))
1364 | return TRUE;
1365 |
1366 | for (i = 1; i < n1; i++)
1367 | {
1368 | if (test_dimer (p1 + i, p2, minim (n1 - i, n)))
1369 | return TRUE;
1370 | }
1371 |
1372 | for (i = 1; i < n2; i++)
1373 | {
1374 | if (test_dimer (p1, p2 + i, minim (n2 - i, n)))
1375 | return TRUE;
1376 | }
1377 |
1378 | return FALSE;
1379 | }
1380 |
1381 | /*---------------------------------------------------------------------*/
1382 | int
1383 | check_dimer (char *p1, char *p2, int n)
1384 | {
1385 | int i;
1386 |
1387 |
1388 | if (n < 3)
1389 | return FALSE;
1390 |
1391 | if (test_dimer (p1, p2, n))
1392 | return TRUE;
1393 |
1394 |
1395 | for (i = 1; i < n; i++)
1396 | {
1397 | if (test_dimer (p1 + i, p2, n - i))
1398 | return TRUE;
1399 | if (test_dimer (p1, p2 + i, n - i))
1400 | return TRUE;
1401 | }
1402 |
1403 | return FALSE;
1404 |
1405 | }
1406 |
1407 | /*---------------------------------------------------------------------*/
1408 | int
1409 | check_watson_crick (char a, char b)
1410 | {
1411 | if (a == 'A')
1412 | {
1413 | if (b == 'T')
1414 | {
1415 | return TRUE;
1416 | }
1417 | else
1418 | {
1419 | return FALSE;
1420 | }
1421 | }
1422 | if (a == 'T')
1423 | {
1424 | if (b == 'A')
1425 | {
1426 | return TRUE;
1427 | }
1428 | else
1429 | {
1430 | return FALSE;
1431 | }
1432 | }
1433 | if (a == 'G')
1434 | {
1435 | if (b == 'C')
1436 | {
1437 | return TRUE;
1438 | }
1439 | else
1440 | {
1441 | return FALSE;
1442 | }
1443 | }
1444 | if (a == 'C')
1445 | {
1446 | if (b == 'G')
1447 | {
1448 | return TRUE;
1449 | }
1450 | else
1451 | {
1452 | return FALSE;
1453 | }
1454 | }
1455 | // added on 06-09-2014 to silence the compiler warning that this
1456 | // function might not return anything
1457 | return FALSE;
1458 | }
1459 |
1460 | /*---------------------------------------------------------------------*/
1461 | void
1462 | count_copys (int *flat, char *s, int n, int *reps)
1463 | {
1464 | char ss[256];
1465 |
1466 | simple_count_copys (flat, s, n, reps, TRUE);
1467 | reverse_transcribe (s, ss, n);
1468 | simple_count_copys (flat, ss, n, reps, FALSE);
1469 | }
1470 |
1471 | /*---------------------------------------------------------------------*/
1472 |
1473 | void
1474 | simple_count_copys (int *flat, char *s, int n, int *reps, int forward)
1475 | {
1476 | int i, j;
1477 | unsigned int offset, k;
1478 |
1479 | offset = 0;
1480 | for (i = 1; i <= n; i++)
1481 | {
1482 | if (s[i - 1] == 'N')
1483 | {
1484 | for (j = 1; j <= n; j++)
1485 | reps[j] += 1e8;
1486 | return;
1487 | }
1488 | if (forward)
1489 | k = encode_basepairs (s, i);
1490 | else
1491 | k = encode_basepairs (s + n - i, i);
1492 |
1493 | reps[i] += flat[k + offset];
1494 |
1495 | /*
1496 | * if(i == 1) printf("\n n = %d forward k = %d",n,k+offset);
1497 | */
1498 |
1499 | offset++;
1500 | offset = offset << 2;
1501 | }
1502 |
1503 | }
1504 |
1505 | /*---------------------------------------------------------------------*/
1506 | void
1507 | reverse_string (char *contig, char *s, int n)
1508 | {
1509 | int i;
1510 |
1511 | for (i = n - 1; i > -1; i--)
1512 | {
1513 | *s++ = *(contig + i);
1514 | }
1515 |
1516 | }
1517 |
1518 | /*---------------------------------------------------------------------*/
1519 | void
1520 | reverse_transcribe (char *contig, char *s, int n)
1521 | {
1522 | int i;
1523 | char c;
1524 |
1525 | for (i = n - 1; i > -1; i--)
1526 | {
1527 | if (*(contig + i) == 'A')
1528 | c = 'T';
1529 | else if (*(contig + i) == 'C')
1530 | c = 'G';
1531 | else if (*(contig + i) == 'G')
1532 | c = 'C';
1533 | else if (*(contig + i) == 'T')
1534 | c = 'A';
1535 | else
1536 | c = 'N';
1537 |
1538 | *s++ = c;
1539 | }
1540 | /*
1541 | * *s = '\0'; printf("\n%s");
1542 | */
1543 |
1544 | }
1545 |
1546 | /*---------------------------------------------------------------------*/
1547 | void
1548 | transcribe (char *contig, char *s, int n)
1549 | {
1550 | int i;
1551 | char c;
1552 |
1553 | for (i = 0; i < n; i++)
1554 | {
1555 | if (*(contig + i) == 'A')
1556 | c = 'T';
1557 | else if (*(contig + i) == 'C')
1558 | c = 'G';
1559 | else if (*(contig + i) == 'G')
1560 | c = 'C';
1561 | else if (*(contig + i) == 'T')
1562 | c = 'A';
1563 | else
1564 | c = 'N';
1565 |
1566 | s[i] = c;
1567 | }
1568 | }
1569 |
1570 | /*---------------------------------------------------------------------*/
1571 |
1572 | void
1573 | flat_index_contig (int *index, char *contig, int L, int depth)
1574 | {
1575 | int i, stop;
1576 |
1577 | for (i = 0; i < L; i++)
1578 | {
1579 | stop = minim (depth, L - i);
1580 |
1581 | // if(i%100000 == 0) printf("\nIndex %d bases out of %d at depth %d",i,L,stop);
1582 |
1583 | if ((*(contig + i)) != 'N')
1584 | index_string (index, contig + i, stop);
1585 | }
1586 | }
1587 |
1588 | /*---------------------------------------------------------------------*/
1589 | void
1590 | index_string (int *index, char *s, int n)
1591 | {
1592 | unsigned int i, offset;
1593 | int j;
1594 |
1595 | offset = 0;
1596 | for (j = 1; j <= n; j++)
1597 | {
1598 | i = encode_basepairs (s, j);
1599 | index[i + offset]++;
1600 | offset++;
1601 | offset = offset << 2;
1602 | }
1603 | }
1604 |
1605 | /*---------------------------------------------------------------------*/
1606 |
1607 | int
1608 | sort_compare (const void *a, const void *b)
1609 | {
1610 | if (*((double *) a) < *((double *) b))
1611 | return -1;
1612 | else if (*((double *) a) > *((double *) b))
1613 | return 1;
1614 | else
1615 | return 0;
1616 | }
1617 |
1618 | /*---------------------------------------------------------------------*/
1619 |
1620 | int
1621 | sort_compare_index (const void *a, const void *b)
1622 | {
1623 | double ad, bd;
1624 |
1625 | ad = FQ_LIST[*((int *) a)];
1626 | bd = FQ_LIST[*((int *) b)];
1627 |
1628 | if (ad < bd)
1629 | return -1;
1630 | else if (ad > bd)
1631 | return 1;
1632 | else
1633 | return 0;
1634 | }
1635 |
1636 | /*---------------------------------------------------------------------*/
1637 |
1638 | void
1639 | read_var (char *line, char *result)
1640 | {
1641 |
1642 | char line1[256];
1643 | unsigned int i;
1644 |
1645 | sprintf (line1, "%s", line);
1646 | printf ("%s", line1);
1647 | fgets (result, 250, stdin);
1648 | result[strlen (result) - 1] = '\0';
1649 | for (i = 0; i < minim (strlen (line1), 255); i++)
1650 | if (line1[i] == '\n')
1651 | line1[i] = '\0';
1652 | }
1653 |
1654 | /*---------------------------------------------------------------------*/
1655 |
1656 | int
1657 | line_count (FILE * sfile)
1658 | {
1659 | int c, nl;
1660 | c = nl = 0;
1661 | while ((c = getc (sfile)) != EOF)
1662 | {
1663 | if (c == '\n')
1664 | nl++;
1665 | }
1666 | return nl;
1667 | }
1668 |
1669 | /*---------------------------------------------------------------------*/
1670 |
1671 | int
1672 | read_primer_pools (const char *filename, int max_primer_pairs, int max_primers_in_pool,
1673 | int *primers_in_pool, PNODE ***primer_pool)
1674 | {
1675 | // open primer file
1676 | FILE *primer_file;
1677 | primer_file = fopen (filename, "r");
1678 | check(primer_file, "cannot open primer file, '%s'", filename);
1679 |
1680 | int ppairs_count = 0;
1681 | int final_pool_number = 0;
1682 |
1683 | char header[1024];
1684 | fgets( header, 1024, primer_file );
1685 | char *fields;
1686 | fields = strtok( header, "\t");
1687 | check((strcmp(fields, "Name")==0), "%s does not start with header of primer file", filename);
1688 |
1689 | while (!feof(primer_file))
1690 | {
1691 | debug("creating new temp_ppair");
1692 | PNODE *temp_ppair = create_ppair( 1000, 50 );
1693 |
1694 | char primer_name[1024];
1695 | int this_primer_num, this_pool_num;
1696 | this_primer_num = this_pool_num = 0;
1697 |
1698 | int read_line =
1699 | fscanf (primer_file,
1700 | "%s\t%d\t%d\t%s\t%lg\t%lg\t%s\t%lg\t%lg\t%d\t%d\t%d\t%d\t%d\t%d\t%lg\t%lg\t%s\n",
1701 | primer_name,
1702 | &this_pool_num,
1703 | &this_primer_num,
1704 | temp_ppair->forward->sequence,
1705 | &temp_ppair->forward->tm,
1706 | &temp_ppair->forward->gc,
1707 | temp_ppair->reverse->sequence,
1708 | &temp_ppair->reverse->tm,
1709 | &temp_ppair->reverse->gc,
1710 | &temp_ppair->chrom,
1711 | &temp_ppair->forward->start,
1712 | &temp_ppair->forward->end,
1713 | &temp_ppair->reverse->start,
1714 | &temp_ppair->reverse->end,
1715 | &temp_ppair->length,
1716 | &temp_ppair->gc,
1717 | &temp_ppair->tm,
1718 | temp_ppair->sequence);
1719 | check((read_line == 18 ), "Error processing file: %s", filename);
1720 | check((ppairs_count < max_primer_pairs), "Out of Memory. Increase primer_pool size.");
1721 | check((this_primer_num < max_primers_in_pool), "Too many primer pairs in pool %d", this_pool_num);
1722 |
1723 | primer_pool[this_pool_num][this_primer_num] = temp_ppair;
1724 | debug("assigned primer pair id '%d' to pool number '%d'", this_primer_num, this_pool_num);
1725 |
1726 | primers_in_pool[this_pool_num]++;
1727 | ppairs_count++;
1728 | debug("\n\n\tpool = %d, pool primer pairs count = %d, total primer pairs count = %d\n",
1729 | this_pool_num, primers_in_pool[this_pool_num], ppairs_count);
1730 | final_pool_number = this_pool_num;
1731 | }
1732 |
1733 | debug("returning this_pool_num: %d", final_pool_number);
1734 | return final_pool_number;
1735 |
1736 | error:
1737 | exit(1);
1738 | }
1739 |
1740 | /*---------------------------------------------------------------------*/
1741 |
1742 | void
1743 | Print_isPcr (const char *filename, int max_pools, int *primers_in_pool, PNODE ***primer_pool)
1744 | {
1745 | FILE *isPcr_File;
1746 | isPcr_File = fopen (filename, "w");
1747 | check(isPcr_File, "cannot open primer file, '%s'", filename);
1748 | for (int i = 0; i <= max_pools; i++ )
1749 | {
1750 | for (int j = 0; j < primers_in_pool[i]; j++)
1751 | {
1752 | fprintf(isPcr_File, "pool_%d_%02d\t%s\t%s\n", (i+1), (j+1),
1753 | primer_pool[i][j]->forward->sequence,
1754 | primer_pool[i][j]->reverse->sequence );
1755 | }
1756 | }
1757 |
1758 | error:
1759 | exit(1);
1760 | }
1761 |
1762 | void
1763 | Check_all_pools ( int max_pools, int *primers_in_pool, PNODE ***primer_pool, int max_amplicon_length )
1764 | {
1765 | double max_amp_diff = (double) (max_amplicon_length * 0.15) + 1;
1766 | printf("%s\t%s\t%s\t%s\n", "Pool Number", "Primer Pair Count", "Compatable", "Comparisons");
1767 | for (int i = 0; i <= max_pools; i++ )
1768 | {
1769 | if (primers_in_pool[i] > 1 )
1770 | {
1771 | check_poolability( primer_pool[i], primers_in_pool[i], i, (int) max_amp_diff);
1772 | }
1773 | else
1774 | {
1775 | printf("%d\t1\tYes\t%d\n", (i + 1), 0);
1776 | }
1777 | }
1778 | }
1779 |
1780 | /*---------------------------------------------------------------------*/
1781 |
1782 | void
1783 | die ( char *message )
1784 | {
1785 | if (errno)
1786 | {
1787 | perror(message);
1788 | }
1789 | else
1790 | {
1791 | printf("ERROR: %s\n", message);
1792 | }
1793 | exit(1);
1794 | }
1795 |
1796 | /*---------------------------------------------------------------------*/
1797 |
1798 | void
1799 | check_poolability (PNODE ** primer_pool, int primers_in_pool, int pool_number, int max_amp_diff)
1800 | {
1801 | int i, j, k, *poolable_count;
1802 | char **cmat;
1803 |
1804 | poolable_count = create_ivec (primers_in_pool);
1805 | cmat = create_cmat (primers_in_pool, primers_in_pool);
1806 |
1807 | for (i = 0; i < primers_in_pool; i++)
1808 | for (j = i + 1; j < primers_in_pool; j++)
1809 | {
1810 | // debug("checking, fwd: %s rev: %s with fwd: %s rev: %s\n",
1811 | // primer_pool[i]->forward->sequence,
1812 | // primer_pool[i]->reverse->sequence,
1813 | // primer_pool[j]->forward->sequence,
1814 | // primer_pool[j]->reverse->sequence
1815 | // );
1816 |
1817 | cmat[i][j] = is_poolable_primer (primer_pool[i], primer_pool[j], max_amp_diff, 2);
1818 | cmat[j][i] = cmat[i][j];
1819 | poolable_count[i] += cmat[i][j];
1820 | // debug("%d", poolable_count[i]);
1821 | poolable_count[j] += cmat[i][j];
1822 | // debug("%d", poolable_count[j]);
1823 | }
1824 |
1825 | k = 0;
1826 | for (i = 0; i < primers_in_pool; i++)
1827 | k += poolable_count[i];
1828 |
1829 | if ((primers_in_pool - 1) * primers_in_pool == k)
1830 | printf ("%d\t%d\tYes\t%d\n", (pool_number + 1), (primers_in_pool + 1), k);
1831 | else
1832 | printf ("%d\t%d\tNo\t%d\n", (pool_number + 1), (primers_in_pool + 1), k);
1833 |
1834 | // print_cmat (cmat, primers_in_pool);
1835 | // printf ("\n");
1836 |
1837 | }
1838 |
1839 | /*---------------------------------------------------------------------*/
1840 |
1841 | void
1842 | print_cmat (char **cmat, int N)
1843 | {
1844 | int i, j;
1845 | for (i = 0; i < N; i++)
1846 | {
1847 | for (j = 0; j < N; j++)
1848 | if (i == j)
1849 | printf (".");
1850 | else
1851 | printf ("%d", cmat[i][j]);
1852 |
1853 | printf ("\n");
1854 | }
1855 | }
1856 |
1857 | /*---------------------------------------------------------------------*/
1858 |
1859 | PNODE ***
1860 | primer_pool_create ( int max_primers_in_pool, int max_ppairs_count)
1861 | {
1862 | PNODE ***primer_pool = (PNODE ***) malloc ((unsigned) max_ppairs_count * sizeof (PNODE **));
1863 | check_mem(primer_pool);
1864 |
1865 | for (int i = 0; i < max_ppairs_count; i++)
1866 | {
1867 | primer_pool[i] = (PNODE **) malloc ((unsigned) max_primers_in_pool * sizeof (PNODE *));
1868 | check_mem(primer_pool[i]);
1869 | }
1870 | return primer_pool;
1871 |
1872 | error:
1873 | exit(1);
1874 | }
1875 |
1876 | /*---------------------------------------------------------------------*/
1877 |
1878 | PRIMER *
1879 | create_primer (int max_primer_length)
1880 | {
1881 | PRIMER *tn;
1882 |
1883 | tn = (PRIMER *) malloc( (unsigned) sizeof(PRIMER) );
1884 | assert( tn != NULL );
1885 | check_mem(tn);
1886 |
1887 | tn->start = 0;
1888 | tn->end = 0;
1889 | tn->sequence = create_cvec(max_primer_length);
1890 | tn->tm = 0;
1891 | tn->gc = 0;
1892 |
1893 | return tn;
1894 |
1895 | error:
1896 | exit(1);
1897 | }
1898 |
1899 | /*---------------------------------------------------------------------*/
1900 |
1901 | PNODE *
1902 | create_ppair (int max_primer_length, int max_amplicon_length)
1903 | {
1904 | PNODE *tn;
1905 |
1906 | tn = (PNODE *) malloc( (unsigned) sizeof(PNODE) );
1907 | assert( tn != NULL );
1908 | check_mem(tn);
1909 |
1910 | tn->forward = create_primer(max_primer_length);
1911 | tn->reverse = create_primer(max_primer_length);
1912 | tn->sequence = create_cvec(max_amplicon_length);
1913 | tn->length = 0;
1914 | tn->gc = 0;
1915 | tn->tm = 0;
1916 | tn->chrom = 0;
1917 |
1918 | return tn;
1919 |
1920 | error:
1921 | exit(1);
1922 | }
1923 |
1924 | /*---------------------------------------------------------------------*/
1925 |
1926 | void
1927 | flat_index_contig_high (int *index, char *contig, int L, int depth, int high_depth, unsigned char *double_high)
1928 | {
1929 | int i, stop;
1930 | char flip[256];
1931 |
1932 |
1933 | for (i = 0; i < L; i++)
1934 | {
1935 | stop = minim (depth, L - i);
1936 | if (i % 1000000 == 0)
1937 | printf ("\nIndex %d bases out of %d at depth %d", i, L, stop);
1938 |
1939 | if ((*(contig + i)) != 'N')
1940 | {
1941 | index_string (index, contig + i, stop);
1942 | if (i + high_depth < L)
1943 | {
1944 | high_index (double_high, contig + i, high_depth);
1945 | reverse_transcribe (contig + i, flip, high_depth);
1946 | high_index (double_high, flip, high_depth);
1947 | }
1948 | }
1949 | }
1950 |
1951 |
1952 | }
1953 |
1954 | /*---------------------------------------------------------------------*/
1955 |
1956 | void
1957 | high_index (unsigned char *dhigh, char *s, int n)
1958 | {
1959 | int i, j;
1960 | unsigned char k, bit;
1961 |
1962 | i = encode_basepairs (s, n);
1963 | /* printf("\nEntered High Index n = %d i = %u\n\n",n,i); */
1964 | if (i >= 0)
1965 | {
1966 | j = i / 8;
1967 | k = i % 8;
1968 | bit = 1 << k;
1969 | dhigh[j] = dhigh[j] | bit;
1970 | }
1971 | }
1972 |
1973 | /*---------------------------------------------------------------------*/
1974 |
--------------------------------------------------------------------------------
/src/mpd.h:
--------------------------------------------------------------------------------
1 | #ifndef _mpp_h
2 | #define _mpp_h
3 | #define _GNU_SOURCE // for asprintf from stdio.h
4 | #define TRUE 1
5 | #define FALSE 0
6 | #define MAX_PAIRS 10
7 | #define minim(a,b) ((ab)?a:b)
9 | #endif
10 |
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include "dbg.h"
21 | #include "mem.h"
22 |
23 | typedef struct primer_node
24 | {
25 | int start;
26 | int end;
27 | char *sequence;
28 | double tm;
29 | double gc;
30 | } PRIMER;
31 |
32 | typedef struct primer_pair
33 | {
34 | PRIMER *forward;
35 | PRIMER *reverse;
36 | char *sequence;
37 | int length;
38 | double gc;
39 | double tm;
40 | int chrom;
41 | } PNODE;
42 |
43 | typedef struct snp_node
44 | {
45 | char baseA;
46 | char baseB;
47 | char name[32];
48 | unsigned int pos;
49 | unsigned int chrom;
50 | int no_pairs;
51 | int no_disc;
52 | double het;
53 | PNODE **pair;
54 | } SNODE;
55 |
56 | typedef struct amp_node
57 | {
58 | char name[32];
59 | unsigned int start_pos;
60 | unsigned int stop_pos;
61 | unsigned int chrom;
62 | int no_pairs;
63 | PNODE **pair;
64 | } AMPNODE;
65 |
66 | void die ( char *message );
67 | int read_primer_pools (const char *filename, int max_ppairs, int max_primer_count,
68 | int *pool_count, PNODE ***primer_pool);
69 | void Print_isPcr (const char *filename, int max_pools, int *primers_in_pool, PNODE ***primer_pool);
70 | void reverse_string (char *contig, char *s, int n);
71 | void check_poolability (PNODE ** primer_pool, int primers_in_pool, int pool_number, int max_amp_diff);
72 | void Check_all_pools ( int max_pools, int *primers_in_pool, PNODE ***primer_pool, int max_amplicon_length );
73 |
74 | PNODE ***primer_pool_create ( int max_ppairs, int max_ppairs_count);
75 | PRIMER *create_primer (int max_primer_length);
76 | PNODE *create_ppair (int max_primer_length, int max_amplicon_length);
77 | void flat_index_contig_high (int *index, char *contig, int L, int depth, int high_depth, unsigned char *double_high);
78 | void flat_index_contig (int *index, char *contig, int L, int depth);
79 | void high_index (unsigned char *dhigh, char *s, int n);
80 | int
81 | find_primers (SNODE ** snp_list, AMPNODE * tn, int no_snp, int *flat, char *contig,
82 | int L, int min_primer, int max_primer, int amp_max, int amp_min,
83 | double min_gc, double max_gc, double min_tm, double max_tm,
84 | int depth, int local_depth, int target_base, int start_pos,
85 | unsigned char *highmer, int **repeats, int no_repeats, int end_region, int chrom);
86 | void count_copys (int *flat, char *s, int n, int *reps);
87 | void simple_count_copys (int *flat, char *s, int n, int *reps, int forward);
88 | void reverse_transcribe (char *contig, char *s, int n);
89 | void transcribe (char *contig, char *s, int n);
90 | void reverse_string (char *contig, char *s, int n);
91 | int sort_compare_index (const void *a, const void *b);
92 | int check_hairpin (char *ss, int n);
93 | int check_hairpin_min (char *ss, char *flip, int n, int min);
94 | int check_dimer (char *p1, char *p2, int n);
95 | int check_uneven_dimer (char *p1, char *p2, int n1, int n2);
96 | int check_watson_crick (char a, char b);
97 | int test_dimer (char *p1, char *p2, int n);
98 | int find_seg (int x, int y, int **list, int start, int end, int guess);
99 | unsigned int encode_basepairs (char *ss, int n);
100 | void decode_basepairs (unsigned char *s, char *dest, int n);
101 | void index_string (int *index, char *s, int n);
102 | void convert_int_basepairs (int i, char *s, int k);
103 | void
104 | fill_quality_scores (int *flat, int *local, char *contig, int L, int window, int depth, int ld, int min_primer,
105 | int max_primer, double *fq_left, double *gc_left, int *index_left, int *plen_l,
106 | double min_gc, double max_gc, double min_tm, double max_tm, int **repeats,
107 | int no_repeats, unsigned char *highmer, int start_pos);
108 | int find_frag (int x, int **list, int start, int end, int guess);
109 | void fill_hs (char a, char b, double *h, double *s);
110 | double calc_tm (char *s, int n);
111 | int sort_compare_struct (const void *a, const void *b);
112 | int check_15mer (char *string, unsigned char *map, int n);
113 | int is_not_repeat (int x, int y, int **list, int no_frags, double max);
114 | SNODE **fill_snp_list (FILE * sfile, int *n, unsigned int chrom);
115 | AMPNODE **fill_amp_list (FILE * sfile, int n);
116 | SNODE *snp_alloc (void);
117 | AMPNODE *amp_alloc (void);
118 | int isbase (char c);
119 | double calc_gc (char *s, int n);
120 | PRIMER *primer_alloc (void);
121 | PNODE *product_alloc (void);
122 | int poly_under_primer (unsigned int p_start, unsigned int p_end, SNODE ** list, int start, int stop, int which);
123 | int *select_snps (SNODE ** list, int n, int target, int start, int stop);
124 | int pick_random (double *x, int n);
125 | int select_subset (SNODE ** list, int n, int pick, int *priority, int *selected, int which, int start, int stop);
126 | double fill_dist (SNODE ** list, int n, int *priority, int *selected, int which, int start, int stop, double *dist);
127 | int line_count (FILE * sfile);
128 | int count_compatable_primers (int **poolable, int total_primers, int k);
129 | int **zero_matrix (int **poolable, int total_primers, int k);
130 | int is_poolable_amp (AMPNODE * a1, AMPNODE * a2, int i1, int i2);
131 | void find_min_pools (int n, int *need_pooling, int **same_amplicon, int **poolable, PNODE ** primer_list);
132 | int is_poolable_primer (PNODE * p1, PNODE * p2, int size_diff_threshold, int tm_diff_threshold);
133 | int count_poolable (int t, int tot_primer, int *need_pooling, int **same_amplicon, int **poolable);
134 | void make_greedy_pools (FILE *outfile, PNODE **plist, char **cmat, int *pc, int **redund, int *bstart, int Nregs, int N, int still, int *current_pool, int this_pool, int max_pool);
135 | void make_less_greedy_pools (FILE *outfile, PNODE ** plist, char **cmat, int *pc, int **redund, int *bstart, int Nregs, int N, int still, int *current_pool, int this_pool, int max_pool);
136 | void read_var (char *line, char *result);
137 | int *create_ivec (int row);
138 | char *create_cvec (int row);
139 | int **create_imat (int row, int col);
140 | char **create_cmat (int row, int col);
141 |
--------------------------------------------------------------------------------
/src/mpd_lessGreedy.c:
--------------------------------------------------------------------------------
1 | #include "mpd.h"
2 | #include "mem.h"
3 |
4 | static FILE *outfile;
5 |
6 | int
7 | main ()
8 | {
9 | char ss[256], sss[4196], **filename;
10 | char *scratch_pad, **contig_descript;
11 | unsigned char **compressed_map, *highmer;
12 | int i, j, k, N, N_snpfiles, *contig_snp_count, *contig_length;
13 | int pad_size, fasta, idepth, target_contig, total_index;
14 | int *flat_index, old_index, ***repeat_list, *no_repeats;
15 | int amp_min, amp_max, pool_size, N_targets;
16 | int min_primer, max_primer, genome_start, genome_stop;
17 | FILE *sfile, *cfile, *idfile, *rfile, *highfile, *snpfile_idx, *snpfile, *target_ampfile;
18 | double max_gc, min_gc, min_tm, max_tm, tm_inc;
19 | SNODE ***snp_list; /* remember, an array of typedef is always an array of pointers this gives us a 2d array, i.e., matrix */
20 | AMPNODE **target_amp_list;
21 |
22 | outfile = stdout;
23 | read_var ("\nSend Output to Screen or Disk? [S,D]\n", ss);
24 |
25 | if ((strchr (ss, 'D')) || (strchr (ss, 'd')))
26 | {
27 | read_var ("Please Enter File Name for Output\n", ss);
28 | if ((outfile = fopen (ss, "w")) == (FILE *) NULL)
29 | {
30 | printf ("\n Can not open file %s\n", ss);
31 | exit (1);
32 | }
33 | }
34 | else
35 | outfile = stdout;
36 |
37 | old_index = TRUE;
38 |
39 | read_var ("Primer Picker Summary Filename (e.g., index summary like hg19.sdx)\n", ss);
40 | if ((sfile = fopen (ss, "r")) == (FILE *) NULL)
41 | {
42 | printf ("\nCould Not Open file %s\n", ss);
43 | exit (1);
44 | }
45 |
46 | // sdx file: read the 1st line that contains an int of contigs that are indexed
47 | fgets (sss, 4195, sfile);
48 | N = atoi (sss);
49 | printf ("\n There are N chromosomes %d \n\n", N);
50 |
51 | compressed_map = (unsigned char **) malloc ((unsigned) (N + 1) * sizeof (unsigned char *));
52 | if (!compressed_map)
53 | log_err("allocation failure for compressed_map");
54 |
55 | contig_descript = cmatrix (0, N, 0, 4196);
56 | contig_length = ivector (0, N);
57 | no_repeats = ivector (0, N);
58 |
59 | repeat_list = (int ***) malloc ((unsigned) (N + 1) * sizeof (int **));
60 | if (!repeat_list)
61 | log_err ("allocation failure for repeat_list");
62 |
63 |
64 | // sdx file: read in contig information that is in the format: contig_lenght number_repeats contig_description
65 | for (i = 0; i < N; i++)
66 | {
67 | fgets (sss, 4195, sfile);
68 | sscanf (sss, "%d\t%d\t%s", &contig_length[i], &no_repeats[i], contig_descript[i]);
69 | repeat_list[i] = imatrix (0, no_repeats[i], 0, 1);
70 | printf ("\n Contig %d is named %s and is length %d\n", i, contig_descript[i], contig_length[i]);
71 | }
72 |
73 | // sdx file: read in int representing depth of coverage
74 | fgets (sss, 4195, sfile);
75 | idepth = atoi (sss);
76 |
77 | // printf("\n Indexing to a depth of %d \n",idepth);
78 |
79 | // sdx file: read next line - should be hg19.cdx
80 | fgets (sss, 4195, sfile);
81 | for (i = 0; i < 4194; i++)
82 | if (isspace (sss[i]))
83 | {
84 | sss[i] = '\0';
85 | i = 4195;
86 | }
87 |
88 | // cdx file: set file
89 | if ((cfile = fopen (sss, "r")) == (FILE *) NULL)
90 | {
91 | printf ("\nCould Not Open 1 file \"%s\"\n", sss);
92 | exit (1);
93 | }
94 |
95 | // sdx file: read next line - should be hg19.idx
96 | fgets (sss, 4195, sfile);
97 | for (i = 0; i < 4194; i++)
98 | if (isspace (sss[i]))
99 | {
100 | sss[i] = '\0';
101 | i = 4195;
102 | }
103 |
104 | // idx file: set file
105 | if ((idfile = fopen (sss, "r")) == (FILE *) NULL)
106 | {
107 | printf ("\nCould Not Open 2 file \"%s\"\n", sss);
108 | exit (1);
109 | }
110 |
111 | // sdx file: read next line - should be hg19.rdx
112 | fgets (sss, 4195, sfile);
113 | for (i = 0; i < 4194; i++)
114 | if (isspace (sss[i]))
115 | {
116 | sss[i] = '\0';
117 | i = 4195;
118 | }
119 |
120 | // rdx file: set file
121 | if ((rfile = fopen (sss, "r")) == (FILE *) NULL)
122 | {
123 | printf ("\nCould Not Open 3 file \"%s\"\n", sss);
124 | exit (1);
125 | }
126 |
127 | //sdx file: read next line - should be hg19.15x
128 | fgets (sss, 4195, sfile);
129 | for (i = 0; i < 4194; i++)
130 | if (isspace (sss[i]))
131 | {
132 | sss[i] = '\0';
133 | i = 4195;
134 | }
135 |
136 | // 'highfile' or 'hg19.15x': set file
137 | if ((highfile = fopen (sss, "r")) == (FILE *) NULL)
138 | {
139 | printf ("\nCould Not Open 4 file \"%s\"\n", sss);
140 | exit (1);
141 | }
142 | fclose (sfile);
143 |
144 | printf ("\nAbout to read repeat file\n\n");
145 |
146 | // index Description i number of contigs: N j number of repeats in a contig: no_repeats[i] k not sure location of start of repeat until end of repeat
147 | for (i = 0; i < N; i++)
148 | {
149 | for (j = 0; j < no_repeats[i]; j++)
150 | for (k = 0; k < 2; k++)
151 | if ((fread (&repeat_list[i][j][k], sizeof (int), 1, rfile)) < 1)
152 | {
153 | printf ("\n Expected to read %d repeats for contig %d, but got %d\n\n", no_repeats[i], i, j);
154 | exit (1);
155 | }
156 | // for(j=0;j<1;j++) printf("\nFor contig %d repeat %d goes from %d %d\n\n", i,j,repeat_list[i][j][0],repeat_list[i][j][1]);
157 | }
158 | fclose (rfile);
159 | printf ("\nFinished reading repeat file");
160 |
161 | printf ("\nAbout to read 15mer file\n\n");
162 | highmer = ucvector (0, 134217728);
163 | if ((j = fread (highmer, sizeof (unsigned char), 134217728, highfile)) != 134217728)
164 | {
165 | printf ("\n Expected to read %d 15mers but got %d\n", 134217728, j);
166 | exit (1);
167 | }
168 | fclose (highfile);
169 | printf ("\nFinished reading 15mer file\n");
170 |
171 | //read dbSNP information -> summary file => actual files fill ***snp_list
172 | read_var ("Name of dbSNP summary file\n", ss);
173 | if ((snpfile_idx = fopen (ss, "r")) == (FILE *) NULL)
174 | {
175 | printf ("\nCould Not Open dbsnp file %s\n", ss);
176 | exit (1);
177 | }
178 |
179 | // dbsnp summary file line 1 = number of contigs/chr
180 | fgets (sss, 4195, snpfile_idx);
181 | N_snpfiles = atoi (sss);
182 |
183 | // read dbsnp files
184 | contig_snp_count = ivector (0, N_snpfiles);
185 | snp_list = (SNODE ***) malloc (N_snpfiles * sizeof (SNODE **));
186 | filename = cmatrix (0, N_snpfiles, 0, 256);
187 | for (i = 0; i < N_snpfiles; i++)
188 | {
189 | fgets (sss, 4195, snpfile_idx);
190 | sscanf (sss, "%s", filename[i]);
191 | if ((snpfile = fopen (filename[i], "r")) == (FILE *) NULL)
192 | {
193 | printf ("\n Can not open file %s\n", filename[i]);
194 | exit (1);
195 | }
196 | snp_list[i] = fill_snp_list (snpfile, &contig_snp_count[i], i + 1);
197 | fclose (snpfile);
198 | printf ("\nFinished reading dbSNP file: %s. Found %d SNPs.\n", filename[i], contig_snp_count[i]);
199 | }
200 | fclose(snpfile_idx);
201 |
202 | // read target file
203 | read_var ("\nName of file with amplicon target coordinates\n", ss);
204 | if ((target_ampfile = fopen (ss, "r")) == (FILE *) NULL)
205 | {
206 | printf ("\nCould Not Open file with amplicon target coordinates %s\n", ss);
207 | exit (1);
208 | }
209 | N_targets = line_count (target_ampfile);
210 | fseek (target_ampfile, 0, SEEK_SET);
211 | target_amp_list = fill_amp_list (target_ampfile, N_targets);
212 | unsigned int total_size_toamp = 0;
213 | fclose (target_ampfile);
214 |
215 | for (i = 0; i < N_targets; i++)
216 | {
217 | printf ("\nSearching for Primers target #%d: %s chr%d:%d-%d\n",
218 | i + 1,
219 | target_amp_list[i]->name,
220 | target_amp_list[i]->chrom, target_amp_list[i]->start_pos + 1, target_amp_list[i]->stop_pos + 1);
221 | total_size_toamp += 1 + target_amp_list[i]->stop_pos - target_amp_list[i]->start_pos;
222 | }
223 | printf ("\nFinished reading amplicon target coordinates\n\tFound %d targets with a total_size of %u\n", N_targets,total_size_toamp);
224 |
225 | //PCR primer parameters
226 | read_var ("Minimum Primer Length\n", ss);
227 | min_primer = atoi (ss);
228 |
229 | read_var ("Maximum Primer Length\n", ss);
230 | max_primer = atoi (ss);
231 |
232 | read_var ("Minimum Amplicon Length\n", ss);
233 | amp_min = atoi (ss);
234 |
235 | read_var ("Maximum Amplicon Length\n", ss);
236 | amp_max = atoi (ss);
237 |
238 | read_var ("Minimum GC content [0..1.0]\n", ss);
239 | min_gc = (double) atof (ss);
240 |
241 | read_var ("Maximum GC content [0..1.0]\n", ss);
242 | max_gc = (double) atof (ss);
243 |
244 | read_var ("Minimum tm_primer in degrees C\n", ss);
245 | min_tm = (double) atof (ss);
246 |
247 | read_var ("Maximum tm_primer in degrees C\n", ss);
248 | max_tm = (double) atof (ss);
249 |
250 | read_var ("Maximum number of primer pairs to pool together\n", ss);
251 | pool_size = atoi (ss);
252 |
253 | read_var ("Pad size\n", ss);
254 | pad_size = atoi (ss);
255 |
256 | // sanity check that pad size
257 | if (pad_size <= max_primer)
258 | {
259 | printf("\nERROR: pad size must be larger than the maximum size of a primer\n");
260 | exit(1);
261 | }
262 | else if (pad_size > 3 * amp_max)
263 | {
264 | printf("\nERROR: pad size is unrealistically large, i.e. 3 times the size of the maximum amplicon length\n");
265 | exit(1);
266 | }
267 |
268 | read_var ("Tm increment (0.5 to 4.0)\n", ss);
269 | tm_inc = (double) atof (ss);
270 |
271 | if (tm_inc > 4 || tm_inc <0.5)
272 | {
273 | printf("\nError please choose an increment between 0.5 and 4 C\n");
274 | exit(1);
275 | }
276 |
277 | // print header for outfile
278 | fprintf (outfile, "Primer_number\tForward_primer\tForward_Tm\tForward_GC\tReverse_primer\tReverse_Tm\tReverse_GC\tChr\t");
279 | fprintf (outfile, "Forward_start_position\tForward_stop_position\tReverse_start_position\tReverse_stop_position\t");
280 | fprintf (outfile, "Product_length\tProduct_GC\tProduct_tm\tProduct\n");
281 |
282 | // allocate some memory for indexed genome
283 | j = 4;
284 | for (i = 0; i < idepth; i++)
285 | j *= 4;
286 | j = (j - 4) / 3;
287 |
288 | total_index = j;
289 | printf ("\n Determined the size of flat index to be %d * %lu = %lu bytes\n",
290 | total_index, sizeof (int), total_index * sizeof (int));
291 |
292 | flat_index = ivector (0, total_index);
293 |
294 | for (i = 0; i <= total_index; i++)
295 | flat_index[i] = 0;
296 |
297 | for (fasta = 0; fasta < N; fasta++)
298 | {
299 | j = contig_length[fasta];
300 | if (j % 4 == 0)
301 | j /= 4;
302 | else
303 | j = j / 4 + 1;
304 |
305 | compressed_map[fasta] = ucvector (0, j + 1);
306 | printf ("\n For chromosome %d we are going to read %d bytes\n", fasta, j);
307 | k = fread (compressed_map[fasta], sizeof (unsigned char), j, cfile);
308 |
309 | if (k != j)
310 | {
311 | printf ("\nCompressed Sequence %d %s should have been length %d but was %d\n",
312 | fasta, contig_descript[fasta], j, k);
313 | exit (1);
314 | }
315 | }
316 | fclose (cfile);
317 | k = fread (flat_index, sizeof (int), total_index, idfile);
318 |
319 | if (k < total_index)
320 | {
321 | printf ("\nIndexed of N'mers should have been length %d but was %d\n", total_index, k);
322 | exit (1);
323 | }
324 | fclose (idfile);
325 |
326 | // allocated memory for amp_pool => array of regions captured.
327 | PNODE **all_primer_pairs;
328 | int max_regions = (1 + maxim(5*total_size_toamp / amp_min,N_targets*2));
329 | int max_ppairs = MAX_PAIRS * max_regions;
330 | printf ("\n We have max_regions = %d max_pairs = %d \n", max_regions, max_ppairs);
331 | all_primer_pairs = malloc ((unsigned) max_ppairs * sizeof (PNODE *));
332 | if (!all_primer_pairs)
333 | {
334 | printf ("\n Could not allocate space for %u primer pairs \n", max_ppairs);
335 | exit (1);
336 | }
337 | int **redundant_list, *best_start;
338 | redundant_list = imatrix (0, max_ppairs, 0, MAX_PAIRS);
339 | best_start = ivector (0, max_regions);
340 | for(i=0;i<=max_regions;i++)
341 | best_start[i] = -1;
342 | for (i = 0; i <= max_ppairs; i++)
343 | {
344 | for (j = 0; j <= MAX_PAIRS; j++)
345 | redundant_list[i][j] = -1;
346 | }
347 | int amp_pool_count = 0;
348 | int primer_count = 0;
349 |
350 | // start finding primers for targets
351 | AMPNODE *loop_amp;
352 | loop_amp = amp_alloc ();
353 | printf("\n loop_amp is located at position %ld in memory \n\n",(long)loop_amp);
354 | for (k = 0; k < N_targets; k++)
355 | {
356 | // set chromosome
357 | target_contig = target_amp_list[k]->chrom - 1;
358 |
359 | // set target start and stop
360 | int target_start = target_amp_list[k]->start_pos + 1;
361 | int target_stop = target_amp_list[k]->stop_pos + 1;
362 |
363 | // set temporary start and stop
364 | int this_start, this_stop, this_midpoint;
365 | if (target_stop - target_start > amp_max)
366 | {
367 | this_start = target_start;
368 | this_stop = target_start + amp_max;
369 | }
370 | else
371 | {
372 | this_midpoint = ((target_stop - target_start) / 2) + target_start;
373 | this_start = this_midpoint - (amp_min / 2);
374 | this_stop = this_midpoint + (amp_min / 2);
375 | }
376 |
377 | // while loop variable: call it the "region" loop
378 | int not_covered = 1;
379 |
380 | // print out target info
381 | printf ("\nSearching for Primers target #%d: %s chr%d:%d-%d\n\n",
382 | k + 1,
383 | target_amp_list[k]->name,
384 | target_amp_list[k]->chrom, target_amp_list[k]->start_pos + 1, target_amp_list[k]->stop_pos + 1);
385 | printf ("\nStarting params this_start = %d, this_stop = %d\n\n", this_start, this_stop);
386 |
387 | // variables:
388 | // target_start, target_stop => region to cover with amplicons
389 | // this_start, this_stop => region to cover with the specific iteration of the while loop
390 | // genome_start, genome_stop => coordinates used to extract actual genomic region to target
391 | //
392 |
393 |
394 | while (not_covered)
395 | {
396 | printf("\n 2 loop_amp is located at position %ld in memory \n\n",(long)loop_amp);
397 |
398 | // start site - make sure it's a multiple of 4
399 | genome_start = maxim (1, this_start - pad_size);
400 | j = genome_start / 4;
401 | genome_start = j * 4;
402 |
403 | // stop site - make sure it's a multiple of 4
404 | genome_stop = (this_stop + pad_size) / 4;
405 | genome_stop = minim (genome_stop * 4, contig_length[target_contig]);
406 |
407 | // length of region
408 | j = (genome_stop - genome_start);
409 | j = minim (j, contig_length[target_contig]);
410 |
411 | // set loop_amp attributes
412 | loop_amp->start_pos = this_start + 1;
413 | loop_amp->stop_pos = this_stop + 1;
414 | loop_amp->chrom = target_amp_list[k]->chrom;
415 | loop_amp->no_pairs = 0;
416 | sprintf (loop_amp->name, "%s_%02d", target_amp_list[k]->name, amp_pool_count);
417 |
418 | // tm range
419 | int this_min_tm = min_tm;
420 | int this_max_tm = max_tm;
421 |
422 | // while loop variables: call it the "specific target" loop
423 | int found_count = 0;
424 | int this_trial = 0;
425 |
426 | // scratch pad
427 | if (j > 2000)
428 | {
429 | printf ("\nj (length) is too big %d\n\n", j);
430 | exit (1);
431 | }
432 | // allocate memory for scratch pad
433 | scratch_pad = cvector (0, j + 5);
434 | scratch_pad[0] = '\0';
435 | decode_basepairs (&compressed_map[target_contig][genome_start / 4], scratch_pad, j / 4);
436 |
437 | do
438 | {
439 | found_count = find_primers (snp_list[target_contig],
440 | loop_amp,
441 | contig_snp_count[target_contig], // number of snps in the contig
442 | flat_index, scratch_pad, // copy of the target
443 | j, // length of region
444 | min_primer, max_primer, amp_max, amp_min, min_gc, max_gc, this_min_tm, this_max_tm,
445 | idepth, // index depth
446 | 10, // local depth
447 | j / 2, // target base (from old primer_snp.c program)
448 | genome_start + 1,
449 | highmer,
450 | repeat_list[target_contig],
451 | no_repeats[target_contig],
452 | pad_size, // size on either end of the contig to look for primer
453 | loop_amp->chrom);
454 | this_min_tm -= tm_inc;
455 | this_max_tm += tm_inc;
456 | this_trial++;
457 | if (this_min_tm < min_tm || this_max_tm > max_tm) {
458 | break;
459 | }
460 | } while (this_trial < 10 && found_count < 1);
461 | printf("\n 3 loop_amp is located at position %ld in memory \n\n",(long)loop_amp);
462 |
463 | free_cvector (scratch_pad, 0, j + 5);
464 | int nearest_stop = 0;
465 | if (found_count > 0)
466 | {
467 | if (loop_amp->no_pairs > 0)
468 | {
469 | if (amp_pool_count == max_ppairs)
470 | {
471 | printf ("ERROR: exceeded the maximum number of primer pairs allocated: %d\n\n", max_ppairs);
472 | exit (1);
473 | }
474 |
475 | printf ("\nBED: chr%d\t%d\t%d\n", loop_amp->chrom, this_start, this_stop);
476 |
477 | int start_primer_count = primer_count;
478 |
479 | printf("\n\nabout to fill the array of primers start = %d with %d pairs coming\n\n",
480 | start_primer_count,loop_amp->no_pairs);
481 |
482 | for (j = 0; j < loop_amp->no_pairs; j++)
483 | all_primer_pairs[primer_count++] = loop_amp->pair[j];
484 | int jj;
485 | for (j = start_primer_count; j < primer_count; j++)
486 | {
487 | for (jj = 0; jj < loop_amp->no_pairs; jj++)
488 | {
489 | printf("\n\nj = %d, jj = %d, start_primer_count = %d, primer_count = %d\n\n", j, jj, start_primer_count, primer_count);
490 | redundant_list[j][jj] = start_primer_count + jj;
491 | printf("\n 4 loop_amp is located at position %ld in memory \n\n",(long)loop_amp);
492 | }
493 | }
494 | printf("\n Made it here \n\n");
495 | best_start[amp_pool_count] = start_primer_count;
496 | amp_pool_count++;
497 |
498 | for (j = 0; j < loop_amp->no_pairs; j++)
499 | {
500 | if (nearest_stop == 0)
501 | nearest_stop = loop_amp->pair[j]->reverse->start;
502 | else if (loop_amp->pair[j]->reverse->start < nearest_stop)
503 | nearest_stop = loop_amp->pair[j]->reverse->start;
504 | }
505 | }
506 | }
507 | printf("\n\nnearest stop is %d\n\n", nearest_stop);
508 |
509 | if (nearest_stop > 0)
510 | {
511 | this_start = nearest_stop;
512 | this_stop = this_start + amp_min;
513 | }
514 | else
515 | {
516 | this_start += pad_size;
517 | this_stop = this_start + amp_min;
518 | }
519 | if (nearest_stop >= target_stop || this_start > target_stop)
520 | not_covered = 0;
521 | }
522 | }
523 |
524 | char **poolable_matrix;
525 | int *poolable_count;
526 | poolable_count = ivector (0, primer_count);
527 | for (i = 0; i < primer_count; i++)
528 | poolable_count[i] = 0;
529 | poolable_matrix = cmatrix (0, primer_count, 0, primer_count);
530 | for (i = 0; i < primer_count; i++)
531 | for (j = i + 1; j < primer_count; j++)
532 | {
533 | double max_amp_diff = (double) (amp_max * 0.15) + 1;
534 | poolable_matrix[i][j] = is_poolable_primer (all_primer_pairs[i], all_primer_pairs[j], (int) max_amp_diff, 2);
535 | poolable_matrix[j][i] = poolable_matrix[i][j];
536 | poolable_count[i] += poolable_matrix[i][j];
537 | poolable_count[j] += poolable_matrix[i][j];
538 | }
539 | for (i = 0; i < amp_pool_count; i++)
540 | if (best_start[i] >= 0)
541 | {
542 | int k = best_start[i];
543 | for (j = 0; j < MAX_PAIRS; j++)
544 | if (redundant_list[k][j] >= 0)
545 | if (poolable_count[redundant_list[k][j]] > poolable_count[best_start[i]])
546 | best_start[i] = redundant_list[k][j];
547 | }
548 | //
549 | // print cmat
550 | //
551 | //printf ("\n");
552 | //for (i = 0; i < primer_count; i++)
553 | //printf ("\t%s_%s_%03d", all_primer_pairs[i]->forward->sequence, all_primer_pairs[i]->reverse->sequence, i);
554 | //for (i = 0; i < primer_count; i++)
555 | //{
556 | //printf ("\n%s_%s_%03d", all_primer_pairs[i]->forward->sequence, all_primer_pairs[i]->reverse->sequence, i);
557 | //for (j = 0; j < primer_count; j++)
558 | //if (i != j)
559 | //printf ("\t%d", (int) poolable_matrix[i][j]);
560 | //else
561 | //printf ("\t.");
562 | //}
563 | //printf ("\n");
564 | //
565 | // print best start matrix
566 | //
567 | // for (i = 0; i < amp_pool_count; i++)
568 | // {
569 | // int k = best_start[i];
570 | // for (j = 0; j < MAX_PAIRS; j++)
571 | // if (redundant_list[k][j] >= 0)
572 | // printf (" %03d", redundant_list[k][j]);
573 | // else
574 | // printf (" .");
575 | // printf ("\t| Region: %03d\t| Primer: %03d\t| Poolable Count: %03d\n", i, best_start[i], poolable_count[best_start[i]]);
576 | // }
577 | //
578 | // redundant matrix
579 | //
580 | // printf("\n\n");
581 | // for (i=0; i= 0)
585 | // printf (" %03d", redundant_list[i][j]);
586 | // else
587 | // printf (" .");
588 | // printf("\n");
589 | // }
590 |
591 | printf ("\n\n going to make_pools with amp_pools = %d, primer_count = %d\n", amp_pool_count, primer_count);
592 | int *current_pool;
593 | current_pool = ivector (0, 20);
594 | make_less_greedy_pools (outfile, all_primer_pairs, poolable_matrix, poolable_count, redundant_list, best_start, amp_pool_count,
595 | primer_count, primer_count, current_pool, 0, pool_size);
596 | return 0;
597 | }
598 |
599 |
--------------------------------------------------------------------------------
/src/mpd_moreGreedy.c:
--------------------------------------------------------------------------------
1 | #include "mpd.h"
2 | #include "mem.h"
3 |
4 | static FILE *outfile;
5 |
6 | int
7 | main ()
8 | {
9 | char ss[256], sss[4196], **filename;
10 | char *scratch_pad, **contig_descript;
11 | unsigned char **compressed_map, *highmer;
12 | int i, j, k, N, N_snpfiles, *contig_snp_count, *contig_length;
13 | int pad_size, fasta, idepth, target_contig, total_index;
14 | int *flat_index, old_index, ***repeat_list, *no_repeats;
15 | int amp_min, amp_max, pool_size, N_targets;
16 | int min_primer, max_primer, genome_start, genome_stop;
17 | FILE *sfile, *cfile, *idfile, *rfile, *highfile, *snpfile_idx, *snpfile, *target_ampfile;
18 | double max_gc, min_gc, min_tm, max_tm, tm_inc;
19 | SNODE ***snp_list;
20 | AMPNODE **target_amp_list;
21 |
22 | outfile = stdout;
23 | read_var ("\nSend Output to Screen or Disk? [S,D]\n", ss);
24 |
25 | if ((strchr (ss, 'D')) || (strchr (ss, 'd')))
26 | {
27 | read_var ("Please Enter File Name for Output\n", ss);
28 | if ((outfile = fopen (ss, "w")) == (FILE *) NULL)
29 | {
30 | printf ("\n Can not open file %s\n", ss);
31 | exit (1);
32 | }
33 | }
34 | else
35 | outfile = stdout;
36 |
37 | old_index = TRUE;
38 |
39 | read_var ("Primer Picker Summary Filename (e.g., index summary like hg19.sdx)\n", ss);
40 | if ((sfile = fopen (ss, "r")) == (FILE *) NULL)
41 | {
42 | printf ("\nCould Not Open file %s\n", ss);
43 | exit (1);
44 | }
45 |
46 | // sdx file: read the 1st line that contains an int of contigs that are indexed
47 | fgets (sss, 4195, sfile);
48 | N = atoi (sss);
49 | printf ("\n There are N chromosomes %d \n\n", N);
50 |
51 | compressed_map = (unsigned char **) malloc ((unsigned) (N + 1) * sizeof (unsigned char *));
52 | if (!compressed_map)
53 | log_err ("allocation failure for compressed_map");
54 |
55 | contig_descript = cmatrix (0, N, 0, 4196);
56 | contig_length = ivector (0, N);
57 | no_repeats = ivector (0, N);
58 |
59 | repeat_list = (int ***) malloc ((unsigned) (N + 1) * sizeof (int **));
60 | if (!repeat_list)
61 | log_err ("allocation failure for repeat_list");
62 |
63 |
64 | // sdx file: read in contig information that is in the format: contig_lenght number_repeats contig_description
65 | for (i = 0; i < N; i++)
66 | {
67 | fgets (sss, 4195, sfile);
68 | sscanf (sss, "%d\t%d\t%s", &contig_length[i], &no_repeats[i], contig_descript[i]);
69 | repeat_list[i] = imatrix (0, no_repeats[i], 0, 1);
70 | printf ("\n Contig %d is named %s and is length %d\n", i, contig_descript[i], contig_length[i]);
71 | }
72 |
73 | // sdx file: read in int representing depth of coverage
74 | fgets (sss, 4195, sfile);
75 | idepth = atoi (sss);
76 |
77 | // printf("\n Indexing to a depth of %d \n",idepth);
78 |
79 | // sdx file: read next line - should be hg19.cdx
80 | fgets (sss, 4195, sfile);
81 | for (i = 0; i < 4194; i++)
82 | if (isspace (sss[i]))
83 | {
84 | sss[i] = '\0';
85 | i = 4195;
86 | }
87 |
88 | // cdx file: set file
89 | if ((cfile = fopen (sss, "r")) == (FILE *) NULL)
90 | {
91 | printf ("\nCould Not Open 1 file \"%s\"\n", sss);
92 | exit (1);
93 | }
94 |
95 | // sdx file: read next line - should be hg19.idx
96 | fgets (sss, 4195, sfile);
97 | for (i = 0; i < 4194; i++)
98 | if (isspace (sss[i]))
99 | {
100 | sss[i] = '\0';
101 | i = 4195;
102 | }
103 |
104 | // idx file: set file
105 | if ((idfile = fopen (sss, "r")) == (FILE *) NULL)
106 | {
107 | printf ("\nCould Not Open 2 file \"%s\"\n", sss);
108 | exit (1);
109 | }
110 |
111 | // sdx file: read next line - should be hg19.rdx
112 | fgets (sss, 4195, sfile);
113 | for (i = 0; i < 4194; i++)
114 | if (isspace (sss[i]))
115 | {
116 | sss[i] = '\0';
117 | i = 4195;
118 | }
119 |
120 | // rdx file: set file
121 | if ((rfile = fopen (sss, "r")) == (FILE *) NULL)
122 | {
123 | printf ("\nCould Not Open 3 file \"%s\"\n", sss);
124 | exit (1);
125 | }
126 |
127 | //sdx file: read next line - should be hg19.15x
128 | fgets (sss, 4195, sfile);
129 | for (i = 0; i < 4194; i++)
130 | if (isspace (sss[i]))
131 | {
132 | sss[i] = '\0';
133 | i = 4195;
134 | }
135 |
136 | // 'highfile' or 'hg19.15x': set file
137 | if ((highfile = fopen (sss, "r")) == (FILE *) NULL)
138 | {
139 | printf ("\nCould Not Open 4 file \"%s\"\n", sss);
140 | exit (1);
141 | }
142 | fclose (sfile);
143 |
144 | printf ("\nAbout to read repeat file\n\n");
145 |
146 | // index Description i number of contigs: N j number of repeats in a contig: no_repeats[i] k not sure location of start of repeat until end of repeat
147 | for (i = 0; i < N; i++)
148 | {
149 | for (j = 0; j < no_repeats[i]; j++)
150 | for (k = 0; k < 2; k++)
151 | if ((fread (&repeat_list[i][j][k], sizeof (int), 1, rfile)) < 1)
152 | {
153 | printf ("\n Expected to read %d repeats for contig %d, but got %d\n\n", no_repeats[i], i, j);
154 | exit (1);
155 | }
156 | // for(j=0;j<1;j++) printf("\nFor contig %d repeat %d goes from %d %d\n\n", i,j,repeat_list[i][j][0],repeat_list[i][j][1]);
157 | }
158 | fclose (rfile);
159 | printf ("\nFinished reading repeat file");
160 |
161 | printf ("\nAbout to read 15mer file\n\n");
162 | highmer = ucvector (0, 134217728);
163 | if ((j = fread (highmer, sizeof (unsigned char), 134217728, highfile)) != 134217728)
164 | {
165 | printf ("\n Expected to read %d 15mers but got %d\n", 134217728, j);
166 | exit (1);
167 | }
168 | fclose (highfile);
169 | printf ("\nFinished reading 15mer file\n");
170 |
171 | //read dbSNP information -> summary file => actual files fill ***snp_list
172 | read_var ("Name of dbSNP summary file\n", ss);
173 | if ((snpfile_idx = fopen (ss, "r")) == (FILE *) NULL)
174 | {
175 | printf ("\nCould Not Open dbsnp file %s\n", ss);
176 | exit (1);
177 | }
178 |
179 | // dbsnp summary file line 1 = number of contigs/chr
180 | fgets (sss, 4195, snpfile_idx);
181 | N_snpfiles = atoi (sss);
182 |
183 | // read dbsnp files
184 | contig_snp_count = ivector (0, N_snpfiles);
185 | snp_list = (SNODE ***) malloc (N_snpfiles * sizeof (SNODE **));
186 | filename = cmatrix (0, N_snpfiles, 0, 256);
187 | for (i = 0; i < N_snpfiles; i++)
188 | {
189 | fgets (sss, 4195, snpfile_idx);
190 | sscanf (sss, "%s", filename[i]);
191 | if ((snpfile = fopen (filename[i], "r")) == (FILE *) NULL)
192 | {
193 | printf ("\n Can not open file %s\n", filename[i]);
194 | exit (1);
195 | }
196 | snp_list[i] = fill_snp_list (snpfile, &contig_snp_count[i], i + 1);
197 | fclose (snpfile);
198 | printf ("\nFinished reading dbSNP file: %s. Found %d SNPs.\n", filename[i], contig_snp_count[i]);
199 | }
200 | fclose(snpfile_idx);
201 |
202 | // read target file
203 | read_var ("\nName of file with amplicon target coordinates\n", ss);
204 | if ((target_ampfile = fopen (ss, "r")) == (FILE *) NULL)
205 | {
206 | printf ("\nCould Not Open file with amplicon target coordinates %s\n", ss);
207 | exit (1);
208 | }
209 | N_targets = line_count (target_ampfile);
210 | fseek (target_ampfile, 0, SEEK_SET);
211 | target_amp_list = fill_amp_list (target_ampfile, N_targets);
212 | unsigned int total_size_toamp = 0;
213 | fclose (target_ampfile);
214 |
215 | for (i = 0; i < N_targets; i++)
216 | {
217 | printf ("\nSearching for Primers target #%d: %s chr%d:%d-%d\n",
218 | i + 1,
219 | target_amp_list[i]->name,
220 | target_amp_list[i]->chrom, target_amp_list[i]->start_pos + 1, target_amp_list[i]->stop_pos + 1);
221 | total_size_toamp += 1 + target_amp_list[i]->stop_pos - target_amp_list[i]->start_pos;
222 | }
223 | printf ("\nFinished reading amplicon target coordinates\n\tFound %d targets with a total_size of %u\n", N_targets,total_size_toamp);
224 |
225 | //PCR primer parameters
226 | read_var ("Minimum Primer Length\n", ss);
227 | min_primer = atoi (ss);
228 |
229 | read_var ("Maximum Primer Length\n", ss);
230 | max_primer = atoi (ss);
231 |
232 | read_var ("Minimum Amplicon Length\n", ss);
233 | amp_min = atoi (ss);
234 |
235 | read_var ("Maximum Amplicon Length\n", ss);
236 | amp_max = atoi (ss);
237 |
238 | read_var ("Minimum GC content [0..1.0]\n", ss);
239 | min_gc = (double) atof (ss);
240 |
241 | read_var ("Maximum GC content [0..1.0]\n", ss);
242 | max_gc = (double) atof (ss);
243 |
244 | read_var ("Minimum tm_primer in degrees C\n", ss);
245 | min_tm = (double) atof (ss);
246 |
247 | read_var ("Maximum tm_primer in degrees C\n", ss);
248 | max_tm = (double) atof (ss);
249 |
250 | read_var ("Maximum number of primer pairs to pool together\n", ss);
251 | pool_size = atoi (ss);
252 |
253 | read_var ("Pad size\n", ss);
254 | pad_size = atoi (ss);
255 |
256 | // sanity check that pad size
257 | if (pad_size <= max_primer)
258 | {
259 | printf("\nERROR: pad size must be larger than the maximum size of a primer\n");
260 | exit(1);
261 | }
262 | else if (pad_size > 3 * amp_max)
263 | {
264 | printf("\nERROR: pad size is unrealistically large, i.e. 3 times the size of the maximum amplicon length\n");
265 | exit(1);
266 | }
267 |
268 | read_var ("Tm increment (0.5 to 4.0)\n", ss);
269 | tm_inc = (double) atof (ss);
270 |
271 | if (tm_inc > 4 || tm_inc <0.5)
272 | {
273 | printf("\nError please choose an increment between 0.5 and 4 C\n");
274 | exit(1);
275 | }
276 |
277 | // print header for outfile
278 | fprintf (outfile, "Primer_number\tForward_primer\tForward_Tm\tForward_GC\tReverse_primer\tReverse_Tm\tReverse_GC\tChr\t");
279 | fprintf (outfile, "Forward_start_position\tForward_stop_position\tReverse_start_position\tReverse_stop_position\t");
280 | fprintf (outfile, "Product_length\tProduct_GC\tProduct_tm\tProduct\n");
281 |
282 | // allocate some memory for indexed genome
283 | j = 4;
284 | for (i = 0; i < idepth; i++)
285 | j *= 4;
286 | j = (j - 4) / 3;
287 |
288 | total_index = j;
289 | printf ("\n Determined the size of flat index to be %d * %lu = %lu bytes\n",
290 | total_index, sizeof (int), total_index * sizeof (int));
291 |
292 | flat_index = ivector (0, total_index);
293 |
294 | for (i = 0; i <= total_index; i++)
295 | flat_index[i] = 0;
296 |
297 | for (fasta = 0; fasta < N; fasta++)
298 | {
299 | j = contig_length[fasta];
300 | if (j % 4 == 0)
301 | j /= 4;
302 | else
303 | j = j / 4 + 1;
304 |
305 | compressed_map[fasta] = ucvector (0, j + 1);
306 | printf ("\n For chromosome %d we are going to read %d bytes\n", fasta, j);
307 | k = fread (compressed_map[fasta], sizeof (unsigned char), j, cfile);
308 |
309 | if (k != j)
310 | {
311 | printf ("\nCompressed Sequence %d %s should have been length %d but was %d\n",
312 | fasta, contig_descript[fasta], j, k);
313 | exit (1);
314 | }
315 | }
316 | fclose (cfile);
317 | k = fread (flat_index, sizeof (int), total_index, idfile);
318 |
319 | if (k < total_index)
320 | {
321 | printf ("\nIndexed of N'mers should have been length %d but was %d\n", total_index, k);
322 | exit (1);
323 | }
324 | fclose (idfile);
325 |
326 | // allocated memory for amp_pool => array of regions captured.
327 | PNODE **all_primer_pairs;
328 | int max_regions = (1 + maxim(5*total_size_toamp / amp_min,N_targets*2));
329 | int max_ppairs = MAX_PAIRS * max_regions;
330 | printf ("\n We have max_regions = %d max_pairs = %d \n", max_regions, max_ppairs);
331 | all_primer_pairs = malloc ((unsigned) max_ppairs * sizeof (PNODE *));
332 | if (!all_primer_pairs)
333 | {
334 | printf ("\n Could not allocate space for %u primer pairs \n", max_ppairs);
335 | exit (1);
336 | }
337 | int **redundant_list, *best_start;
338 | redundant_list = imatrix (0, max_ppairs, 0, MAX_PAIRS);
339 | best_start = ivector (0, max_regions);
340 | for(i=0;i<=max_regions;i++)
341 | best_start[i] = -1;
342 | for (i = 0; i <= max_ppairs; i++)
343 | {
344 | for (j = 0; j <= MAX_PAIRS; j++)
345 | redundant_list[i][j] = -1;
346 | }
347 | int amp_pool_count = 0;
348 | int primer_count = 0;
349 |
350 | // start finding primers for targets
351 | AMPNODE *loop_amp;
352 | loop_amp = amp_alloc ();
353 | printf("\n loop_amp is located at position %ld in memory \n\n",(long)loop_amp);
354 | for (k = 0; k < N_targets; k++)
355 | {
356 | // set chromosome
357 | target_contig = target_amp_list[k]->chrom - 1;
358 |
359 | // set target start and stop
360 | int target_start = target_amp_list[k]->start_pos + 1;
361 | int target_stop = target_amp_list[k]->stop_pos + 1;
362 |
363 | // set temporary start and stop
364 | int this_start, this_stop, this_midpoint;
365 | if (target_stop - target_start > amp_max)
366 | {
367 | this_start = target_start;
368 | this_stop = target_start + amp_max;
369 | }
370 | else
371 | {
372 | this_midpoint = ((target_stop - target_start) / 2) + target_start;
373 | this_start = this_midpoint - (amp_min / 2);
374 | this_stop = this_midpoint + (amp_min / 2);
375 | }
376 |
377 | // while loop variable: call it the "region" loop
378 | int not_covered = 1;
379 |
380 | // print out target info
381 | printf ("\nSearching for Primers target #%d: %s chr%d:%d-%d\n\n",
382 | k + 1,
383 | target_amp_list[k]->name,
384 | target_amp_list[k]->chrom, target_amp_list[k]->start_pos + 1, target_amp_list[k]->stop_pos + 1);
385 | printf ("\nStarting params this_start = %d, this_stop = %d\n\n", this_start, this_stop);
386 |
387 | // variables:
388 | // target_start, target_stop => region to cover with amplicons
389 | // this_start, this_stop => region to cover with the specific iteration of the while loop
390 | // genome_start, genome_stop => coordinates used to extract actual genomic region to target
391 | //
392 |
393 |
394 | while (not_covered)
395 | {
396 | printf("\n 2 loop_amp is located at position %ld in memory \n\n",(long)loop_amp);
397 |
398 | // start site - make sure it's a multiple of 4
399 | genome_start = maxim (1, this_start - pad_size);
400 | j = genome_start / 4;
401 | genome_start = j * 4;
402 |
403 | // stop site - make sure it's a multiple of 4
404 | genome_stop = (this_stop + pad_size) / 4;
405 | genome_stop = minim (genome_stop * 4, contig_length[target_contig]);
406 |
407 | // length of region
408 | j = (genome_stop - genome_start);
409 | j = minim (j, contig_length[target_contig]);
410 |
411 | // set loop_amp attributes
412 | loop_amp->start_pos = this_start + 1;
413 | loop_amp->stop_pos = this_stop + 1;
414 | loop_amp->chrom = target_amp_list[k]->chrom;
415 | loop_amp->no_pairs = 0;
416 | sprintf (loop_amp->name, "%s_%02d", target_amp_list[k]->name, amp_pool_count);
417 |
418 | // tm range
419 | int this_min_tm = min_tm;
420 | int this_max_tm = max_tm;
421 |
422 | // while loop variables: call it the "specific target" loop
423 | int found_count = 0;
424 | int this_trial = 0;
425 |
426 | // scratch pad
427 | if (j > 2000)
428 | {
429 | printf ("\nj (length) is too big %d\n\n", j);
430 | exit (1);
431 | }
432 | // allocate memory for scratch pad
433 | scratch_pad = cvector (0, j + 5);
434 | scratch_pad[0] = '\0';
435 | decode_basepairs (&compressed_map[target_contig][genome_start / 4], scratch_pad, j / 4);
436 |
437 | do
438 | {
439 | found_count = find_primers (snp_list[target_contig],
440 | loop_amp,
441 | contig_snp_count[target_contig], // number of snps in the contig
442 | flat_index,
443 | scratch_pad, // copy of the target
444 | j, // length of region
445 | min_primer, max_primer, amp_max, amp_min, min_gc, max_gc, this_min_tm, this_max_tm,
446 | idepth, // index depth
447 | 10, // local depth
448 | j / 2, // target base (from old primer_snp.c program)
449 | genome_start + 1,
450 | highmer,
451 | repeat_list[target_contig],
452 | no_repeats[target_contig],
453 | pad_size, // size on either end of the contig to look for primer
454 | loop_amp->chrom);
455 | this_min_tm -= tm_inc;
456 | this_max_tm += tm_inc;
457 | this_trial++;
458 | if (this_min_tm < min_tm || this_max_tm > max_tm) {
459 | break;
460 | }
461 | } while (this_trial < 10 && found_count < 1);
462 | printf("\n 3 loop_amp is located at position %ld in memory \n\n",(long)loop_amp);
463 |
464 |
465 | free_cvector (scratch_pad, 0, j + 5);
466 | int nearest_stop = 0;
467 | if (found_count > 0)
468 | {
469 | if (loop_amp->no_pairs > 0)
470 | {
471 | if (amp_pool_count == max_ppairs)
472 | {
473 | printf ("ERROR: exceeded the maximum number of primer pairs allocated: %d\n\n", max_ppairs);
474 | exit (1);
475 | }
476 |
477 | printf ("\nBED: chr%d\t%d\t%d\n", loop_amp->chrom, this_start, this_stop);
478 |
479 | int start_primer_count = primer_count;
480 |
481 | printf("\n\nabout to fill the array of primers start = %d with %d pairs coming\n\n",
482 | start_primer_count,loop_amp->no_pairs);
483 |
484 | for (j = 0; j < loop_amp->no_pairs; j++)
485 | all_primer_pairs[primer_count++] = loop_amp->pair[j];
486 | int jj;
487 | for (j = start_primer_count; j < primer_count; j++)
488 | {
489 | for (jj = 0; jj < loop_amp->no_pairs; jj++)
490 | {
491 | printf("\n\nj = %d, jj = %d, start_primer_count = %d, primer_count = %d\n\n", j, jj, start_primer_count, primer_count);
492 | redundant_list[j][jj] = start_primer_count + jj;
493 | printf("\n 4 loop_amp is located at position %ld in memory \n\n",(long)loop_amp);
494 | }
495 | }
496 | printf("\n Made it here \n\n");
497 | best_start[amp_pool_count] = start_primer_count;
498 | amp_pool_count++;
499 |
500 | for (j = 0; j < loop_amp->no_pairs; j++)
501 | {
502 | if (nearest_stop == 0)
503 | nearest_stop = loop_amp->pair[j]->reverse->start;
504 | else if (loop_amp->pair[j]->reverse->start < nearest_stop)
505 | nearest_stop = loop_amp->pair[j]->reverse->start;
506 | }
507 | }
508 | }
509 | printf("\n\nnearest stop is %d\n\n", nearest_stop);
510 |
511 | if (nearest_stop > 0)
512 | {
513 | this_start = nearest_stop;
514 | this_stop = this_start + amp_min;
515 | }
516 | else
517 | {
518 | this_start += pad_size;
519 | this_stop = this_start + amp_min;
520 | }
521 | if (nearest_stop >= target_stop || this_start > target_stop)
522 | not_covered = 0;
523 | }
524 | }
525 |
526 | char **poolable_matrix;
527 | int *poolable_count;
528 | poolable_count = ivector (0, primer_count);
529 | for (i = 0; i < primer_count; i++)
530 | poolable_count[i] = 0;
531 | poolable_matrix = cmatrix (0, primer_count, 0, primer_count);
532 | for (i = 0; i < primer_count; i++)
533 | for (j = i + 1; j < primer_count; j++)
534 | {
535 | double max_amp_diff = (double) (amp_max * 0.15) + 1;
536 | poolable_matrix[i][j] = is_poolable_primer (all_primer_pairs[i], all_primer_pairs[j], (int) max_amp_diff, 2);
537 | poolable_matrix[j][i] = poolable_matrix[i][j];
538 | poolable_count[i] += poolable_matrix[i][j];
539 | poolable_count[j] += poolable_matrix[i][j];
540 | }
541 | for (i = 0; i < amp_pool_count; i++)
542 | if (best_start[i] >= 0)
543 | {
544 | int k = best_start[i];
545 | for (j = 0; j < MAX_PAIRS; j++)
546 | if (redundant_list[k][j] >= 0)
547 | if (poolable_count[redundant_list[k][j]] > poolable_count[best_start[i]])
548 | best_start[i] = redundant_list[k][j];
549 | }
550 | //
551 | // print cmat
552 | //
553 | //printf ("\n");
554 | //for (i = 0; i < primer_count; i++)
555 | //printf ("\t%s_%s_%03d", all_primer_pairs[i]->forward->sequence, all_primer_pairs[i]->reverse->sequence, i);
556 | //for (i = 0; i < primer_count; i++)
557 | //{
558 | //printf ("\n%s_%s_%03d", all_primer_pairs[i]->forward->sequence, all_primer_pairs[i]->reverse->sequence, i);
559 | //for (j = 0; j < primer_count; j++)
560 | //if (i != j)
561 | //printf ("\t%d", (int) poolable_matrix[i][j]);
562 | //else
563 | //printf ("\t.");
564 | //}
565 | //printf ("\n");
566 | //
567 | // print best start matrix
568 | //
569 | // for (i = 0; i < amp_pool_count; i++)
570 | // {
571 | // int k = best_start[i];
572 | // for (j = 0; j < MAX_PAIRS; j++)
573 | // if (redundant_list[k][j] >= 0)
574 | // printf (" %03d", redundant_list[k][j]);
575 | // else
576 | // printf (" .");
577 | // printf ("\t| Region: %03d\t| Primer: %03d\t| Poolable Count: %03d\n", i, best_start[i], poolable_count[best_start[i]]);
578 | // }
579 | //
580 | // redundant matrix
581 | //
582 | // printf("\n\n");
583 | // for (i=0; i= 0)
587 | // printf (" %03d", redundant_list[i][j]);
588 | // else
589 | // printf (" .");
590 | // printf("\n");
591 | // }
592 |
593 | printf ("\n\n going to make_pools with amp_pools = %d, primer_count = %d\n", amp_pool_count, primer_count);
594 | int *current_pool;
595 | current_pool = ivector (0, 20);
596 | make_greedy_pools ( outfile, all_primer_pairs, poolable_matrix, poolable_count, redundant_list, best_start, amp_pool_count, primer_count, primer_count, current_pool, 0, pool_size);
597 | return 0;
598 | }
599 |
--------------------------------------------------------------------------------
/src/pool_check.c:
--------------------------------------------------------------------------------
1 | #include "mpd.h"
2 |
3 | // TS Wingo
4 | // created: 2013-10-28
5 | // updated: 2015-08-02
6 | // checks that the pools created by primer4amplicons.c
7 | // are actually poolable. It needs the input stripped of
8 | // lines that don't contain primers and this is done
9 | // using p4a_2_pool_check.pl
10 |
11 | int main (int argc, char **argv)
12 | {
13 | usage(argc == 4, "pool_check " );
14 |
15 | // initialize
16 | int max_primers_in_pool = 20;
17 | int max_primer_pairs = 1000;
18 | int *primers_in_pool = create_ivec (max_primer_pairs);
19 | PNODE ***primer_pool = primer_pool_create (max_primers_in_pool, max_primer_pairs);
20 |
21 | int max_amplicon_length = atoi(argv[3]);
22 |
23 | // read data
24 | int max_pools = read_primer_pools( argv[1], max_primer_pairs, max_primers_in_pool,
25 | primers_in_pool, primer_pool);
26 |
27 | // check pools
28 | Check_all_pools( max_pools, primers_in_pool, primer_pool, max_amplicon_length);
29 |
30 | // print isPcr
31 | Print_isPcr( argv[2], max_pools, primers_in_pool, primer_pool );
32 |
33 | return 0;
34 |
35 | error:
36 | return 1;
37 | }
38 |
--------------------------------------------------------------------------------
/src/primer_compat.c:
--------------------------------------------------------------------------------
1 | #include "mpd.h"
2 |
3 | // TS Wingo
4 | // 10-30-2013
5 | // checks a primer is compat with another primer
6 |
7 | int main (int argc, char **argv)
8 | {
9 | char fp1[80], fp2[80], rp1[80], rp2[80];
10 | int i, f1, r1, f2, r2;
11 | char flipf1[80], flipf2[80], flipr1[80], flipr2[80];
12 |
13 | if (argc != 5)
14 | {
15 | printf("\nUsage: primer_compat forward_primer_1 reverse_primer_1 forward_primer_2 reverse_primer_2\n");
16 | exit(1);
17 | }
18 |
19 | sprintf(fp1, "%s", argv[1]);
20 | sprintf(rp1, "%s", argv[2]);
21 | sprintf(fp2, "%s", argv[3]);
22 | sprintf(rp2, "%s", argv[4]);
23 |
24 | printf("\nprimer 1 fwd: %s\trev: %s\nprimer 2 fwd: %s\trev: %s\n", fp1, rp1, fp2, rp2);
25 |
26 | f1 = strlen(fp1);
27 | r1 = strlen(rp1);
28 | f2 = strlen(fp2);
29 | r2 = strlen(rp2);
30 |
31 | for(i=0;i<80;i++)
32 | flipf1[i] = flipf2[i] = flipr1[i] = flipr2[i] = '\0';
33 |
34 | printf("\n\nlen primer 1 fwd: %d\trev: %d\nlen primer 2 fwd: %d\trev: %d\n", f1, r1, f2, r2);
35 | reverse_string (rp1, flipr1, r1);
36 | reverse_string (rp2, flipr2, r2);
37 | reverse_string (fp1, flipf1, f1);
38 | reverse_string (fp2, flipf2, f2);
39 |
40 | printf("\n\ncomplement of primer 1 fwd: %s\trev: %s\ncomplement of primer 2 fwd: %s\trev: %s\n", flipf1, flipr1, flipf2, flipr2);
41 |
42 | if (check_uneven_dimer (fp1, flipf2, f1, f2))
43 | printf ("\nprimer 1 forward (%s) makes dimer with primer 2 forward (%s)\n", fp1, fp2);
44 | else if (check_uneven_dimer (rp1, flipr2, r1, r2))
45 | printf ("\nprimer 1 reverse (%s) makes dimer with primer 2 reverse (%s)\n", rp1, rp2);
46 | else if (check_uneven_dimer (fp1, flipr2, f1, r2))
47 | printf ("\nprimer 1 forward (%s) makes dimer with primer 2 reverse (%s)\n", fp1, rp2);
48 | else if (check_uneven_dimer (rp1, flipf2, r1, f2))
49 | printf ("\nprimer 1 reverse (%s) makes dimer with primer 2 forward (%s)\n", rp1, fp2);
50 | else
51 | printf("\nprimer pair 1 and 2 seem compatable.\n");
52 | }
53 |
--------------------------------------------------------------------------------