├── VERSION ├── docker ├── .gitignore ├── Dockerfile └── Makefile ├── make.sh ├── clean.sh ├── src ├── inc │ ├── libbam.a │ ├── libz.so │ ├── libbam_1.a │ ├── nuc.h │ ├── check.h │ ├── stdinc.h │ ├── extfunc2.h │ ├── types.h │ ├── darray.h │ ├── dfibHeap.h │ ├── fibHeap.h │ ├── sam_header.h │ ├── stack.h │ ├── def2.h │ ├── sam_view.h │ ├── dfib.h │ ├── fib.h │ ├── kstring.h │ ├── knetfile.h │ ├── glf.h │ ├── dfibpriv.h │ ├── fibpriv.h │ ├── global.h │ ├── faidx.h │ ├── sam.h │ ├── extvab.h │ ├── newhash.h │ ├── bgzf.h │ ├── razf.h │ └── def.h ├── cutTip_graph.c ├── readInterval.c ├── check.c ├── fibHeap.c ├── darray.c ├── dfibHeap.c ├── Makefile ├── output_pregraph.c ├── output_scaffold.c ├── mem_manager.c ├── compactEdge.c ├── seq.c ├── loadReadPath.c ├── stack.c ├── ReadTrace.c ├── map.c ├── hashFunction.c ├── scaffold.c ├── connect.c ├── pregraph.c ├── loadPath.c ├── arc.c ├── searchPath.c ├── concatenateEdge.c ├── read2scaf.c ├── output_contig.c ├── contig.c └── sortContig.c ├── update.log └── INSTALL /VERSION: -------------------------------------------------------------------------------- 1 | 1.04 2 | -------------------------------------------------------------------------------- /docker/.gitignore: -------------------------------------------------------------------------------- 1 | docker-image -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | cd src/ 2 | make 3 | make 127mer=1 4 | -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | make clean 3 | make clean 127mer=1 4 | 5 | -------------------------------------------------------------------------------- /src/inc/libbam.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquaskyline/SOAPdenovo-Trans/HEAD/src/inc/libbam.a -------------------------------------------------------------------------------- /src/inc/libz.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquaskyline/SOAPdenovo-Trans/HEAD/src/inc/libz.so -------------------------------------------------------------------------------- /src/cutTip_graph.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquaskyline/SOAPdenovo-Trans/HEAD/src/cutTip_graph.c -------------------------------------------------------------------------------- /src/inc/libbam_1.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquaskyline/SOAPdenovo-Trans/HEAD/src/inc/libbam_1.a -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | RUN yum update -y \ 3 | && yum groupinstall -y "Development Tools" \ 4 | && yum install -y zlib-static glibc-static -------------------------------------------------------------------------------- /update.log: -------------------------------------------------------------------------------- 1 | 1.04 | 2014-04-22 15:00:00 +0800 (Tue, 22 Apr 2014)Fixes a number of 'seqmentation fault' errors on different kinds of data. 2 | (Thanks for Chris Boursnell (twitter: @chrisboursnell) fixing the bugs.) 3 | 4 | 1.03 | 2013-07-19 12:00:00 +0800 (Fri, 19 Jul 2013) 5 | Add the function: calculate RPKM (Reads per Kilobase of assembled transcripts per Million mapped reads). 6 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | 2 | Installation of SOAPdenovo-Trans1.04 3 | 4 | ******************************************************************** 5 | 6 | Pre-configurated Makefile was designed to handle most circumstances. 7 | 8 | ************ 9 | MAKE 10 | ************ 11 | 12 | Type "sh make.sh" at root of unpacked folder. 13 | 14 | ************ 15 | CLEAN 16 | ************ 17 | 18 | Type "sh clean.sh" at root of unpacked folder. 19 | -------------------------------------------------------------------------------- /src/inc/nuc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Nuc.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | int total_nuc = 16; 24 | char na_name[17] = {'g', 'a', 't', 'c', 25 | 'n', 'r', 'y', 'w', 's', 'm', 'k', 'h', 'b', 'v', 'd', 'x' 26 | }; 27 | -------------------------------------------------------------------------------- /src/inc/check.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Check.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | 24 | extern void * ckalloc ( unsigned long long amount ); 25 | extern void * ckrealloc ( void * p, size_t new_size, size_t old_size ); 26 | extern FILE * ckopen ( char * name, char * mode ); 27 | 28 | -------------------------------------------------------------------------------- /docker/Makefile: -------------------------------------------------------------------------------- 1 | DOCKER_IMAGE_NAME := soapdenovo-trans 2 | DOCKER_IMAGE_TAG ?= latest 3 | 4 | help: 5 | @ echo "build : build $(DOCKER_IMAGE_NAME) binaries with a docker container" 6 | @ echo "clean : clean the binaries" 7 | 8 | .PHONY: docker-version 9 | docker-version: 10 | docker version 11 | 12 | .PHONY: docker-image-check 13 | docker-image-check: 14 | @if [ -f docker-image -a -z "$$(docker images -q $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG))" ]; then \ 15 | echo "docker-image file exists and docker image does not, removing check file"; \ 16 | rm docker-image; \ 17 | fi 18 | 19 | docker-image: Dockerfile 20 | docker build -t $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) . 21 | @ touch $@ 22 | 23 | build: docker-version docker-image-check docker-image 24 | @echo "Executing 'make.sh' in docker" 25 | docker run --rm \ 26 | -v $$(pwd)/..:/app \ 27 | -w /app \ 28 | $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) \ 29 | bash make.sh 30 | 31 | clean: docker-version docker-image-check docker-image 32 | @echo "Executing 'clean.sh' in docker" 33 | docker run --rm \ 34 | -v $$(pwd)/..:/app \ 35 | -w /app \ 36 | $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) \ 37 | bash clean.sh -------------------------------------------------------------------------------- /src/inc/stdinc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Stdinc.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "def.h" 32 | #include "types.h" 33 | #include 34 | #include 35 | -------------------------------------------------------------------------------- /src/inc/extfunc2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Extfunc2.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #ifndef _MEM_MANAGER 24 | #define _MEM_MANAGER 25 | extern MEM_MANAGER * createMem_manager ( int num_items, size_t unit_size ); 26 | extern void * getItem ( MEM_MANAGER * mem_Manager ); 27 | extern void returnItem ( MEM_MANAGER * mem_Manager, void * ); 28 | extern void freeMem_manager ( MEM_MANAGER * mem_Manager ); 29 | #endif 30 | -------------------------------------------------------------------------------- /src/inc/types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Types.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #ifndef __TYPES_RJ 24 | #define __TYPES_RJ 25 | 26 | typedef unsigned long long ubyte8; 27 | typedef unsigned int ubyte4; 28 | typedef unsigned short ubyte2; 29 | typedef unsigned char ubyte; 30 | 31 | typedef long long byte8; 32 | typedef int byte4; 33 | typedef short byte2; 34 | typedef char byte; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /src/inc/darray.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Darray.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #ifndef __DARRAY__ 24 | #define __DARRAY__ 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | typedef struct dynamic_array 31 | { 32 | void * array; 33 | long long array_size; 34 | size_t item_size; 35 | long long item_c; 36 | } DARRAY; 37 | 38 | void * darrayPut ( DARRAY * darray, long long index ); 39 | void * darrayGet ( DARRAY * darray, long long index ); 40 | DARRAY * createDarray ( int num_items, size_t unit_size ); 41 | void freeDarray ( DARRAY * darray ); 42 | void emptyDarray ( DARRAY * darray ); 43 | 44 | #endif 45 | 46 | -------------------------------------------------------------------------------- /src/inc/dfibHeap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/dfibHeap.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #ifndef _DFIBHEAP_H_ 24 | #define _DFIBHEAP_H_ 25 | 26 | DFibHeap * newDFibHeap(); 27 | 28 | DFibHeapNode * insertNodeIntoDHeap ( DFibHeap * heap, Time key, unsigned int node ); 29 | 30 | Time replaceKeyInDHeap ( DFibHeap * heap, DFibHeapNode * node, Time newKey ); 31 | 32 | unsigned int removeNextNodeFromDHeap ( DFibHeap * heap ); 33 | 34 | void destroyDHeap ( DFibHeap * heap ); 35 | 36 | boolean HasMin ( DFibHeap * h ); 37 | 38 | void replaceValueInDHeap ( DFibHeapNode * node, unsigned int newValue ); 39 | 40 | void * destroyNodeInDHeap ( DFibHeapNode * node, DFibHeap * heap ); 41 | 42 | IDnum getDFibHeapSize ( DFibHeap * heap ); 43 | 44 | Time getKey ( DFibHeapNode * node ); 45 | #endif 46 | -------------------------------------------------------------------------------- /src/inc/fibHeap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/fibHeap.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #ifndef _FIBHEAP_H_ 24 | #define _FIBHEAP_H_ 25 | 26 | FibHeap * newFibHeap(); 27 | 28 | FibHeapNode * insertNodeIntoHeap ( FibHeap * heap, Coordinate key, 29 | unsigned int node ); 30 | 31 | Coordinate minKeyOfHeap ( FibHeap * heap ); 32 | 33 | Coordinate replaceKeyInHeap ( FibHeap * heap, FibHeapNode * node, 34 | Coordinate newKey ); 35 | 36 | void replaceValueInHeap ( FibHeapNode * node, unsigned int newValue ); 37 | 38 | unsigned int removeNextNodeFromHeap ( FibHeap * heap ); 39 | 40 | void * destroyNodeInHeap ( FibHeapNode * node, FibHeap * heap ); 41 | 42 | void destroyHeap ( FibHeap * heap ); 43 | 44 | boolean IsHeapEmpty ( FibHeap * heap ); 45 | #endif 46 | -------------------------------------------------------------------------------- /src/inc/sam_header.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Sam_header.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #ifndef __SAM_HEADER_H__ 23 | #define __SAM_HEADER_H__ 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | void *sam_header_parse2(const char *headerText); 30 | void *sam_header_merge(int n, const void **dicts); 31 | void sam_header_free(void *header); 32 | char *sam_header_write(const void *headerDict); // returns a newly allocated string 33 | 34 | char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n); 35 | 36 | void *sam_header2tbl(const void *dict, char type[2], char key_tag[2], char value_tag[2]); 37 | const char *sam_tbl_get(void *h, const char *key); 38 | int sam_tbl_size(void *h); 39 | void sam_tbl_destroy(void *h); 40 | 41 | #ifdef __cplusplus 42 | } 43 | #endif 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/inc/stack.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Stack.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #ifndef __STACK__ 24 | #define __STACK__ 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | typedef struct block_starter 31 | { 32 | struct block_starter * prev; 33 | struct block_starter * next; 34 | } BLOCK_STARTER; 35 | 36 | typedef struct stack 37 | { 38 | BLOCK_STARTER * block_list; 39 | int index_in_block; 40 | int items_per_block; 41 | int item_c; 42 | size_t item_size; 43 | BLOCK_STARTER * block_backup; 44 | int index_backup; 45 | int item_c_backup; 46 | } STACK; 47 | 48 | void stackBackup ( STACK * astack ); 49 | void stackRecover ( STACK * astack ); 50 | void * stackPush ( STACK * astack ); 51 | void * stackPop ( STACK * astack ); 52 | void freeStack ( STACK * astack ); 53 | void emptyStack ( STACK * astack ); 54 | STACK * createStack ( int num_items, size_t unit_size ); 55 | 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /src/readInterval.c: -------------------------------------------------------------------------------- 1 | /* 2 | * readInterval.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | #define RVBLOCKSIZE 1000 29 | 30 | void destroyReadIntervMem () 31 | { 32 | freeMem_manager (rv_mem_manager); 33 | rv_mem_manager = NULL; 34 | } 35 | 36 | READINTERVAL *allocateRV (int readid, int edgeid) 37 | { 38 | READINTERVAL *newRV; 39 | 40 | newRV = (READINTERVAL *) getItem (rv_mem_manager); 41 | newRV->readid = readid; 42 | newRV->edgeid = edgeid; 43 | newRV->nextInRead = NULL; 44 | newRV->prevInRead = NULL; 45 | newRV->nextOnEdge = NULL; 46 | newRV->prevOnEdge = NULL; 47 | return newRV; 48 | } 49 | 50 | void dismissRV (READINTERVAL * rv) 51 | { 52 | returnItem (rv_mem_manager, rv); 53 | } 54 | 55 | void createRVmemo () 56 | { 57 | if (!rv_mem_manager) 58 | { 59 | rv_mem_manager = createMem_manager (RVBLOCKSIZE, sizeof (READINTERVAL)); 60 | } 61 | else 62 | { 63 | printf ("Warning from createRVmemo: rv_mem_manager is an active pointer\n"); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/inc/def2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Def2.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #ifndef _DEF2 24 | #define _DEF2 25 | typedef char boolean; 26 | typedef long long IDnum; 27 | typedef double Time; 28 | typedef long long Coordinate; 29 | // Fibonacci heaps used mainly in Tour Bus 30 | typedef struct fibheap FibHeap; 31 | typedef struct fibheap_el FibHeapNode; 32 | typedef struct dfibheap DFibHeap; 33 | typedef struct dfibheap_el DFibHeapNode; 34 | //Memory manager 35 | typedef struct block_start 36 | { 37 | struct block_start * next; 38 | } BLOCK_START; 39 | 40 | typedef struct recycle_mark 41 | { 42 | struct recycle_mark * next; 43 | } RECYCLE_MARK; 44 | 45 | typedef struct mem_manager 46 | { 47 | BLOCK_START * block_list; 48 | int index_in_block; 49 | int items_per_block; 50 | size_t item_size; 51 | RECYCLE_MARK * recycle_list; 52 | unsigned long long counter; 53 | } MEM_MANAGER; 54 | 55 | struct dfibheap_el 56 | { 57 | int dfhe_degree; 58 | boolean dfhe_mark; 59 | DFibHeapNode * dfhe_p; 60 | DFibHeapNode * dfhe_child; 61 | DFibHeapNode * dfhe_left; 62 | DFibHeapNode * dfhe_right; 63 | Time dfhe_key; 64 | unsigned int dfhe_data;//void *dfhe_data; 65 | }; 66 | #endif 67 | -------------------------------------------------------------------------------- /src/inc/sam_view.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Sam_view.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #ifndef SAM_VIEW_H 24 | #define SAM_VIEW_H 25 | 26 | 27 | static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0; 28 | static char *g_library, *g_rg; 29 | static int g_sol2sanger_tbl[128]; 30 | 31 | static void sol2sanger(bam1_t *b) 32 | { 33 | int l; 34 | uint8_t *qual = bam1_qual(b); 35 | if (g_sol2sanger_tbl[30] == 0) { 36 | for (l = 0; l != 128; ++l) { 37 | g_sol2sanger_tbl[l] = (int)(10.0 * log(1.0 + pow(10.0, (l - 64 + 33) / 10.0)) / log(10.0) + .499); 38 | if (g_sol2sanger_tbl[l] >= 93) g_sol2sanger_tbl[l] = 93; 39 | } 40 | } 41 | for (l = 0; l < b->core.l_qseq; ++l) { 42 | int q = qual[l]; 43 | if (q > 127) q = 127; 44 | qual[l] = g_sol2sanger_tbl[q]; 45 | } 46 | } 47 | 48 | static inline int __g_skip_aln(const bam_header_t *h, const bam1_t *b) 49 | { 50 | if (b->core.qual < g_min_mapQ || ((b->core.flag & g_flag_on) != g_flag_on) || (b->core.flag & g_flag_off)) 51 | return 1; 52 | if (g_rg) { 53 | uint8_t *s = bam_aux_get(b, "RG"); 54 | if (s && strcmp(g_rg, (char*)(s + 1)) == 0) return 0; 55 | } 56 | if (g_library) { 57 | const char *p = bam_get_library((bam_header_t*)h, b); 58 | return (p && strcmp(p, g_library) == 0)? 0 : 1; 59 | } 60 | return 0; 61 | } 62 | 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /src/check.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Check.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include 24 | void *ckalloc (unsigned long long amount); 25 | FILE *ckopen (char *name, char *mode); 26 | FILE *ckopen (char *name, char *mode) 27 | { 28 | FILE *fp; 29 | 30 | if ((fp = fopen (name, mode)) == NULL) 31 | { 32 | printf ("Cannot open %s. Now exit to system...\n", name); 33 | exit (-1); 34 | } 35 | return (fp); 36 | } 37 | 38 | /* ckalloc - allocate space; check for success */ 39 | void *ckalloc (unsigned long long amount) 40 | { 41 | void *p; 42 | 43 | if ((p = (void *) calloc (1, (unsigned long long) amount)) == NULL && amount != 0) 44 | { 45 | printf ("Ran out of memory while applying %lldbytes\n", amount); 46 | printf ("There may be errors as follows:\n"); 47 | printf ("1) Not enough memory.\n"); 48 | printf ("2) The ARRAY may be overrode.\n"); 49 | printf ("3) The wild pointers.\n"); 50 | fflush (stdout); 51 | exit (-1); 52 | } 53 | return (p); 54 | } 55 | 56 | /* reallocate memory */ 57 | void *ckrealloc (void *p, size_t new_size, size_t old_size) 58 | { 59 | void *q; 60 | 61 | q = realloc ((void *) p, new_size); 62 | if (new_size == 0 || q != (void *) 0) 63 | { 64 | return q; 65 | } 66 | /* manually reallocate space */ 67 | q = ckalloc (new_size); 68 | 69 | /* move old memory to new space */ 70 | bcopy (p, q, old_size); 71 | free (p); 72 | return q; 73 | } 74 | -------------------------------------------------------------------------------- /src/fibHeap.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fibHeap.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "fib.h" 24 | 25 | // Constructor 26 | // Memory allocated 27 | FibHeap *newFibHeap () 28 | { 29 | return fh_makekeyheap (); 30 | } 31 | 32 | // Add new node into heap with a key, and a pointer to the specified node 33 | FibHeapNode *insertNodeIntoHeap (FibHeap * heap, Coordinate key, unsigned int node) 34 | { 35 | return fh_insertkey (heap, key, node); 36 | } 37 | 38 | // Returns smallest key in heap 39 | Coordinate minKeyOfHeap (FibHeap * heap) 40 | { 41 | return fh_minkey (heap); 42 | } 43 | 44 | // Replaces the key for a given node 45 | Coordinate replaceKeyInHeap (FibHeap * heap, FibHeapNode * node, Coordinate newKey) 46 | { 47 | return fh_replacekey (heap, node, newKey); 48 | } 49 | 50 | // Removes the node with the shortest key, then returns it. 51 | unsigned int removeNextNodeFromHeap (FibHeap * heap) 52 | { 53 | return (unsigned int) fh_extractmin (heap); 54 | } 55 | 56 | boolean IsHeapEmpty (FibHeap * heap) 57 | { 58 | return fh_isempty (heap); 59 | } 60 | 61 | // Destructor 62 | void destroyHeap (FibHeap * heap) 63 | { 64 | fh_deleteheap (heap); 65 | } 66 | 67 | // Replace the node pointed to by a heap node 68 | void replaceValueInHeap (FibHeapNode * node, unsigned int newValue) 69 | { 70 | fh_replacedata (node, newValue); 71 | } 72 | 73 | // Remove unwanted node 74 | void destroyNodeInHeap (FibHeapNode * node, FibHeap * heap) 75 | { 76 | fh_delete (heap, node); 77 | } 78 | -------------------------------------------------------------------------------- /src/inc/dfib.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * Copyright 1997, 1998-2003 John-Mark Gurney. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | * SUCH DAMAGE. 25 | * 26 | * $Id: dfib.h,v 1.8 2007/04/24 12:16:41 zerbino Exp $ 27 | * 28 | */ 29 | 30 | #ifndef _DFIB_H_ 31 | #define _DFIB_H_ 32 | 33 | #include 34 | #include "def2.h" //#include "globals.h" 35 | 36 | /* functions for key heaps */ 37 | DFibHeap * dfh_makekeyheap ( void ); 38 | DFibHeapNode * dfh_insertkey ( DFibHeap *, Time, unsigned int ); 39 | Time dfh_replacekey ( DFibHeap *, DFibHeapNode *, Time ); 40 | unsigned int dfh_replacekeydata ( DFibHeap *, DFibHeapNode *, Time, unsigned int ); 41 | 42 | unsigned int dfh_extractmin ( DFibHeap * ); 43 | unsigned int dfh_replacedata ( DFibHeapNode *, unsigned int ); 44 | unsigned int dfh_delete ( DFibHeap *, DFibHeapNode * ); 45 | void dfh_deleteheap ( DFibHeap * ); 46 | 47 | // DEBUG 48 | IDnum dfibheap_getSize ( DFibHeap * ); 49 | Time dfibheap_el_getKey ( DFibHeapNode * ); 50 | // END DEBUG 51 | 52 | #endif /* _FIB_H_ */ 53 | -------------------------------------------------------------------------------- /src/darray.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Darray.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "darray.h" 24 | #include "check.h" 25 | 26 | DARRAY *createDarray (int num_items, size_t unit_size) 27 | { 28 | DARRAY *newDarray = (DARRAY *) malloc (1 * sizeof (DARRAY)); 29 | 30 | newDarray->array_size = num_items; 31 | newDarray->item_size = unit_size; 32 | newDarray->item_c = 0; 33 | newDarray->array = (void *) ckalloc (num_items * unit_size); 34 | return newDarray; 35 | } 36 | 37 | void *darrayPut (DARRAY * darray, long long index) 38 | { 39 | int i = 2; 40 | 41 | if (index + 1 > darray->item_c) 42 | { 43 | darray->item_c = index + 1; 44 | } 45 | 46 | if (index < darray->array_size) 47 | { 48 | return darray->array + darray->item_size * index; 49 | } 50 | 51 | while (index > i * darray->array_size) 52 | { 53 | i++; 54 | } 55 | 56 | darray->array = (void *) ckrealloc (darray->array, i * darray->array_size * darray->item_size, darray->array_size * darray->item_size); 57 | darray->array_size *= i; 58 | return (void *) ((void *) darray->array + darray->item_size * index); 59 | } 60 | 61 | void *darrayGet (DARRAY * darray, long long index) 62 | { 63 | if (index < darray->array_size) 64 | { 65 | return (void *) ((void *) darray->array + darray->item_size * index); 66 | } 67 | 68 | printf ("array read index %lld out of range %lld\n", index, darray->array_size); 69 | return NULL; 70 | } 71 | 72 | void emptyDarray (DARRAY * darray) 73 | { 74 | darray->item_c = 0; 75 | } 76 | 77 | void freeDarray (DARRAY * darray) 78 | { 79 | if (!darray) 80 | { 81 | return; 82 | } 83 | 84 | if (darray->array) 85 | { 86 | free ((void *) darray->array); 87 | } 88 | 89 | free ((void *) darray); 90 | } 91 | -------------------------------------------------------------------------------- /src/dfibHeap.c: -------------------------------------------------------------------------------- 1 | /* 2 | * dfibHeap.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include 24 | #include 25 | 26 | #include "def2.h" 27 | #include "dfib.h" 28 | 29 | // Return number of elements stored in heap 30 | IDnum getDFibHeapSize (DFibHeap * heap) 31 | { 32 | return dfibheap_getSize (heap); 33 | } 34 | 35 | // Constructor 36 | // Memory allocated 37 | DFibHeap *newDFibHeap () 38 | { 39 | return dfh_makekeyheap (); 40 | } 41 | 42 | // Add new node into heap with a key, and a pointer to the specified node 43 | DFibHeapNode *insertNodeIntoDHeap (DFibHeap * heap, Time key, unsigned int node) 44 | { 45 | DFibHeapNode *res; 46 | 47 | res = dfh_insertkey (heap, key, node); 48 | return res; 49 | } 50 | 51 | // Replaces the key for a given node 52 | Time replaceKeyInDHeap (DFibHeap * heap, DFibHeapNode * node, Time newKey) 53 | { 54 | Time res; 55 | 56 | res = dfh_replacekey (heap, node, newKey); 57 | return res; 58 | } 59 | 60 | // Removes the node with the shortest key, then returns it. 61 | unsigned int removeNextNodeFromDHeap (DFibHeap * heap) 62 | { 63 | unsigned int node; 64 | 65 | node = (unsigned int) dfh_extractmin (heap); 66 | return node; 67 | } 68 | 69 | // Destructor 70 | void destroyDHeap (DFibHeap * heap) 71 | { 72 | dfh_deleteheap (heap); 73 | } 74 | 75 | // Replace the node pointed to by a heap node 76 | void replaceValueInDHeap (DFibHeapNode * node, unsigned int newValue) 77 | { 78 | dfh_replacedata (node, newValue); 79 | } 80 | 81 | // Remove unwanted node 82 | void destroyNodeInDHeap (DFibHeapNode * node, DFibHeap * heap) 83 | { 84 | dfh_delete (heap, node); 85 | } 86 | 87 | Time getKey (DFibHeapNode * node) 88 | { 89 | return dfibheap_el_getKey (node); 90 | } 91 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | CC= gcc 2 | CFLAGS= -O3 -fomit-frame-pointer -static 3 | #CFLAGS= -O3 -g -D_DEBUG 4 | DFLAGS= 5 | 6 | OBJS= arc.o attachPEinfo.o bubble.o check.o compactEdge.o \ 7 | concatenateEdge.o connect.o contig.o cutTipPreGraph.o cutTip_graph.o \ 8 | darray.o dfib.o dfibHeap.o fib.o fibHeap.o \ 9 | hashFunction.o kmer.o lib.o loadGraph.o loadPath.o \ 10 | loadPreGraph.o localAsm.o main.o map.o mem_manager.o \ 11 | newhash.o node2edge.o orderContig.o output_contig.o output_pregraph.o \ 12 | output_scaffold.o pregraph.o prlHashCtg.o prlHashReads.o prlRead2Ctg.o \ 13 | prlRead2path.o prlReadFillGap.o read2scaf.o readInterval.o stack.o\ 14 | readseq1by1.o ReadTrace.o scaffold.o searchPath.o seq.o splitReps.o #sortContig.o 15 | PROG= ../SOAPdenovo-Trans-31kmer 16 | #CFLAGS += -DMER31 17 | ifdef 127mer 18 | CFLAGS += -DMER127 19 | PROG = ../SOAPdenovo-Trans-127mer 20 | else 21 | ifdef 63mer 22 | CFLAGS += -DMER63 23 | PROG = ../SOAPdenovo-Trans-63mer 24 | else 25 | CFLAGS += -DMER31 26 | PROG = ../SOAPdenovo-Trans-31mer 27 | endif 28 | endif 29 | ifdef debug 30 | CFLAGS += -DDEBUG 31 | endif 32 | INCLUDES= -Iinc 33 | SUBDIRS= . 34 | LIBPATH= 35 | LIBS= -pthread -lm -lrt -lbam -lz -L./inc 36 | EXTRA_FLAGS= 37 | 38 | BIT_ERR = 0 39 | ifeq (,$(findstring $(shell uname -m), x86_64 ppc64 ia64)) 40 | BIT_ERR = 1 41 | endif 42 | 43 | 44 | LINUX = 0 45 | ifneq (,$(findstring Linux,$(shell uname))) 46 | LINUX = 1 47 | EXTRA_FLAGS += -Wl,--hash-style=both 48 | endif 49 | 50 | ifneq (,$(findstring $(shell uname -m), x86_64)) 51 | CFLAGS += -m64 52 | endif 53 | 54 | ifneq (,$(findstring $(shell uname -m), ia64)) 55 | CFLAGS += 56 | endif 57 | 58 | ifneq (,$(findstring $(shell uname -m), ppc64)) 59 | CFLAGS += -mpowerpc64 60 | endif 61 | 62 | .SUFFIXES:.c .o 63 | 64 | .c.o: 65 | @printf "Compiling $<... \r"; \ 66 | $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< || echo "Error in command: $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $<" 67 | 68 | all: clean SOAPdenovo 69 | 70 | .PHONY:all clean install 71 | 72 | envTest: 73 | @test $(BIT_ERR) != 1 || sh -c 'echo "Fatal: 64bit CPU and Operating System required!";false;' 74 | 75 | SOAPdenovo: envTest $(OBJS) 76 | @$(CC) $(CFLAGS) -o $(PROG) $(OBJS) $(LIBPATH) $(LIBS) $(ENTRAFLAGS) 77 | @printf "Linking...\r" 78 | @printf "$(PROG) compilation done.\n"; 79 | 80 | clean: 81 | @rm -fr gmon.out *.o a.out *.exe *.dSYM $(PROG) *~ *.a *.so.* *.so *.dylib 82 | @printf "$(PROG) cleaning done.\n"; 83 | 84 | install: 85 | @cp $(PROG) ../bin/ 86 | @printf "$(PROG) installed at ../bin/$(PROG)\n" 87 | -------------------------------------------------------------------------------- /src/inc/fib.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * Copyright 1997, 1998-2003 John-Mark Gurney. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | * SUCH DAMAGE. 25 | * 26 | */ 27 | 28 | #ifndef _FIB_H_ 29 | #define _FIB_H_ 30 | 31 | //#include "globals.h" 32 | #include 33 | #include "def2.h" 34 | 35 | typedef Coordinate ( *voidcmp ) ( unsigned int , unsigned int ); 36 | 37 | /* functions for key heaps */ 38 | boolean fh_isempty ( FibHeap * ); 39 | FibHeap * fh_makekeyheap ( void ); 40 | FibHeapNode * fh_insertkey ( FibHeap *, Coordinate, unsigned int ); 41 | Coordinate fh_minkey ( FibHeap * ); 42 | Coordinate fh_replacekey ( FibHeap *, FibHeapNode *, Coordinate ); 43 | unsigned int fh_replacekeydata ( FibHeap *, FibHeapNode *, Coordinate, unsigned int ); 44 | 45 | /* functions for unsigned int * heaps */ 46 | FibHeap * fh_makeheap ( void ); 47 | voidcmp fh_setcmp ( FibHeap *, voidcmp ); 48 | unsigned int fh_setneginf ( FibHeap *, unsigned int ); 49 | FibHeapNode * fh_insert ( FibHeap *, unsigned int ); 50 | 51 | /* shared functions */ 52 | unsigned int fh_extractmin ( FibHeap * ); 53 | unsigned int fh_min ( FibHeap * ); 54 | unsigned int fh_replacedata ( FibHeapNode *, unsigned int ); 55 | unsigned int fh_delete ( FibHeap *, FibHeapNode * ); 56 | void fh_deleteheap ( FibHeap * ); 57 | FibHeap * fh_union ( FibHeap *, FibHeap * ); 58 | 59 | #endif /* _FIB_H_ */ 60 | -------------------------------------------------------------------------------- /src/inc/kstring.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Kstring.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #ifndef KSTRING_H 23 | #define KSTRING_H 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #ifndef kroundup32 30 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 31 | #endif 32 | 33 | #ifndef KSTRING_T 34 | #define KSTRING_T kstring_t 35 | typedef struct __kstring_t { 36 | size_t l, m; 37 | char *s; 38 | } kstring_t; 39 | #endif 40 | 41 | int ksprintf(kstring_t *s, const char *fmt, ...); 42 | int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); 43 | 44 | // calculate the auxiliary array, allocated by calloc() 45 | int *ksBM_prep(const uint8_t *pat, int m); 46 | 47 | /* Search pat in str and returned the list of matches. The size of the 48 | * list is returned as n_matches. _prep is the array returned by 49 | * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */ 50 | int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches); 51 | 52 | static inline int kputsn(const char *p, int l, kstring_t *s) 53 | { 54 | if (s->l + l + 1 >= s->m) { 55 | s->m = s->l + l + 2; 56 | kroundup32(s->m); 57 | s->s = (char*)realloc(s->s, s->m); 58 | } 59 | strncpy(s->s + s->l, p, l); 60 | s->l += l; 61 | s->s[s->l] = 0; 62 | return l; 63 | } 64 | 65 | static inline int kputs(const char *p, kstring_t *s) 66 | { 67 | return kputsn(p, strlen(p), s); 68 | } 69 | 70 | static inline int kputc(int c, kstring_t *s) 71 | { 72 | if (s->l + 1 >= s->m) { 73 | s->m = s->l + 2; 74 | kroundup32(s->m); 75 | s->s = (char*)realloc(s->s, s->m); 76 | } 77 | s->s[s->l++] = c; 78 | s->s[s->l] = 0; 79 | return c; 80 | } 81 | 82 | static inline int *ksplit(kstring_t *s, int delimiter, int *n) 83 | { 84 | int max = 0, *offsets = 0; 85 | *n = ksplit_core(s->s, delimiter, &max, &offsets); 86 | return offsets; 87 | } 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /src/inc/knetfile.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Knetfile.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #ifndef KNETFILE_H 23 | #define KNETFILE_H 24 | 25 | #include 26 | #include 27 | 28 | #ifndef _WIN32 29 | #define netread(fd, ptr, len) read(fd, ptr, len) 30 | #define netwrite(fd, ptr, len) write(fd, ptr, len) 31 | #define netclose(fd) close(fd) 32 | #else 33 | #include 34 | #define netread(fd, ptr, len) recv(fd, ptr, len, 0) 35 | #define netwrite(fd, ptr, len) send(fd, ptr, len, 0) 36 | #define netclose(fd) closesocket(fd) 37 | #endif 38 | 39 | // FIXME: currently I/O is unbuffered 40 | 41 | #define KNF_TYPE_LOCAL 1 42 | #define KNF_TYPE_FTP 2 43 | #define KNF_TYPE_HTTP 3 44 | 45 | typedef struct knetFile_s { 46 | int type, fd; 47 | int64_t offset; 48 | char *host, *port; 49 | 50 | // the following are for FTP only 51 | int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; 52 | char *response, *retr, *size_cmd; 53 | int64_t seek_offset; // for lazy seek 54 | int64_t file_size; 55 | 56 | // the following are for HTTP only 57 | char *path, *http_host; 58 | } knetFile; 59 | 60 | #define knet_tell(fp) ((fp)->offset) 61 | #define knet_fileno(fp) ((fp)->fd) 62 | 63 | #ifdef __cplusplus 64 | extern "C" { 65 | #endif 66 | 67 | #ifdef _WIN32 68 | int knet_win32_init(); 69 | void knet_win32_destroy(); 70 | #endif 71 | 72 | knetFile *knet_open(const char *fn, const char *mode); 73 | 74 | /* 75 | This only works with local files. 76 | */ 77 | knetFile *knet_dopen(int fd, const char *mode); 78 | 79 | /* 80 | If ->is_ready==0, this routine updates ->fd; otherwise, it simply 81 | reads from ->fd. 82 | */ 83 | off_t knet_read(knetFile *fp, void *buf, off_t len); 84 | 85 | /* 86 | This routine only sets ->offset and ->is_ready=0. It does not 87 | communicate with the FTP server. 88 | */ 89 | off_t knet_seek(knetFile *fp, int64_t off, int whence); 90 | int knet_close(knetFile *fp); 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | 96 | #endif 97 | -------------------------------------------------------------------------------- /src/inc/glf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Glf.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #ifndef GLF_H_ 23 | #define GLF_H_ 24 | 25 | typedef struct { 26 | unsigned char ref_base:4, dummy:4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */ 27 | unsigned char max_mapQ; /** maximum mapping quality */ 28 | unsigned char lk[10]; /** log likelihood ratio, capped at 255 */ 29 | unsigned min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */ 30 | } glf1_t; 31 | 32 | #include 33 | #include "bgzf.h" 34 | typedef BGZF *glfFile; 35 | 36 | #define GLF3_RTYPE_END 0 37 | #define GLF3_RTYPE_SUB 1 38 | #define GLF3_RTYPE_INDEL 2 39 | 40 | typedef struct { 41 | uint8_t ref_base:4, rtype:4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */ 42 | uint8_t rms_mapQ; /** RMS mapping quality */ 43 | uint8_t lk[10]; /** log likelihood ratio, capped at 255 */ 44 | uint32_t min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */ 45 | int32_t offset; /** the first base in a chromosome has offset zero. */ 46 | // for indel (lkHom1, lkHom2 and lkHet are the first three elements in lk[10]) 47 | int16_t indel_len[2]; 48 | int32_t max_len; // maximum indel len; will be modified by glf3_read1() 49 | char *indel_seq[2]; 50 | } glf3_t; 51 | 52 | typedef struct { 53 | int32_t l_text; 54 | uint8_t *text; 55 | } glf3_header_t; 56 | 57 | #ifdef __cplusplus 58 | extern "C" { 59 | #endif 60 | 61 | #define glf3_init1() ((glf3_t*)calloc(1, sizeof(glf3_t))) 62 | #define glf3_destroy1(g3) do { free((g3)->indel_seq[0]); free((g3)->indel_seq[1]); free(g3); } while (0) 63 | 64 | glf3_header_t *glf3_header_init(); 65 | glf3_header_t *glf3_header_read(glfFile fp); 66 | void glf3_header_write(glfFile fp, const glf3_header_t *h); 67 | void glf3_header_destroy(glf3_header_t *h); 68 | char *glf3_ref_read(glfFile fp, int *len); 69 | void glf3_ref_write(glfFile fp, const char *name, int len); 70 | int glf3_write1(glfFile fp, const glf3_t *g3); 71 | int glf3_read1(glfFile fp, glf3_t *g3); 72 | 73 | #ifdef __cplusplus 74 | } 75 | #endif 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /src/output_pregraph.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Output_pregraph.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #include 23 | #include "newhash.h" 24 | #include 25 | #include 26 | static int outvCounter = 0; 27 | 28 | //after this LINKFLAGFILTER in the Kmer is destroyed 29 | static void output1vt (kmer_t * node1, FILE * fp) 30 | { 31 | if (!node1) 32 | { 33 | return; 34 | } 35 | if (!(node1->linear) && !(node1->deleted)) 36 | { 37 | outvCounter++; 38 | print_kmer (fp, node1->seq, ' '); 39 | if (outvCounter % 8 == 0) 40 | 41 | { 42 | fprintf (fp, "\n"); 43 | } 44 | } 45 | } 46 | 47 | void output_vertex (char *outfile) 48 | { 49 | char temp[256]; 50 | 51 | FILE * fp; 52 | int i; 53 | 54 | kmer_t * node; 55 | KmerSet * set; 56 | sprintf (temp, "%s.vertex", outfile); 57 | fp = ckopen (temp, "w"); 58 | for (i = 0; i < thrd_num; i++) 59 | { 60 | set = KmerSets[i]; 61 | set->iter_ptr = 0; 62 | while (set->iter_ptr < set->size) 63 | { 64 | if (!is_kmer_entity_null (set->flags, set->iter_ptr)) 65 | { 66 | node = set->array + set->iter_ptr; 67 | output1vt (node, fp); 68 | } 69 | set->iter_ptr++; 70 | } 71 | } 72 | fprintf (fp, "\n"); 73 | printf ("%d vertex outputed\n", outvCounter); 74 | fclose (fp); 75 | sprintf (temp, "%s.preGraphBasic", outfile); 76 | fp = ckopen (temp, "w"); 77 | fprintf (fp, "VERTEX %d K %d\n", outvCounter, overlaplen); 78 | fprintf (fp, "\nEDGEs %d\n", num_ed); 79 | fprintf (fp, "\nMaxReadLen %d MinReadLen %d MaxNameLen %d\n", maxReadLen4all, minReadLen, maxNameLen); 80 | fclose (fp); 81 | } 82 | 83 | void output_1edge (preEDGE * edge, gzFile * fp) 84 | { 85 | int i; 86 | 87 | gzprintf (fp, ">length %d,", edge->length); 88 | print_kmer_gz (fp, edge->from_node, ','); 89 | print_kmer_gz (fp, edge->to_node, ','); 90 | gzprintf (fp, "cvg %d, %d\n", edge->cvg, edge->bal_edge); 91 | for (i = 0; i < edge->length; i++) 92 | { 93 | gzprintf (fp, "%c", int2base ((int) edge->seq[i])); 94 | if ((i + 1) % 100 == 0) 95 | { 96 | gzprintf (fp, "\n"); 97 | } 98 | } 99 | gzprintf (fp, "\n"); 100 | } 101 | 102 | 103 | -------------------------------------------------------------------------------- /src/output_scaffold.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Output_scaffold.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | void output_contig_graph (char *outfile) 29 | { 30 | char name[256]; 31 | FILE *fp; 32 | unsigned int i; 33 | 34 | sprintf (name, "%s.contig.gvz", outfile); 35 | fp = ckopen (name, "w"); 36 | fprintf (fp, "digraph G{\n"); 37 | fprintf (fp, "\tsize=\"512,512\";\n"); 38 | 39 | for (i = num_ctg; i > 0; i--) 40 | { 41 | fprintf (fp, "\tV%d -> V%d[label =\"%d(%d)\"];\n", contig_array[i].from_vt, contig_array[i].to_vt, i, contig_array[i].length); 42 | } 43 | 44 | fprintf (fp, "}\n"); 45 | fclose (fp); 46 | } 47 | void output_scaf (char *outfile) 48 | { 49 | char name[256]; 50 | FILE *fp; 51 | unsigned int i; 52 | CONNECT *connect; 53 | boolean flag; 54 | 55 | sprintf (name, "%s.scaffold.gvz", outfile); 56 | fp = ckopen (name, "w"); 57 | fprintf (fp, "digraph G{\n"); 58 | fprintf (fp, "\tsize=\"512,512\";\n"); 59 | 60 | for (i = num_ctg; i > 0; i--) 61 | { 62 | //if(contig_array[i].mask||!contig_array[i].downwardConnect) 63 | if (!contig_array[i].downwardConnect) 64 | { 65 | continue; 66 | } 67 | 68 | connect = contig_array[i].downwardConnect; 69 | 70 | while (connect) 71 | { 72 | //if(connect->mask||connect->deleted){ 73 | if (connect->deleted) 74 | { 75 | connect = connect->next; 76 | continue; 77 | } 78 | 79 | if (connect->prevInScaf || connect->nextInScaf) 80 | { 81 | flag = 1; 82 | } 83 | else 84 | { 85 | flag = 0; 86 | } 87 | 88 | if (!connect->mask) 89 | fprintf (fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\"];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length, 90 | connect->gapLen, flag, connect->weight); 91 | else 92 | fprintf (fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\", color = red];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length, 93 | connect->gapLen, flag, connect->weight); 94 | 95 | connect = connect->next; 96 | } 97 | } 98 | 99 | fprintf (fp, "}\n"); 100 | fclose (fp); 101 | } 102 | -------------------------------------------------------------------------------- /src/inc/dfibpriv.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1997, 1999-2003 John-Mark Gurney. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | * SUCH DAMAGE. 25 | * 26 | * $Id: dfibpriv.h,v 1.8 2007/10/09 09:56:46 zerbino Exp $ 27 | * 28 | */ 29 | 30 | #ifndef _DFIBPRIV_H_ 31 | #define _DFIBPRIV_H_ 32 | 33 | //#include "globals.h" 34 | #include "def2.h" 35 | 36 | /* 37 | * specific node operations 38 | */ 39 | 40 | static DFibHeapNode * dfhe_newelem ( DFibHeap * ); 41 | static void dfhe_insertafter ( DFibHeapNode * a, DFibHeapNode * b ); 42 | static inline void dfhe_insertbefore ( DFibHeapNode * a, DFibHeapNode * b ); 43 | static DFibHeapNode * dfhe_remove ( DFibHeapNode * a ); 44 | 45 | /* 46 | * global heap operations 47 | */ 48 | struct dfibheap 49 | { 50 | MEM_MANAGER * nodeMemory; 51 | IDnum dfh_n; 52 | IDnum dfh_Dl; 53 | DFibHeapNode ** dfh_cons; 54 | DFibHeapNode * dfh_min; 55 | DFibHeapNode * dfh_root; 56 | }; 57 | 58 | static void dfh_insertrootlist ( DFibHeap *, DFibHeapNode * ); 59 | static void dfh_removerootlist ( DFibHeap *, DFibHeapNode * ); 60 | static void dfh_consolidate ( DFibHeap * ); 61 | static void dfh_heaplink ( DFibHeap * h, DFibHeapNode * y, DFibHeapNode * x ); 62 | static void dfh_cut ( DFibHeap *, DFibHeapNode *, DFibHeapNode * ); 63 | static void dfh_cascading_cut ( DFibHeap *, DFibHeapNode * ); 64 | static DFibHeapNode * dfh_extractminel ( DFibHeap * ); 65 | static void dfh_checkcons ( DFibHeap * h ); 66 | static int dfh_compare ( DFibHeap * h, DFibHeapNode * a, DFibHeapNode * b ); 67 | static int dfh_comparedata ( DFibHeap * h, Time key, 68 | unsigned int data, DFibHeapNode * b ); 69 | static void dfh_insertel ( DFibHeap * h, DFibHeapNode * x ); 70 | 71 | 72 | /* 73 | * general functions 74 | */ 75 | static inline IDnum ceillog2 ( IDnum a ); 76 | 77 | #endif /* _FIBPRIV_H_ */ 78 | -------------------------------------------------------------------------------- /src/mem_manager.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Mem_manager.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | MEM_MANAGER *createMem_manager (int num_items, size_t unit_size) 29 | { 30 | MEM_MANAGER *mem_Manager = (MEM_MANAGER *) ckalloc (1 * sizeof (MEM_MANAGER)); 31 | 32 | mem_Manager->block_list = NULL; 33 | mem_Manager->items_per_block = num_items; 34 | mem_Manager->item_size = unit_size; 35 | mem_Manager->recycle_list = NULL; 36 | mem_Manager->counter = 0; 37 | return mem_Manager; 38 | } 39 | 40 | void freeMem_manager (MEM_MANAGER * mem_Manager) 41 | { 42 | BLOCK_START *ite_block, *temp_block; 43 | 44 | if (!mem_Manager) 45 | { 46 | return; 47 | } 48 | 49 | ite_block = mem_Manager->block_list; 50 | 51 | while (ite_block) 52 | { 53 | temp_block = ite_block; 54 | ite_block = ite_block->next; 55 | free ((void *) temp_block); 56 | } 57 | 58 | free ((void *) mem_Manager); 59 | } 60 | 61 | void *getItem (MEM_MANAGER * mem_Manager) 62 | { 63 | RECYCLE_MARK *mark; //this is the type of return value 64 | BLOCK_START *block; 65 | 66 | if (!mem_Manager) 67 | { 68 | return NULL; 69 | } 70 | 71 | if (mem_Manager->recycle_list) 72 | { 73 | mark = mem_Manager->recycle_list; 74 | mem_Manager->recycle_list = mark->next; 75 | return mark; 76 | } 77 | 78 | mem_Manager->counter++; 79 | 80 | if (!mem_Manager->block_list || mem_Manager->index_in_block == mem_Manager->items_per_block) 81 | { 82 | //pthread_mutex_lock(&gmutex); 83 | block = ckalloc (sizeof (BLOCK_START) + mem_Manager->items_per_block * mem_Manager->item_size); 84 | //mem_Manager->counter += sizeof(BLOCK_START)+mem_Manager->items_per_block*mem_Manager->item_size; 85 | //pthread_mutex_unlock(&gmutex); 86 | block->next = mem_Manager->block_list; 87 | mem_Manager->block_list = block; 88 | mem_Manager->index_in_block = 1; 89 | return (RECYCLE_MARK *) ((void *) block + sizeof (BLOCK_START)); 90 | } 91 | 92 | block = mem_Manager->block_list; 93 | return (RECYCLE_MARK *) ((void *) block + sizeof (BLOCK_START) + mem_Manager->item_size * (mem_Manager->index_in_block++)); 94 | } 95 | 96 | void returnItem (MEM_MANAGER * mem_Manager, void *item) 97 | { 98 | RECYCLE_MARK *mark; 99 | 100 | mark = item; 101 | mark->next = mem_Manager->recycle_list; 102 | mem_Manager->recycle_list = mark; 103 | } 104 | -------------------------------------------------------------------------------- /src/compactEdge.c: -------------------------------------------------------------------------------- 1 | /* 2 | * compactEdge.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | void copyEdge (unsigned int source, unsigned int target) 29 | { 30 | edge_array[target].from_vt = edge_array[source].from_vt; 31 | edge_array[target].to_vt = edge_array[source].to_vt; 32 | edge_array[target].length = edge_array[source].length; 33 | edge_array[target].cvg = edge_array[source].cvg; 34 | edge_array[target].multi = edge_array[source].multi; 35 | 36 | if (edge_array[target].seq) 37 | { 38 | free ((void *) edge_array[target].seq); 39 | } 40 | 41 | edge_array[target].seq = edge_array[source].seq; 42 | edge_array[source].seq = NULL; 43 | edge_array[target].arcs = edge_array[source].arcs; 44 | edge_array[source].arcs = NULL; 45 | edge_array[target].deleted = edge_array[source].deleted; 46 | } 47 | 48 | //move edge from source to target 49 | void edgeMove (unsigned int source, unsigned int target) 50 | { 51 | unsigned int bal_source, bal_target; 52 | ARC *arc; 53 | 54 | copyEdge (source, target); 55 | bal_source = getTwinEdge (source); 56 | 57 | //bal_edge 58 | if (bal_source != source) 59 | { 60 | bal_target = target + 1; 61 | copyEdge (bal_source, bal_target); 62 | edge_array[target].bal_edge = 2; 63 | edge_array[bal_target].bal_edge = 0; 64 | } 65 | else 66 | { 67 | edge_array[target].bal_edge = 1; 68 | bal_target = target; 69 | } 70 | 71 | //take care of the arcs 72 | arc = edge_array[target].arcs; 73 | 74 | while (arc) 75 | { 76 | arc->bal_arc->to_ed = bal_target; 77 | arc = arc->next; 78 | } 79 | 80 | if (bal_target == target) 81 | { 82 | return; 83 | } 84 | 85 | arc = edge_array[bal_target].arcs; 86 | 87 | while (arc) 88 | { 89 | arc->bal_arc->to_ed = target; 90 | arc = arc->next; 91 | } 92 | } 93 | 94 | void compactEdgeArray () 95 | { 96 | unsigned int i; 97 | unsigned int validCounter = 0; 98 | unsigned int bal_ed; 99 | 100 | printf ("there're %d edges\n", num_ed); 101 | 102 | for (i = 1; i <= num_ed; i++) 103 | { 104 | if (edge_array[i].deleted) 105 | { 106 | continue; 107 | } 108 | 109 | validCounter++; 110 | 111 | if (i == validCounter) 112 | { 113 | continue; 114 | } 115 | 116 | bal_ed = getTwinEdge (i); 117 | edgeMove (i, validCounter); 118 | 119 | if (bal_ed != i) 120 | { 121 | i++; 122 | validCounter++; 123 | } 124 | } 125 | 126 | num_ed = validCounter; 127 | printf ("after compacting %d edges left\n", num_ed); 128 | } 129 | -------------------------------------------------------------------------------- /src/inc/fibpriv.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * Copyright 1997, 1999-2003 John-Mark Gurney. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | * SUCH DAMAGE. 25 | * 26 | * $Id: fibpriv.h,v 1.10 2007/10/09 09:56:46 zerbino Exp $ 27 | * 28 | */ 29 | 30 | #ifndef _FIBPRIV_H_ 31 | #define _FIBPRIV_H_ 32 | 33 | #include "def2.h" 34 | 35 | /* 36 | * specific node operations 37 | */ 38 | struct fibheap_el 39 | { 40 | int fhe_degree; 41 | boolean fhe_mark; 42 | FibHeapNode * fhe_p; 43 | FibHeapNode * fhe_child; 44 | FibHeapNode * fhe_left; 45 | FibHeapNode * fhe_right; 46 | Coordinate fhe_key; 47 | unsigned int fhe_data; 48 | }; 49 | 50 | static FibHeapNode * fhe_newelem ( struct fibheap * ); 51 | static void fhe_initelem ( FibHeapNode * ); 52 | static void fhe_insertafter ( FibHeapNode * a, FibHeapNode * b ); 53 | static inline void fhe_insertbefore ( FibHeapNode * a, FibHeapNode * b ); 54 | static FibHeapNode * fhe_remove ( FibHeapNode * a ); 55 | 56 | /* 57 | * global heap operations 58 | */ 59 | struct fibheap 60 | { 61 | Coordinate ( *fh_cmp_fnct ) ( unsigned int, unsigned int ); 62 | MEM_MANAGER * nodeMemory; 63 | IDnum fh_n; 64 | IDnum fh_Dl; 65 | FibHeapNode ** fh_cons; 66 | FibHeapNode * fh_min; 67 | FibHeapNode * fh_root; 68 | unsigned int fh_neginf; 69 | boolean fh_keys: 1; 70 | }; 71 | 72 | static void fh_initheap ( FibHeap * ); 73 | static void fh_insertrootlist ( FibHeap *, FibHeapNode * ); 74 | static void fh_removerootlist ( FibHeap *, FibHeapNode * ); 75 | static void fh_consolidate ( FibHeap * ); 76 | static void fh_heaplink ( FibHeap * h, FibHeapNode * y, FibHeapNode * x ); 77 | static void fh_cut ( FibHeap *, FibHeapNode *, FibHeapNode * ); 78 | static void fh_cascading_cut ( FibHeap *, FibHeapNode * ); 79 | static FibHeapNode * fh_extractminel ( FibHeap * ); 80 | static void fh_checkcons ( FibHeap * h ); 81 | static void fh_destroyheap ( FibHeap * h ); 82 | static int fh_compare ( FibHeap * h, FibHeapNode * a, FibHeapNode * b ); 83 | static int fh_comparedata ( FibHeap * h, Coordinate key, 84 | unsigned int data, FibHeapNode * b ); 85 | static void fh_insertel ( FibHeap * h, FibHeapNode * x ); 86 | 87 | /* 88 | * general functions 89 | */ 90 | static inline IDnum ceillog2 ( IDnum a ); 91 | 92 | #endif /* _FIBPRIV_H_ */ 93 | -------------------------------------------------------------------------------- /src/inc/global.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Global.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | int * contig_index_array = NULL; 24 | int scaffNum=0; 25 | int gapNum=1; 26 | boolean fill=0; 27 | int overlaplen = 23; 28 | int inGraph; 29 | long long n_ban; 30 | long long n_solexa = 0; 31 | long long prevNum = 0; 32 | int ins_size_var = 20; 33 | PE_INFO * pes = NULL; 34 | MEM_MANAGER * rv_mem_manager = NULL; 35 | MEM_MANAGER * cn_mem_manager = NULL; 36 | MEM_MANAGER * arc_mem_manager = NULL; 37 | unsigned int num_vt = 0; 38 | unsigned long long new_num_vt=0; 39 | unsigned int ** found_routes = NULL; 40 | unsigned int * so_far = NULL; 41 | int max_n_routes = 10; 42 | int num_trace; 43 | Kmer WORDFILTER; 44 | unsigned int num_ed = 0; 45 | unsigned int num_ctg = 0; 46 | unsigned int num_ed_limit; 47 | unsigned int extraEdgeNum; 48 | EDGE * edge_array = NULL; 49 | VERTEX * vt_array = NULL; 50 | unsigned int * index_array = NULL; 51 | CONTIG * contig_array = NULL; 52 | int lineLen; 53 | int len_bar = 100; 54 | int weakPE = 3; 55 | int fillGap = 0; 56 | boolean globalFlag; 57 | long long arcCounter; 58 | MEM_MANAGER * prearc_mem_manager = NULL; 59 | MEM_MANAGER ** preArc_mem_managers = NULL; 60 | int maxReadLen = 0; 61 | int maxReadLen4all = 0; 62 | int minReadLen = 0; 63 | int maxNameLen = 0; 64 | ARC ** arcLookupTable = NULL; 65 | long long * markersArray = NULL; 66 | boolean deLowKmer = 0; 67 | boolean deLowEdge = 1; 68 | long long newCntCounter; 69 | boolean repsTie = 0; 70 | CONNECT ** cntLookupTable = NULL; 71 | int num_libs = 0; 72 | LIB_INFO * lib_array = NULL; 73 | int libNo = 0; 74 | long long readNumBack; 75 | int gradsCounter; 76 | unsigned int ctg_short = 0; 77 | int thrd_num = 8; 78 | int cvgAvg = 0; 79 | KmerSet ** KmerSets = NULL; 80 | KmerSet ** KmerSetsPatch = NULL; 81 | DARRAY * readSeqInGap = NULL; 82 | DARRAY * gapSeqDarray = NULL; 83 | DARRAY ** darrayBuf; 84 | boolean orig2new; 85 | int maxSteps; 86 | boolean maskRep = 1; 87 | int GLDiff = 50; 88 | int initKmerSetSize = 0; 89 | int RPKM = 0; //Tang 2013-3-30 90 | long known_genome_size = 0; 91 | int smallKmer = 0; 92 | int deltaKmer = 0; 93 | //mao 94 | CONTIG_PATH *contigpath=NULL; 95 | READ_PATH *readpath=NULL; 96 | int *pool=NULL; 97 | int num_pool=0; 98 | unsigned int* flag_array =NULL; //lzy 0602 99 | CONTIG_PATHID * contig_path_array=NULL; 100 | PATH_CONTIGID * path_contig_array=NULL; 101 | LOCUS * component=NULL; 102 | int cut_length=48; 103 | int max_num=5; 104 | int max_cnt=0; 105 | int ctg_mask=100; 106 | 107 | //mao 9-21 108 | int dk = 5; 109 | int de = 20; 110 | int dd = 5 ;//mao 2011 10 21 111 | int da = 5; 112 | int dA =2; 113 | int delowArc =200; 114 | int SupportNum=2; 115 | int N_kmer=0;//mao 2011-10-13 116 | int read_trace=0;//mao 2011-10-21 117 | 118 | extern Kmer kmerZero; 119 | 120 | -------------------------------------------------------------------------------- /src/inc/faidx.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008 Genome Research Ltd (GRL). 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* Contact: Heng Li */ 27 | 28 | #ifndef FAIDX_H 29 | #define FAIDX_H 30 | 31 | /*! 32 | @header 33 | 34 | Index FASTA files and extract subsequence. 35 | 36 | @copyright The Wellcome Trust Sanger Institute. 37 | */ 38 | 39 | struct __faidx_t; 40 | typedef struct __faidx_t faidx_t; 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | /*! 47 | @abstract Build index for a FASTA or razip compressed FASTA file. 48 | @param fn FASTA file name 49 | @return 0 on success; or -1 on failure 50 | @discussion File "fn.fai" will be generated. 51 | */ 52 | int fai_build(const char *fn); 53 | 54 | /*! 55 | @abstract Distroy a faidx_t struct. 56 | @param fai Pointer to the struct to be destroyed 57 | */ 58 | void fai_destroy(faidx_t *fai); 59 | 60 | /*! 61 | @abstract Load index from "fn.fai". 62 | @param fn File name of the FASTA file 63 | */ 64 | faidx_t *fai_load(const char *fn); 65 | 66 | /*! 67 | @abstract Fetch the sequence in a region. 68 | @param fai Pointer to the faidx_t struct 69 | @param reg Region in the format "chr2:20,000-30,000" 70 | @param len Length of the region 71 | @return Pointer to the sequence; null on failure 72 | 73 | @discussion The returned sequence is allocated by malloc family 74 | and should be destroyed by end users by calling free() on it. 75 | */ 76 | char *fai_fetch(const faidx_t *fai, const char *reg, int *len); 77 | 78 | /*! 79 | @abstract Fetch the number of sequences. 80 | @param fai Pointer to the faidx_t struct 81 | @return The number of sequences 82 | */ 83 | int faidx_fetch_nseq(const faidx_t *fai); 84 | 85 | /*! 86 | @abstract Fetch the sequence in a region. 87 | @param fai Pointer to the faidx_t struct 88 | @param c_name Region name 89 | @param p_beg_i Beginning position number (zero-based) 90 | @param p_end_i End position number (zero-based) 91 | @param len Length of the region 92 | @return Pointer to the sequence; null on failure 93 | 94 | @discussion The returned sequence is allocated by malloc family 95 | and should be destroyed by end users by calling free() on it. 96 | */ 97 | char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len); 98 | 99 | #ifdef __cplusplus 100 | } 101 | #endif 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /src/seq.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Seq.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | /* 28 | void print_kmer(FILE *fp,Kmer kmer,char c) 29 | { 30 | fprintf(fp,"%llx %llx %llx %llx",kmer.high1,kmer.low1,kmer.high2,kmer.low2); 31 | fprintf(fp,"%c",c); 32 | 33 | }*/ 34 | void printTightString (char *tightSeq, int len) 35 | { 36 | int i; 37 | 38 | for (i = 0; i < len; i++) 39 | { 40 | printf ("%c", int2base ((int) getCharInTightString (tightSeq, i))); 41 | if ((i + 1) % 100 == 0) 42 | { 43 | printf ("\n"); 44 | } 45 | } 46 | printf ("\n"); 47 | } 48 | 49 | void writeChar2tightString (char nt, char *tightSeq, int pos) 50 | { 51 | char *byte = tightSeq + pos / 4; 52 | 53 | switch (pos % 4) 54 | { 55 | case 0: 56 | *byte &= 63; 57 | *byte += nt << 6; 58 | return; 59 | case 1: 60 | *byte &= 207; 61 | *byte += nt << 4; 62 | return; 63 | case 2: 64 | *byte &= 243; 65 | *byte += nt << 2; 66 | return; 67 | case 3: 68 | *byte &= 252; 69 | *byte += nt; 70 | return; 71 | } 72 | } 73 | 74 | char getCharInTightString (char *tightSeq, int pos) 75 | { 76 | char *byte = tightSeq + pos / 4; 77 | 78 | switch (pos % 4) 79 | { 80 | case 3: 81 | return (*byte & 3); 82 | case 2: 83 | return (*byte & 12) >> 2; 84 | case 1: 85 | return (*byte & 48) >> 4; 86 | case 0: 87 | return (*byte & 192) >> 6; 88 | } 89 | return 0; 90 | } 91 | 92 | // complement of sequence denoted 0, 1, 2, 3 93 | void reverseComplementSeq (char *seq, int len, char *bal_seq) 94 | { 95 | int i, index = 0; 96 | 97 | if (len < 1) 98 | { 99 | return; 100 | } 101 | for (i = len - 1; i >= 0; i--) 102 | { 103 | //mao 2011 10 8 104 | // if(seq[i] >= 4) 105 | // bal_seq[index++]=seq[i]; 106 | // else 107 | bal_seq[index++] = int_comp (seq[i]); 108 | } 109 | return; 110 | } 111 | 112 | // complement of sequence denoted 0, 1, 2, 3 113 | char *compl_int_seq (char *seq, int len) 114 | { 115 | char *bal_seq = NULL, c, bal_c; 116 | int i, index; 117 | 118 | if (len < 1) 119 | { 120 | return bal_seq; 121 | } 122 | bal_seq = (char *) ckalloc (len * sizeof (char)); 123 | index = 0; 124 | for (i = len - 1; i >= 0; i--) 125 | { 126 | c = seq[i]; 127 | if (c < 4) 128 | { 129 | bal_c = int_comp (c); 130 | } //3-c; 131 | else 132 | { 133 | bal_c = c; 134 | } 135 | bal_seq[index++] = bal_c; 136 | } 137 | return bal_seq; 138 | } 139 | 140 | long long trans_seq (char *seq, int len) 141 | { 142 | int i; 143 | long long res; 144 | 145 | res = 0; 146 | for (i = 0; i < len; i++) 147 | { 148 | res = res * 4 + seq[i]; 149 | } 150 | return (res); 151 | } 152 | 153 | /* 154 | char *kmer2seq(Kmer word) 155 | { 156 | int i; 157 | char *seq; 158 | Kmer charMask = 3; 159 | 160 | seq = (char *)ckalloc(overlaplen*sizeof(char)); 161 | for(i=overlaplen-1;i>=0;i--){ 162 | seq[i] = charMask&word; 163 | word >>= 2; 164 | } 165 | return seq; 166 | } 167 | */ 168 | -------------------------------------------------------------------------------- /src/loadReadPath.c: -------------------------------------------------------------------------------- 1 | /* 2 | * loadReadPath.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #include "stdinc.h" 23 | #include "newhash.h" 24 | #include "extfunc.h" 25 | #include "extvab.h" 26 | #include "zlib.h" 27 | #include 28 | 29 | 30 | static int split(unsigned int *arr, char *str,int contig_count)//, const char *del) 31 | { 32 | char *s =NULL; 33 | char *del = "\t"; 34 | int index=0; 35 | unsigned int contigID; 36 | unsigned int pre_contigID; 37 | s=strtok(str,del); 38 | while(s != NULL) 39 | { 40 | sscanf (s ,"%d", &contigID); 41 | if(contigID== pre_contigID && index == contig_count) 42 | break; 43 | arr[index++] = contigID; 44 | pre_contigID=contigID; 45 | s = strtok(NULL,del); 46 | } 47 | /* 48 | int i; 49 | for(i=0;i') 91 | { 92 | sscanf (line ,"%s %d %d ", &name, &contig_count,&cov); 93 | // printf("check:\t%s\t%d\t%d\n",name,contig_count,path_count); 94 | path_contig_array[path_index].contigID = (unsigned int *)ckalloc (contig_count* sizeof(unsigned int )); 95 | path_contig_array[path_index].contig_count = contig_count; 96 | path_contig_array[path_index].coverage=cov; 97 | } 98 | else 99 | { 100 | contig_count = split(&(path_contig_array[path_index].contigID[0]) , &(line[0]),path_contig_array[path_index].contig_count); 101 | if(contig_count >path_contig_array[path_index].contig_count) 102 | { 103 | printf("ERROR:\tinput %d\tfault:\t%d\n",contig_count,path_contig_array[path_index].contig_count); 104 | exit(0); 105 | } 106 | else 107 | { 108 | for(i=0;i. 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #ifndef BAM_SAM_H 23 | #define BAM_SAM_H 24 | 25 | #include "bam.h" 26 | 27 | /*! 28 | @header 29 | 30 | This file provides higher level of I/O routines and unifies the APIs 31 | for SAM and BAM formats. These APIs are more convenient and 32 | recommended. 33 | 34 | @copyright Genome Research Ltd. 35 | */ 36 | 37 | /*! @typedef 38 | @abstract SAM/BAM file handler 39 | @field type type of the handler; bit 1 for BAM, 2 for reading and bit 3-4 for flag format 40 | @field bam BAM file handler; valid if (type&1) == 1 41 | @field tamr SAM file handler for reading; valid if type == 2 42 | @field tamw SAM file handler for writing; valid if type == 0 43 | @field header header struct 44 | */ 45 | typedef struct { 46 | int type; 47 | union { 48 | tamFile tamr; 49 | bamFile bam; 50 | FILE *tamw; 51 | } x; 52 | bam_header_t *header; 53 | } samfile_t; 54 | 55 | #ifdef __cplusplus 56 | extern "C" { 57 | #endif 58 | 59 | /*! 60 | @abstract Open a SAM/BAM file 61 | 62 | @param fn SAM/BAM file name; "-" is recognized as stdin (for 63 | reading) or stdout (for writing). 64 | 65 | @param mode open mode /[rw](b?)(u?)(h?)([xX]?)/: 'r' for reading, 66 | 'w' for writing, 'b' for BAM I/O, 'u' for uncompressed BAM output, 67 | 'h' for outputing header in SAM, 'x' for HEX flag and 'X' for 68 | string flag. If 'b' present, it must immediately follow 'r' or 69 | 'w'. Valid modes are "r", "w", "wh", "wx", "whx", "wX", "whX", 70 | "rb", "wb" and "wbu" exclusively. 71 | 72 | @param aux auxiliary data; if mode[0]=='w', aux points to 73 | bam_header_t; if strcmp(mode, "rb")!=0 and @SQ header lines in SAM 74 | are absent, aux points the file name of the list of the reference; 75 | aux is not used otherwise. If @SQ header lines are present in SAM, 76 | aux is not used, either. 77 | 78 | @return SAM/BAM file handler 79 | */ 80 | samfile_t *samopen(const char *fn, const char *mode, const void *aux); 81 | 82 | /*! 83 | @abstract Close a SAM/BAM handler 84 | @param fp file handler to be closed 85 | */ 86 | void samclose(samfile_t *fp); 87 | 88 | /*! 89 | @abstract Read one alignment 90 | @param fp file handler 91 | @param b alignment 92 | @return bytes read 93 | */ 94 | int samread(samfile_t *fp, bam1_t *b); 95 | 96 | /*! 97 | @abstract Write one alignment 98 | @param fp file handler 99 | @param b alignment 100 | @return bytes written 101 | */ 102 | int samwrite(samfile_t *fp, const bam1_t *b); 103 | 104 | /*! 105 | @abstract Get the pileup for a whole alignment file 106 | @param fp file handler 107 | @param mask mask transferred to bam_plbuf_set_mask() 108 | @param func user defined function called in the pileup process 109 | #param data user provided data for func() 110 | */ 111 | int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *data); 112 | 113 | char *samfaipath(const char *fn_ref); 114 | 115 | #ifdef __cplusplus 116 | } 117 | #endif 118 | 119 | #endif 120 | -------------------------------------------------------------------------------- /src/inc/extvab.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Extvab.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | /*** global variables ****/ 24 | extern boolean fill; 25 | extern int overlaplen; 26 | extern int inGraph; 27 | extern long long n_ban; 28 | extern Kmer WORDFILTER; 29 | extern boolean globalFlag; 30 | extern int thrd_num; 31 | /**** reads info *****/ 32 | extern long long n_solexa; 33 | extern long long prevNum; 34 | extern int ins_size_var; 35 | extern PE_INFO * pes; 36 | extern int maxReadLen; 37 | extern int maxReadLen4all; 38 | extern int minReadLen; 39 | extern int maxNameLen; 40 | extern int num_libs; 41 | extern LIB_INFO * lib_array; 42 | extern int libNo; 43 | extern long long readNumBack; 44 | extern int gradsCounter; 45 | /*** used for pregraph *****/ 46 | extern MEM_MANAGER * prearc_mem_manager; //also used in scaffolding 47 | extern MEM_MANAGER ** preArc_mem_managers; 48 | extern boolean deLowKmer; 49 | extern boolean deLowEdge; 50 | extern KmerSet ** KmerSets; // also used in mapping 51 | extern KmerSet ** KmerSetsPatch; 52 | /**** used for contiging ****/ 53 | extern boolean repsTie; 54 | extern long long arcCounter; 55 | extern unsigned int num_ed; 56 | extern unsigned int num_ed_limit; 57 | extern unsigned int extraEdgeNum; 58 | extern EDGE * edge_array; 59 | extern VERTEX * vt_array; 60 | extern MEM_MANAGER * rv_mem_manager; 61 | extern MEM_MANAGER * arc_mem_manager; 62 | extern unsigned int num_vt; 63 | extern unsigned long long new_num_vt; 64 | extern int len_bar; 65 | extern ARC ** arcLookupTable; 66 | extern long long * markersArray; 67 | /***** used for scaffolding *****/ 68 | extern MEM_MANAGER * cn_mem_manager; 69 | extern unsigned int num_ctg; 70 | extern unsigned int * index_array; 71 | extern CONTIG * contig_array; 72 | extern int lineLen; 73 | extern int weakPE; 74 | extern long long newCntCounter; 75 | extern CONNECT ** cntLookupTable; 76 | extern unsigned int ctg_short; 77 | extern int cvgAvg; 78 | extern boolean orig2new; 79 | /**** used for gapFilling ****/ 80 | extern DARRAY * readSeqInGap; 81 | extern DARRAY * gapSeqDarray; 82 | extern DARRAY ** darrayBuf; 83 | extern int fillGap; 84 | /**** used for searchPath *****/ 85 | extern int maxSteps; 86 | extern int num_trace; 87 | extern unsigned int ** found_routes; 88 | extern unsigned int * so_far; 89 | extern int max_n_routes; 90 | extern boolean maskRep; 91 | extern int GLDiff; 92 | extern int initKmerSetSize; 93 | extern int RPKM; //Tang 2013-3-30 94 | extern long known_genome_size; 95 | extern int smallKmer; 96 | extern int deltaKmer; 97 | extern int gapNum; 98 | extern int scaffNum; 99 | extern int *contig_index_array; 100 | extern int max_num; 101 | extern int max_cnt; 102 | extern int ctg_mask; 103 | 104 | //mao: 105 | extern CONTIG_PATH *contigpath; 106 | extern READ_PATH *readpath; 107 | extern int *pool; 108 | extern int num_pool; 109 | extern CONTIG_PATHID * contig_path_array; 110 | extern PATH_CONTIGID * path_contig_array; 111 | extern LOCUS * component; 112 | extern int cut_length; 113 | extern int dk; 114 | extern int de; 115 | extern int dd;//mao 2011 10 21 116 | extern int da; 117 | extern int dA; 118 | extern int delowArc; 119 | extern int SupportNum; 120 | extern int N_kmer;//mao 2011-10-13 121 | extern int read_trace;//mao 2011-10-21 122 | extern Kmer kmerZero; 123 | 124 | -------------------------------------------------------------------------------- /src/inc/newhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Newhash.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #ifndef __NEW_HASH_RJ 24 | #define __NEW_HASH_RJ 25 | 26 | #ifndef K_LOAD_FACTOR 27 | #define K_LOAD_FACTOR 0.75 28 | #endif 29 | 30 | #define MAX_KMER_COV 63 31 | #define EDGE_BIT_SIZE 6 32 | #define EDGE_XOR_MASK 0x3FU 33 | #define LINKS_BITS 0x00FFFFFFU 34 | 35 | #define get_kmer_seq(mer) ((mer).seq) 36 | #define set_kmer_seq(mer, val) ((mer).seq = val) 37 | 38 | #define get_kmer_left_cov(mer, idx) (((mer).l_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK) 39 | #define set_kmer_left_cov(mer, idx, val) ((mer).l_links = ((mer).l_links&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) ) 40 | #define get_kmer_left_covs(mer) (get_kmer_left_cov(mer, 0) + get_kmer_left_cov(mer, 1) + get_kmer_left_cov(mer, 2) + get_kmer_left_cov(mer, 3)) 41 | 42 | #define get_kmer_right_cov(mer, idx) (((mer).r_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK) 43 | #define set_kmer_right_cov(mer, idx, val) ((mer).r_links = ((mer).r_links&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) ) 44 | #define get_kmer_right_covs(mer) (get_kmer_right_cov(mer, 0) + get_kmer_right_cov(mer, 1) + get_kmer_right_cov(mer, 2) + get_kmer_right_cov(mer, 3)) 45 | 46 | 47 | #define is_kmer_entity_null(flags, idx) ((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x01) 48 | #define is_kmer_entity_del(flags, idx) ((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x02) 49 | #define set_kmer_entity_null(flags, idx) ((flags)[(idx)>>4] |= (0x01u<<(((idx)&0x0f)<<1))) 50 | #define set_kmer_entity_del(flags, idx) ((flags)[(idx)>>4] |= (0x02u<<(((idx)&0x0f)<<1))) 51 | #define clear_kmer_entity_null(flags, idx) ((flags)[(idx)>>4] &= ~(0x01u<<(((idx)&0x0f)<<1))) 52 | #define clear_kmer_entity_del(flags, idx) ((flags)[(idx)>>4] &= ~(0x02u<<(((idx)&0x0f)<<1))) 53 | #define exists_kmer_entity(flags, idx) (!((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x03)) 54 | 55 | //mao 127->31mer 56 | /* 57 | typedef __uint128_t u128b; 58 | typedef struct u256b 59 | { 60 | u128b low; 61 | u128b high; 62 | } U256b; 63 | */ 64 | 65 | typedef struct kmer_st 66 | { 67 | Kmer seq; 68 | ubyte4 l_links; // sever as edgeID since make_edge 69 | ubyte4 r_links: 4 * EDGE_BIT_SIZE; 70 | ubyte4 linear: 1; 71 | ubyte4 deleted: 1; 72 | ubyte4 checked: 1; 73 | ubyte4 single: 1; 74 | ubyte4 twin: 2; 75 | ubyte4 inEdge: 2; 76 | ubyte4 count; 77 | } kmer_t; 78 | 79 | typedef struct kmerSet_st 80 | { 81 | kmer_t * array; 82 | ubyte4 * flags; 83 | ubyte8 size; 84 | ubyte8 count; 85 | ubyte8 max; 86 | double load_factor; 87 | ubyte8 iter_ptr; 88 | } KmerSet; 89 | 90 | typedef struct kmer_pt 91 | { 92 | kmer_t * node; 93 | Kmer kmer; 94 | boolean isSmaller; 95 | struct kmer_pt * next; 96 | } KMER_PT; 97 | 98 | extern KmerSet * init_kmerset ( ubyte8 init_size, float load_factor ); 99 | extern int search_kmerset ( KmerSet * set, Kmer seq, kmer_t ** rs ); 100 | extern int put_kmerset ( KmerSet * set, Kmer seq, ubyte left, ubyte right, kmer_t ** kmer_p ); 101 | extern byte8 count_kmerset ( KmerSet * set ); 102 | extern void free_Sets ( KmerSet ** KmerSets, int num ); 103 | extern void free_kmerset ( KmerSet * set ); 104 | extern void dislink2nextUncertain ( kmer_t * node, char ch, boolean smaller ); 105 | extern void dislink2prevUncertain ( kmer_t * node, char ch, boolean smaller ); 106 | 107 | extern int count_branch2prev ( kmer_t * node ); 108 | extern int count_branch2next ( kmer_t * node ); 109 | extern char firstCharInKmer ( Kmer kmer ); 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /src/stack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Stack.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stack.h" 24 | 25 | STACK *createStack (int num_items, size_t unit_size) 26 | { 27 | STACK *newStack = (STACK *) malloc (1 * sizeof (STACK)); 28 | 29 | newStack->block_list = NULL; 30 | newStack->items_per_block = num_items; 31 | newStack->item_size = unit_size; 32 | newStack->item_c = 0; 33 | return newStack; 34 | } 35 | 36 | void emptyStack (STACK * astack) 37 | { 38 | BLOCK_STARTER *block; 39 | 40 | if (!astack || !astack->block_list) 41 | { 42 | return; 43 | } 44 | 45 | block = astack->block_list; 46 | 47 | if (block->next) 48 | { 49 | block = block->next; 50 | } 51 | 52 | astack->block_list = block; 53 | astack->item_c = 0; 54 | astack->index_in_block = 0; 55 | } 56 | 57 | void freeStack (STACK * astack) 58 | { 59 | BLOCK_STARTER *ite_block, *temp_block; 60 | 61 | if (!astack) 62 | { 63 | return; 64 | } 65 | 66 | ite_block = astack->block_list; 67 | 68 | if (ite_block) 69 | { 70 | while (ite_block->next) 71 | { 72 | ite_block = ite_block->next; 73 | } 74 | } 75 | 76 | while (ite_block) 77 | { 78 | temp_block = ite_block; 79 | ite_block = ite_block->prev; 80 | free ((void *) temp_block); 81 | } 82 | 83 | free ((void *) astack); 84 | } 85 | 86 | void stackBackup (STACK * astack) 87 | { 88 | astack->block_backup = astack->block_list; 89 | astack->index_backup = astack->index_in_block; 90 | astack->item_c_backup = astack->item_c; 91 | } 92 | 93 | void stackRecover (STACK * astack) 94 | { 95 | astack->block_list = astack->block_backup; 96 | astack->index_in_block = astack->index_backup; 97 | astack->item_c = astack->item_c_backup; 98 | } 99 | 100 | void *stackPop (STACK * astack) 101 | { 102 | BLOCK_STARTER *block; 103 | 104 | if (!astack || !astack->block_list || !astack->item_c) 105 | { 106 | return NULL; 107 | } 108 | 109 | astack->item_c--; 110 | block = astack->block_list; 111 | 112 | if (astack->index_in_block == 1) 113 | { 114 | if (block->next) 115 | { 116 | astack->block_list = block->next; 117 | astack->index_in_block = astack->items_per_block; 118 | } 119 | else 120 | { 121 | astack->index_in_block = 0; 122 | astack->item_c = 0; 123 | } 124 | 125 | return (void *) ((void *) block + sizeof (BLOCK_STARTER)); 126 | } 127 | 128 | return (void *) ((void *) block + sizeof (BLOCK_STARTER) + astack->item_size * (--astack->index_in_block)); 129 | } 130 | 131 | void *stackPush (STACK * astack) 132 | { 133 | BLOCK_STARTER *block; 134 | 135 | if (!astack) 136 | { 137 | return NULL; 138 | } 139 | 140 | astack->item_c++; 141 | 142 | if (!astack->block_list || (astack->index_in_block == astack->items_per_block && !astack->block_list->prev)) 143 | { 144 | block = malloc (sizeof (BLOCK_STARTER) + astack->items_per_block * astack->item_size); 145 | block->prev = NULL; 146 | 147 | if (astack->block_list) 148 | { 149 | astack->block_list->prev = block; 150 | } 151 | 152 | block->next = astack->block_list; 153 | astack->block_list = block; 154 | astack->index_in_block = 1; 155 | return (void *) ((void *) block + sizeof (BLOCK_STARTER)); 156 | } 157 | else if (astack->index_in_block == astack->items_per_block && astack->block_list->prev) 158 | { 159 | astack->block_list = astack->block_list->prev; 160 | astack->index_in_block = 1; 161 | return (void *) ((void *) astack->block_list + sizeof (BLOCK_STARTER)); 162 | } 163 | 164 | block = astack->block_list; 165 | return (void *) ((void *) block + sizeof (BLOCK_STARTER) + astack->item_size * astack->index_in_block++); 166 | } 167 | -------------------------------------------------------------------------------- /src/inc/bgzf.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | */ 23 | 24 | #ifndef __BGZF_H 25 | #define __BGZF_H 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #ifdef _USE_KNETFILE 32 | #include "knetfile.h" 33 | #endif 34 | 35 | //typedef int8_t bool; 36 | 37 | typedef struct { 38 | int file_descriptor; 39 | char open_mode; // 'r' or 'w' 40 | bool owned_file, is_uncompressed; 41 | #ifdef _USE_KNETFILE 42 | union { 43 | knetFile *fpr; 44 | FILE *fpw; 45 | } x; 46 | #else 47 | FILE* file; 48 | #endif 49 | int uncompressed_block_size; 50 | int compressed_block_size; 51 | void* uncompressed_block; 52 | void* compressed_block; 53 | int64_t block_address; 54 | int block_length; 55 | int block_offset; 56 | int cache_size; 57 | const char* error; 58 | void *cache; // a pointer to a hash table 59 | } BGZF; 60 | 61 | #ifdef __cplusplus 62 | extern "C" { 63 | #endif 64 | 65 | /* 66 | * Open an existing file descriptor for reading or writing. 67 | * Mode must be either "r" or "w". 68 | * A subsequent bgzf_close will not close the file descriptor. 69 | * Returns null on error. 70 | */ 71 | BGZF* bgzf_fdopen(int fd, const char* __restrict mode); 72 | 73 | /* 74 | * Open the specified file for reading or writing. 75 | * Mode must be either "r" or "w". 76 | * Returns null on error. 77 | */ 78 | BGZF* bgzf_open(const char* path, const char* __restrict mode); 79 | 80 | /* 81 | * Close the BGZ file and free all associated resources. 82 | * Does not close the underlying file descriptor if created with bgzf_fdopen. 83 | * Returns zero on success, -1 on error. 84 | */ 85 | int bgzf_close(BGZF* fp); 86 | 87 | /* 88 | * Read up to length bytes from the file storing into data. 89 | * Returns the number of bytes actually read. 90 | * Returns zero on end of file. 91 | * Returns -1 on error. 92 | */ 93 | int bgzf_read(BGZF* fp, void* data, int length); 94 | 95 | /* 96 | * Write length bytes from data to the file. 97 | * Returns the number of bytes written. 98 | * Returns -1 on error. 99 | */ 100 | int bgzf_write(BGZF* fp, const void* data, int length); 101 | 102 | /* 103 | * Return a virtual file pointer to the current location in the file. 104 | * No interpetation of the value should be made, other than a subsequent 105 | * call to bgzf_seek can be used to position the file at the same point. 106 | * Return value is non-negative on success. 107 | * Returns -1 on error. 108 | */ 109 | int64_t bgzf_tell(BGZF* fp); 110 | 111 | /* 112 | * Set the file to read from the location specified by pos, which must 113 | * be a value previously returned by bgzf_tell for this file (but not 114 | * necessarily one returned by this file handle). 115 | * The where argument must be SEEK_SET. 116 | * Seeking on a file opened for write is not supported. 117 | * Returns zero on success, -1 on error. 118 | */ 119 | int64_t bgzf_seek(BGZF* fp, int64_t pos, int where); 120 | 121 | /* 122 | * Set the cache size. Zero to disable. By default, caching is 123 | * disabled. The recommended cache size for frequent random access is 124 | * about 8M bytes. 125 | */ 126 | void bgzf_set_cache_size(BGZF *fp, int cache_size); 127 | 128 | int bgzf_check_EOF(BGZF *fp); 129 | 130 | #ifdef __cplusplus 131 | } 132 | #endif 133 | 134 | #endif 135 | -------------------------------------------------------------------------------- /src/inc/razf.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * RAZF : Random Access compressed(Z) File 3 | * Version: 1.0 4 | * Release Date: 2008-10-27 5 | * 6 | * Copyright 2008, Jue Ruan , Heng Li 7 | * 8 | * All rights reserved. 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 1. Redistributions of source code must retain the above copyright 14 | * notice, this list of conditions and the following disclaimer. 15 | * 2. Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in the 17 | * documentation and/or other materials provided with the distribution. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 | * SUCH DAMAGE. 30 | */ 31 | 32 | 33 | #ifndef __RAZF_RJ_H 34 | #define __RAZF_RJ_H 35 | 36 | #include 37 | #include 38 | #include "zlib.h" 39 | 40 | #ifdef _USE_KNETFILE 41 | #include "knetfile.h" 42 | #endif 43 | 44 | #if ZLIB_VERNUM < 0x1221 45 | #define _RZ_READONLY 46 | struct _gz_header_s; 47 | typedef struct _gz_header_s _gz_header; 48 | #define gz_header _gz_header 49 | #endif 50 | 51 | #define WINDOW_BITS 15 52 | 53 | #ifndef RZ_BLOCK_SIZE 54 | #define RZ_BLOCK_SIZE (1<mode from HEAD to TYPE after call inflateReset */ 104 | int buf_off, buf_len; 105 | int z_err, z_eof; 106 | int seekable; 107 | /* Indice where the source is seekable */ 108 | int load_index; 109 | /* set has_index to 0 in mode 'w', then index will be discarded */ 110 | } RAZF; 111 | 112 | #ifdef __cplusplus 113 | extern "C" { 114 | #endif 115 | 116 | RAZF* razf_dopen(int data_fd, const char *mode); 117 | RAZF *razf_open(const char *fn, const char *mode); 118 | int razf_write(RAZF* rz, const void *data, int size); 119 | int razf_read(RAZF* rz, void *data, int size); 120 | int64_t razf_seek(RAZF* rz, int64_t pos, int where); 121 | void razf_close(RAZF* rz); 122 | 123 | #define razf_tell(rz) ((rz)->out) 124 | 125 | RAZF* razf_open2(const char *filename, const char *mode); 126 | RAZF* razf_dopen2(int fd, const char *mode); 127 | uint64_t razf_tell2(RAZF *rz); 128 | int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where); 129 | 130 | #ifdef __cplusplus 131 | } 132 | #endif 133 | 134 | #endif 135 | -------------------------------------------------------------------------------- /src/ReadTrace.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ReadTrace.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #include "stdinc.h" 23 | #include "unistd.h" 24 | #include "stdlib.h" 25 | #include "newhash.h" 26 | #include "extfunc.h" 27 | #include "extvab.h" 28 | #include "dfibHeap.h" 29 | #include "fibHeap.h" 30 | #include "darray.h" 31 | 32 | 33 | typedef struct readOnContig 34 | { 35 | unsigned long long readID; 36 | int read_pos,contig_pos; 37 | int alignLength; 38 | char orign; 39 | struct readOnContig * next; 40 | }READONCONTIG; 41 | void getReadOnScaf(char *file) 42 | { 43 | char name[1024]; 44 | char line[1024]; 45 | FILE * fp1=NULL,*fp2=NULL,*output=NULL; 46 | 47 | sprintf(name,"%s.readInformation",file); 48 | fp1=ckopen(name, "r"); 49 | sprintf(name,"%s.contigPosInscaff",file); 50 | fp2=ckopen(name, "r"); 51 | sprintf(name,"%s.readOnScaf",file); 52 | output=ckopen(name,"w"); 53 | 54 | unsigned long long readID; 55 | unsigned int contigID; 56 | int read_pos,contig_pos; 57 | int alignLength; 58 | char orign; 59 | 60 | READONCONTIG ** contig2read = (READONCONTIG **) ckalloc ( sizeof(READONCONTIG *) * (num_ctg + 1)); 61 | READONCONTIG *temp=NULL; 62 | unsigned int index; 63 | for(index=0;index<=num_ctg;index++) 64 | { 65 | contig2read[index]=NULL; 66 | contig_array[index].flag=0; 67 | } 68 | while(fgets(line,sizeof(line),fp1)) 69 | { 70 | sscanf(line,"%llu %d %d %d %d %c",&readID,&read_pos,&contigID,&contig_pos,&alignLength,&orign); 71 | temp=(READONCONTIG *) ckalloc ( sizeof(READONCONTIG )); 72 | temp->next=NULL; 73 | temp->readID=readID; 74 | temp->read_pos=read_pos; 75 | temp->contig_pos=contig_pos; 76 | temp->alignLength=alignLength; 77 | temp->orign=orign; 78 | 79 | temp->next = contig2read[contigID]; 80 | contig2read[contigID] = temp; 81 | } 82 | int isFirst=0; 83 | int scafPos,scafAlignLength; 84 | char scafOrign; 85 | while(fgets(line,sizeof(line),fp2)) 86 | { 87 | if(line[0]=='>') 88 | { 89 | fprintf(output,"%s",line); 90 | isFirst=1; 91 | } 92 | else 93 | { 94 | sscanf(line ,"%d %d %c %d",&contigID,&contig_pos,&orign,&alignLength); 95 | 96 | temp = contig2read[contigID] ; 97 | contig_array[contigID].flag=1; 98 | contig_array[getTwinCtg(contigID)].flag=1; 99 | while(temp) 100 | { 101 | if(isFirst) 102 | { 103 | scafPos=contig_pos + temp->contig_pos; 104 | scafAlignLength = temp->alignLength; 105 | } 106 | else 107 | { 108 | scafPos=contig_pos + temp->contig_pos - overlaplen; 109 | if( temp->contig_pos < overlaplen) 110 | scafAlignLength = temp->alignLength - overlaplen + temp->contig_pos; 111 | else 112 | scafAlignLength = temp->alignLength; 113 | } 114 | 115 | if(orign == temp->orign) 116 | { 117 | scafOrign = '+'; 118 | } 119 | else 120 | { 121 | scafOrign = '-'; 122 | } 123 | fprintf(output , "%llu\t%d\t%d\t%c\t%d\n" , temp->readID,temp->read_pos,scafPos,scafOrign,scafAlignLength); 124 | temp = temp->next; 125 | } 126 | isFirst=0; 127 | } 128 | } 129 | for(index=0;index<=num_ctg;index++) 130 | { 131 | if ((contig_array[index].length + overlaplen) < 100 || contig_array[index].flag) 132 | { 133 | continue; 134 | } 135 | fprintf(output,">C%d\n", index); 136 | contig_array[index].flag=1; 137 | contig_array[getTwinCtg(index)].flag=1; 138 | temp = contig2read[index] ; 139 | while(temp) 140 | { 141 | fprintf(output , "%llu\t%d\t%d\t%c\t%d\n" , temp->readID,temp->read_pos,temp->contig_pos,temp->orign,temp->alignLength); 142 | temp=temp->next; 143 | } 144 | } 145 | fclose(fp1); 146 | fclose(fp2); 147 | fclose(output); 148 | 149 | for(index=0;index<=num_ctg;index++) 150 | { 151 | while(contig2read[index]) 152 | { 153 | temp=contig2read[index]->next; 154 | contig2read[index]->next = NULL; 155 | free(contig2read[index]); 156 | contig2read[index]=temp; 157 | } 158 | } 159 | free(contig2read); 160 | } 161 | -------------------------------------------------------------------------------- /src/map.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Map.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #include "stdinc.h" 23 | #include "newhash.h" 24 | #include "extfunc.h" 25 | #include "extvab.h" 26 | 27 | static void initenv (int argc, char **argv); 28 | static char shortrdsfile[256]; 29 | static char graphfile[256]; 30 | 31 | static void display_map_usage (); 32 | 33 | static int getMinOverlap (char *gfile) 34 | { 35 | char name[256], ch; 36 | FILE *fp; 37 | int num_kmer, overlaplen = 23; 38 | char line[1024]; 39 | 40 | sprintf (name, "%s.preGraphBasic", gfile); 41 | fp = fopen (name, "r"); 42 | 43 | if (!fp) 44 | { 45 | return overlaplen; 46 | } 47 | 48 | while (fgets (line, sizeof (line), fp) != NULL) 49 | { 50 | if (line[0] == 'V') 51 | { 52 | sscanf (line + 6, "%d %c %d", &num_kmer, &ch, &overlaplen); 53 | } 54 | else if (line[0] == 'M') 55 | { 56 | sscanf (line, "MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxReadLen, &minReadLen, &maxNameLen); 57 | } 58 | } 59 | 60 | fclose (fp); 61 | return overlaplen; 62 | } 63 | 64 | int call_align (int argc, char **argv) 65 | { 66 | time_t start_t, stop_t, time_bef, time_aft; 67 | N_kmer=0; 68 | time (&start_t); 69 | if(strlen(graphfile) == 0) 70 | { 71 | initenv (argc, argv); 72 | } 73 | overlaplen = getMinOverlap (graphfile); 74 | initenv (argc, argv); 75 | 76 | printf ("K = %d\n", overlaplen); 77 | time (&time_bef); 78 | ctg_short = overlaplen + 2; 79 | printf ("contig len cutoff: %d\n", ctg_short); 80 | prlContig2nodes (graphfile, ctg_short); 81 | time (&time_aft); 82 | printf ("time spent on De bruijn graph construction: %ds\n\n", (int) (time_aft - time_bef)); 83 | 84 | //map long read to edge one by one 85 | time (&time_bef); 86 | // prlLongRead2Ctg (shortrdsfile, graphfile); 87 | time (&time_aft); 88 | // printf ("time spent on mapping long reads: %ds\n\n", (int) (time_aft - time_bef)); 89 | 90 | //map read to edge one by one 91 | time (&time_bef); 92 | prlRead2Ctg (shortrdsfile, graphfile); 93 | time (&time_aft); 94 | printf ("time spent on mapping reads: %ds\n\n", (int) (time_aft - time_bef)); 95 | free_Sets (KmerSets, thrd_num); 96 | time (&stop_t); 97 | printf ("overall time for alignment: %dm\n\n", (int) (stop_t - start_t) / 60); 98 | return 0; 99 | } 100 | 101 | /***************************************************************************** 102 | * Parse command line switches 103 | *****************************************************************************/ 104 | 105 | void initenv (int argc, char **argv) 106 | { 107 | int copt; 108 | int inpseq, outseq; 109 | extern char *optarg; 110 | char temp[100]; 111 | 112 | optind = 1; 113 | inpseq = outseq = 0; 114 | 115 | while ((copt = getopt (argc, argv, "s:g:K:p:rfR")) != EOF) 116 | { 117 | //printf("get option\n"); 118 | switch (copt) 119 | { 120 | case 's': 121 | inpseq = 1; 122 | sscanf (optarg, "%s", shortrdsfile); 123 | break; 124 | case 'g': 125 | outseq = 1; 126 | sscanf (optarg, "%s", graphfile); // 127 | break; 128 | case 'K': 129 | sscanf (optarg, "%s", temp); // 130 | overlaplen = atoi (temp); 131 | break; 132 | case 'p': 133 | sscanf (optarg, "%s", temp); // 134 | thrd_num = atoi (temp); 135 | break; 136 | case 'r': 137 | read_trace=1; 138 | break; 139 | case 'R': 140 | RPKM=1; 141 | read_trace= 1; 142 | break; 143 | case 'f': 144 | fill=1; 145 | break; 146 | default: 147 | if (inpseq == 0 || outseq == 0) // 148 | { 149 | display_map_usage (); 150 | exit (1); 151 | } 152 | } 153 | } 154 | 155 | if (inpseq == 0 || outseq == 0) // 156 | { 157 | //printf("need more\n"); 158 | display_map_usage (); 159 | exit (1); 160 | } 161 | } 162 | 163 | static void display_map_usage () 164 | { 165 | printf ("\nmap -s configFile -g inputGraph [-f -R] [-p n_cpu]\n"); 166 | printf (" -s\t\tconfigFile: the config file of reads\n"); 167 | printf (" -g\t\tinputGraph: prefix of input graph file name\n"); 168 | // printf (" -r\t(optional)\toutput the information between read and contig, [NO]\n"); 169 | printf (" -f\t(optional)\toutput gap related reads for SRkgf to fill gap, [NO]\n"); 170 | printf (" -R\t(optional)\toutput assembly RPKM statistics, [NO]\n"); 171 | printf (" -p\t\t\tn_cpu: number of cpu for use, [8]\n"); 172 | } 173 | -------------------------------------------------------------------------------- /src/hashFunction.c: -------------------------------------------------------------------------------- 1 | /* 2 | * hashFunction.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include 24 | 25 | #define KMER_HASH_MASK 0x0000000000ffffffL 26 | #define KMER_HASH_BUCKETS 16777216 // 4^12 27 | 28 | static int crc_table[256] = { 29 | 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 30 | 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 31 | 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 32 | 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 33 | 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, 34 | 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 35 | 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 36 | 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 37 | 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 38 | 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, 39 | 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 40 | 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 41 | 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 42 | 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 43 | 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, 44 | 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 45 | 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 46 | 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 47 | 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 48 | 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 49 | 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 50 | 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 51 | 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, 52 | 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 53 | 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 54 | 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 55 | 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 56 | 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 57 | 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, 58 | 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 59 | 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 60 | 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 61 | 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 62 | 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, 63 | 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 64 | 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 65 | 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 66 | 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 67 | 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, 68 | 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 69 | 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 70 | 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, 71 | 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 72 | 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 73 | 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, 74 | 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 75 | 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 76 | 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 77 | 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 78 | 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 79 | 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 80 | 0x2d02ef8d 81 | }; 82 | 83 | static int crc32 (int crc, const char *buf, int len) 84 | { 85 | if (buf == NULL) 86 | { 87 | return 0; 88 | } 89 | 90 | crc = crc ^ 0xffffffff; 91 | 92 | while (len--) 93 | { 94 | crc = crc_table[((int) crc ^ (*buf++)) & 0xff] ^ (crc >> 8); 95 | } 96 | 97 | return crc ^ 0xffffffff; 98 | } 99 | /*127kmer -> 31kmer 100 | ubyte8 hash_kmer (Kmer kmer) 101 | { 102 | ubyte8 hash = kmer.low2; 103 | 104 | hash = crc32 (0, (char *) &kmer, sizeof (Kmer)); 105 | hash &= KMER_HASH_MASK; 106 | return hash; 107 | }*/ 108 | ubyte8 hash_kmer(Kmer kmer) 109 | { 110 | #ifdef MER127 111 | ubyte8 hash = kmer.low2; 112 | #endif 113 | #ifdef MER63 114 | ubyte8 hash = kmer.low; 115 | #endif 116 | #ifdef MER31 117 | ubyte8 hash = kmer; 118 | #endif 119 | hash = crc32(0, (char *) &kmer, sizeof(Kmer)); 120 | hash &= KMER_HASH_MASK; 121 | return hash; 122 | } 123 | 124 | -------------------------------------------------------------------------------- /src/scaffold.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Scaffold.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | #include "transcriptome.c" 28 | 29 | static void initenv (int argc, char **argv); 30 | static void display_scaff_usage (); 31 | 32 | static boolean LINK, SCAFF; 33 | static char graphfile[256]; 34 | 35 | int call_scaffold (int argc, char **argv) 36 | { 37 | time_t start_t, stop_t, time_bef, time_aft; 38 | 39 | time (&start_t); 40 | initenv (argc, argv); 41 | loadPEgrads (graphfile); 42 | time (&time_bef); 43 | loadUpdatedEdges (graphfile); 44 | time (&time_aft); 45 | printf ("time spent on loading edges %ds\n", (int) (time_aft - time_bef)); 46 | 47 | if (!SCAFF) 48 | { 49 | time (&time_bef); 50 | PE2Links (graphfile); 51 | time (&time_aft); 52 | printf ("time spent on loading pair end info %ds\n\n", (int) (time_aft - time_bef)); 53 | 54 | time (&time_bef); 55 | Links2Scaf (graphfile); 56 | time (&time_aft); 57 | printf ("time spent on creating scaffolds %ds\n\n", (int) (time_aft - time_bef)); 58 | 59 | time(&time_bef); 60 | transcriptome(graphfile); 61 | time(&time_aft); 62 | printf("time spent on creating transcriptome %ds\n",(int)(time_aft-time_bef)); 63 | // scaffolding (100, graphfile); 64 | } 65 | 66 | prlReadsCloseGap (graphfile); 67 | // locateReadOnScaf(graphfile); 68 | ScafStat (100, graphfile); 69 | if(read_trace) 70 | { 71 | getReadOnScaf(graphfile); 72 | if(RPKM) //Must add '-R' parameter RPKMStat(graphfile); 73 | RPKMStat(graphfile); 74 | } 75 | 76 | free_pe_mem (); 77 | 78 | if (index_array) 79 | { 80 | free ((void *) index_array); 81 | } 82 | 83 | freeContig_array (); 84 | destroyPreArcMem (); 85 | destroyConnectMem (); 86 | deleteCntLookupTable (); 87 | time (&stop_t); 88 | printf ("time elapsed: %dm\n", (int) (stop_t - start_t) / 60); 89 | return 0; 90 | } 91 | 92 | /***************************************************************************** 93 | * Parse command line switches 94 | *****************************************************************************/ 95 | 96 | void initenv (int argc, char **argv) 97 | { 98 | int copt; 99 | int inpseq; 100 | extern char *optarg; 101 | char temp[256]; 102 | 103 | inpseq = 0; 104 | LINK = 0; 105 | SCAFF = 0; 106 | optind = 1; 107 | 108 | while ((copt = getopt (argc, argv, "g:L:p:G:N:FuSt:c:rR")) != EOF) 109 | { 110 | switch (copt) 111 | { 112 | case 'g': 113 | inGraph = 1; 114 | sscanf (optarg, "%s", graphfile); // 115 | break; 116 | case 'G': 117 | sscanf (optarg, "%s", temp); // 118 | GLDiff = atoi (temp); 119 | break; 120 | case 'L': 121 | sscanf (optarg, "%s", temp); 122 | ctg_mask = atoi (temp); 123 | break; 124 | case 'N': 125 | sscanf ( optarg, "%s", temp ); 126 | known_genome_size = atoi ( temp ); 127 | break; 128 | case 'F': 129 | fillGap = 1; 130 | break; 131 | case 'S': 132 | SCAFF = 1; 133 | break; 134 | case 'u': 135 | maskRep = 0; 136 | break; 137 | case 'p': 138 | sscanf (optarg, "%s", temp); // 139 | thrd_num = atoi (temp); 140 | break; 141 | case 't': 142 | sscanf (optarg, "%s", temp); // 143 | max_num = atoi (temp)>0? atoi (temp):5; 144 | break; 145 | case 'c': 146 | sscanf (optarg, "%s", temp); // 147 | max_cnt = atoi (temp)>=0? atoi (temp):0; 148 | break; 149 | case 'r': 150 | read_trace= 1; 151 | break; 152 | case 'R': 153 | RPKM=1; 154 | read_trace= 1; 155 | break; 156 | default: 157 | if (inGraph == 0) // 158 | { 159 | display_scaff_usage (); 160 | exit (-1); 161 | } 162 | } 163 | } 164 | 165 | if (inGraph == 0) // 166 | { 167 | display_scaff_usage (); 168 | exit (-1); 169 | } 170 | } 171 | 172 | static void display_scaff_usage () 173 | { 174 | printf ("\nscaff -g inputGraph [-R -S -F] [-p n_cpu -L minContigLen -t locusMaxOutput -G gapLenDiff]\n"); 175 | printf (" -g\t\tinputGraph: prefix of input graph file name\n"); 176 | // printf (" -r\t(optional)\toutput the information between read and scaffold, [NO]\n"); 177 | printf (" -R\t\t(optional) output assembly RPKM statistics, [NO]\n"); 178 | printf (" -S\t(optional)\tscaffold structure exists, [NO]\n"); 179 | printf (" -F\t(optional)\tfill gaps in scaffolds, [NO]\n"); 180 | printf (" -p\t\t\tn_cpu: number of cpu for use, [8]\n"); 181 | printf (" -L\t\t\tminContigLen: shortest contig for scaffolding, [100]\n"); 182 | printf (" -t\t\t\tlocusMaxOutput: output the number of transcripts no more than locusMaxOutput in one locus, [5]\n"); 183 | printf (" -G\t\t\tgapLenDiff: allowed length difference between estimated and filled gap, [50]\n"); 184 | } 185 | -------------------------------------------------------------------------------- /src/connect.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Connect.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | #define CNBLOCKSIZE 100000 29 | 30 | void createCntMemManager () 31 | { 32 | if (!cn_mem_manager) 33 | { 34 | cn_mem_manager = createMem_manager (CNBLOCKSIZE, sizeof (CONNECT)); 35 | } 36 | else 37 | { 38 | printf ("cn_mem_manger was created\n"); 39 | } 40 | } 41 | 42 | void destroyConnectMem () 43 | { 44 | freeMem_manager (cn_mem_manager); 45 | cn_mem_manager = NULL; 46 | } 47 | 48 | CONNECT *allocateCN (unsigned int contigId, int gap) 49 | { 50 | CONNECT *newCN; 51 | 52 | newCN = (CONNECT *) getItem (cn_mem_manager); 53 | newCN->contigID = contigId; 54 | newCN->gapLen = gap; 55 | newCN->minGap = 0; 56 | newCN->maxGap = 0; 57 | newCN->bySmall = 0; 58 | newCN->weakPoint = 0; 59 | newCN->weight = 1; 60 | newCN->weightNotInherit = 0; 61 | newCN->mask = 0; 62 | newCN->used = 0; 63 | newCN->checking = 0; 64 | newCN->deleted = 0; 65 | newCN->prevInScaf = 0; 66 | newCN->inherit = 0; 67 | newCN->singleInScaf = 0; 68 | newCN->nextInScaf = NULL; 69 | return newCN; 70 | } 71 | 72 | void output_cntGVZ (char *outfile) 73 | { 74 | char name[256]; 75 | FILE *fp; 76 | unsigned int i; 77 | CONNECT *connect; 78 | boolean flag; 79 | 80 | sprintf (name, "%s.scaffold.gvz", outfile); 81 | fp = ckopen (name, "w"); 82 | fprintf (fp, "digraph G{\n"); 83 | fprintf (fp, "\tsize=\"512,512\";\n"); 84 | 85 | for (i = num_ctg; i > 0; i--) 86 | { 87 | //if(contig_array[i].mask||!contig_array[i].downwardConnect) 88 | if (!contig_array[i].downwardConnect) 89 | { 90 | continue; 91 | } 92 | 93 | connect = contig_array[i].downwardConnect; 94 | 95 | while (connect) 96 | { 97 | //if(connect->mask||connect->deleted){ 98 | if (connect->deleted) 99 | { 100 | connect = connect->next; 101 | continue; 102 | } 103 | 104 | if (connect->prevInScaf || connect->nextInScaf) 105 | { 106 | flag = 1; 107 | } 108 | else 109 | { 110 | flag = 0; 111 | } 112 | 113 | if (!connect->mask) 114 | fprintf (fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\"];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length, 115 | connect->gapLen, flag, connect->weight); 116 | else 117 | fprintf (fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\", color = red];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length, 118 | connect->gapLen, flag, connect->weight); 119 | 120 | connect = connect->next; 121 | } 122 | } 123 | 124 | fprintf (fp, "}\n"); 125 | fclose (fp); 126 | } 127 | 128 | /***************** below this line all codes are about lookup table *****************/ 129 | 130 | void createCntLookupTable () 131 | { 132 | if (!cntLookupTable) 133 | { 134 | cntLookupTable = (CONNECT **) ckalloc ((3 * num_ctg + 1) * sizeof (CONNECT *)); 135 | } 136 | } 137 | 138 | void deleteCntLookupTable () 139 | { 140 | if (cntLookupTable) 141 | { 142 | free ((void *) cntLookupTable); 143 | cntLookupTable = NULL; 144 | } 145 | } 146 | 147 | void putCnt2LookupTable (unsigned int from_c, CONNECT * cnt) 148 | { 149 | if (!cnt || !cntLookupTable) 150 | { 151 | return; 152 | } 153 | 154 | unsigned int index = 2 * from_c + cnt->contigID; 155 | 156 | cnt->nextInLookupTable = cntLookupTable[index]; 157 | cntLookupTable[index] = cnt; 158 | } 159 | 160 | static CONNECT *getCntInLookupTable (unsigned int from_c, unsigned int to_c) 161 | { 162 | unsigned int index = 2 * from_c + to_c; 163 | CONNECT *ite_cnt = cntLookupTable[index]; 164 | 165 | while (ite_cnt) 166 | { 167 | if (ite_cnt->contigID == to_c) 168 | { 169 | return ite_cnt; 170 | } 171 | 172 | ite_cnt = ite_cnt->nextInLookupTable; 173 | } 174 | 175 | return NULL; 176 | } 177 | 178 | CONNECT *getCntBetween (unsigned int from_c, unsigned int to_c) 179 | { 180 | CONNECT *pcnt; 181 | 182 | if (cntLookupTable) 183 | { 184 | pcnt = getCntInLookupTable (from_c, to_c); 185 | return pcnt; 186 | } 187 | 188 | pcnt = contig_array[from_c].downwardConnect; 189 | 190 | while (pcnt) 191 | { 192 | if (pcnt->contigID == to_c) 193 | { 194 | return pcnt; 195 | } 196 | 197 | pcnt = pcnt->next; 198 | } 199 | 200 | return pcnt; 201 | } 202 | 203 | /* 204 | void removeCntInLookupTable(unsigned int from_c,unsigned int to_c) 205 | { 206 | unsigned int index = 2*from_c + to_c; 207 | CONNECT *ite_cnt = cntLookupTable[index]; 208 | CONNECT *cnt; 209 | 210 | if(!ite_cnt){ 211 | printf("removeCntInLookupTable: not found A\n"); 212 | return; 213 | } 214 | if(ite_cnt->contigID==to_c){ 215 | cntLookupTable[index] = ite_cnt->nextInLookupTable; 216 | return; 217 | } 218 | 219 | while(ite_cnt->nextInLookupTable&&ite_cnt->nextInLookupTable->contigID!=to_c) 220 | ite_cnt = ite_cnt->nextInLookupTable; 221 | 222 | if(ite_cnt->nextInLookupTable){ 223 | cnt = ite_cnt->nextInLookupTable; 224 | ite_cnt->nextInLookupTable = cnt->nextInLookupTable; 225 | return; 226 | } 227 | printf("removeCntInLookupTable: not found B\n"); 228 | return; 229 | } 230 | */ 231 | -------------------------------------------------------------------------------- /src/pregraph.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Pregraph.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | static void initenv (int argc, char **argv); 29 | static char shortrdsfile[256]; 30 | static char graphfile[256]; 31 | static int cutTips = 1; 32 | static void display_pregraph_usage (); 33 | int call_pregraph (int argc, char **argv) 34 | { 35 | time_t start_t, stop_t, time_bef, time_aft; 36 | time (&start_t); 37 | initenv (argc, argv); 38 | if (overlaplen % 2 == 0) 39 | { 40 | overlaplen++; 41 | printf ("K should be an odd number\n"); 42 | } 43 | if (overlaplen < 13) 44 | { 45 | overlaplen = 13; 46 | printf ("K should not be less than 13\n"); 47 | } 48 | #ifdef MER127 49 | else if (overlaplen > 127) 50 | overlaplen = 127; 51 | #endif 52 | #ifdef MER63 53 | else if(overlaplen > 63) 54 | overlaplen=63; 55 | #endif 56 | #ifdef MER31 57 | else if(overlaplen>31) 58 | overlaplen = 31; 59 | #endif 60 | 61 | time (&time_bef); 62 | 63 | prlRead2HashTable (shortrdsfile, graphfile); 64 | time (&time_aft); 65 | printf ("time spent on pre-graph construction: %ds\n\n", (int) (time_aft - time_bef)); 66 | printf ("deLowKmer %d, deLowEdge %d\n", deLowKmer, deLowEdge); 67 | 68 | time (&time_bef); 69 | removeMinorOut(); 70 | time (&time_aft); 71 | printf ("time spent on cut kmer: %ds\n\n", (int) (time_aft - time_bef)); 72 | 73 | //analyzeTips(hash_table, graphfile); 74 | 75 | if (!deLowKmer && cutTips) 76 | { 77 | time (&time_bef); 78 | removeSingleTips (); 79 | removeMinorTips (); 80 | time (&time_aft); 81 | printf ("time spent on cutTipe: %ds\n\n", (int) (time_aft - time_bef)); 82 | } 83 | else 84 | { 85 | time (&time_bef); 86 | removeMinorTips (); 87 | time (&time_aft); 88 | printf ("time spent on cutTipe: %ds\n\n", (int) (time_aft - time_bef)); 89 | } 90 | 91 | 92 | initKmerSetSize = 0; 93 | 94 | 95 | //combine each linear part to an edge 96 | time (&time_bef); 97 | kmer2edges (graphfile); 98 | time (&time_aft); 99 | printf ("time spent on making edges: %ds\n\n", (int) (time_aft - time_bef)); 100 | 101 | //map read to edge one by one 102 | time (&time_bef); 103 | prlRead2edge (shortrdsfile, graphfile); 104 | time (&time_aft); 105 | printf ("time spent on mapping reads: %ds\n\n", (int) (time_aft - time_bef)); 106 | output_vertex (graphfile); 107 | free_Sets (KmerSets, thrd_num); 108 | free_Sets (KmerSetsPatch, thrd_num); 109 | time (&stop_t); 110 | printf ("overall time for lightgraph: %dm\n\n", (int) (stop_t - start_t) / 60); 111 | return 0; 112 | } 113 | 114 | 115 | /***************************************************************************** 116 | * Parse command line switches 117 | *****************************************************************************/ 118 | void initenv (int argc, char **argv) 119 | { 120 | int copt; 121 | int inpseq, outseq; 122 | extern char *optarg; 123 | char temp[100]; 124 | 125 | optind = 1; 126 | inpseq = outseq = 0; 127 | while ((copt = getopt (argc, argv, "a:s:o:K:p:d:Di:n")) != EOF) 128 | { 129 | 130 | //printf("get option\n"); 131 | switch (copt) 132 | { 133 | case 's': 134 | inpseq = 1; 135 | sscanf (optarg, "%s", shortrdsfile); 136 | break; 137 | case 'o': 138 | outseq = 1; 139 | sscanf (optarg, "%s", graphfile); // 140 | break; 141 | case 'K': 142 | sscanf (optarg, "%s", temp); // 143 | overlaplen = atoi (temp); 144 | break; 145 | case 'p': 146 | sscanf (optarg, "%s", temp); // 147 | thrd_num = atoi (temp); 148 | break; 149 | /* case 'R': 150 | repsTie = 1; 151 | break;*/ 152 | case 'd': 153 | sscanf (optarg, "%s", temp); // 154 | deLowKmer = atoi (temp) >= 0 ? atoi (temp) : 0; 155 | break; 156 | /* case 'D': 157 | deLowEdge = 1; 158 | break; 159 | */ 160 | case 'a': 161 | initKmerSetSize = atoi (optarg); 162 | break; 163 | case 'i'://mao 9-21 164 | sscanf (optarg, "%s", temp); 165 | dd=atoi (temp) >= 0 ? atoi (temp) : 0; 166 | break; 167 | case 'n'://mao 2011-10-13 168 | N_kmer=1; 169 | break; 170 | default: 171 | if (inpseq == 0 || outseq == 0) // 172 | { 173 | display_pregraph_usage (); 174 | exit (-1); 175 | } 176 | } 177 | } 178 | if (inpseq == 0 || outseq == 0) // 179 | { 180 | 181 | //printf("need more\n"); 182 | display_pregraph_usage (); 183 | exit (-1); 184 | } 185 | } 186 | 187 | static void display_pregraph_usage () 188 | { 189 | printf ("\npregraph -s configFile -o outputGraph [-K kmer -p n_cpu -d kmerFreqCutoff]\n"); 190 | printf (" -s\t\tconfigFile: the config file of reads\n"); 191 | printf (" -o\t\toutputGraph: prefix of output graph file name\n"); 192 | #ifdef MER127 193 | printf (" -K\t\t\tkmer(min 13, max 127): kmer size, [23]\n"); 194 | #endif 195 | #ifdef MER63 196 | printf (" -K\t\t\tkmer(min 13, max 63): kmer size, [23]\n"); 197 | #endif 198 | #ifdef MER31 199 | printf (" -K\t\t\tkmer(min 13, max 31): kmer size, [23]\n"); 200 | #endif 201 | printf (" -p\t\t\tn_cpu: number of cpu for use, [8]\n"); 202 | printf (" -d\t\t\tkmerFreqCutoff: kmers with frequency no larger than KmerFreqCutoff will be deleted, [0]\n"); 203 | 204 | } 205 | -------------------------------------------------------------------------------- /src/loadPath.c: -------------------------------------------------------------------------------- 1 | /* 2 | * loadPath.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | static void add1marker2edge (unsigned int edgeno, long long readid) 29 | { 30 | if (edge_array[edgeno].multi == 255) 31 | { 32 | return; 33 | } 34 | 35 | unsigned int bal_ed = getTwinEdge (edgeno); 36 | unsigned char counter = edge_array[edgeno].multi++; 37 | 38 | edge_array[edgeno].markers[counter] = readid; 39 | counter = edge_array[bal_ed].multi++; 40 | edge_array[bal_ed].markers[counter] = -readid; 41 | } 42 | 43 | boolean loadPath (char *graphfile) 44 | { 45 | FILE *fp; 46 | char name[256], line[1024]; 47 | unsigned int i, bal_ed, num1, edgeno, num2; 48 | long long markCounter = 0, readid = 0; 49 | char *seg; 50 | 51 | sprintf (name, "%s.markOnEdge", graphfile); 52 | fp = fopen (name, "r"); 53 | 54 | if (!fp) 55 | { 56 | return 0; 57 | } 58 | 59 | for (i = 1; i <= num_ed; i++) 60 | { 61 | edge_array[i].multi = 0; 62 | } 63 | 64 | for (i = 1; i <= num_ed; i++) 65 | { 66 | fscanf (fp, "%d", &num1); 67 | 68 | if (EdSmallerThanTwin (i)) 69 | { 70 | fscanf (fp, "%d", &num2); 71 | bal_ed = getTwinEdge (i); 72 | 73 | if (num1 + num2 >= 255) 74 | { 75 | edge_array[i].multi = 255; 76 | edge_array[bal_ed].multi = 255; 77 | } 78 | else 79 | { 80 | edge_array[i].multi = num1 + num2; 81 | edge_array[bal_ed].multi = num1 + num2; 82 | markCounter += 2 * (num1 + num2); 83 | } 84 | 85 | i++; 86 | } 87 | else 88 | { 89 | if (2 * num1 >= 255) 90 | { 91 | edge_array[i].multi = 255; 92 | } 93 | else 94 | { 95 | edge_array[i].multi = 2 * num1; 96 | markCounter += 2 * num1; 97 | } 98 | } 99 | } 100 | 101 | fclose (fp); 102 | printf ("%lld markers overall\n", markCounter); 103 | markersArray = (long long *) ckalloc (markCounter * sizeof (long long)); 104 | markCounter = 0; 105 | 106 | for (i = 1; i <= num_ed; i++) 107 | { 108 | if (edge_array[i].multi == 255) 109 | { 110 | continue; 111 | } 112 | 113 | edge_array[i].markers = markersArray + markCounter; 114 | markCounter += edge_array[i].multi; 115 | edge_array[i].multi = 0; 116 | } 117 | 118 | sprintf (name, "%s.path", graphfile); 119 | fp = fopen (name, "r"); 120 | 121 | if (!fp) 122 | { 123 | return 0; 124 | } 125 | 126 | while (fgets (line, sizeof (line), fp) != NULL) 127 | { 128 | //printf("%s",line); 129 | readid++; 130 | seg = strtok (line, " "); 131 | 132 | while (seg) 133 | { 134 | edgeno = atoi (seg); 135 | //printf("%s, %d\n",seg,edgeno); 136 | add1marker2edge (edgeno, readid); 137 | seg = strtok (NULL, " "); 138 | } 139 | } 140 | 141 | fclose (fp); 142 | markCounter = 0; 143 | 144 | for (i = 1; i <= num_ed; i++) 145 | { 146 | if (edge_array[i].multi == 255) 147 | { 148 | continue; 149 | } 150 | 151 | markCounter += edge_array[i].multi; 152 | } 153 | 154 | printf ("%lld marks loaded\n", markCounter); 155 | return 1; 156 | } 157 | 158 | boolean loadPathBin (char *graphfile) 159 | { 160 | FILE *fp; 161 | char name[256]; 162 | unsigned int i, bal_ed, num1, num2; 163 | long long markCounter = 0, readid = 0; 164 | unsigned char seg, ch; 165 | unsigned int *freadBuf; 166 | 167 | sprintf (name, "%s.markOnEdge", graphfile); 168 | fp = fopen (name, "r"); 169 | 170 | if (!fp) 171 | { 172 | return 0; 173 | } 174 | 175 | for (i = 1; i <= num_ed; i++) 176 | { 177 | edge_array[i].multi = 0; 178 | edge_array[i].markers = NULL; 179 | } 180 | 181 | for (i = 1; i <= num_ed; i++) 182 | { 183 | fscanf (fp, "%d", &num1); 184 | 185 | if (EdSmallerThanTwin (i)) 186 | { 187 | fscanf (fp, "%d", &num2); 188 | bal_ed = getTwinEdge (i); 189 | 190 | if (num1 + num2 >= 255) 191 | { 192 | edge_array[i].multi = 255; 193 | edge_array[bal_ed].multi = 255; 194 | } 195 | else 196 | { 197 | edge_array[i].multi = num1 + num2; 198 | edge_array[bal_ed].multi = num1 + num2; 199 | markCounter += 2 * (num1 + num2); 200 | } 201 | 202 | i++; 203 | } 204 | else 205 | { 206 | if (2 * num1 >= 255) 207 | { 208 | edge_array[i].multi = 255; 209 | } 210 | else 211 | { 212 | edge_array[i].multi = 2 * num1; 213 | markCounter += 2 * num1; 214 | } 215 | } 216 | } 217 | 218 | fclose (fp); 219 | printf ("%lld markers overall\n", markCounter); 220 | markersArray = (long long *) ckalloc (markCounter * sizeof (long long)); 221 | markCounter = 0; 222 | 223 | for (i = 1; i <= num_ed; i++) 224 | { 225 | if (edge_array[i].multi == 255) 226 | { 227 | continue; 228 | } 229 | 230 | edge_array[i].markers = markersArray + markCounter; 231 | markCounter += edge_array[i].multi; 232 | edge_array[i].multi = 0; 233 | } 234 | 235 | sprintf (name, "%s.path", graphfile); 236 | fp = fopen (name, "rb"); 237 | 238 | if (!fp) 239 | { 240 | return 0; 241 | } 242 | 243 | freadBuf = (unsigned int *) ckalloc ((maxReadLen - overlaplen + 1) * sizeof (unsigned int)); 244 | 245 | while (fread (&ch, sizeof (char), 1, fp) == 1) 246 | { 247 | //printf("%s",line); 248 | if (fread (freadBuf, sizeof (unsigned int), ch, fp) != ch) 249 | { 250 | break; 251 | } 252 | 253 | readid++; 254 | 255 | for (seg = 0; seg < ch; seg++) 256 | { 257 | add1marker2edge (freadBuf[seg], readid); 258 | } 259 | } 260 | 261 | fclose (fp); 262 | markCounter = 0; 263 | 264 | for (i = 1; i <= num_ed; i++) 265 | { 266 | if (edge_array[i].multi == 255) 267 | { 268 | continue; 269 | } 270 | 271 | markCounter += edge_array[i].multi; 272 | } 273 | 274 | printf ("%lld markers loaded\n", markCounter); 275 | free ((void *) freadBuf); 276 | return 1; 277 | } 278 | -------------------------------------------------------------------------------- /src/arc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Arc.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | #define preARCBLOCKSIZE 100000 29 | 30 | void createPreArcMemManager () 31 | { 32 | prearc_mem_manager = createMem_manager (preARCBLOCKSIZE, sizeof (preARC)); 33 | } 34 | 35 | void prlDestroyPreArcMem () 36 | { 37 | if (!preArc_mem_managers) 38 | { 39 | return; 40 | } 41 | 42 | int i; 43 | 44 | for (i = 0; i < thrd_num; i++) 45 | { 46 | freeMem_manager (preArc_mem_managers[i]); 47 | } 48 | 49 | free ((void *) preArc_mem_managers); 50 | preArc_mem_managers = NULL; 51 | } 52 | 53 | void destroyPreArcMem () 54 | { 55 | freeMem_manager (prearc_mem_manager); 56 | prearc_mem_manager = NULL; 57 | } 58 | 59 | preARC *prlAllocatePreArc (unsigned int edgeid, MEM_MANAGER * manager) 60 | { 61 | preARC *newArc; 62 | 63 | newArc = (preARC *) getItem (manager); 64 | newArc->to_ed = edgeid; 65 | newArc->multiplicity = 1; 66 | newArc->next = NULL; 67 | return newArc; 68 | } 69 | 70 | preARC *allocatePreArc (unsigned int edgeid) 71 | { 72 | arcCounter++; 73 | preARC *newArc; 74 | 75 | newArc = (preARC *) getItem (prearc_mem_manager); 76 | newArc->to_ed = edgeid; 77 | newArc->multiplicity = 1; 78 | newArc->next = NULL; 79 | return newArc; 80 | } 81 | 82 | void output_arcGVZ (char *outfile, boolean IsContig) 83 | { 84 | ARC *pArc; 85 | preARC *pPreArc; 86 | char name[256]; 87 | FILE *fp; 88 | unsigned int i; 89 | 90 | sprintf (name, "%s.arc.gvz", outfile); 91 | fp = ckopen (name, "w"); 92 | fprintf (fp, "digraph G{\n"); 93 | fprintf (fp, "\tsize=\"512,512\";\n"); 94 | 95 | for (i = 1; i <= num_ed; i++) 96 | { 97 | if (IsContig) 98 | { 99 | pPreArc = contig_array[i].arcs; 100 | 101 | while (pPreArc) 102 | { 103 | fprintf (fp, "\tC%d -> C%d[label =\"%d\"];\n", i, pPreArc->to_ed, pPreArc->multiplicity); 104 | pPreArc = pPreArc->next; 105 | } 106 | } 107 | else 108 | { 109 | pArc = edge_array[i].arcs; 110 | 111 | while (pArc) 112 | { 113 | fprintf (fp, "\tC%d -> C%d[label =\"%d\"];\n", i, pArc->to_ed, pArc->multiplicity); 114 | pArc = pArc->next; 115 | } 116 | } 117 | } 118 | 119 | fprintf (fp, "}\n"); 120 | fclose (fp); 121 | } 122 | 123 | /**************** below this line all codes are about ARC ****************/ 124 | #define ARCBLOCKSIZE 100000 125 | void createArcMemo () 126 | { 127 | if (!arc_mem_manager) 128 | { 129 | arc_mem_manager = createMem_manager (ARCBLOCKSIZE, sizeof (ARC)); 130 | } 131 | else 132 | { 133 | printf ("Warning from createArcMemo: arc_mem_manager is active pointer\n"); 134 | } 135 | } 136 | 137 | void destroyArcMem () 138 | { 139 | freeMem_manager (arc_mem_manager); 140 | arc_mem_manager = NULL; 141 | } 142 | 143 | ARC *allocateArc (unsigned int edgeid) 144 | { 145 | arcCounter++; 146 | ARC *newArc; 147 | 148 | newArc = (ARC *) getItem (arc_mem_manager); 149 | newArc->to_ed = edgeid; 150 | newArc->multiplicity = 1; 151 | newArc->prev = NULL; 152 | newArc->next = NULL; 153 | return newArc; 154 | } 155 | 156 | void dismissArc (ARC * arc) 157 | { 158 | returnItem (arc_mem_manager, arc); 159 | } 160 | 161 | /***************** below this line all codes are about lookup table *****************/ 162 | 163 | void createArcLookupTable () 164 | { 165 | if (!arcLookupTable) 166 | { 167 | arcLookupTable = (ARC **) ckalloc ((3 * num_ed + 1) * sizeof (ARC *)); 168 | } 169 | } 170 | 171 | void deleteArcLookupTable () 172 | { 173 | if (arcLookupTable) 174 | { 175 | free ((void *) arcLookupTable); 176 | arcLookupTable = NULL; 177 | } 178 | } 179 | 180 | void putArc2LookupTable (unsigned int from_ed, ARC * arc) 181 | { 182 | if (!arc || !arcLookupTable) 183 | { 184 | return; 185 | } 186 | 187 | unsigned int index = 2 * from_ed + arc->to_ed; 188 | 189 | arc->nextInLookupTable = arcLookupTable[index]; 190 | arcLookupTable[index] = arc; 191 | } 192 | 193 | static ARC *getArcInLookupTable (unsigned int from_ed, unsigned int to_ed) 194 | { 195 | unsigned int index = 2 * from_ed + to_ed; 196 | ARC *ite_arc = arcLookupTable[index]; 197 | 198 | while (ite_arc) 199 | { 200 | if (ite_arc->to_ed == to_ed) 201 | { 202 | return ite_arc; 203 | } 204 | 205 | ite_arc = ite_arc->nextInLookupTable; 206 | } 207 | 208 | return NULL; 209 | } 210 | 211 | void removeArcInLookupTable (unsigned int from_ed, unsigned int to_ed) 212 | { 213 | unsigned int index = 2 * from_ed + to_ed; 214 | ARC *ite_arc = arcLookupTable[index]; 215 | ARC *arc; 216 | 217 | if (!ite_arc) 218 | { 219 | printf ("removeArcInLookupTable: not found A\n"); 220 | return; 221 | } 222 | 223 | if (ite_arc->to_ed == to_ed) 224 | { 225 | arcLookupTable[index] = ite_arc->nextInLookupTable; 226 | return; 227 | } 228 | 229 | while (ite_arc->nextInLookupTable && ite_arc->nextInLookupTable->to_ed != to_ed) 230 | { 231 | ite_arc = ite_arc->nextInLookupTable; 232 | } 233 | 234 | if (ite_arc->nextInLookupTable) 235 | { 236 | arc = ite_arc->nextInLookupTable; 237 | ite_arc->nextInLookupTable = arc->nextInLookupTable; 238 | return; 239 | } 240 | 241 | printf ("removeArcInLookupTable: not found B\n"); 242 | return; 243 | } 244 | 245 | void recordArcsInLookupTable () 246 | { 247 | unsigned int i; 248 | ARC *ite_arc; 249 | 250 | for (i = 1; i <= num_ed; i++) 251 | { 252 | ite_arc = edge_array[i].arcs; 253 | 254 | while (ite_arc) 255 | { 256 | putArc2LookupTable (i, ite_arc); 257 | ite_arc = ite_arc->next; 258 | } 259 | } 260 | } 261 | 262 | ARC *getArcBetween (unsigned int from_ed, unsigned int to_ed) 263 | { 264 | ARC *parc; 265 | 266 | if (arcLookupTable) 267 | { 268 | parc = getArcInLookupTable (from_ed, to_ed); 269 | return parc; 270 | } 271 | 272 | parc = edge_array[from_ed].arcs; 273 | 274 | while (parc) 275 | { 276 | if (parc->to_ed == to_ed) 277 | { 278 | return parc; 279 | } 280 | 281 | parc = parc->next; 282 | } 283 | 284 | return parc; 285 | } 286 | -------------------------------------------------------------------------------- /src/searchPath.c: -------------------------------------------------------------------------------- 1 | /* 2 | * searchPath.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | #include "stdinc.h" 23 | #include "newhash.h" 24 | #include "extfunc.h" 25 | #include "extvab.h" 26 | 27 | static int trace_limit = 5000; //the times function is called in a search 28 | 29 | /* 30 | search connection paths which were masked along related contigs 31 | start from one contig, end with another 32 | path length includes the length of the last contig 33 | */ 34 | void traceAlongMaskedCnt (unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int *num_route) 35 | { 36 | num_trace++; 37 | 38 | if (num_trace > trace_limit || *num_route >= max_n_routes) 39 | { 40 | return; 41 | } 42 | 43 | unsigned int *array; 44 | int num, i, length; 45 | CONNECT *ite_cnt; 46 | 47 | if (index > 0) // there're at most max_steps edges stored in this array including the destination edge 48 | { 49 | length = len + contig_array[currE].length; 50 | } 51 | else 52 | { 53 | length = 0; 54 | } 55 | 56 | if (index > max_steps || length > max) 57 | { 58 | return; 59 | } // this is the only situation we stop 60 | 61 | if (index > 0) // there're at most max_steps edges stored in this array including the destination edge 62 | { 63 | so_far[index - 1] = currE; 64 | } 65 | 66 | if (currE == destE && index == 0) 67 | { 68 | printf ("traceAlongMaskedCnt: start and destination are the same\n"); 69 | return; 70 | } 71 | 72 | if (currE == destE && length >= min && length <= max) 73 | { 74 | num = *num_route; 75 | array = found_routes[num]; 76 | 77 | for (i = 0; i < index; i++) 78 | { 79 | array[i] = so_far[i]; 80 | } 81 | 82 | if (index < max_steps) 83 | { 84 | array[index] = 0; 85 | } //indicate the end of the route 86 | 87 | *num_route = ++num; 88 | } // one route is extrated, but we don't terminate searching 89 | 90 | ite_cnt = contig_array[currE].downwardConnect; 91 | 92 | while (ite_cnt) 93 | { 94 | if (!ite_cnt->mask || ite_cnt->deleted) 95 | { 96 | ite_cnt = ite_cnt->next; 97 | continue; 98 | } 99 | 100 | traceAlongMaskedCnt (destE, ite_cnt->contigID, max_steps, min, max, index + 1, length + ite_cnt->gapLen, num_route); 101 | ite_cnt = ite_cnt->next; 102 | } 103 | } 104 | 105 | // search connection paths from one connect to a contig 106 | // path length includes the length of the last contig 107 | void traceAlongConnect (unsigned int destE, CONNECT * currCNT, int max_steps, int min, int max, int index, int len, int *num_route) 108 | { 109 | num_trace++; 110 | 111 | if (num_trace > trace_limit || *num_route >= max_n_routes) 112 | { 113 | return; 114 | } 115 | 116 | unsigned int *array, currE; 117 | int num, i, length; 118 | CONNECT *ite_cnt; 119 | 120 | currE = currCNT->contigID; 121 | length = len + currCNT->gapLen; 122 | length += contig_array[currE].length; 123 | 124 | if (index > max_steps || length > max) 125 | { 126 | return; 127 | } // this is the only situation we stop 128 | 129 | /* 130 | if(globalFlag) 131 | printf("B: step %d, ctg %d, length %d\n",index,currCNT->contigID,length); 132 | */ 133 | if (currE == destE && index == 1) 134 | { 135 | printf ("traceAlongConnect: start and destination are the same\n"); 136 | return; 137 | } 138 | 139 | so_far[index - 1] = currE; // there're at most max_steps edges stored in this array including the destination edge 140 | 141 | if (currE == destE && length >= min && length <= max) 142 | { 143 | num = *num_route; 144 | array = found_routes[num]; 145 | 146 | for (i = 0; i < index; i++) 147 | { 148 | array[i] = so_far[i]; 149 | } 150 | 151 | if (index < max_steps) 152 | { 153 | array[index] = 0; 154 | } //indicate the end of the route 155 | 156 | *num_route = ++num; 157 | } // one route is extrated, but we don't terminate searching 158 | 159 | if (currCNT->nextInScaf) 160 | { 161 | traceAlongConnect (destE, currCNT->nextInScaf, max_steps, min, max, index + 1, length, num_route); 162 | return; 163 | } 164 | 165 | ite_cnt = contig_array[currE].downwardConnect; 166 | 167 | while (ite_cnt) 168 | { 169 | if (ite_cnt->mask || ite_cnt->deleted) 170 | { 171 | ite_cnt = ite_cnt->next; 172 | continue; 173 | } 174 | 175 | traceAlongConnect (destE, ite_cnt, max_steps, min, max, index + 1, length, num_route); 176 | ite_cnt = ite_cnt->next; 177 | } 178 | } 179 | 180 | //find paths in the graph from currE to destE, its length does not include length of both end contigs 181 | void traceAlongArc (unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int *num_route) 182 | { 183 | num_trace++; 184 | 185 | if (num_trace > trace_limit || *num_route >= 4)//max_n_routes-1) 186 | { 187 | return; 188 | } 189 | 190 | unsigned int *array, out_ed, vt; 191 | int num, i, pos, length; 192 | preARC *parc; 193 | 194 | pos = index; 195 | 196 | if (pos > max_steps || len > max) 197 | { 198 | return; 199 | } // this is the only situation we stop 200 | 201 | if (currE == destE && pos == 0) 202 | { 203 | printf ("traceAlongArc: start and destination are the same\n"); 204 | return; 205 | } 206 | 207 | if (pos > 0) // pos starts with 0 for the starting edge 208 | { 209 | so_far[pos - 1] = currE; 210 | } // there're at most max_steps edges stored in this array including the destination edge 211 | 212 | if (currE == destE && len >= min) 213 | { 214 | num = *num_route; 215 | array = found_routes[num]; 216 | 217 | for (i = 0; i < pos; i++) 218 | { 219 | array[i] = so_far[i]; 220 | } 221 | 222 | if (pos < max_steps) 223 | { 224 | array[pos] = 0; 225 | } //indicate the end of the route 226 | 227 | *num_route = ++num; 228 | } // one route is extrated, but we don't terminate searching 229 | 230 | if (pos == max_steps || len == max) 231 | { 232 | return; 233 | } 234 | 235 | if (pos++ > 0) //not the starting edge 236 | { 237 | length = len + contig_array[currE].length; 238 | } 239 | else 240 | { 241 | length = len; 242 | } 243 | 244 | vt = contig_array[currE].to_vt; 245 | parc = contig_array[currE].arcs; 246 | 247 | while (parc) 248 | { 249 | out_ed = parc->to_ed; 250 | traceAlongArc (destE, out_ed, max_steps, min, max, pos, length, num_route); 251 | parc = parc->next; 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /src/concatenateEdge.c: -------------------------------------------------------------------------------- 1 | /* 2 | * concatenateEdge.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | void copySeq (char *targetS, char *sourceS, int pos, int length) 29 | { 30 | char ch; 31 | int i, index; 32 | 33 | index = pos; 34 | 35 | for (i = 0; i < length; i++) 36 | { 37 | ch = getCharInTightString (sourceS, i); 38 | writeChar2tightString (ch, targetS, index++); 39 | } 40 | } 41 | 42 | //a path from e1 to e2 is merged int to e1(indicate=0) or e2(indicate=1), update graph topology 43 | void linearUpdateConnection (unsigned int e1, unsigned int e2, int indicate) 44 | { 45 | unsigned int bal_ed; 46 | ARC *parc; 47 | 48 | if (!indicate) 49 | { 50 | edge_array[e1].to_vt = edge_array[e2].to_vt; 51 | bal_ed = getTwinEdge (e1); 52 | parc = edge_array[e2].arcs; 53 | 54 | while (parc) 55 | { 56 | parc->bal_arc->to_ed = bal_ed; 57 | parc = parc->next; 58 | } 59 | 60 | edge_array[e1].arcs = edge_array[e2].arcs; 61 | edge_array[e2].arcs = NULL; 62 | 63 | if (edge_array[e1].length || edge_array[e2].length) 64 | edge_array[e1].cvg = (edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length) / (edge_array[e1].length + edge_array[e2].length); 65 | 66 | edge_array[e2].deleted = 1; 67 | } 68 | else 69 | { 70 | //all the arcs pointing to e1 switch to e2 71 | parc = edge_array[getTwinEdge (e1)].arcs; 72 | 73 | while (parc) 74 | { 75 | parc->bal_arc->to_ed = e2; 76 | parc = parc->next; 77 | } 78 | 79 | edge_array[e1].arcs = NULL; 80 | edge_array[e2].from_vt = edge_array[e1].from_vt; 81 | 82 | if (edge_array[e1].length || edge_array[e2].length) 83 | edge_array[e2].cvg = (edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length) / (edge_array[e1].length + edge_array[e2].length); 84 | 85 | edge_array[e1].deleted = 1; 86 | } 87 | } 88 | 89 | static void printEdgeSeq (FILE * fp, char *tightSeq, int len) 90 | { 91 | int i; 92 | 93 | for (i = 0; i < len; i++) 94 | { 95 | fprintf (fp, "%c", int2base ((int) getCharInTightString (tightSeq, i))); 96 | 97 | if ((i + overlaplen + 1) % 100 == 0) 98 | { 99 | fprintf (fp, "\n"); 100 | } 101 | } 102 | 103 | fprintf (fp, "\n"); 104 | } 105 | void allpathUpdateEdge (unsigned int e1, unsigned int e2, int indicate) 106 | { 107 | int tightLen; 108 | char *tightSeq = NULL; 109 | 110 | if (edge_array[e1].cvg == 0) 111 | { 112 | edge_array[e1].cvg = edge_array[e2].cvg; 113 | } 114 | 115 | if (edge_array[e2].cvg == 0) 116 | { 117 | edge_array[e2].cvg = edge_array[e1].cvg; 118 | } 119 | 120 | /* 121 | if(edge_array[e1].length&&edge_array[e2].length){ 122 | fprintf(stderr,">e1\n"); 123 | printEdgeSeq(stderr,edge_array[e1].seq,edge_array[e1].length); 124 | fprintf(stderr,">e2\n"); 125 | printEdgeSeq(stderr,edge_array[e2].seq,edge_array[e2].length); 126 | } */ 127 | unsigned int cvgsum = edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length; 128 | 129 | tightLen = edge_array[e1].length + edge_array[e2].length; 130 | 131 | if (tightLen) 132 | { 133 | tightSeq = (char *) ckalloc ((tightLen / 4 + 1) * sizeof (char)); 134 | } 135 | 136 | tightLen = 0; 137 | 138 | if (edge_array[e1].length) 139 | { 140 | copySeq (tightSeq, edge_array[e1].seq, 0, edge_array[e1].length); 141 | tightLen = edge_array[e1].length; 142 | 143 | if (edge_array[e1].seq) 144 | { 145 | free ((void *) edge_array[e1].seq); 146 | edge_array[e1].seq = NULL; 147 | } 148 | else 149 | { 150 | printf ("allpathUpdateEdge: edge %d with length %d, but without seq\n", e1, edge_array[e1].length); 151 | } 152 | } 153 | 154 | if (edge_array[e2].length) 155 | { 156 | copySeq (tightSeq, edge_array[e2].seq, tightLen, edge_array[e2].length); 157 | tightLen += edge_array[e2].length; 158 | 159 | if (edge_array[e2].seq) 160 | { 161 | free ((void *) edge_array[e2].seq); 162 | edge_array[e2].seq = NULL; 163 | } 164 | else 165 | { 166 | printf ("allpathUpdateEdge: edge %d with length %d, but without seq\n", e2, edge_array[e2].length); 167 | } 168 | } 169 | 170 | /* 171 | if(edge_array[e1].length&&edge_array[e2].length){ 172 | fprintf(stderr,">e1+e2\n"); 173 | printEdgeSeq(stderr,tightSeq,tightLen); 174 | } 175 | */ 176 | //edge_array[e2].extend_len = tightLen-edge_array[e2].length; 177 | //the sequence of e1 is to be updated 178 | if (!indicate) 179 | { 180 | edge_array[e2].length = 0; //e1 is removed from the graph 181 | edge_array[e1].to_vt = edge_array[e2].to_vt; //e2 is part of e1 now 182 | edge_array[e1].length = tightLen; 183 | edge_array[e1].seq = tightSeq; 184 | 185 | if (tightLen) 186 | { 187 | edge_array[e1].cvg = cvgsum / tightLen; 188 | } 189 | 190 | edge_array[e1].cvg = edge_array[e1].cvg > 0 ? edge_array[e1].cvg : 1; 191 | } 192 | else 193 | { 194 | edge_array[e1].length = 0; //e1 is removed from the graph 195 | edge_array[e2].from_vt = edge_array[e1].from_vt; //e1 is part of e2 now 196 | edge_array[e2].length = tightLen; 197 | edge_array[e2].seq = tightSeq; 198 | 199 | if (tightLen) 200 | { 201 | edge_array[e2].cvg = cvgsum / tightLen; 202 | } 203 | 204 | edge_array[e2].cvg = edge_array[e2].cvg > 0 ? edge_array[e2].cvg : 1; 205 | } 206 | } 207 | 208 | static void debugging (unsigned int i) 209 | { 210 | ARC *parc; 211 | 212 | parc = edge_array[i].arcs; 213 | 214 | if (!parc) 215 | { 216 | printf ("no downward connection for %d\n", i); 217 | } 218 | 219 | while (parc) 220 | { 221 | printf ("%d -> %d\n", i, parc->to_ed); 222 | parc = parc->next; 223 | } 224 | } 225 | 226 | //concatenate two edges if they are linearly linked 227 | void linearConcatenate () 228 | { 229 | unsigned int i; 230 | int conc_c = 1; 231 | int counter; 232 | unsigned int from_ed, to_ed, bal_ed; 233 | ARC *parc, *parc2; 234 | unsigned int bal_fe; 235 | 236 | //debugging(30514); 237 | while (conc_c) 238 | { 239 | conc_c = 0; 240 | counter = 0; 241 | 242 | for (i = 1; i <= num_ed; i++) //num_ed 243 | { 244 | if (edge_array[i].deleted || EdSameAsTwin (i)) 245 | { 246 | continue; 247 | } 248 | 249 | if (edge_array[i].length > 0) 250 | { 251 | counter++; 252 | } 253 | 254 | parc = edge_array[i].arcs; 255 | 256 | if (!parc || parc->next) 257 | { 258 | continue; 259 | } 260 | 261 | to_ed = parc->to_ed; 262 | bal_ed = getTwinEdge (to_ed); 263 | parc2 = edge_array[bal_ed].arcs; 264 | 265 | if (bal_ed == to_ed || !parc2 || parc2->next) 266 | { 267 | continue; 268 | } 269 | 270 | from_ed = i; 271 | 272 | if (from_ed == to_ed || from_ed == bal_ed) 273 | { 274 | continue; 275 | } 276 | 277 | //linear connection found 278 | conc_c++; 279 | linearUpdateConnection (from_ed, to_ed, 0); 280 | allpathUpdateEdge (from_ed, to_ed, 0); 281 | bal_fe = getTwinEdge (from_ed); 282 | linearUpdateConnection (bal_ed, bal_fe, 1); 283 | allpathUpdateEdge (bal_ed, bal_fe, 1); 284 | /* 285 | if(from_ed==6589||to_ed==6589) 286 | printf("%d <- %d (%d)\n",from_ed,to_ed,i); 287 | if(bal_fe==6589||bal_ed==6589) 288 | printf("%d <- %d (%d)\n",bal_fe,bal_ed,i); 289 | */ 290 | } 291 | 292 | printf ("a linear concatenation lap, %d concatenated\n", conc_c); 293 | } 294 | 295 | printf ("%d edges in graph\n", counter); 296 | } 297 | -------------------------------------------------------------------------------- /src/read2scaf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Read2scaf.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | static int Ncounter; 29 | static int allGaps; 30 | 31 | // for multi threads 32 | static int scafBufSize = 100; 33 | static STACK **ctgStackBuffer; 34 | static int scafCounter; 35 | static int scafInBuf; 36 | 37 | static void convertIndex () 38 | { 39 | int *length_array = (int *) ckalloc ((num_ctg + 1) * sizeof (int)); 40 | unsigned int i; 41 | 42 | for (i = 1; i <= num_ctg; i++) 43 | { 44 | length_array[i] = 0; 45 | } 46 | 47 | for (i = 1; i <= num_ctg; i++) 48 | { 49 | if (index_array[i] > 0) 50 | { 51 | length_array[index_array[i]] = i; 52 | } 53 | } 54 | 55 | for (i = 1; i <= num_ctg; i++) 56 | { 57 | index_array[i] = length_array[i]; 58 | } //contig i with new index: index_array[i] 59 | 60 | free ((void *) length_array); 61 | } 62 | 63 | static void reverseStack (STACK * dStack, STACK * sStack) 64 | { 65 | CTGinSCAF *actg, *ctgPt; 66 | 67 | emptyStack (dStack); 68 | 69 | while ((actg = (CTGinSCAF *) stackPop (sStack)) != NULL) 70 | { 71 | ctgPt = (CTGinSCAF *) stackPush (dStack); 72 | ctgPt->ctgID = actg->ctgID; 73 | ctgPt->start = actg->start; 74 | ctgPt->end = actg->end; 75 | } 76 | 77 | stackBackup (dStack); 78 | } 79 | 80 | static void initStackBuf (STACK ** ctgStackBuffer, int scafBufSize) 81 | { 82 | int i; 83 | 84 | for (i = 0; i < scafBufSize; i++) 85 | { 86 | ctgStackBuffer[i] = (STACK *) createStack (100, sizeof (CTGinSCAF)); 87 | } 88 | } 89 | static void freeStackBuf (STACK ** ctgStackBuffer, int scafBufSize) 90 | { 91 | int i; 92 | 93 | for (i = 0; i < scafBufSize; i++) 94 | { 95 | freeStack (ctgStackBuffer[i]); 96 | } 97 | } 98 | 99 | static void mapCtg2Scaf (int scafInBuf) 100 | { 101 | int i, scafID; 102 | CTGinSCAF *actg; 103 | STACK *ctgsStack; 104 | unsigned int ctg, bal_ctg; 105 | 106 | for (i = 0; i < scafInBuf; i++) 107 | { 108 | scafID = scafCounter + i + 1; 109 | ctgsStack = ctgStackBuffer[i]; 110 | 111 | while ((actg = stackPop (ctgsStack)) != NULL) 112 | { 113 | ctg = actg->ctgID; 114 | bal_ctg = getTwinCtg (ctg); 115 | 116 | if (contig_array[ctg].from_vt != 0) 117 | { 118 | contig_array[ctg].multi = 1; 119 | contig_array[bal_ctg].multi = 1; 120 | continue; 121 | } 122 | 123 | contig_array[ctg].from_vt = scafID; 124 | contig_array[ctg].to_vt = actg->start; 125 | contig_array[ctg].flag = 0; //ctg and scaf on the same strand 126 | contig_array[bal_ctg].from_vt = scafID; 127 | contig_array[bal_ctg].to_vt = actg->start; 128 | contig_array[bal_ctg].flag = 1; 129 | } 130 | } 131 | } 132 | 133 | static void locateContigOnscaff (char *graphfile) 134 | { 135 | FILE *fp; 136 | char line[1024]; 137 | CTGinSCAF *actg; 138 | STACK *ctgStack, *aStack; 139 | int index = 0, counter, overallLen; 140 | int starter, prev_start, gapN, scafLen; 141 | unsigned int ctg, prev_ctg = 0; 142 | 143 | for (ctg = 1; ctg <= num_ctg; ctg++) 144 | { 145 | contig_array[ctg].from_vt = 0; 146 | contig_array[ctg].multi = 0; 147 | } 148 | 149 | ctgStack = (STACK *) createStack (1000, sizeof (CTGinSCAF)); 150 | sprintf (line, "%s.scaf_gap", graphfile); 151 | fp = ckopen (line, "r"); 152 | ctgStackBuffer = (STACK **) ckalloc (scafBufSize * sizeof (STACK *)); 153 | initStackBuf (ctgStackBuffer, scafBufSize); 154 | Ncounter = scafCounter = scafInBuf = allGaps = 0; 155 | 156 | while (fgets (line, sizeof (line), fp) != NULL) 157 | { 158 | if (line[0] == '>') 159 | { 160 | if (index) 161 | { 162 | aStack = ctgStackBuffer[scafInBuf++]; 163 | reverseStack (aStack, ctgStack); 164 | 165 | if (scafInBuf == scafBufSize) 166 | { 167 | mapCtg2Scaf (scafInBuf); 168 | scafCounter += scafInBuf; 169 | scafInBuf = 0; 170 | } 171 | 172 | if (index % 1000 == 0) 173 | { 174 | printf ("Processed %d scaffolds\n", index); 175 | } 176 | } 177 | 178 | //read next scaff 179 | scafLen = prev_ctg = 0; 180 | emptyStack (ctgStack); 181 | sscanf (line + 9, "%d %d %d", &index, &counter, &overallLen); 182 | //fprintf(stderr,">%d\n",index); 183 | continue; 184 | } 185 | 186 | if (line[0] == 'G') // gap appears 187 | { 188 | continue; 189 | } 190 | 191 | if (line[0] >= '0' && line[0] <= '9') // a contig line 192 | { 193 | sscanf (line, "%d %d", &ctg, &starter); 194 | actg = (CTGinSCAF *) stackPush (ctgStack); 195 | actg->ctgID = ctg; 196 | 197 | if (!prev_ctg) 198 | { 199 | actg->start = scafLen; 200 | actg->end = actg->start + overlaplen + contig_array[ctg].length - 1; 201 | } 202 | else 203 | { 204 | gapN = starter - prev_start - (int) contig_array[prev_ctg].length; 205 | gapN = gapN < 1 ? 1 : gapN; 206 | actg->start = scafLen + gapN; 207 | actg->end = actg->start + contig_array[ctg].length - 1; 208 | } 209 | 210 | //fprintf(stderr,"%d\t%d\n",actg->start,actg->end); 211 | scafLen = actg->end + 1; 212 | prev_ctg = ctg; 213 | prev_start = starter; 214 | } 215 | } 216 | 217 | if (index) 218 | { 219 | aStack = ctgStackBuffer[scafInBuf++]; 220 | reverseStack (aStack, ctgStack); 221 | mapCtg2Scaf (scafInBuf); 222 | } 223 | 224 | gapN = 0; 225 | 226 | for (ctg = 1; ctg <= num_ctg; ctg++) 227 | { 228 | if (contig_array[ctg].from_vt == 0 || contig_array[ctg].multi == 1) 229 | { 230 | continue; 231 | } 232 | 233 | gapN++; 234 | } 235 | 236 | printf ("\nDone with %d scaffolds, %d contigs in Scaffolld\n", index, gapN); 237 | /* 238 | if(readSeqInGap) 239 | freeDarray(readSeqInGap); 240 | */ 241 | fclose (fp); 242 | freeStack (ctgStack); 243 | freeStackBuf (ctgStackBuffer, scafBufSize); 244 | free ((void *) ctgStackBuffer); 245 | } 246 | 247 | static boolean contigElligible (unsigned int contigno) 248 | { 249 | unsigned int ctg = index_array[contigno]; 250 | 251 | if (contig_array[ctg].from_vt == 0 || contig_array[ctg].multi == 1) 252 | { 253 | return 0; 254 | } 255 | else 256 | { 257 | return 1; 258 | } 259 | } 260 | static void output1read (FILE * fo, long long readno, unsigned int contigno, int pos) 261 | { 262 | unsigned int ctg = index_array[contigno]; 263 | int posOnScaf; 264 | char orien; 265 | 266 | pos = pos < 0 ? 0 : pos; 267 | 268 | if (contig_array[ctg].flag == 0) 269 | { 270 | posOnScaf = contig_array[ctg].to_vt + pos - overlaplen; 271 | orien = '+'; 272 | } 273 | else 274 | { 275 | posOnScaf = contig_array[ctg].to_vt + contig_array[ctg].length - pos; 276 | orien = '-'; 277 | } 278 | 279 | /* 280 | if(readno==676) 281 | printf("Read %lld in region from %d, extend %d, pos %d, orien %c\n", 282 | readno,contig_array[ctg].to_vt,contig_array[ctg].length,posOnScaf,orien); 283 | */ 284 | fprintf (fo, "%lld\t%d\t%d\t%c\n", readno, contig_array[ctg].from_vt, posOnScaf, orien); 285 | } 286 | 287 | void locateReadOnScaf (char *graphfile) 288 | { 289 | char name[1024], line[1024]; 290 | FILE *fp, *fo; 291 | long long readno, counter = 0, pre_readno = 0; 292 | unsigned int contigno, pre_contigno; 293 | int pre_pos, pos; 294 | 295 | locateContigOnscaff (graphfile); 296 | sprintf (name, "%s.readOnContig", graphfile); 297 | fp = ckopen (name, "r"); 298 | sprintf (name, "%s.readOnScaf", graphfile); 299 | fo = ckopen (name, "w"); 300 | 301 | if (!orig2new) 302 | { 303 | convertIndex (); 304 | orig2new = 1; 305 | } 306 | 307 | fgets (line, 1024, fp); 308 | 309 | while (fgets (line, 1024, fp) != NULL) 310 | { 311 | sscanf (line, "%lld %d %d", &readno, &contigno, &pos); 312 | 313 | if ((readno % 2 == 0) && (pre_readno == readno - 1) // they are a pair of reads 314 | && contigElligible (pre_contigno) && contigElligible (contigno)) 315 | { 316 | output1read (fo, pre_readno, pre_contigno, pre_pos); 317 | output1read (fo, readno, contigno, pos); 318 | counter++; 319 | } 320 | 321 | pre_readno = readno; 322 | pre_contigno = contigno; 323 | pre_pos = pos; 324 | } 325 | 326 | printf ("%lld pairs on contig\n", counter); 327 | fclose (fp); 328 | fclose (fo); 329 | } 330 | -------------------------------------------------------------------------------- /src/output_contig.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Output_contig.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | static char *kmerSeq; 28 | static unsigned int* flag_array; 29 | void output_graph (char *outfile) 30 | { 31 | char name[256]; 32 | 33 | FILE * fp; 34 | unsigned int i, bal_i; 35 | 36 | sprintf (name, "%s.edge.gvz", outfile); 37 | fp = ckopen (name, "w"); 38 | fprintf (fp, "digraph G{\n"); 39 | fprintf (fp, "\tsize=\"512,512\";\n"); 40 | for (i = num_ed; i > 0; i--) 41 | 42 | { 43 | if (edge_array[i].deleted) 44 | 45 | { 46 | continue; 47 | } 48 | 49 | bal_i = getTwinEdge (i); 50 | 51 | fprintf (fp, "\tV%d -> V%d[label =\"%d(%d)\"];\n", edge_array[i].from_vt, edge_array[i].to_vt, i, edge_array[i].length); 52 | } 53 | fprintf (fp, "}\n"); 54 | fclose (fp); 55 | } 56 | 57 | void output_1contig (int id, EDGE * edge, FILE * fp, boolean tip) 58 | { 59 | int i; 60 | Kmer kmer; 61 | 62 | fprintf (fp, ">%d length %d cvg_%.1f_tip_%d\n", id, edge->length + overlaplen, (double) edge->cvg / 10, tip); 63 | 64 | kmer = vt_array[edge->from_vt].kmer; 65 | printKmerSeq ( fp, kmer ); 66 | 67 | for (i = 0; i < edge->length; i++) 68 | { 69 | fprintf (fp, "%c", int2base ((int) getCharInTightString (edge->seq, i))); 70 | if ((i + overlaplen + 1) % 100 == 0) 71 | { 72 | fprintf (fp, "\n"); 73 | } 74 | } 75 | fprintf (fp, "\n"); 76 | } 77 | 78 | int cmp_int (const void *a, const void *b) 79 | { 80 | int *A, *B; 81 | 82 | A = (int *) a; 83 | B = (int *) b; 84 | if (*A > *B) 85 | { 86 | return 1; 87 | } 88 | 89 | else if (*A == *B) 90 | { 91 | return 0; 92 | } 93 | 94 | else 95 | { 96 | return -1; 97 | } 98 | } 99 | 100 | int cmp_edge (const void *a, const void *b) 101 | { 102 | EDGE * A, *B; 103 | A = (EDGE *) a; 104 | B = (EDGE *) b; 105 | if (A->length > B->length) 106 | { 107 | return 1; 108 | } 109 | 110 | else if (A->length == B->length) 111 | { 112 | return 0; 113 | } 114 | 115 | else 116 | { 117 | return -1; 118 | } 119 | } 120 | void output_contig (EDGE * ed_array, unsigned int ed_num, char *outfile, int cut_len) 121 | { 122 | char temp[256]; 123 | 124 | FILE * fp, *fp_contig; 125 | int flag, count, len_c; 126 | int signI; 127 | unsigned int i, j, diff_len=0; 128 | long long sum = 0, N90, N50; 129 | unsigned int *length_array; 130 | 131 | boolean tip; 132 | sprintf (temp, "%s.contig", outfile); 133 | fp = ckopen (temp, "w"); 134 | 135 | index_array = (unsigned int *)ckalloc((ed_num+1)*sizeof(unsigned int)); 136 | unsigned int * all_length_arr = (unsigned int*) ckalloc((ed_num+1)*sizeof(unsigned int)); 137 | flag_array = (unsigned int*)ckalloc((ed_num+1)*sizeof(unsigned int)); 138 | 139 | for (i=1; i<=ed_num; ++i) 140 | { 141 | index_array[i] = ed_array[i].length; 142 | all_length_arr[i] = ed_array[i].length; 143 | } 144 | 145 | qsort(&all_length_arr[1], ed_num, sizeof(all_length_arr[0]), cmp_int); 146 | 147 | for (i=1; i<=ed_num; ++i) 148 | { 149 | for (j=i+1; j<=ed_num; ++j) 150 | { 151 | if (all_length_arr[i] != all_length_arr[j]) 152 | break; 153 | } 154 | all_length_arr[++diff_len] = all_length_arr[i]; 155 | flag_array[diff_len] = i; 156 | i = j-1; 157 | 158 | } 159 | 160 | for (i=1; i<=ed_num; ++i) 161 | { 162 | index_array[i] = uniqueLenSearch(all_length_arr, flag_array, diff_len, index_array[i]); 163 | } 164 | 165 | for (i=1; i<=ed_num; ++i) 166 | { 167 | flag_array[index_array[i]] = i; 168 | } 169 | 170 | free((void*)all_length_arr); 171 | 172 | length_array = (unsigned int *) ckalloc (ed_num * sizeof (unsigned int)); 173 | kmerSeq = (char *) ckalloc (overlaplen * sizeof (char)); 174 | 175 | count = len_c = 0; 176 | for (i = 1; i <= ed_num; i++) 177 | { 178 | if ((ed_array[i].length + overlaplen) >= len_bar) 179 | { 180 | length_array[len_c++] = ed_array[i].length + overlaplen; 181 | } 182 | if (ed_array[i].length < 1 || ed_array[i].deleted) 183 | { 184 | continue; 185 | } 186 | count++; 187 | if (EdSmallerThanTwin (i)) 188 | { 189 | i++; 190 | } 191 | } 192 | sum = 0; 193 | for (signI = len_c - 1; signI >= 0; signI--) 194 | { 195 | sum += length_array[signI]; 196 | } 197 | 198 | qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int ); 199 | 200 | if ( len_c > 0 ) 201 | { 202 | printf ( "%d ctgs longer than %d, sum up %lldbp, with average length %lld\n", len_c, len_bar, sum, sum / len_c ); 203 | printf ( "the longest is %dbp, ", length_array[len_c - 1] ); 204 | } 205 | 206 | N50 = sum * 0.5; 207 | N90 = sum * 0.9; 208 | sum = flag = 0; 209 | for (signI = len_c - 1; signI >= 0; signI--) 210 | { 211 | sum += length_array[signI]; 212 | if (!flag && sum >= N50) 213 | { 214 | printf ("contig N50 is %d bp,", length_array[signI]); 215 | flag = 1; 216 | } 217 | if (sum >= N90) 218 | { 219 | printf ("contig N90 is %d bp\n", length_array[signI]); 220 | break; 221 | } 222 | } 223 | 224 | for (i = 1; i <= ed_num; i++) 225 | { 226 | j = flag_array[i]; 227 | if (ed_array[j].deleted || ed_array[j].length < 1) 228 | { 229 | continue; 230 | } 231 | if (ed_array[j].arcs && ed_array[getTwinEdge (j)].arcs) 232 | { 233 | tip = 0; 234 | } 235 | 236 | else 237 | { 238 | tip = 1; 239 | } 240 | output_1contig (i, &(ed_array[j]), fp, tip); 241 | if (EdSmallerThanTwin (j)) 242 | { 243 | i++; 244 | } 245 | } 246 | 247 | fclose (fp); 248 | free ((void *) kmerSeq); 249 | free ((void *) length_array); 250 | printf ("%d contigs longer than %d output\n", count, cut_len); 251 | sprintf (temp, "%s.ContigIndex", outfile); 252 | fp_contig = ckopen (temp, "w"); 253 | fprintf (fp_contig, "Edge_num %d %d\n", ed_num, count); 254 | fprintf (fp_contig, "index\tlength\treverseComplement\n"); 255 | 256 | for (i = 1; i <= num_ed; i++) 257 | { 258 | j = flag_array[i]; 259 | fprintf (fp_contig, "%d\t%d\t", i, edge_array[j].length + overlaplen); 260 | if (EdSmallerThanTwin (j)) 261 | { 262 | fprintf (fp_contig, "1\n"); 263 | i++; 264 | } 265 | 266 | else if (EdLargerThanTwin (j)) 267 | { 268 | fprintf (fp_contig, "-1\n"); 269 | } 270 | 271 | else 272 | { 273 | fprintf (fp_contig, "0\n"); 274 | } 275 | } 276 | fclose (fp_contig); 277 | } 278 | void output_pool(char * fileName) 279 | { 280 | FILE * pool_file = ckopen(fileName,"w"); 281 | unsigned int j,index; 282 | for(index=1;index<=num_ed;index++) 283 | { 284 | j = flag_array[index]; 285 | fprintf(pool_file,"%d\t%d\n", index,pool[j]); 286 | } 287 | fclose(pool_file); 288 | } 289 | void output_updated_edges (char *outfile) 290 | { 291 | FILE * fp; 292 | char name[256]; 293 | unsigned int i, j, validCounter = 0; 294 | 295 | EDGE * edge; 296 | sprintf (name, "%s.updated.edge", outfile); 297 | fp = ckopen (name, "w"); 298 | for (i = 1; i <= num_ed; i++) 299 | { 300 | validCounter++; 301 | } 302 | fprintf (fp, "EDGEs %d\n", validCounter); 303 | validCounter = 0; 304 | 305 | for (i = 1; i <= num_ed; i++) 306 | { 307 | j = flag_array[i]; 308 | edge = &edge_array[j]; 309 | if(edge->length != 0) 310 | fprintf (fp, ">length %d,", edge->length + overlaplen); 311 | else fprintf (fp, ">length %d,", edge->length); 312 | //print_kmer (fp, vt_array[edge->from_vt].kmer, ','); 313 | //print_kmer (fp, vt_array[edge->to_vt].kmer, ','); 314 | if (EdSmallerThanTwin (j)) 315 | { 316 | fprintf (fp, "1,"); 317 | } 318 | 319 | else if (EdLargerThanTwin (j)) 320 | { 321 | fprintf (fp, "-1,"); 322 | } 323 | 324 | else 325 | { 326 | fprintf (fp, "0,"); 327 | } 328 | //fprintf (fp, "%d\n", edge->cvg); 329 | fprintf ( fp, "%d ", edge->cvg ); 330 | print_kmer ( fp, vt_array[edge->from_vt].kmer, ',' ); 331 | print_kmer ( fp, vt_array[edge->to_vt].kmer, ',' ); 332 | fprintf ( fp, "\n" ); 333 | } 334 | fclose (fp); 335 | } 336 | void output_heavyArcs (char *outfile) 337 | { 338 | unsigned int i, j; 339 | char name[256]; 340 | 341 | FILE * outfp; 342 | ARC * parc; 343 | sprintf (name, "%s.Arc", outfile); 344 | outfp = ckopen (name, "w"); 345 | for (i = 1; i <= num_ed; i++) 346 | { 347 | if(edge_array[flag_array[i]].deleted==1 && edge_array[flag_array[i]].length <1) 348 | continue; 349 | parc = edge_array[flag_array[i]].arcs; 350 | if (!parc) 351 | { 352 | continue; 353 | } 354 | j = 0; 355 | fprintf (outfp, "%u", i); 356 | 357 | while (parc) 358 | { 359 | if(edge_array[parc->to_ed].deleted==1 && edge_array[parc->to_ed].length <1) 360 | { 361 | parc = parc->next; 362 | continue; 363 | } 364 | fprintf (outfp, " %u %u", index_array[parc->to_ed], parc->multiplicity); 365 | if ((++j) % 10 == 0) 366 | { 367 | fprintf (outfp, "\n%u", i); 368 | } 369 | parc = parc->next; 370 | } 371 | fprintf (outfp, "\n"); 372 | } 373 | fclose (outfp); 374 | free ((void*) index_array); 375 | free ((void*) flag_array); 376 | } 377 | 378 | 379 | -------------------------------------------------------------------------------- /src/inc/def.h: -------------------------------------------------------------------------------- 1 | /* 2 | * inc/Def.h 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | /* this file provides some datatype definition */ 24 | #ifndef _DEF 25 | #define _DEF 26 | 27 | #include "def2.h" 28 | #include "types.h" 29 | #include "stack.h" 30 | #include "darray.h" 31 | #include "sam.h" //support the samfile_t struct 32 | 33 | #define EDGE_BIT_SIZE 6 34 | #define word_len 12 35 | #define taskMask 0xf //the last 7 bits 36 | 37 | #define MaxEdgeCov 16000 38 | 39 | #define base2int(base) (char)(((base)&0x06)>>1) 40 | #define int2base(seq) "ACTG"[seq] 41 | #define int2compbase(seq) "TGAC"[seq] 42 | #define int_comp(seq) (char)(seq^0x02) //(char)((0x4E>>((seq)<<1))&0x03) 43 | 44 | 45 | #ifdef MER127 46 | typedef struct kmer 47 | { 48 | unsigned long long high1, low1, high2, low2; 49 | } Kmer; 50 | #endif 51 | #ifdef MER63 52 | typedef struct kmer 53 | { 54 | unsigned long long high,low; 55 | }Kmer; 56 | #endif 57 | #ifdef MER31 58 | typedef unsigned long long Kmer; 59 | #endif 60 | 61 | typedef struct preedge 62 | { 63 | Kmer from_node; 64 | Kmer to_node; 65 | char * seq; 66 | int length; 67 | unsigned short cvg: 14; 68 | unsigned bal_edge: 2; //indicate whether it's bal_edge is the previous edge, next edge or itself 69 | } preEDGE; 70 | 71 | typedef struct readinterval 72 | { 73 | int readid; 74 | unsigned int edgeid; 75 | int start; 76 | struct readinterval * bal_rv; 77 | struct readinterval * nextOnEdge; 78 | struct readinterval * prevOnEdge; 79 | struct readinterval * nextInRead; 80 | struct readinterval * prevInRead; 81 | } READINTERVAL; 82 | 83 | struct arc; 84 | typedef struct edge 85 | { 86 | unsigned int from_vt; 87 | unsigned int to_vt; 88 | int length; 89 | unsigned short cvg: 14; 90 | unsigned short bal_edge: 2; 91 | unsigned short multi: 14; 92 | unsigned short deleted : 1; 93 | unsigned short flag : 1; 94 | char * seq; 95 | READINTERVAL * rv; 96 | struct arc * arcs; 97 | long long * markers; 98 | } EDGE; 99 | 100 | typedef struct edge_pt 101 | { 102 | EDGE * edge; 103 | struct edge_pt * next; 104 | } EDGE_PT; 105 | 106 | typedef struct vertex 107 | { 108 | Kmer kmer; 109 | } VERTEX; 110 | /* 111 | typedef struct connection 112 | { 113 | unsigned int contigID; 114 | int gapLen; 115 | 116 | short maxGap; 117 | unsigned char minGap; 118 | unsigned char bySmall:1; 119 | unsigned char weakPoint:1; 120 | 121 | unsigned char weightNotInherit; 122 | unsigned char weight; 123 | unsigned char maxSingleWeight; 124 | unsigned char mask : 1; 125 | unsigned char used : 1; 126 | unsigned char weak : 1; 127 | unsigned char deleted : 1; 128 | unsigned char prevInScaf : 1; 129 | unsigned char inherit : 1; 130 | unsigned char checking : 1; 131 | unsigned char singleInScaf : 1; 132 | struct connection *nextInScaf; 133 | struct connection *next; 134 | struct connection *nextInLookupTable; 135 | }CONNECT; 136 | */ 137 | typedef struct connection 138 | { 139 | unsigned int contigID; 140 | int gapLen; 141 | 142 | unsigned short maxGap; 143 | unsigned char minGap; 144 | unsigned char bySmall: 1; 145 | unsigned char weakPoint: 1; 146 | 147 | unsigned char weightNotInherit; 148 | unsigned char weight; 149 | unsigned char maxSingleWeight; 150 | unsigned char mask : 1; 151 | unsigned char used : 1; 152 | unsigned char weak : 1; 153 | unsigned char deleted : 1; 154 | unsigned char prevInScaf : 1; 155 | unsigned char inherit : 1; 156 | unsigned char checking : 1; 157 | unsigned char singleInScaf : 1; 158 | 159 | unsigned short SECount; 160 | unsigned short PECount; 161 | // unsigned char usable; 162 | 163 | struct connection * nextInScaf; 164 | struct connection * next; 165 | struct connection * nextInLookupTable; 166 | } CONNECT; 167 | 168 | typedef struct prearc 169 | { 170 | unsigned int to_ed; 171 | unsigned int multiplicity; 172 | struct prearc * next; 173 | } preARC; 174 | 175 | typedef struct read_path 176 | { 177 | unsigned int *contigID; 178 | int contig_count; 179 | unsigned int coverage; 180 | struct read_path * next; 181 | }READ_PATH; 182 | 183 | typedef struct contig_path{ 184 | struct read_path *pathHead; 185 | int path_count; 186 | }CONTIG_PATH; 187 | 188 | typedef struct contig_pathID 189 | { 190 | int path_count; 191 | unsigned int * pathID; 192 | }CONTIG_PATHID; 193 | 194 | typedef struct path_contigID 195 | { 196 | int contig_count; 197 | int coverage; 198 | unsigned int * contigID; 199 | }PATH_CONTIGID; 200 | 201 | typedef struct Path 202 | { 203 | unsigned int * contigID; 204 | int contig_count; 205 | struct Path * next; 206 | }PATH; 207 | 208 | typedef struct contig 209 | { 210 | unsigned int from_vt; 211 | unsigned int to_vt; 212 | unsigned int length; 213 | unsigned short indexInScaf; 214 | unsigned char cvg; 215 | unsigned char bal_edge: 2; // 0, 1 or 2 216 | unsigned char mask : 1; 217 | unsigned char flag : 1; 218 | unsigned char multi: 1; 219 | unsigned char inSubGraph: 1; 220 | unsigned char unique: 1; 221 | // unsigned short time; 222 | // unsigned int pre_contigID; 223 | char * seq; 224 | CONNECT * downwardConnect; 225 | preARC * arcs; 226 | STACK * closeReads; 227 | } CONTIG; 228 | 229 | typedef struct read_nearby 230 | { 231 | int len; 232 | int dis; // dis to nearby contig or scaffold's start position 233 | long long seqStarter; //sequence start position in dynamic array 234 | } READNEARBY; 235 | 236 | typedef struct annotation 237 | { 238 | unsigned long long readID; 239 | unsigned int contigID; 240 | int pos; 241 | } ANNOTATION; 242 | 243 | typedef struct parameter 244 | { 245 | unsigned char threadID; 246 | void ** hash_table; 247 | unsigned char * mainSignal; 248 | unsigned char * selfSignal; 249 | } PARAMETER; 250 | 251 | typedef struct lightannot 252 | { 253 | int contigID; 254 | int pos; 255 | } LIGHTANNOT; 256 | 257 | typedef struct edgepatch 258 | { 259 | Kmer from_kmer, to_kmer; 260 | unsigned int length; 261 | char bal_edge; 262 | } EDGEPATCH; 263 | 264 | typedef struct lightctg 265 | { 266 | unsigned int index; 267 | int length; 268 | char * seq; 269 | } LIGHTCTG; 270 | 271 | 272 | typedef struct arc 273 | { 274 | unsigned int to_ed; 275 | unsigned int multiplicity; 276 | struct arc * prev; 277 | struct arc * next; 278 | struct arc * bal_arc; 279 | struct arc * nextInLookupTable; 280 | } ARC; 281 | 282 | typedef struct arcexist 283 | { 284 | Kmer kmer; 285 | struct arcexist * left; 286 | struct arcexist * right; 287 | } ARCEXIST; 288 | typedef struct locus 289 | { 290 | unsigned int contigCount; 291 | unsigned int * contigID; 292 | unsigned int transcriptomeCount; 293 | unsigned int ** transcriptomeContigID; 294 | unsigned int ** gap; 295 | unsigned int repeatMark; 296 | }LOCUS; 297 | typedef struct ReadSet 298 | { 299 | unsigned int contigID; 300 | int readOffset; 301 | int contigOffset; 302 | int alignLength; 303 | char orien; 304 | }READSET; 305 | 306 | typedef struct lib_info 307 | { 308 | int min_ins; 309 | int max_ins; 310 | int avg_ins; 311 | int rd_len_cutoff; 312 | int reverse; 313 | int asm_flag; 314 | int map_len; 315 | int pair_num_cut; 316 | int rank; 317 | //indicate which file is next to be read 318 | int curr_type; 319 | int curr_index; 320 | 321 | //file handlers to opened files 322 | FILE * fp1; 323 | FILE * fp2; 324 | boolean f1_start; 325 | boolean f2_start; 326 | //whether last read is read1 in pair 327 | int paired; // 0 -- single; 1 -- read1; 2 -- read2; 328 | 329 | //type1 330 | char ** a1_fname; 331 | char ** a2_fname; 332 | int num_a1_file; 333 | int num_a2_file; 334 | 335 | //type2 336 | char ** q1_fname; 337 | char ** q2_fname; 338 | int num_q1_file; 339 | int num_q2_file; 340 | 341 | //type3 342 | char ** p_fname; 343 | int num_p_file; //fasta only 344 | 345 | //type4 &5 346 | char ** s_a_fname; 347 | int num_s_a_file; 348 | char ** s_q_fname; 349 | int num_s_q_file; 350 | 351 | samfile_t *fp3; //the file handle to read bam file 352 | char **b_fname; //the name of the bam file 353 | int num_b_file; //the number of the bam file 354 | 355 | } LIB_INFO; 356 | 357 | typedef struct ctg4heap 358 | { 359 | unsigned int ctgID; 360 | int dis; 361 | unsigned char ds_shut4dheap: 1; // ignore downstream connections 362 | unsigned char us_shut4dheap: 1; // ignore upstream connections 363 | unsigned char ds_shut4uheap: 1; // ignore downstream connections 364 | unsigned char us_shut4uheap: 1; // ignore upstream connections 365 | } CTGinHEAP; 366 | 367 | typedef struct ctg4scaf 368 | { 369 | unsigned int ctgID; 370 | int start; 371 | int end; //position in scaff 372 | unsigned int cutHead : 8; // 373 | unsigned int cutTail : 7; // 374 | unsigned int scaftig_start : 1; //is it a scaftig starter 375 | unsigned int mask : 1; // is it masked for further operations 376 | unsigned int gapSeqLen: 15; 377 | int gapSeqOffset; 378 | } CTGinSCAF; 379 | 380 | typedef struct pe_info 381 | { 382 | int insertS; 383 | long long PE_bound; 384 | int rank; 385 | int pair_num_cut; 386 | } PE_INFO; 387 | #endif 388 | -------------------------------------------------------------------------------- /src/contig.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Contig.c 3 | * 4 | * Copyright (c) 2011-2013 BGI-Shenzhen . 5 | * 6 | * This file is part of SOAPdenovo-Trans. 7 | * 8 | * SOAPdenovo-Trans is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * SOAPdenovo-Trans is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with SOAPdenovo-Trans. If not, see . 20 | * 21 | */ 22 | 23 | #include "stdinc.h" 24 | #include "newhash.h" 25 | #include "extfunc.h" 26 | #include "extvab.h" 27 | 28 | static void initenv (int argc, char **argv); 29 | static void display_contig_usage (); 30 | char shortrdsfile[256], graphfile[256]; 31 | static boolean repeatSolve; 32 | static int M = 1; 33 | 34 | FILE * edge_info=NULL; 35 | int file_num=0; 36 | void output_edge_info(unsigned int edgeid) 37 | { 38 | unsigned int in_cov=0,out_cov=0; 39 | ARC * arc; 40 | unsigned int twin = getTwinEdge(edgeid); 41 | 42 | arc=edge_array[edgeid].arcs; 43 | int in_time=0,out_time=0; 44 | while(arc) 45 | { 46 | out_time++; 47 | out_cov += edge_array[arc->to_ed].cvg; 48 | arc = arc->next; 49 | } 50 | 51 | arc=edge_array[twin].arcs; 52 | while(arc) 53 | { 54 | in_time++; 55 | in_cov += edge_array[arc->to_ed].cvg; 56 | arc = arc->next; 57 | } 58 | 59 | fprintf(edge_info,"%d\t%d\t%d\t%d\t%d\n",in_cov,in_time,edge_array[edgeid].cvg,out_cov,out_time); 60 | 61 | } 62 | void find_edge(unsigned int edgeid,int poolid,double avg_cov) 63 | { 64 | if(pool[edgeid] !=0)// || (edge_array[edgeid].length<=48 && edge_array[edgeid].cvg <=avg_cov)) 65 | return ; 66 | if(edge_array[edgeid].length<=48 && edge_array[edgeid].cvg <=avg_cov) 67 | { 68 | if(edge_info != NULL) 69 | output_edge_info(edgeid); 70 | return ; 71 | } 72 | pool[edgeid]=poolid; 73 | unsigned int twin_ed=getTwinEdge(edgeid); 74 | pool[twin_ed]=poolid; 75 | 76 | ARC * arc; 77 | arc=edge_array[edgeid].arcs; 78 | while(arc) 79 | { 80 | find_edge(arc->to_ed,poolid,avg_cov); 81 | arc=arc->next; 82 | } 83 | 84 | if(twin_ed == edgeid) 85 | return ; 86 | 87 | arc=edge_array[twin_ed].arcs; 88 | while(arc) 89 | { 90 | find_edge(arc->to_ed,poolid,avg_cov); 91 | arc=arc->next; 92 | } 93 | } 94 | 95 | void resetZeroEdge() 96 | { 97 | unsigned int index; 98 | for(index=1;index<=num_ed;index++) 99 | { 100 | if(edge_array[index].cvg > 0) 101 | continue; 102 | unsigned short min_cov=0; 103 | ARC * arc; 104 | 105 | arc = edge_array[index].arcs; 106 | while(arc) 107 | { 108 | if(min_cov > edge_array[arc->to_ed].cvg && edge_array[arc->to_ed].cvg >0) 109 | min_cov=edge_array[arc->to_ed].cvg; 110 | if(min_cov == 0) 111 | min_cov=edge_array[arc->to_ed].cvg; 112 | arc=arc->next; 113 | } 114 | 115 | arc = edge_array[getTwinEdge(index)].arcs; 116 | while(arc) 117 | { 118 | if(min_cov > edge_array[arc->to_ed].cvg && edge_array[arc->to_ed].cvg>0) 119 | min_cov=edge_array[arc->to_ed].cvg; 120 | if(min_cov == 0) 121 | min_cov=edge_array[arc->to_ed].cvg; 122 | arc=arc->next; 123 | } 124 | edge_array[index].cvg=min_cov; 125 | edge_array[getTwinEdge(index)].cvg=min_cov; 126 | if(index == getTwinEdge(index)) 127 | index++; 128 | } 129 | } 130 | void divideComponent(char * outfile) 131 | { 132 | pool=(int*)malloc(sizeof(int )*(num_ed+1)); 133 | unsigned int index,twin_ed; 134 | int pool_id=1; 135 | 136 | for(index=1;index<=num_ed;index++) 137 | { 138 | pool[index]=0; 139 | } 140 | for(index=1;index<=num_ed;index++) 141 | { 142 | if(pool[index] != 0 ) 143 | continue; 144 | 145 | twin_ed=getTwinEdge(index); 146 | 147 | find_edge(index,pool_id,0.0); 148 | if(index != twin_ed) 149 | { 150 | find_edge(twin_ed,pool_id,0.0); 151 | index++; 152 | } 153 | pool_id++; 154 | } 155 | int pool_count=pool_id-1; 156 | int *contigCount=(int*)malloc((1+pool_count)*sizeof(int)); 157 | int *length=( int* ) malloc ((1+pool_count)*sizeof( int )); 158 | double * avg_cov=( double* ) malloc ((1+pool_count)*sizeof( double )); 159 | int * pool_backup = (int*)malloc(sizeof(int )*(num_ed+1)); 160 | for(pool_id=1;pool_id<=pool_count;pool_id++) 161 | { 162 | length[pool_id]=0; 163 | contigCount[pool_id]=0; 164 | avg_cov[pool_id]=0.0; 165 | } 166 | unsigned int twin; 167 | for(index=1;index<=num_ed;index++) 168 | { 169 | twin=getTwinEdge(index); 170 | contigCount[pool[index]]++; 171 | length[pool[index]]+=edge_array[index].length; 172 | avg_cov[pool[index]]+=edge_array[index].cvg*edge_array[index].length; 173 | if(twin!=index) 174 | index++; 175 | } 176 | 177 | for(pool_id=1;pool_id<=pool_count;pool_id++) 178 | { 179 | avg_cov[pool_id] /= length[pool_id]; 180 | } 181 | for(index=1;index<=num_ed;index++) 182 | { 183 | pool_backup[index]=pool[index]; 184 | pool[index]=0; 185 | } 186 | char fileName[1024]; 187 | sprintf (fileName, "%s.edgeInfo_%d", graphfile,file_num); 188 | edge_info = fopen(fileName,"w"); 189 | fprintf(edge_info,"in\tin_time\tself\tout\tout_time\n"); 190 | pool_id=1; 191 | for(index=1;index<=num_ed;index++) 192 | { 193 | if(pool[index] != 0 )//|| (edge_array[index].length<=48 && edge_array[index].cvg <= avg_cov[pool_backup[index]])) 194 | continue; 195 | if(edge_array[index].length<=48 && edge_array[index].cvg <= avg_cov[pool_backup[index]]) 196 | { 197 | if(edge_info != NULL) 198 | output_edge_info(index); 199 | continue; 200 | } 201 | 202 | twin_ed=getTwinEdge(index); 203 | 204 | find_edge(index,pool_id,avg_cov[pool_backup[index]]); 205 | if(index != twin_ed) 206 | { 207 | find_edge(twin_ed,pool_id,avg_cov[pool_backup[index]]); 208 | index++; 209 | } 210 | pool_id++; 211 | } 212 | fclose(edge_info); 213 | free(avg_cov); 214 | free(length); 215 | free(contigCount); 216 | free(pool_backup); 217 | 218 | sprintf(fileName,"%s.poolid",graphfile); 219 | 220 | output_pool(fileName); 221 | 222 | free(pool); 223 | file_num++; 224 | } 225 | int call_heavygraph (int argc, char **argv) 226 | { 227 | time_t start_t, stop_t, time_bef, time_aft; 228 | time (&start_t); 229 | boolean ret; 230 | initenv (argc, argv); 231 | loadVertex (graphfile); 232 | loadEdge (graphfile); 233 | 234 | char name[256]; 235 | 236 | // swapedge(); 237 | // sortedge(); 238 | // freshArc(); 239 | 240 | //0531 241 | if (M > 0) 242 | { 243 | time (&time_bef); 244 | bubblePinch (0.90, graphfile, M); 245 | time (&time_aft); 246 | printf ("time spent on bubblePinch: %ds\n\n", (int) (time_aft - time_bef)); 247 | } 248 | deleteWeakEdge(de); 249 | 250 | cutTipsInGraph (0, 0); 251 | 252 | // resetCov(); 253 | deleteUnlikeArc(); 254 | delowHighArc(delowArc); 255 | int change=1; 256 | int changed_time=1; 257 | while(change){ 258 | printf("%d time to delete light arcs \n",changed_time++); 259 | deleteSimpleLoop(); 260 | change=deleteLightArc(); 261 | if(change) 262 | { 263 | linearConcatenate(); 264 | compactEdgeArray (); 265 | } 266 | printf("\n"); 267 | } 268 | //output_graph(graphfile); 269 | // deleteLightContig(); 270 | deleteShortContig(cut_length); 271 | linearConcatenate(); 272 | compactEdgeArray (); 273 | printf("\n"); 274 | 275 | output_contig (edge_array, num_ed, graphfile, overlaplen + 1); 276 | 277 | // sprintf(name,"%s.AllComponent_back",graphfile); 278 | // divideComponent(name); 279 | 280 | output_updated_edges (graphfile); 281 | output_heavyArcs (graphfile); 282 | if (vt_array) 283 | { 284 | free ((void *) vt_array); 285 | vt_array = NULL; 286 | } 287 | if (edge_array) 288 | { 289 | free_edge_array (edge_array, num_ed_limit); 290 | edge_array = NULL; 291 | } 292 | destroyArcMem (); 293 | time (&stop_t); 294 | printf ("time elapsed: %dm\n\n", (int) (stop_t - start_t) / 60); 295 | return 0; 296 | } 297 | 298 | 299 | /***************************************************************************** 300 | * Parse command line switches 301 | *****************************************************************************/ 302 | void initenv (int argc, char **argv) 303 | { 304 | int copt; 305 | int inpseq, outseq; 306 | extern char *optarg; 307 | char temp[100]; 308 | 309 | inpseq = outseq = repeatSolve = 0; 310 | optind = 1; 311 | while ((copt = getopt (argc, argv, "g:e:q:Q:H:S:M:")) != EOF) 312 | { 313 | switch (copt) 314 | { 315 | 316 | case 'M': 317 | sscanf (optarg, "%s", temp); // 318 | M = atoi (temp); 319 | break; 320 | /* case 'D': 321 | sscanf (optarg, "%s", temp); 322 | de= atoi (temp) >= 0 ? atoi (temp) : 0; 323 | de *=10; 324 | break;*/ 325 | case 'g': 326 | inGraph = 1; 327 | sscanf (optarg, "%s", graphfile); // 328 | break; 329 | /* case 'R': 330 | repeatSolve = 1; 331 | break; 332 | */ 333 | case 'S': 334 | sscanf (optarg, "%s", temp); 335 | cut_length= atoi (temp) >= 0 ? atoi (temp) : 0; 336 | break; 337 | case 'e': 338 | sscanf (optarg, "%s", temp); 339 | de= atoi (temp) >= 0 ? atoi (temp) : 0; 340 | de *=10; 341 | break; 342 | case 'q': 343 | sscanf (optarg, "%s", temp); 344 | da= atoi (temp) >= 0 ? atoi (temp) : 0; 345 | break; 346 | case 'Q': 347 | sscanf (optarg, "%s", temp); 348 | dA= atoi (temp) >= 0 ? atoi (temp) : 0; 349 | break; 350 | case 'H': 351 | sscanf (optarg, "%s", temp); 352 | delowArc= atoi (temp) >= 100 ? atoi (temp) : 200; 353 | break; 354 | default: 355 | if (inGraph == 0) // 356 | { 357 | display_contig_usage (); 358 | exit (-1); 359 | } 360 | } 361 | } 362 | if (inGraph == 0) // 363 | { 364 | display_contig_usage (); 365 | exit (-1); 366 | } 367 | } 368 | static void display_contig_usage () 369 | { 370 | printf ("\ncontig -g inputGraph [-e EdgeCovCutoff -M mergeLevel\n"); 371 | printf (" -g\t\tinputGraph: prefix of input graph file name\n"); 372 | printf (" -e\t\t\tEdgeCovCutoff: edges with coverage no larger than EdgeCovCutoff will be deleted, [2]\n"); 373 | printf (" -M\t\t\tmergeLevel(min 0, max 3): the strength of merging similar sequences during contiging, [1]\n"); 374 | } 375 | -------------------------------------------------------------------------------- /src/sortContig.c: -------------------------------------------------------------------------------- 1 | #include "stdinc.h" 2 | #include "newhash.h" 3 | #include "extfunc.h" 4 | #include "extvab.h" 5 | 6 | ARC * sort_arc ( ARC * list ) 7 | { 8 | if ( !list ) 9 | { return list; } 10 | 11 | ARC * head = ( ARC * ) malloc ( sizeof ( ARC ) ); 12 | head->next = list; 13 | list->prev = head; 14 | ARC * curr = list; 15 | ARC * temp = list; 16 | ARC * temp1 = NULL; 17 | 18 | while ( curr ) 19 | { 20 | temp = curr; 21 | 22 | if ( temp ) 23 | { 24 | temp1 = temp->next; 25 | 26 | while ( temp1 ) 27 | { 28 | if ( temp->to_ed > temp1->to_ed ) 29 | { temp = temp1; } 30 | 31 | temp1 = temp1->next; 32 | } 33 | } 34 | 35 | if ( temp && temp != curr ) 36 | { 37 | if ( temp->next ) 38 | { 39 | temp->prev->next = temp->next; 40 | temp->next->prev = temp->prev; 41 | } 42 | else 43 | { 44 | temp->prev->next = NULL; 45 | } 46 | 47 | temp->next = curr; 48 | temp->prev = curr->prev; 49 | curr->prev->next = temp; 50 | curr->prev = temp; 51 | } 52 | else 53 | { 54 | curr = curr->next; 55 | } 56 | } 57 | 58 | list = head->next; 59 | list->prev = NULL; 60 | head->next = NULL; 61 | free ( head ); 62 | return list; 63 | }; 64 | static void copyOneEdge ( EDGE * target , EDGE * source ) 65 | { 66 | target->from_vt = source->from_vt; 67 | target->to_vt = source->to_vt; 68 | target->length = source->length; 69 | target->cvg = source->cvg; 70 | target->multi = source->multi; 71 | target->flag = source->flag; 72 | target->bal_edge = source->bal_edge; 73 | target->seq = source->seq; 74 | source->seq = NULL; 75 | target->arcs = source->arcs; 76 | source->arcs = NULL ; 77 | target->markers = source->markers; 78 | source->markers = NULL; 79 | target->deleted = source->deleted; 80 | }; 81 | static void updateArcToEd ( unsigned int ed_index ) 82 | { 83 | ARC * arc = edge_array[ed_index].arcs; 84 | 85 | while ( arc ) 86 | { 87 | arc->to_ed = index_array[arc->to_ed]; 88 | arc = arc->next; 89 | } 90 | }; 91 | inline void delete1Edge ( unsigned int index ) 92 | { 93 | edge_array[index].deleted = 1; 94 | }; 95 | //Copy edge from source to target. 96 | void copy1Edge ( EDGE * source, EDGE * target ) 97 | { 98 | target->from_vt = source->from_vt; 99 | target->to_vt = source->to_vt; 100 | target->length = source->length; 101 | target->cvg = source->cvg; 102 | target->multi = source->multi; 103 | 104 | if ( target->seq ) 105 | { 106 | free ( ( void * ) target->seq ); 107 | } 108 | 109 | target->seq = source->seq; 110 | source->seq = NULL; 111 | target->arcs = source->arcs; 112 | source->arcs = NULL; 113 | target->deleted = source->deleted; 114 | }; 115 | 116 | //Check whether two bases are equal. 117 | int BaseEqual ( char ch1, char ch2 ) 118 | { 119 | if ( ch1 == ch2 ) 120 | { return 0; } 121 | else if ( ch1 > ch2 ) 122 | { return 1; } 123 | else 124 | { return -1; } 125 | }; 126 | 127 | int EdgeEqual ( unsigned int prev, unsigned int next ) 128 | { 129 | int i = 0; 130 | int length = edge_array[prev].length; 131 | char ch1, ch2; 132 | int equal = 0; 133 | 134 | for ( i = 0; i < length; ++i ) 135 | { 136 | ch1 = int2base ( ( int ) getCharInTightString ( edge_array[prev].seq, i ) ); 137 | ch2 = int2base ( ( int ) getCharInTightString ( edge_array[next].seq, i ) ); 138 | 139 | if ( ( equal = BaseEqual ( ch1, ch2 ) ) ) 140 | { 141 | return equal; 142 | } 143 | } 144 | 145 | return 0; 146 | }; 147 | void swapedge() 148 | { 149 | unsigned int i; 150 | ARC * arc, *bal_arc, *temp_arc; 151 | int count_swap = 0, count_equal = 0; 152 | 153 | for ( i = 1; i <= num_ed; ++i ) 154 | { 155 | if ( edge_array[i].deleted || EdSameAsTwin ( i ) ) 156 | { continue; } 157 | 158 | if ( EdSmallerThanTwin ( i ) ) 159 | { 160 | if ( KmerLarger ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) ) 161 | { 162 | count_swap++; 163 | copyEdge ( i, num_ed + 1 + 1 ); 164 | copyEdge ( i + 1, num_ed + 1 ); 165 | copyEdge ( num_ed + 1, i ); 166 | copyEdge ( num_ed + 1 + 1, i + 1 ); 167 | edge_array[i].bal_edge = 2; 168 | edge_array[i + 1].bal_edge = 0; 169 | //take care of the arcs 170 | arc = edge_array[i].arcs; 171 | 172 | while ( arc ) 173 | { 174 | arc->bal_arc->to_ed = i + 1; 175 | arc = arc->next; 176 | } 177 | 178 | arc = edge_array[i + 1].arcs; 179 | 180 | while ( arc ) 181 | { 182 | arc->bal_arc->to_ed = i; 183 | arc = arc->next; 184 | } 185 | } 186 | else if ( KmerEqual ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) ) 187 | { 188 | int temp = EdgeEqual ( i, i + 1 ); 189 | 190 | if ( temp == 0 ) 191 | { 192 | count_equal++; 193 | edge_array[i].bal_edge = 1; 194 | delete1Edge ( i + 1 ); 195 | //take care of the arcs 196 | arc = edge_array[i].arcs; 197 | 198 | while ( arc ) 199 | { 200 | arc->bal_arc->to_ed = i; 201 | arc = arc->next; 202 | } 203 | 204 | bal_arc = edge_array[i + 1].arcs; 205 | edge_array[i + 1].arcs = NULL; 206 | 207 | while ( bal_arc ) 208 | { 209 | temp_arc = bal_arc; 210 | bal_arc = bal_arc->next; 211 | 212 | if ( edge_array[i].arcs ) 213 | { edge_array[i].arcs->prev = temp_arc; } 214 | 215 | temp_arc->next = edge_array[i].arcs; 216 | edge_array[i].arcs = temp_arc; 217 | } 218 | } 219 | else if ( temp > 0 ) 220 | { 221 | count_swap++; 222 | copyEdge ( i, num_ed + 1 + 1 ); 223 | copyEdge ( i + 1, num_ed + 1 ); 224 | copyEdge ( num_ed + 1, i ); 225 | copyEdge ( num_ed + 1 + 1, i + 1 ); 226 | edge_array[i].bal_edge = 2; 227 | edge_array[i + 1].bal_edge = 0; 228 | //take care of the arcs 229 | arc = edge_array[i].arcs; 230 | 231 | while ( arc ) 232 | { 233 | arc->bal_arc->to_ed = i + 1; 234 | arc = arc->next; 235 | } 236 | 237 | arc = edge_array[i + 1].arcs; 238 | 239 | while ( arc ) 240 | { 241 | arc->bal_arc->to_ed = i; 242 | arc = arc->next; 243 | } 244 | } 245 | } 246 | 247 | ++i; 248 | } 249 | else 250 | { 251 | delete1Edge ( i ); 252 | printf( "Warning : Front edge %d is larger than %d.\n", i, i + 1 ); 253 | } 254 | } 255 | 256 | printf( "%d none-palindrome edge(s) swapped, %d palindrome edge(s) processed.\n", count_swap, count_equal ); 257 | }; 258 | static int cmp_seq ( const void * a, const void * b ) 259 | { 260 | EDGE * A, *B; 261 | A = ( EDGE * ) a; 262 | B = ( EDGE * ) b; 263 | 264 | if ( KmerLarger ( vt_array[A->from_vt].kmer, vt_array[B->from_vt].kmer ) ) 265 | { 266 | return 1; 267 | } 268 | else if ( KmerSmaller ( vt_array[A->from_vt].kmer , vt_array[B->from_vt].kmer ) ) 269 | { 270 | return -1; 271 | } 272 | else 273 | { 274 | if ( A->seq[0] > B->seq[0] ) 275 | { 276 | return 1; 277 | } 278 | else if ( A->seq[0] == B->seq[0] ) 279 | { 280 | int i = 0; 281 | 282 | for ( i = 1; i < A->length && i < B->length; i++ ) 283 | { 284 | if ( getCharInTightString ( A->seq, i ) > getCharInTightString ( B->seq, i ) ) 285 | { return 1; } 286 | else if ( getCharInTightString ( A->seq, i ) < getCharInTightString ( B->seq, i ) ) 287 | { return -1; } 288 | } 289 | 290 | if ( i == A->length && i < B->length ) 291 | { return -1; } 292 | else if ( i < A->length && i == B->length ) 293 | { return 1; } 294 | else 295 | { 296 | printKmerSeq ( stderr , vt_array[A->from_vt].kmer ); 297 | fprintf ( stderr , "\n" ); 298 | printKmerSeq ( stderr , vt_array[B->from_vt].kmer ); 299 | fprintf ( stderr , "\n" ); 300 | 301 | for ( i = 0; i < A->length; i++ ) 302 | { 303 | printf( "%c", int2base ( ( int ) getCharInTightString ( A->seq, i ) ) ); 304 | } 305 | 306 | printf( "\n" ); 307 | 308 | for ( i = 0; i < B->length; i++ ) 309 | { 310 | printf( "%c", int2base ( ( int ) getCharInTightString ( B->seq, i ) ) ); 311 | } 312 | 313 | printf( "\n" ); 314 | printf( "cmp_seq:\terr\n" ); 315 | exit ( 0 ); 316 | return 0; 317 | } 318 | } 319 | else 320 | { 321 | return -1; 322 | } 323 | } 324 | }; 325 | void sortedge() 326 | { 327 | unsigned int index ; 328 | EDGE * sort_edge , * backup_edge ; 329 | sort_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) * ( num_ed + 1 ) ); 330 | backup_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) * ( num_ed + 1 ) ); 331 | unsigned int i = 1; 332 | 333 | for ( index = 1 ; index <= num_ed ; index ++ ) 334 | { 335 | sort_edge[i].from_vt = edge_array[index].from_vt; 336 | sort_edge[i].seq = edge_array[index].seq; 337 | sort_edge[i].to_vt = index; // record old id 338 | sort_edge[i].length = edge_array[index].length; 339 | i++; 340 | copyOneEdge ( & ( backup_edge[index] ) , & ( edge_array[index] ) ); 341 | 342 | if ( !EdSameAsTwin ( index ) ) 343 | { 344 | index++; 345 | copyOneEdge ( & ( backup_edge[index] ) , & ( edge_array[index] ) ); 346 | } 347 | } 348 | 349 | qsort ( & ( sort_edge[1] ), i - 1, sizeof ( sort_edge[1] ), cmp_seq ); 350 | index_array = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * ( num_ed + 1 ) ); // used to record new id 351 | unsigned int new_index = 1, old_index; 352 | 353 | for ( index = 1; index <= i - 1; index++ ) 354 | { 355 | old_index = sort_edge[index].to_vt; // old id 356 | sort_edge[index].seq = NULL; 357 | index_array[old_index] = new_index++;// old id -> new id 358 | 359 | if ( !EdSameAsTwin ( old_index ) ) 360 | { 361 | index_array[old_index + 1] = new_index++; // old id -> new id 362 | } 363 | } 364 | 365 | for ( index = 1; index <= num_ed; index++ ) 366 | { 367 | new_index = index_array[index]; 368 | copyOneEdge ( & ( edge_array[new_index] ), & ( backup_edge[index] ) ); 369 | updateArcToEd ( new_index ); 370 | } 371 | 372 | free ( index_array ); 373 | free ( sort_edge ); 374 | free ( backup_edge ); 375 | }; 376 | void freshArc() 377 | { 378 | unsigned int i; 379 | ARC * arc_temp, *parc; 380 | 381 | for ( i = 1; i <= num_ed; ++i ) 382 | { 383 | if ( edge_array[i].deleted ) 384 | { continue; } 385 | 386 | edge_array[i].arcs = sort_arc ( edge_array[i].arcs ); 387 | } 388 | }; 389 | --------------------------------------------------------------------------------