├── .github
    └── workflows
    │   ├── README.md
    │   ├── create_release.yml
    │   ├── val.bat
    │   ├── val.sh
    │   └── validate.yml
├── .gitignore
├── .gitmodules
├── LICENSE
├── Makefile
├── README.md
├── images
    └── graph.png
├── include
    ├── gfastats-global.h
    ├── input.h
    ├── main.h
    └── validate.h
├── instructions
    └── README.md
├── scaffolding
    └── README.md
├── scripts
    ├── gfastats_stats.sh
    ├── plot_runtime.R
    └── submit_gfastats_stats.sh
├── src
    ├── generate-random-fasta.cpp
    ├── generate-tests.cpp
    ├── input.cpp
    ├── main.cpp
    └── validate.cpp
├── testFiles
    ├── random1.agp
    ├── random1.comment.sak
    ├── random1.fasta
    ├── random1.fasta.1.bed
    ├── random1.fasta.2.bed
    ├── random1.fasta.3.bed
    ├── random1.fasta.4.bed
    ├── random1.fasta.5.bed
    ├── random1.fasta.gz
    ├── random1.fastq
    ├── random1.fastq.gz
    ├── random1.gfa2
    ├── random1.gfa2.instructions.sak
    ├── random1.hc.sak
    ├── random1.hdc.sak
    ├── random1.instructions.sak
    ├── random1.mask.sak
    ├── random1.rename.sak
    ├── random1.rvcp.sak
    ├── random2.gfa
    ├── random2.gfa.gz
    ├── random2.gfa2
    ├── random2.gfa2.agp
    ├── random2.gfa2.gz
    ├── random2.noseq.gfa
    ├── random3.sorting.fasta
    ├── random4.fasta
    ├── random5.findovl.gfa
    └── random6.circular.gfa
└── validateFiles
    ├── README.md
    ├── random1.fasta.10.tst
    ├── random1.fasta.11.tst
    ├── random1.fasta.3.tst
    ├── random1.fasta.4.tst
    ├── random1.fasta.5.tst
    ├── random1.fasta.6.tst
    ├── random1.fasta.7.tst
    ├── random1.fasta.78.tst
    ├── random1.fasta.79.tst
    ├── random1.fasta.8.tst
    ├── random1.fasta.80.tst
    ├── random1.fasta.81.tst
    ├── random1.fasta.82.tst
    ├── random1.fasta.83.tst
    ├── random1.fasta.84.tst
    ├── random1.fasta.85.tst
    ├── random1.fasta.86.tst
    ├── random1.fasta.87.tst
    ├── random1.fasta.88.tst
    ├── random1.fasta.89.tst
    ├── random1.fasta.9.tst
    ├── random1.fasta.90.tst
    ├── random1.fasta.91.tst
    ├── random1.fasta.92.tst
    ├── random1.fasta.93.tst
    ├── random1.fasta.94.tst
    ├── random1.fasta.95.tst
    ├── random1.fasta.96.tst
    ├── random1.fasta.gz.100.tst
    ├── random1.fasta.gz.69.tst
    ├── random1.fasta.gz.70.tst
    ├── random1.fasta.gz.71.tst
    ├── random1.fasta.gz.72.tst
    ├── random1.fasta.gz.73.tst
    ├── random1.fasta.gz.74.tst
    ├── random1.fasta.gz.75.tst
    ├── random1.fasta.gz.76.tst
    ├── random1.fasta.gz.77.tst
    ├── random1.fasta.gz.97.tst
    ├── random1.fasta.gz.98.tst
    ├── random1.fasta.gz.99.tst
    ├── random1.fastq.101.tst
    ├── random1.fastq.102.tst
    ├── random1.fastq.103.tst
    ├── random1.fastq.104.tst
    ├── random1.fastq.54.tst
    ├── random1.fastq.55.tst
    ├── random1.fastq.56.tst
    ├── random1.fastq.57.tst
    ├── random1.fastq.58.tst
    ├── random1.fastq.59.tst
    ├── random1.fastq.60.tst
    ├── random1.fastq.61.tst
    ├── random1.fastq.62.tst
    ├── random1.fastq.gz.105.tst
    ├── random1.fastq.gz.106.tst
    ├── random1.fastq.gz.107.tst
    ├── random1.fastq.gz.108.tst
    ├── random1.fastq.gz.33.tst
    ├── random1.fastq.gz.34.tst
    ├── random1.fastq.gz.35.tst
    ├── random1.fastq.gz.36.tst
    ├── random1.fastq.gz.37.tst
    ├── random1.fastq.gz.38.tst
    ├── random1.fastq.gz.39.tst
    ├── random1.fastq.gz.40.tst
    ├── random1.fastq.gz.41.tst
    ├── random1.gfa2.109.tst
    ├── random1.gfa2.110.tst
    ├── random1.gfa2.111.tst
    ├── random1.gfa2.112.tst
    ├── random1.gfa2.113.tst
    ├── random1.gfa2.12.tst
    ├── random1.gfa2.13.tst
    ├── random1.gfa2.14.tst
    ├── random2.gfa.42.tst
    ├── random2.gfa.43.tst
    ├── random2.gfa.44.tst
    ├── random2.gfa.gz.45.tst
    ├── random2.gfa.gz.46.tst
    ├── random2.gfa.gz.47.tst
    ├── random2.gfa2.63.tst
    ├── random2.gfa2.64.tst
    ├── random2.gfa2.65.tst
    ├── random2.gfa2.gz.51.tst
    ├── random2.gfa2.gz.52.tst
    ├── random2.gfa2.gz.53.tst
    ├── random2.noseq.gfa.114.tst
    ├── random2.noseq.gfa.48.tst
    ├── random2.noseq.gfa.49.tst
    ├── random2.noseq.gfa.50.tst
    ├── random3.sorting.fasta.24.tst
    ├── random3.sorting.fasta.25.tst
    ├── random3.sorting.fasta.26.tst
    ├── random3.sorting.fasta.27.tst
    ├── random3.sorting.fasta.28.tst
    ├── random3.sorting.fasta.29.tst
    ├── random3.sorting.fasta.30.tst
    ├── random3.sorting.fasta.31.tst
    ├── random3.sorting.fasta.32.tst
    ├── random4.fasta.115.tst
    ├── random4.fasta.15.tst
    ├── random4.fasta.16.tst
    ├── random4.fasta.17.tst
    ├── random4.fasta.18.tst
    ├── random4.fasta.19.tst
    ├── random4.fasta.20.tst
    ├── random4.fasta.21.tst
    ├── random4.fasta.22.tst
    ├── random4.fasta.23.tst
    ├── random5.findovl.gfa.116.tst
    ├── random5.findovl.gfa.66.tst
    ├── random5.findovl.gfa.67.tst
    ├── random5.findovl.gfa.68.tst
    ├── random6.circular.gfa.0.tst
    ├── random6.circular.gfa.1.tst
    ├── random6.circular.gfa.117.tst
    └── random6.circular.gfa.2.tst


/.github/workflows/README.md:
--------------------------------------------------------------------------------
 1 | validate is automatically run on pushes to any branch, or pull requests to main
 2 | 
 3 | to automatically create a new release and automatically upload mac, ubuntu, and windows builds run:
 4 | `git tag v*`
 5 | `git push origin v*`
 6 | where * is the version number.
 7 | 
 8 | Example:
 9 | `git tag v1.2.1`
10 | `git push origin v1.2.1`
11 | 
12 | 


--------------------------------------------------------------------------------
/.github/workflows/create_release.yml:
--------------------------------------------------------------------------------
  1 | name: Create Release
  2 | 
  3 | on:
  4 |   push:
  5 |     tags:
  6 |       - 'v*' # Trigger on version tags
  7 | 
  8 | jobs:
  9 |   create_release:
 10 |     name: Create Release
 11 |     permissions: write-all
 12 |     runs-on: ubuntu-latest
 13 |     steps:
 14 |     - name: Checkout code
 15 |       uses: actions/checkout@v2
 16 |       with:
 17 |         submodules: recursive
 18 |     - name: Create Release
 19 |       id: create_release
 20 |       uses: actions/create-release@v1
 21 |       env:
 22 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 23 |       with:
 24 |         tag_name: ${{ github.ref }}
 25 |         release_name: gfastats ${{ github.ref }}
 26 |         body: |
 27 |           Changes in this Release
 28 |         draft: false
 29 |         prerelease: false
 30 |     outputs:
 31 |       upload_url: ${{ steps.create_release.outputs.upload_url }}
 32 | 
 33 |   add_resources:
 34 |     needs: create_release
 35 |     name: Add Resources
 36 |     strategy:
 37 |       matrix:
 38 |         OS: [macos-13, ubuntu-latest, windows-2019]
 39 |         include:
 40 |         - OS: macos-13
 41 |           OS_NAME: macOS
 42 |         - OS: ubuntu-latest
 43 |           OS_NAME: linux
 44 |         - OS: windows-latest
 45 |           OS_NAME: win
 46 |     runs-on: ${{ matrix.OS }}
 47 |     steps:
 48 |       - name: Checkout code
 49 |         uses: actions/checkout@v2
 50 |         with:
 51 |             submodules: recursive
 52 |       - name: Build
 53 |         run: make -j
 54 |         
 55 |       - name: Make binary executable (Linux & macOS)
 56 |         if: matrix.OS_NAME != 'win'
 57 |         run: chmod +x build/bin/gfastats
 58 | 
 59 |       - name: Zip (Windows)
 60 |         if: matrix.OS_NAME == 'win'
 61 |         uses: papeloto/action-zip@v1
 62 |         with:
 63 |           files: build/bin/gfastats.exe
 64 |           dest: result.zip
 65 | 
 66 |       - name: Tar (Linux & macOS)
 67 |         if: matrix.OS_NAME != 'win'
 68 |         run: tar -czvf result.tar.gz -C build/bin gfastats
 69 | 
 70 |       - name: Add binaries & Upload tarball (Linux & macOS)
 71 |         if: matrix.OS_NAME != 'win'
 72 |         uses: actions/upload-release-asset@v1
 73 |         env:
 74 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 75 |         with:
 76 |           upload_url: ${{ needs.create_release.outputs.upload_url }}
 77 |           asset_path: result.tar.gz
 78 |           asset_name: gfastats.${{ github.ref_name }}-${{matrix.OS_NAME}}.tar.gz
 79 |           asset_content_type: application/tar
 80 |           
 81 |       - name: Add binaries & Upload tarball (Windows)
 82 |         if: matrix.OS_NAME == 'win'
 83 |         uses: actions/upload-release-asset@v1
 84 |         env:
 85 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 86 |         with:
 87 |           upload_url: ${{ needs.create_release.outputs.upload_url }}
 88 |           asset_path: result.zip
 89 |           asset_name: gfastats.${{ github.ref_name }}-${{matrix.OS_NAME}}.zip
 90 |           asset_content_type: application/zip
 91 | 
 92 |   add_submodules:
 93 |     needs: create_release
 94 |     name: Source with submodules
 95 |     runs-on: ubuntu-latest
 96 |     steps:
 97 |       - name: Checkout code
 98 |         uses: actions/checkout@v2
 99 |         with:
100 |             submodules: true
101 |       - name: Zip
102 |         uses: papeloto/action-zip@v1
103 |         with:
104 |           files: .
105 |           dest: result.zip
106 |       - name: Add files
107 |         uses: actions/upload-release-asset@v1
108 |         env:
109 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
110 |         with:
111 |           upload_url: ${{ needs.create_release.outputs.upload_url }}
112 |           asset_path: result.zip
113 |           asset_name: gfastats.${{ github.ref_name }}-with_submodules.zip
114 |           asset_content_type: application/zip
115 | 


--------------------------------------------------------------------------------
/.github/workflows/val.bat:
--------------------------------------------------------------------------------
1 | "build/bin/gfastats-validate.exe" validateFiles
2 | 


--------------------------------------------------------------------------------
/.github/workflows/val.sh:
--------------------------------------------------------------------------------
1 | build/bin/gfastats-validate validateFiles
2 | 


--------------------------------------------------------------------------------
/.github/workflows/validate.yml:
--------------------------------------------------------------------------------
 1 | name: Validate
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |     branches: [ main ]
 7 | 
 8 | jobs:
 9 |   validate:
10 |     name: Validate
11 |     strategy:
12 |       matrix:
13 |         include:
14 |           - os: macos-latest
15 |             command: ".github/workflows/val.sh"
16 |           - os: ubuntu-latest
17 |             command: ".github/workflows/val.sh"
18 |             chmod: true
19 |           - os: windows-latest
20 |             command: ".github/workflows/val.bat"
21 |       fail-fast: false
22 |     runs-on: ${{ matrix.os }}
23 |     steps:
24 |     - name: Checkout code
25 |       uses: actions/checkout@v3
26 |       with:
27 |             submodules: true
28 |     - name: Build
29 |       run: make all -j
30 |     - name: Validate
31 |       run: |
32 |         chmod +x .github/workflows/val.sh
33 |         ${{ matrix.command }}
34 | 
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by https://www.gitignore.io/api/swift,xcode
 2 | 
 3 | ### Swift ###
 4 | # Xcode
 5 | #
 6 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
 7 | 
 8 | ## Build generated
 9 | build/
10 | DerivedData/
11 | 
12 | ## Various settings
13 | *.pbxuser
14 | !default.pbxuser
15 | *.mode1v3
16 | !default.mode1v3
17 | *.mode2v3
18 | !default.mode2v3
19 | *.perspectivev3
20 | !default.perspectivev3
21 | xcuserdata/
22 | 
23 | ## Other
24 | *.moved-aside
25 | *.xccheckout
26 | *.xcscmblueprint
27 | 
28 | ## Obj-C/Swift specific
29 | *.hmap
30 | *.ipa
31 | *.dSYM.zip
32 | *.dSYM
33 | 
34 | ## Playgrounds
35 | timeline.xctimeline
36 | playground.xcworkspace
37 | 
38 | # Swift Package Manager
39 | #
40 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
41 | # Packages/
42 | # Package.pins
43 | .build/
44 | 
45 | # CocoaPods - Refactored to standalone file
46 | 
47 | # Carthage - Refactored to standalone file
48 | 
49 | # fastlane
50 | #
51 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the
52 | # screenshots whenever they are needed.
53 | # For more information about the recommended setup visit:
54 | # https://docs.fastlane.tools/best-practices/source-control/#source-control
55 | 
56 | fastlane/report.xml
57 | fastlane/Preview.html
58 | fastlane/screenshots
59 | fastlane/test_output
60 | 
61 | ### Xcode ###
62 | # Xcode
63 | #
64 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
65 | 
66 | ## Build generated
67 | 
68 | ## Various settings
69 | 
70 | ## Other
71 | 
72 | ### Xcode Patch ###
73 | *.xcodeproj
74 | *.xcodeproj/*
75 | !*.xcodeproj/project.pbxproj
76 | *.xcodeproj/xcshareddata/
77 | !*.xcodeproj/xcuserdata/
78 | !*.xcworkspace/contents.xcworkspacedata
79 | /*.gcno
80 | 
81 | .DS_Store
82 | *.pbxproj
83 | *.xcworkspacedata
84 | *.plist
85 | 
86 | 
87 | # End of https://www.gitignore.io/api/swift,xcode,vscode
88 | 
89 | tmp.txt
90 | err.txt
91 | out
92 | 
93 | *.o
94 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "gfalibs"]
2 | 	path = gfalibs
3 | 	url = https://github.com/vgl-hub/gfalibs.git
4 |     ignore = untracked
5 |     ignore = dirty
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Giulio Formenti
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CXX ?= g++
 2 | INCLUDE_DIR ?= -I./include -Igfalibs/include
 3 | WARNINGS = -Wall -Wextra
 4 | 
 5 | CXXFLAGS = -g -std=gnu++14 -O3 $(INCLUDE_DIR) $(WARNINGS) $(CFLAGS)
 6 | 
 7 | TARGET = gfastats
 8 | TEST_TARGET = validate
 9 | GENERATE_TARGET = generate-tests
10 | RANDOM_FASTA_TARGET = generate-random-fasta
11 | BUILD = build/bin
12 | SOURCE = src
13 | INCLUDE = include
14 | BINDIR := $(BUILD)/.o
15 | 
16 | LIBS = -lz
17 | LDFLAGS= -pthread
18 | 
19 | #gfalibs
20 | GFALIBS_DIR := $(CURDIR)/gfalibs
21 | 
22 | OBJS := main input
23 | BINS := $(addprefix $(BINDIR)/, $(OBJS))
24 | 
25 | head: $(BINS) gfalibs | $(BUILD)
26 | 	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $(BUILD)/$(TARGET) $(wildcard $(BINDIR)/*) $(GFALIBS_DIR)/*.o $(LIBS)
27 | 	
28 | debug: CXXFLAGS += -DDEBUG
29 | debug: CCFLAGS += -DDEBUG
30 | debug: head
31 | 
32 | all: head validate regenerate random_fasta
33 | 
34 | $(OBJS): %: $(BINDIR)/%
35 | 	@
36 | $(BINDIR)%: $(SOURCE)/%.cpp $(INCLUDE)/%.h | $(BINDIR)
37 | 	$(CXX) $(CXXFLAGS) $(LDFLAGS) -c $(SOURCE)/$(notdir $@).cpp -o $@
38 | 	
39 | .PHONY: gfalibs
40 | gfalibs:
41 | 	$(MAKE) -j -C $(GFALIBS_DIR) CXXFLAGS="$(CXXFLAGS)"
42 | 
43 | validate: | $(BUILD)
44 | 	$(CXX) $(CXXFLAGS) -o $(BUILD)/$(TARGET)-$(TEST_TARGET) $(SOURCE)/$(TEST_TARGET).cpp $(LIBS)
45 | 	
46 | regenerate: | $(BUILD)
47 | 	$(CXX) $(CXXFLAGS) -o $(BUILD)/$(TARGET)-$(GENERATE_TARGET) $(SOURCE)/$(GENERATE_TARGET).cpp $(LIBS)
48 | 
49 | random_fasta: | $(BUILD)
50 | 	$(CXX) $(CXXFLAGS) -o $(BUILD)/$(TARGET)-$(RANDOM_FASTA_TARGET) $(SOURCE)/$(RANDOM_FASTA_TARGET).cpp $(LIBS)
51 | 
52 | $(BUILD):
53 | 	-mkdir -p $@
54 | 
55 | $(BINDIR):
56 | 	-mkdir -p $@
57 | 	
58 | debug: CXXFLAGS += -DDEBUG -O0
59 | debug: head
60 | 
61 | clean:
62 | 	$(RM) -r build
63 | 	$(MAKE) -C $(GFALIBS_DIR) clean
64 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # gfastats
 2 | 
 3 | The swiss army knife for genome assembly.
 4 | 
 5 | **gfastats** is a single fast and exhaustive tool for **summary statistics** and simultaneous \*fa\* (fasta, fastq, gfa [.gz]) genome assembly file **manipulation**. **gfastats** also allows seamless fasta<>fastq<>gfa[.gz] conversion. It has been tested in genomes even >100Gbp.
 6 | 
 7 | Typical fast\* metrics include:
 8 | 
 9 | - scaffold, contig and gap size
10 | - number of scaffolds, contigs and gaps
11 | - total length of scaffolds, contigs and gaps
12 | - scaffold, contig, gap N50 and statistics (full N\*/NG\* statistics with the `--nstar-report` flag)
13 | - area under the curve (AuN/AuNG) values for scaffolds, contigs and gaps
14 | - average scaffold, contig, gap size
15 | - largest scaffold, contig and gap
16 | - base composition and GC content
17 | - soft-masked base counts (lower case bases)
18 | 
19 | Typical gfa metrics include (see also note below when evaluating gfa):
20 | 
21 | - Number of nodes and edges
22 | - Average degree
23 | - Number of connected components, and length of the largets connected component
24 | - Number of dead ends
25 | - Number of disconnected components, and their total length
26 | - Number of bubbles
27 | 
28 | Metrics for each scaffold/contig can be generated with the `--seq-report` flag.
29 | 
30 | `Bed` coordinates and sizes of scaffolds, contigs and gaps can be outputted with the options `--out-coord` and `--out-size`. By default, `--out-coord` produces a full representation of the assembly in `agp` format.
31 | 
32 | Additionally, input can be filtered using scaffold lists or `bed` coordinate files with the options `--include-bed` and `--exclude-bed`.
33 | 
34 | Importantly, the filtered input can be outputted in any \*fa\* (fasta, fastq, gfa [.gz]) format.
35 | 
36 | ## Installation
37 | 
38 | Either download one of the releases or `git clone https://github.com/vgl-hub/gfastats.git --recursive` and `make -j` in `gfastats` folder.
39 | 
40 | ## Usage
41 | 
42 | `gfastats input.[fasta|fastq|gfa][.gz] [expected genome size] [header[:start-end]]`
43 | 
44 | To check out all options and flags use `gfastats -h`.
45 | 
46 | **Note**: if you have a GFA without paths defined (e.g. as output from hifiasm) you will need to add the `--discover-paths` options in order to generate statistics for contigs and scaffolds. This is an attempt to clearly distinguish contigs from segments.
47 | 
48 | You can test some typical usage with the files in the `testFiles` folder, e.g.:
49 | 
50 | ```
51 | gfastats testFiles/random1.fasta -o gfa // converts fasta to gfa
52 | gfastats testFiles/random2.gfa2.gfa -o fa // converts gfa to fasta
53 | ```
54 | 
55 | ## Assembly manipulation
56 | 
57 | **gfastats** allows extensive assembly manipulation at the sequence level. Manipulation is achieved using a set of _instructions_ provided as an ordered list in a file to the option `-k` / `--swiss-army-knife`:
58 | 
59 | ```
60 | gfastats testFiles/random1.fasta -k testFiles/random1.instructions.sak -o gfa // reads fasta applies a set of instructions and outputs gfa
61 | ```
62 | 
63 | The _instructions_ are sequentially processed to generate the final output. Examples of _instructions_ are:
64 | 
65 | ```
66 | JOIN contig1+ contig2+ 50 [gap1] [scaffold1] [this is a new scaffold] // introduces a new gap of 50 bp between scaffold1 and scaffold2 with optional id gap1, effectively joining the two sequences into a new sequences named scaffold1 with an optional comment
67 | SPLIT contig1+ contig2+ // splits the scaffold containing contig1 and contig2, effectively removing the existing gap between them
68 | ```
69 | 
70 | The _instructions_ directly provide the list of edits that were introduced. The _instructions_ could be from an automated tool or from manual annotation.
71 | 
72 | A prime example of manipulations using input from an automated tool is overlaying AGP coordinates on top of the graph to generate new scaffolds, which can be achieved with:
73 | ```
74 | gfastats input.fasta|input.gfa -a input.agp -o output.fasta|output.gfa
75 | ```
76 | 
77 | See the <a href="instructions/">instruction wiki</a> for a full list of _instructions_.
78 | 
79 | ## Description
80 | 
81 | Please refer to **gfastats** paper for a complete description. Briefly, **gfastats** reads and stores any fasta<>fastq<>gfa[.gz] in gfa format. **gfastats** then builds a bidirected graph representation of the assembly using adjacency lists, where each node is a segment, and each edge is a gap (see figure below). The original sequence can be directly manipulated from the graph. Finally, walking the graph allows to generate different kinds of outputs, including manipulated assemblies and feature coordinates.
82 | 
83 | <p align="center">
84 |     <img src="images/graph.png" alt="alt gfastats assembly graph" width="70%" />
85 | </p>
86 | 
87 | ## How to cite
88 | 
89 | If you use **gfastats** in your work, please cite:
90 | 
91 | Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs
92 | 
93 | Giulio Formenti, Linelle Abueg, Angelo Brajuka, Nadolina Brajuka, Cristo Gallardo, Alice Giani, Olivier Fedrigo, Erich D. Jarvis
94 | 
95 | doi: https://doi.org/10.1093/bioinformatics/btac460
96 | 


--------------------------------------------------------------------------------
/images/graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vgl-hub/gfastats/cedd755227fe0d7b6daf3fee932e325af54f3b21/images/graph.png


--------------------------------------------------------------------------------
/include/gfastats-global.h:
--------------------------------------------------------------------------------
 1 | #ifndef GLOBAL_H
 2 | #define GLOBAL_H
 3 | 
 4 | #include <mutex>
 5 | #include <chrono>
 6 | #include <queue>
 7 | #include <thread>
 8 | #include <functional>
 9 | 
10 | #include "log.h"
11 | #include "threadpool.h"
12 | 
13 | extern UserInput userInput;
14 | 
15 | #endif /* GLOBAL_H */
16 | 


--------------------------------------------------------------------------------
/include/input.h:
--------------------------------------------------------------------------------
 1 | #ifndef INPUT_H
 2 | #define INPUT_H
 3 | 
 4 | struct UserInputGfastats : UserInput {
 5 |     
 6 |     std::vector<std::string> outFiles; // output files
 7 |     int segmentReport_flag = 0;
 8 |     int pathReport_flag = 0;
 9 |     int outSequence_flag = 0;
10 |     int nstarReport_flag = 0;
11 |     int outSize_flag = 0;
12 |     int outCoord_flag = 0;
13 |     int outFile_flag = 0;
14 |     int outBubbles_flag = 0;
15 |     int cmd_flag = 0;
16 |     int rmGaps_flag = 0;
17 |     int extractContigs_flag = 0;
18 |     int terminalOvlLen = 0;
19 | 
20 | };
21 | 
22 | class Input {
23 |     
24 |     UserInputGfastats userInput;
25 |     // stream read variable definition
26 |     std::string firstLine;
27 |     unsigned int seqPos = 0; // to keep track of the original sequence order
28 |     
29 |     std::string newLine, seqHeader, seqComment, line, bedHeader;
30 |     
31 |     std::shared_ptr<std::istream> stream;
32 |     
33 |     std::vector<Instruction> instructions;
34 |     
35 | public:
36 |     
37 |     void load(UserInputGfastats userInput);
38 |     
39 |     void read(InSequences& inSequence);
40 |     
41 | };
42 | 
43 | #endif /* INPUT_H */
44 | 


--------------------------------------------------------------------------------
/include/main.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAIN_H
 2 | #define MAIN_H
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | 
 7 | #include <iostream>
 8 | #include <fstream>
 9 | #include <sstream>
10 | 
11 | #include <unistd.h>
12 | #include <getopt.h>
13 | 
14 | #include <vector>  //required for zstream
15 | #include <stack>
16 | #include <queue>
17 | #include <string.h>
18 | #include <algorithm> //required for zstream
19 | #include <cstring> //required for zstream
20 | #include <tuple> // for graph manipulation
21 | #include <cctype> // toupper()
22 | #include <iomanip>
23 | 
24 | #include <chrono>
25 | #include <memory>
26 | 
27 | #include <thread>
28 | #include <mutex>
29 | #include <condition_variable>
30 | 
31 | #include "log.h"
32 | 
33 | #include "uid-generator.h"
34 | 
35 | #include "bed.h"
36 | 
37 | #include "global.h" // global variables
38 | #include "struct.h"
39 | #include "functions.h" // global functions
40 | 
41 | #include "threadpool.h"
42 | 
43 | #include <parallel-hashmap/phmap.h>
44 | 
45 | #include "zlib.h"
46 | #include <zstream/zstream_common.hpp>
47 | #include <zstream/ozstream.hpp>
48 | #include <zstream/ozstream_impl.hpp>
49 | 
50 | #include "gfa-lines.h"
51 | 
52 | #include "gfa.h" // gfa classes
53 | #include "sak.h" // swiss army knife classes
54 | 
55 | #include "stream-obj.h"
56 | 
57 | #include "output.h" // output classes
58 | #include "input.h"
59 | 
60 | #endif /* MAIN_H */
61 | 


--------------------------------------------------------------------------------
/include/validate.h:
--------------------------------------------------------------------------------
  1 | #ifndef GFASTATS_VALIDATE_H
  2 | #define GFASTATS_VALIDATE_H
  3 | 
  4 | #include <iostream>
  5 | #include <algorithm>
  6 | #include <fstream>
  7 | #include <string>
  8 | #include <dirent.h>
  9 | #include <vector>
 10 | #include <set>
 11 | #include <string>
 12 | 
 13 | std::string getExePath(const std::string &argv0) {
 14 |     std::string exePath = argv0.substr(0, argv0.find_last_of("/\\")+1);
 15 |     std::replace(exePath.begin(), exePath.end(), '\\', '/');
 16 | #ifdef _WIN32
 17 |     exePath += "gfastats.exe";
 18 | #else
 19 |     exePath += "gfastats";
 20 | #endif
 21 |     return exePath;
 22 | }
 23 | 
 24 | std::string rmFileExt(const std::string path) { // utility to strip file extension from file
 25 |     if (path == "." || path == "..")
 26 |         return path;
 27 | 
 28 |     size_t pos = path.find_last_of("\\/.");
 29 |     if (pos != std::string::npos && path[pos] == '.')
 30 |         return path.substr(0, pos);
 31 | 
 32 |     return path;
 33 | }
 34 | 
 35 | std::string getFileExt(std::string fileName) // utility to get file extension
 36 | {
 37 |     if(fileName.find_last_of(".") != std::string::npos) {
 38 |         
 39 |         if(fileName.substr(fileName.find_last_of(".")+1) == "gz") {
 40 |             
 41 |             fileName = rmFileExt(fileName);
 42 |             
 43 |             return getFileExt(fileName) + ".gz";
 44 |             
 45 |         }
 46 |         
 47 |         return fileName.substr(fileName.find_last_of(".")+1);
 48 |     }
 49 |     return "";
 50 | }
 51 | 
 52 | std::vector<std::string> list_dir(const char *path) {
 53 |     std::vector<std::string> list;
 54 |     struct dirent *entry;
 55 |     DIR *dir = opendir(path);
 56 | 
 57 |     if (dir == NULL) {
 58 |         std::cerr << "error: unable to access " << path << std::endl;
 59 |         exit(0);
 60 |     }
 61 |     while ((entry = readdir(dir)) != NULL) {
 62 |         DIR *f = opendir((std::string(path)+"/"+entry->d_name).c_str());
 63 |         if(f == NULL) /*not a directory*/ list.push_back(std::string(entry->d_name));
 64 |         else closedir(f);
 65 |     }
 66 |     closedir(dir);
 67 |     return list;
 68 | }
 69 | 
 70 | void get_recursive(const std::string &path, std::set<std::string> &paths) {
 71 |     if(getFileExt(path) == "tst") {
 72 |         paths.insert(path);
 73 |     } else {
 74 |         DIR *dir = opendir(path.c_str());
 75 |         if(dir != NULL) {
 76 |             for(const auto &file : list_dir(path.c_str())) {
 77 |                 get_recursive((path+"/"+file).c_str(), paths);
 78 |             }
 79 |             closedir(dir);
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | int i = 0;
 85 | 
 86 | void genTest(std::string exePath, const std::string &file, const std::string &args){
 87 |     std::string tstFile = "validateFiles/"+file+"."+std::to_string(i)+".tst";
 88 |     std::cout << "generating: " << tstFile << std::endl;
 89 |     std::ofstream ostream;
 90 |     ostream.open(tstFile);
 91 |     ostream << "testFiles/" << file << " " << args << "\nembedded" << std::endl;
 92 |     ostream.close();
 93 | #ifdef _WIN32
 94 |     std::string cmd = "\"\""+exePath+"\" testFiles/"+file+" "+args+" >> "+tstFile+"\"";
 95 | #else
 96 |     std::string cmd = "\""+exePath+"\" testFiles/"+file+" "+args+" >> "+tstFile;
 97 | #endif
 98 |     int exit = system(cmd.c_str());
 99 |     if (exit == EXIT_SUCCESS) {
100 |         ostream << cmd << std::endl;
101 |         ostream << "Command executed.";
102 |     }
103 |     ++i;
104 | };
105 | 
106 | #endif // #ifndef GFASTATS_VALIDATE_H
107 | 


--------------------------------------------------------------------------------
/instructions/README.md:
--------------------------------------------------------------------------------
  1 | # Instruction wiki
  2 | 
  3 | Instructions are sequentially executed and each instruction is described by tab-separated columns.
  4 | 
  5 | ## JOIN
  6 | 
  7 | The JOIN instruction introduces a new gap of 50 bp between `scaffold1` and `scaffold2` (two paths) with id `gap1`, effectively joining the two sequences into a new sequence with id `new_scaffold` and an optional comment.
  8 | 
  9 | ```
 10 | JOIN    scaffold1+    scaffold2+    50  gap1    new_scaffold
 11 | JOIN    scaffold1(1:100)+    scaffold2(1:100)+    50  gap1    new_scaffold // optional subsetting
 12 | ```
 13 | 
 14 | ## SPLIT
 15 | 
 16 | The SPLIT instruction splits the scaffold containing `segment1` and `segment2`, effectively removing the existing gap between them. Two optional comments can be provided.
 17 | 
 18 | ```
 19 | SPLIT   segment1+    segment2+    scaffold1   scaffold2 [this is a new scaffold1] [this is a new scaffold2]
 20 | ```
 21 | 
 22 | ## EXCISE
 23 | 
 24 | The EXCISE instruction removes segment1 from its scaffold, leaving it unplaced and adding a gap of 50bp with id `gap1` between the original sequences
 25 | 
 26 | ```
 27 | EXCISE  segment1  50  gap1
 28 | ```
 29 | 
 30 | ## REMOVE
 31 | 
 32 | The REMOVE instruction removes the paths involving the specified segment.
 33 | 
 34 | ```
 35 | REMOVE  segment1
 36 | ```
 37 | 
 38 | ## EXCLUDE
 39 | 
 40 | The EXCLUDE instruction removes the specified path and all its components.
 41 | 
 42 | ```
 43 | EXCLUDE  path1
 44 | ```
 45 | 
 46 | ## ERASE
 47 | 
 48 | The ERASE instruction trims off the sequence range specified from the given segment.
 49 | 
 50 | ```
 51 | ERASE   segment1:10-100 // deletes segment1 sequence between the coordinates provided (in bed format)
 52 | ```
 53 | 
 54 | ## RVCP
 55 | 
 56 | The RVCP instruction reverse-complements path1 or segment1 sequence in place
 57 | 
 58 | ```
 59 | RVCP    path1/segment1
 60 | ```
 61 | 
 62 | ## INVERT
 63 | 
 64 | The INVERT instruction inverts segment1 sequence in place
 65 | 
 66 | ```
 67 | INVERT  segment1
 68 | ```
 69 | 
 70 | ## RESIZE
 71 | 
 72 | The RESIZE instruction resizes the size of gap1 to 50 bp
 73 | 
 74 | ```
 75 | RESIZE  gap1    50
 76 | ```
 77 | 
 78 | ## MASK
 79 | 
 80 | The MASK instruction masks with 50 Ns a portion of a path, effectively adding a gap in the corresponding segment of optional size 5. If size is not provided, the masked size is used
 81 | 
 82 | ```
 83 | MASK  path1 10  60  [5]
 84 | ```
 85 | 
 86 | ## CLEAVE
 87 | 
 88 | The CLEAVE instruction breaks the specified segment at the given position generating segment2 and segment3, optionally connected by an edge
 89 | 
 90 | ```
 91 | CLEAVE  segment1 50  segment2 segment3 [edge1]
 92 | ```
 93 | 
 94 | ## RENAME
 95 | 
 96 | The RENAME instruction renames a path. It can be used to rename FASTA headers.
 97 | 
 98 | ```
 99 | RENAME  path1   new_path
100 | ```
101 | 
102 | ## COMMENT
103 | 
104 | The COMMENT instruction add or replaces an existing comment associated with a specific path/header.
105 | 
106 | ```
107 | COMMENT  path1   comment
108 | ```
109 | 
110 | ## Yet to be implemented
111 | 
112 | ```
113 | ADD contig3 contig1+ 50 contig2+ 50 ACGT // introduces a new contig named contig3 with sequence ACGT between contig1 and contig2 leaving 50bp gaps on each side
114 | REPLACE contig1:20-24 ACGT // replaces the sequence at coordinates contig1:20-24 with ACGT
115 | ```
116 | 


--------------------------------------------------------------------------------
/scaffolding/README.md:
--------------------------------------------------------------------------------
 1 | ### example data: bTaeGut2 Hifiasm (HiC) assembly
 2 | right click -> download link
 3 | - [hap1 contigs as GFA](https://genomeark.s3.amazonaws.com/species/Taeniopygia_guttata/bTaeGut2/assembly_vgp_hic_2.0/intermediates/hifiasm/bTaeGut2.trim.HiC.hic.hap1.p_ctg.gfa)
 4 | - [hap1 s1 AGP](https://genomeark.s3.amazonaws.com/species/Taeniopygia_guttata/bTaeGut2/assembly_vgp_hic_2.0/intermediates/bionano_hap1/agp_fasta/bTaeGut2_Saphyr_DLE1_3172351_bppAdjust_cmap_bTaeGut2_trim_HiC_hic_hap1_p_ctg_fasta_NGScontigs_HYBRID_SCAFFOLD.agp)
 5 | - [hap1 s2 AGP](https://genomeark.s3.amazonaws.com/species/Taeniopygia_guttata/bTaeGut2/assembly_vgp_hic_2.0/intermediates/salsa_hap1/bTaeGut2_hap1_s1.gfastats.rename_salsa/scaffolds_FINAL.original-coordinates.agp)
 6 | - [hap1 s2 final fasta (to check your results)](https://genomeark.s3.amazonaws.com/species/Taeniopygia_guttata/bTaeGut2/assembly_vgp_hic_2.0/bTaeGut2.hic.hap1.s2.fasta)
 7 | 
 8 | The starting files from hifiasm-HiC workflow are the hap1 & hap2 GFAs:
 9 | 
10 | `bTaeGut2.hap1.gfa` and `bTaeGut2.hap2.gfa`
11 | 
12 | Convert GFA -> FASTA run bionano to obtain s1 AGPs. `bTaeGut2.hap1.fasta` into Bionano produces `bTaeGut2.hap1.s1.agp`, and same for hap2.
13 | 
14 | NOTE: IF Bionano is cutting, then fix the subseq lines. Bionano is not cutting in Galaxy, so do not need to run `sed` command on Galaxy assemblies.
15 | ````bash
16 | # THIS IS NOT NEEDED FOR GALAXY ASSEMBLIES
17 | cat bTaeGut2_hap1_s1.agp | sed 's/W\t\(.*\)_subseq_\([0-9]*\):\([0-9]*\)\t[0-9]*\t[0-9]*\t\(.\)/W\t\1\t\2\t\3\t\4/g' | sed 's/subseq_\([0-9]*\):\([0-9]*\)/subseq_\1_\2/g' > bTaeGut2_hap1_s1.edit.agp
18 | ````
19 | 
20 | ##### UPDATE: MAY 3, 2022
21 | Newer versions of gfastats append `_path` to path names, so the Bionano AGP must be processed accordingly. **This needs to happen even if Bionano is not cutting -- i.e. this needs to happen for Galaxy assemblies!**
22 | 
23 | an example of fixing the Bionano AGP to recognize `_path` in contig names:
24 | ````bash
25 | awk '{OFS = "\t"}{if ($0 ~ /^#/) print $0 }{if ($6 ~ /h1*/) print $1,$2,$3,$4,$5,$6"_path",$7,$8,$9; if ($6 ~ /^[0-9]/) print $0}' bTaeGut2.hap1.s1.edit.agp > bTaeGut2.hap1.s1.edit.path.agp
26 | ````
27 | 
28 | Overlap s1 AGP onto c1/p1 GFA. `--discover` is so gfastats finds the paths in the GFA
29 | ````bash
30 | gfastats bTaeGut2.trim.HiC.hic.hap1.p_ctg.gfa --discover -o bTaeGut2.hap1.discover.gfa
31 | gfastats bTaeGut2.hap1.discover.gfa --discover -a bTaeGut2.hap1.s1.edit.path.agp -o bTaeGut2.hap1.s1.gfa
32 | ````
33 | 
34 | Convert s1 GFA -> s1 FASTA, run salsa to obtain s2 AGP.
35 | ````bash
36 | gfastats bTaeGut2.hap1.s1.gfa -o bTaeGut2.hap1.s1.gfastats.fasta
37 | ````
38 | NOTE: IF Bionano is cutting, then subseq lines have colons in the names, so you need to remove those before SALSA
39 | ````bash
40 | ## Removing colons from bionano scaff names, because salsa doesn't like it
41 | # THIS IS NOT NEEDED FOR GALAXY ASSEMBLIES
42 | sed 's/:/_/g' bTaeGut2.hap1.s1.gfastats.fasta > bTaeGut2.hap1.s1.gfastats.nocolon.fasta
43 | ````
44 | 
45 | `bTaeGut2.hap1.s1.gfastats.fasta` into SALSA produces `bTaeGut2.hap1.s2.agp`
46 | 
47 | Overlap s2 AGP onto s1 GFA to create s2 GFA
48 | ````bash
49 | cp <salsa_results_directory>/scaffolds_FINAL.original-coordinates.agp > ./bTaeGut2.hap1.s2.originalcoords.agp
50 | gfastats bTaeGut2.hap1.s1.gfa -a bTaeGut2.hap1.s2.originalcoords.agp -o bTaeGut2.hap1.s2.gfa
51 | ````
52 | If you want to convert this s2 GFA to s2 FASTA:
53 | ````bash
54 | gfastats bTaeGut2.hap1.s2.gfa -o bTaeGut2.hap1.s2.gfastats.fasta
55 | ````
56 | 


--------------------------------------------------------------------------------
/scripts/gfastats_stats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | 
 4 | pathlist=$1
 5 | LINE=$(sed -n "$SLURM_ARRAY_TASK_ID"p $pathlist)
 6 | 
 7 | echo "SCRIPT START FOR $LINE ----------------------------"
 8 | printf "Path to genomeark file:  $LINE \n\n"
 9 | 
10 | IFS='/' read -r -a array <<< $LINE 
11 | fastaname=${array[4]} ##make more robust - basename 
12 | echo "Local filename: $LINE"
13 | 
14 | printf "Path to genomeark file:  $LINE \n\n"
15 | 
16 | statsname=$LINE.gfastats
17 | 
18 | aws s3 cp s3://genomeark/${LINE} ./${fastaname}
19 | 
20 | ziptime=$(wc -c $fastaname)
21 | echo "Time on compressed $fastaname"
22 | TIMEFORMAT=%R
23 | ziptime=$(time (gfastats $fastaname > ${fastaname}_temp_out.txt) 2>&1)
24 | 
25 | ziplength=$(grep "Total scaffold length" ${fastaname}_temp_out.txt | grep -Eo "[0-9]+") 
26 | echo "Compressed length: $ziplength"
27 | printf "$LINE\t $ziplength\t $ziptime\t gzip\n"  >> gfastats_stats_out.txt
28 | 
29 | uncomp=$(echo $fastaname | sed 's/.gz//g') 
30 | echo "Decompressing $fastaname \n\n"
31 | gunzip $fastaname 
32 | 
33 | echo "Time on uncompressed fasta: $uncomp"
34 | unziptime=$(time (gfastats $uncomp > ${fastaname}_temp_out_2.txt) 2>&1)
35 | 
36 | unziplength=$(grep "Total scaffold length" ${fastaname}_temp_out_2.txt | grep -Eo "[0-9]+")
37 | echo "Decompressed length: $unziplength"
38 | 
39 | printf "$LINE\t $unziplength\t $unziptime\t plain text\n" >> gfastats_stats_out.txt
40 | 
41 | rm $uncomp
42 | rm ${fastaname}_temp_out.txt 
43 | rm ${fastaname}_temp_out_2.txt 
44 | 
45 | echo "SCRIPT COMPLETE FOR $LINE --------------------"
46 | 
47 | 


--------------------------------------------------------------------------------
/scripts/plot_runtime.R:
--------------------------------------------------------------------------------
 1 | setwd(dirname(rstudioapi::getSourceEditorContext()$path))
 2 | 
 3 | library(ggplot2)
 4 | 
 5 | df<-read.csv("data.txt", header = TRUE, sep = "\t")
 6 | 
 7 | png(file="Fig 1c.png",
 8 |     width=2000, height=1000)
 9 | 
10 | ggplot(df, aes(x=size, y=time, group=format)) +
11 |   geom_point(aes(color=format), size = 3)+
12 |   scale_color_grey() + theme_classic() +
13 |   geom_smooth(aes(color=format)) +
14 |   theme(
15 |     text = element_text(size = 60),
16 |     legend.title = element_blank(),
17 |     legend.key.size = unit(3,"cm"),
18 |     axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
19 |   ) +
20 |   xlab("Genome size (Gbp)") + ylab("Time (s)") +
21 |   guides(color=guide_legend(override.aes=list(fill=NA)))
22 | 
23 | dev.off()
24 | 


--------------------------------------------------------------------------------
/scripts/submit_gfastats_stats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | pathlist=$1 
 4 | linecount=$(wc -l $pathlist | awk '{print $1}')
 5 | echo $linecount
 6 | 
 7 | log=logs/slurm_%A.log 
 8 | sbatch -p hpc,vgl,vgl_bigmem -c 1 --error=$log --output=$log --array=1-$linecount gfastats_stats.sh $pathlist
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/src/generate-random-fasta.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <fstream>
 3 | #include <ctime>
 4 | #include <cstdlib>
 5 | 
 6 | typedef unsigned long long ull;
 7 | 
 8 | // random from 0 to unsigned long long max
 9 | ull rndull() {
10 | #if RAND_MAX == 2147483647 // can get 32 bits of randomness from std::rand()
11 |     return (((ull)std::rand()) < 32) | (((ull)std::rand()));
12 | #else // only guarunteed 16 bits of randomness from std::rand()
13 |     return (((ull)std::rand()) < 48) | (((ull)std::rand()) < 32) | (((ull)std::rand()) < 16) | (((ull)std::rand()));
14 | #endif
15 | }
16 | 
17 | ull rnd(ull min, ull max) {
18 |     return rndull()%(max-min)+min;
19 | }
20 | 
21 | char rndACGT() {
22 |     static const char acgt[] = {'A', 'C', 'G', 'T'};
23 |     return acgt[rand()%4];
24 | }
25 | 
26 | int main(int argc, char **argv) {
27 |     if(argc != 10) {
28 |         std::cout << "usage: generate-random-fasta <output_file> <contig_min_size> <contig_max_size> <gap_min_size> <gap_max_size> <min_num_contigs> <max_num_contigs> <min_num_headers> <max_num_headers>" << std::endl;
29 |     }
30 | 
31 |     ull contig_min_size     = std::stoull(argv[2]),
32 |         contig_max_size     = std::stoull(argv[3]),
33 |         gap_min_size        = std::stoull(argv[4]),
34 |         gap_max_size        = std::stoull(argv[5]),
35 |         min_num_contigs   = std::stoull(argv[6]),
36 |         max_num_contigs   = std::stoull(argv[7]),
37 |         min_num_headers     = std::stoull(argv[8]),
38 |         max_num_headers     = std::stoull(argv[9]);
39 | 
40 |     std::srand(std::time(nullptr));
41 | 
42 |     std::ofstream output_file;
43 |     output_file.open(argv[1]);
44 |     if(!output_file.is_open()) {
45 |         std::cerr << "couldn't open the specified file: <" << argv[1] << ">" << std::endl;
46 |         return EXIT_FAILURE;
47 |     }
48 |     ull num_headers = rnd(min_num_headers, max_num_headers);
49 |     for(ull h=0; h<num_headers; ++h) {
50 |         output_file << ">Header" << h+1 << std::endl;
51 | 
52 |         if(std::rand()%2 == 1) {
53 |             ull gap_size = rnd(gap_min_size, gap_max_size);
54 |             for(ull i=0; i<gap_size; ++i) {
55 |                 output_file << 'N';
56 |             }
57 |         } // leading gap
58 | 
59 |         ull num_contigs = rnd(min_num_contigs, max_num_contigs);
60 |         for(ull c=0; c<num_contigs; ++c) {
61 |             ull contig_size = rnd(contig_min_size, contig_max_size);
62 |             for(ull i=0; i<contig_size; ++i) {
63 |                 output_file << rndACGT();
64 |             }
65 | 
66 |             if(c == num_contigs-1 && std::rand()%2 == 1) { // trailing gap
67 |                 ull gap_size = rnd(gap_min_size, gap_max_size);
68 |                 for(ull i=0; i<gap_size; ++i) {
69 |                     output_file << 'N';
70 |                 }
71 |             }
72 |         }
73 | 
74 |         output_file << std::endl;
75 |     }
76 | 
77 |     return EXIT_SUCCESS;
78 | }


--------------------------------------------------------------------------------
/src/generate-tests.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <map>
 3 | #include <cstdio>
 4 | 
 5 | #include "validate.h"
 6 | 
 7 | int main(int, char **argv) {
 8 |     std::cout << "WARNING: only run this program if gfastats is in a working state" << std::endl;
 9 |     std::cout << "WARNING: previous validate files will be deleted" << std::endl;
10 |     std::cout << "continue? (Y/N) ";
11 |     std::string input;
12 |     std::cin >> input;
13 |     if(input != "Y" && input != "y") {
14 |         std::cout << "validate generation cancelled" << std::endl;
15 |         std::exit(0);
16 |     }
17 |     std::cout << "deleting old validate files..." << std::endl;
18 | 
19 |     for(auto &file : list_dir("validateFiles")) {
20 |         if(getFileExt(file) != "tst") continue; // dont delete README
21 |         file = "validateFiles/"+file;
22 |         if(remove(file.c_str()) != 0) {
23 |             std::cerr << "error deleting <" << file << ">" << std::endl;
24 |             return -1;
25 |         }
26 |     }
27 | 
28 |     std::cout << "generating new validate files..." << std::endl;
29 | 
30 |     std::string exePath = getExePath(argv[0]);
31 | 
32 |     const std::map<std::set<std::string>, std::vector<std::string>> ext_args = {
33 |         {{"fasta", "fasta.gz", "fastq", "fastq.gz"}, {"", "-s s", "-s c", "-s g", "-b a", "-b s", "-b c", "-b g", "--homopolymer-compress 1 -ofa"}},
34 |         {{"gfa", "gfa.gz", "gfa2", "gfa2.gz"}, {"-o gfa2", "-o gfa", "-o fasta"}}
35 |     //  {{set of test file extensions}, {list of command line args to run with}}
36 |     };
37 | 
38 |     const std::map<std::set<std::string>, std::vector<std::string>> file_args = {
39 |         {{"random1.fasta", "random1.fasta.gz", "random1.fastq", "random1.fastq.gz", "random1.gfa2"}, {"-a testFiles/random1.agp --stats", "-a testFiles/random1.agp -ofa", "-k testFiles/random1.rvcp.sak", "-k testFiles/random1.mask.sak"}},
40 |         {{"random1.fasta"}, {"-k testFiles/random1.instructions.sak", "-ofa -k testFiles/random1.instructions.sak", "-ofa -k testFiles/random1.hc.sak", "-ofa -k testFiles/random1.hdc.sak", "Header2", "-ofa -e testFiles/random1.fasta.1.bed", "-ofa -e testFiles/random1.fasta.2.bed", "-ofa -e testFiles/random1.fasta.3.bed", "-ofa -e testFiles/random1.fasta.4.bed", "-ofa -e testFiles/random1.fasta.5.bed", "-ofa -i testFiles/random1.fasta.1.bed", "-ofa -i testFiles/random1.fasta.2.bed", "-ofa -i testFiles/random1.fasta.3.bed", "-ofa -i testFiles/random1.fasta.4.bed", "-ofa -i testFiles/random1.fasta.5.bed"}},
41 |         {{"random2.noseq.gfa"}, {""}},
42 |         {{"random1.gfa2"}, {"-k testFiles/random1.gfa2.instructions.sak"}},
43 |         {{"random4.fasta"}, {""}},
44 |         {{"random5.findovl.gfa"}, {"--discover-terminal-overlaps 3 -ogfa"}},
45 |         {{"random6.circular.gfa"}, {""}}
46 |         
47 |     //  {{set of test file paths}, {list of command line args to run with}}
48 |     };
49 | 
50 |     const std::set<std::string> exclude {"agp", "sak"};
51 | 
52 |     for(const std::string &file : list_dir("testFiles")) {
53 |         std::string ext = getFileExt(file);
54 |         if(exclude.count(ext)) continue;
55 |         for(auto pair : ext_args) {
56 |             if(!pair.first.count(ext)) continue;
57 |             for(auto args : pair.second) {
58 |                 genTest(exePath, file, args);
59 |             }
60 |         }
61 |     }
62 | 
63 |     std::fstream fstream;
64 |     for(const auto &pair : file_args) {
65 |         for(const std::string &file : pair.first) {
66 |             fstream.open("testFiles/"+file);
67 |             if(!fstream) continue;
68 |             fstream.close();
69 |             for(const std::string &args : pair.second) {
70 |                 genTest(exePath, file, args);
71 |             }
72 |         }
73 |     }
74 | 
75 |     std::exit(EXIT_SUCCESS);
76 | }
77 | 


--------------------------------------------------------------------------------
/src/input.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <string>
  3 | 
  4 | #include <istream>
  5 | #include <fstream>
  6 | #include <sstream>
  7 | 
  8 | #include <parallel-hashmap/phmap.h>
  9 | 
 10 | #include "log.h"
 11 | #include "global.h"
 12 | #include "uid-generator.h"
 13 | 
 14 | #include "bed.h"
 15 | #include "struct.h"
 16 | #include "functions.h"
 17 | 
 18 | #include "gfa-lines.h"
 19 | #include "gfa.h"
 20 | #include "sak.h"
 21 | 
 22 | #include "stream-obj.h"
 23 | 
 24 | #include "input-agp.h"
 25 | #include "input-filters.h"
 26 | #include "input-gfa.h"
 27 | #include "input.h"
 28 | 
 29 | void Input::load(UserInputGfastats userInput) {
 30 |     
 31 |     this->userInput = userInput;
 32 |     
 33 | }
 34 |     
 35 | void Input::read(InSequences& inSequences) {
 36 |     
 37 |     if (userInput.inSequence.empty()) {return;}
 38 |     
 39 |     threadPool.init(maxThreads); // initialize threadpool
 40 | 
 41 |     if (!userInput.inSak.empty() || userInput.pipeType == 'k') {
 42 |         
 43 |         StreamObj streamObj;
 44 |         
 45 |         stream = streamObj.openStream(userInput, 'k');
 46 |         
 47 |         SAK sak; // create a new swiss army knife
 48 |         
 49 |         while (getline(*stream, line)) {
 50 |             
 51 |             std::istringstream iss(line);
 52 |             
 53 |             instructions.push_back(sak.readInstruction(line)); // use the swiss army knife to read the instruction
 54 |             
 55 |         }
 56 |         
 57 |         lg.verbose("Finished reading SAK instructions");
 58 |         
 59 |     }
 60 |     
 61 |     if (!userInput.inBedInclude.empty() || userInput.pipeType == 'i') {
 62 |         
 63 |         StreamObj streamObj;
 64 |         stream = streamObj.openStream(userInput, 'i');
 65 |         
 66 |         while (getline(*stream, line)) {
 67 |             
 68 |             uint64_t begin = 0, end = 0;
 69 |             std::istringstream iss(line);
 70 |             iss >> bedHeader >> begin >> end;
 71 |             userInput.bedIncludeList.pushCoordinates(bedHeader, begin, end);
 72 |         }
 73 |         lg.verbose("Finished reading BED include list");
 74 |     }
 75 |     
 76 |     BedCoordinates bedExcludeList;
 77 |     
 78 |     if (!userInput.inBedExclude.empty() || userInput.pipeType == 'e') {
 79 |         
 80 |         StreamObj streamObj;
 81 |         stream = streamObj.openStream(userInput, 'e');
 82 |         
 83 |         while (getline(*stream, line)) {
 84 |             
 85 |             uint64_t begin = 0, end = 0;
 86 |             std::istringstream iss(line);
 87 |             iss >> bedHeader >> begin >> end;
 88 |             
 89 |             bedExcludeList.pushCoordinates(bedHeader, begin, end);
 90 |         }
 91 |         lg.verbose("Finished reading BED exclude list");
 92 |     }
 93 |     
 94 |     if (!userInput.inSequence.empty() || userInput.pipeType == 'f') {
 95 |         
 96 |         StreamObj streamObj;
 97 |         
 98 |         stream = streamObj.openStream(userInput, 'f');
 99 |         
100 |         if (stream) {
101 |             
102 |             switch (stream->peek()) {
103 |                     
104 |                 case '>': {
105 |                     
106 |                     stream->get();
107 |                     
108 |                     while (getline(*stream, newLine)) {
109 |                         
110 |                         if(userInput.bedIncludeList.size() - bedExcludeList.size() != 0 && userInput.bedIncludeList.size() - bedExcludeList.size() == inSequences.getPathN()) { // we have all the sequences needed
111 |                             lg.verbose("Found all sequences, stop streaming input");
112 |                             break;
113 |                         }
114 |                         size_t spacePos = newLine.find(" ");
115 |                         seqHeader = newLine.substr(0, spacePos);
116 |                         if (spacePos != std::string::npos)
117 |                             seqComment = newLine.substr(spacePos + 1);
118 |                         else
119 |                             seqComment.clear();
120 |                         
121 |                         std::string* inSequence = new std::string;
122 |                         getline(*stream, *inSequence, '>');
123 |                         lg.verbose("Individual fasta sequence read");
124 |                         
125 |                         Sequence* sequence = includeExcludeSeq(seqHeader, seqComment, inSequence, userInput.bedIncludeList, bedExcludeList);
126 |                         
127 |                         if (sequence != NULL) {
128 |                             sequence->seqPos = seqPos; // remember the order
129 |                             inSequences.appendSequence(sequence, userInput.hc_cutoff);
130 |                             seqPos++;
131 |                         }
132 |                     }
133 |                     break;
134 |                 }
135 |                 case '@': {
136 |                     
137 |                     while (getline(*stream, newLine)) { // file input
138 |                         
139 |                         if(userInput.bedIncludeList.size() - bedExcludeList.size() != 0 && userInput.bedIncludeList.size() - bedExcludeList.size() == inSequences.getPathN()) { // we have all the sequences needed
140 |                             lg.verbose("Found all sequences, stop streaming input");
141 |                             break;
142 |                         
143 |                         }
144 |                         newLine.erase(0, 1);
145 |                         size_t spacePos = newLine.find(" ");
146 |                         seqHeader = newLine.substr(0, spacePos);
147 |                         if (spacePos != std::string::npos)
148 |                             seqComment = newLine.substr(spacePos + 1);
149 |                         else
150 |                             seqComment.clear();
151 |                         
152 |                         std::string* inSequence = new std::string;
153 |                         getline(*stream, *inSequence);
154 |                         
155 |                         getline(*stream, newLine);
156 |                         
157 |                         std::string* inSequenceQuality = new std::string;
158 |                         getline(*stream, *inSequenceQuality);
159 | 
160 |                         Sequence* sequence = includeExcludeSeq(seqHeader, seqComment, inSequence, userInput.bedIncludeList, bedExcludeList, inSequenceQuality);
161 |                         
162 |                         if (sequence != NULL) {
163 |                             
164 |                             sequence->seqPos = seqPos; // remember the order
165 |                         
166 |                             inSequences.appendSequence(sequence, userInput.hc_cutoff);
167 |                             
168 |                             seqPos++;
169 |                             
170 |                         }
171 |                         
172 |                     }
173 |                     
174 |                     break;
175 |                     
176 |                 }
177 |                 default: {
178 |                     
179 |                     readGFA(inSequences, userInput, stream, &bedExcludeList);
180 |                     
181 |                 }
182 |                 
183 |             }
184 |             
185 |             lg.verbose("End of file");
186 |                 
187 |         }else{
188 | 
189 |             fprintf(stderr, "Stream not successful: %s", userInput.inSequence.c_str());
190 |             exit(1);
191 | 
192 |         }
193 |         
194 |     }
195 |     
196 |     jobWait(threadPool);
197 |     
198 |     inSequences.sortSegmentsByOriginal();
199 |     
200 |     if (userInput.rmGaps_flag)
201 |         inSequences.removeTerminalGaps();
202 |     
203 |     if (userInput.extractContigs_flag) {
204 |         
205 |         inSequences.clearGaps();
206 |         inSequences.clearPaths();
207 |         
208 |     }
209 |     
210 |     if (userInput.extractContigs_flag || userInput.discoverPaths_flag)
211 |         inSequences.discoverPaths();
212 |     
213 |     if (userInput.terminalOvlLen != 0)
214 |         inSequences.discoverTerminalOverlaps(userInput.terminalOvlLen);
215 |     
216 |     if (!instructions.empty()) {
217 |         
218 |         lg.verbose("\nStarted instruction execution");
219 |     
220 |         SAK sak; // create a new swiss army knife
221 |         
222 |         for (Instruction instruction : instructions) { // execute swiss army knife instructions
223 |             
224 |             sak.executeInstruction(inSequences, instruction);
225 |             lg.verbose(instruction.action + " instruction executed");
226 |             
227 |         }
228 |     
229 |     }
230 |     
231 |     if (!userInput.inAgp.empty() || userInput.pipeType == 'a')
232 |         readAgp(inSequences, userInput);
233 |     
234 |     if (userInput.sortType == "ascending") {
235 |         inSequences.sortPathsByNameAscending();
236 |     }else if (userInput.sortType == "descending") {
237 |         inSequences.sortPathsByNameDescending();
238 |     }else if (userInput.sortType == "largest") {
239 |         inSequences.sortPathsBySize(0);
240 |     }else if (userInput.sortType == "smallest") {
241 |         inSequences.sortPathsBySize(1);
242 |     }else if (userInput.sortType != "none" && ifFileExists(userInput.sortType.c_str())){
243 |             
244 |         stream = std::make_unique<std::ifstream>(std::ifstream(userInput.sortType));
245 |         
246 |         std::string header;
247 |         std::vector<std::string> headerList;
248 |         
249 |         while (getline(*stream, line)) { // read the file to vector
250 |             
251 |             std::istringstream iss(line);
252 |             iss >> header;
253 |             
254 |             headerList.push_back(header);
255 |             
256 |         }
257 |         
258 |         inSequences.sortPathsByList(headerList);
259 |         
260 |     }else if(userInput.inAgp.empty() && !(userInput.pipeType == 'a')){
261 |         inSequences.sortPathsByOriginal();
262 |     }
263 |     
264 |     inSequences.updateStats();
265 |     
266 |     threadPool.join();
267 |     
268 | }
269 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | //fastats.cpp
  3 | //
  4 | //Created by Giulio Formenti on 12/17/21.
  5 | //
  6 | 
  7 | #include "main.h"
  8 | 
  9 | std::string version = "1.3.11";
 10 | 
 11 | //global
 12 | std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); // immediately start the clock when the program is run
 13 | 
 14 | int verbose_flag;
 15 | Log lg;
 16 | std::vector<Log> logs;
 17 | int tabular_flag;
 18 | 
 19 | int maxThreads = 0;
 20 | std::mutex mtx;
 21 | ThreadPool<std::function<bool()>> threadPool;
 22 | 
 23 | UserInputGfastats userInput; // initialize input object
 24 | 
 25 | int main(int argc, char **argv) {
 26 |     
 27 |     short int c; // optarg
 28 |     short unsigned int pos_op = 1; // optional arguments
 29 |     unsigned long long int gSize = 0; // expected genome size, with 0 NG/LG* statistics are not computed
 30 |     
 31 |     char bedOutType = 'a'; // default output type with bed flag (agp)
 32 |     bool isPipe = false; // to check if input is from pipe
 33 |     
 34 |     if (argc == 1) { // gfastats with no arguments
 35 |         printf("gfastats input.[fasta|fastq|gfa][.gz] [expected genome size] [header[:start-end]]\n-h for additional help.\n");
 36 |         exit(EXIT_SUCCESS);
 37 |     }
 38 |     static struct option long_options[] = { // struct mapping long options
 39 |         {"input-sequence", required_argument, 0, 'f'},
 40 |         
 41 |         {"threads", required_argument, 0, 'j'},
 42 |         
 43 |         {"agp-to-path", required_argument, 0, 'a'}, // agp to path conversion
 44 |         {"swiss-army-knife", required_argument, 0, 'k'}, // the swiss army knife
 45 |         {"remove-terminal-gaps", no_argument, &userInput.rmGaps_flag, 1}, // this remove all gap edges at the end of sequences
 46 |         {"homopolymer-compress", required_argument, 0, 0},
 47 |         {"discover-paths", no_argument, &userInput.discoverPaths_flag, 1},
 48 |         {"discover-terminal-overlaps", optional_argument, 0, 0},
 49 |         {"sort", required_argument, 0, 0},
 50 |         {"extract-contigs", no_argument, &userInput.extractContigs_flag, 1},
 51 |         
 52 |         {"include-bed", required_argument, 0, 'i'},
 53 |         {"exclude-bed", required_argument, 0, 'e'},
 54 |  
 55 |         {"out-format", required_argument, 0, 'o'},
 56 |         {"line-length", required_argument, 0, 0},
 57 |         {"no-sequence", no_argument, &userInput.noSequence, 1},
 58 |         {"out-sequence", no_argument, &userInput.outSequence_flag, 1},
 59 |         {"out-size", required_argument, 0, 's'},
 60 |         {"out-coord", required_argument, 0, 'b'},
 61 |         {"out-bubbles", no_argument, &userInput.outBubbles_flag, 1},
 62 |         
 63 |         {"stats", no_argument, &userInput.stats_flag, 1},
 64 |         {"segment-report", no_argument, &userInput.segmentReport_flag, 1},
 65 |         {"path-report", no_argument, &userInput.pathReport_flag, 1},
 66 |         {"nstar-report", no_argument, &userInput.nstarReport_flag, 1},
 67 |         {"tabular", no_argument, 0, 't'},
 68 |         {"locale", required_argument, 0, 0},
 69 |         
 70 |         {"verbose", no_argument, &verbose_flag, 1},
 71 |         {"cmd", no_argument, &userInput.cmd_flag, 1},
 72 |         {"version", no_argument, 0, 'v'},
 73 |         {"help", no_argument, 0, 'h'},
 74 |         
 75 |         {0, 0, 0, 0}
 76 |     };
 77 |     
 78 |     while (1) { // loop through argv
 79 |         
 80 |         int option_index = 0;
 81 |         int curind = optind;
 82 |         c = getopt_long(argc, argv, "-:a:b:e:f:i:j:k:o:s:tvh",
 83 |                         long_options, &option_index);
 84 | 
 85 |         if (optind < argc && !isPipe) // if pipe wasn't assigned already
 86 |             isPipe = isDash(argv[optind]) ? true : false; // check if the argument to the option is a '-' and set it as pipe input
 87 |         
 88 |         if (optarg != nullptr && !isPipe) // case where pipe input is given as positional argument (input sequence file)
 89 |             isPipe = isDash(optarg) ? true : false;
 90 | 
 91 |         if (c == -1) // exit the loop if run out of options
 92 |             break;
 93 |         
 94 |         switch (c) {
 95 |             case '?':
 96 |                 if (optopt)
 97 |                     printf("Unrecognized short option (%c).\n", optopt);
 98 |                 else
 99 |                     printf("Unrecognized long option (%s).\n", argv[curind]);
100 |                 exit(EXIT_FAILURE);
101 |             case ':': // handle options without arguments
102 |                 switch (optopt) { // the command line option last matched
103 |                     case 'b':
104 |                         bedOutType = 'a'; // default bed output is agp is -b option is given without argument
105 |                         userInput.outCoord_flag = 1;
106 |                         break;
107 |                         
108 |                     case 's':
109 |                         bedOutType = 's'; // default size output is scaffold is -s option is given without argument
110 |                         userInput.outSize_flag = 1;
111 |                         break;
112 |                         
113 |                     case 'o':
114 |                         userInput.outFiles.push_back("fasta"); // default output is fasta if -o option is given without argument
115 |                         userInput.outFile_flag = 1;
116 |                         break;
117 |                         
118 |                     default:
119 |                         fprintf(stderr, "option -%c is missing a required argument\n", optopt);
120 |                         return EXIT_FAILURE;
121 |                 }
122 |                 break;
123 |             default: // handle positional arguments
124 |                 if (pos_op == 1) { // only one positional argument given
125 |                     
126 |                     if (isPipe && userInput.pipeType == 'n') // check whether input is from pipe and that pipe input was not already set
127 |                         userInput.pipeType = 'f'; // pipe input is a sequence
128 |                     else{ // input is a regular file
129 |                         ifFileExists(optarg);
130 |                         userInput.inSequence = optarg;
131 |                     }
132 |                     pos_op++;
133 |                     
134 |                 }else if (pos_op == 2 || pos_op == 3) { // if >1 positional argument, check what additional positional arguments are present
135 |                     
136 |                     if (isInt(optarg)) { // if the positional argument is a number, it is likely the expected genome size
137 |                         
138 |                         gSize = atoll(optarg); pos_op++;
139 |                         
140 |                     }else{ // else it is an include argument
141 |                         
142 |                         std::tuple<std::string, uint64_t, uint64_t> coordinate = parseCoordinate(std::string(optarg));
143 |                         userInput.bedIncludeList.pushCoordinates(std::get<0>(coordinate), std::get<1>(coordinate), std::get<2>(coordinate));
144 |                         pos_op++;
145 |                         
146 |                     }
147 |                 }else{
148 |                     printf("Error: too many positional arguments (%s).\n",optarg);
149 |                     exit(EXIT_FAILURE);
150 |                 }
151 |                 break;
152 |             case 0: // case for long options without short options
153 |                 
154 |                 if (strcmp(long_options[option_index].name,"discover-terminal-overlaps") == 0) {
155 |                     
156 |                     if (optarg == NULL && optind < argc && argv[optind][0] != '-')
157 |                         optarg = argv[optind++];
158 |                     
159 |                     if (optarg != NULL)
160 |                         userInput.terminalOvlLen = atoi(optarg);
161 |                     else
162 |                         userInput.terminalOvlLen = 1000;
163 |                 }
164 |                 if (strcmp(long_options[option_index].name,"line-length") == 0)
165 |                     userInput.splitLength = atoi(optarg);
166 |                 
167 |                 if (strcmp(long_options[option_index].name,"sort") == 0) {
168 | 
169 |                     std::vector<std::string> options {"none", "ascending", "descending", "largest", "smallest"};
170 |                             
171 |                     if (std::find(options.begin(), options.end(), optarg) != options.end() || ifFileExists(optarg))
172 |                         userInput.sortType = optarg;
173 |                     else{
174 |                         printf("Error: unrecognized sorting option (%s).\n", optarg);
175 |                         exit(1);
176 |                     }
177 |                 }
178 |                 if(strcmp(long_options[option_index].name,"homopolymer-compress") == 0) {
179 |                     userInput.hc_cutoff = atoi(optarg);
180 |                     userInput.stats_flag = 1;
181 |                 }
182 |                 if (strcmp(long_options[option_index].name,"locale") == 0) {
183 |                     
184 |                     setlocale(LC_ALL, optarg);
185 |                     std::cout.imbue(std::locale(optarg));
186 |                     std::locale::global(std::locale(optarg));
187 |                     userInput.stats_flag = 1;
188 |                 }
189 |                 break;
190 |             case 'a': // agp to paths
191 |                 
192 |                 if (isPipe && userInput.pipeType == 'n') // check whether input is from pipe and that pipe input was not already set
193 |                     userInput.pipeType = 'a'; // pipe input is agp
194 |                 else{ // input is a regular file
195 |                     ifFileExists(optarg);
196 |                     userInput.inAgp = optarg;
197 |                 }
198 |                 userInput.stats_flag = 1;
199 |                 break;
200 |             case 'b': // output bed type (agp, contig, gaps)
201 |                 bedOutType = *optarg;
202 |                 userInput.outCoord_flag = 1;
203 |                 break;
204 |             case 'e': // bed exclude
205 |                 
206 |                 if (isPipe && userInput.pipeType == 'n') // check whether input is from pipe and that pipe input was not already set
207 |                     userInput.pipeType = 'e'; // pipe input is an exclude bed
208 |                 else{ // input is a regular file
209 |                     ifFileExists(optarg);
210 |                     userInput.inBedExclude = optarg;
211 |                 }
212 |                 userInput.stats_flag = 1;
213 |                 break;
214 |             case 'f': // input sequence
215 |                 
216 |                 if (isPipe && userInput.pipeType == 'n') // check whether input is from pipe and that pipe input was not already set
217 |                     userInput.pipeType = 'f'; // pipe input is a sequence
218 |                 else{ // input is a regular file
219 |                     
220 |                     ifFileExists(optarg);
221 |                     userInput.inSequence = optarg;
222 |                     userInput.stats_flag = 1;
223 |                 }
224 |                 break;
225 |             case 'i': // bed include
226 |                 
227 |                 if (isPipe && userInput.pipeType == 'n') // check whether input is from pipe and that pipe input was not already set
228 |                     userInput.pipeType = 'i'; // pipe input is an include bed
229 |                 else{ // input is a regular file
230 |                     ifFileExists(optarg);
231 |                     userInput.inBedInclude = optarg;
232 |                 }
233 |                 userInput.stats_flag = 1;
234 |                 break;
235 |             case 'j': // max threads
236 |                 maxThreads = atoi(optarg);
237 |                 userInput.stats_flag = 1;
238 |                 break;
239 |             case 'k': // the swiss army knife
240 |                 
241 |                 if (isPipe && userInput.pipeType == 'n') // check whether input is from pipe and that pipe input was not already set
242 |                     userInput.pipeType = 'k'; // pipe input is a set of instructions for the swiss army knife
243 |                 else{ // input is a regular file
244 |                     ifFileExists(optarg);
245 |                     userInput.inSak = optarg;
246 |                 }
247 |                 userInput.stats_flag = 1;
248 |                 break;
249 |             case 'o': // handle output (file or stdout)
250 |                 
251 |                 userInput.outFile_flag = 1;
252 | 
253 |                 if (isPipe && userInput.pipeType == 'n') // check whether input is from pipe and that pipe input was not already set
254 |                     userInput.pipeType = 'r'; // pipe input is a sequence
255 |                 else{ // outputs are regular files
256 |                     
257 |                     optind--;
258 |                     
259 |                     std::string file;
260 |                     uint8_t i = 0;
261 | 
262 |                     for( ;optind < argc && !isInt(argv[optind]); optind++) {
263 |                         
264 |                         if (i > 0 && *argv[optind] == '-')
265 |                             break;
266 |                         
267 |                         file = argv[optind];
268 |                         
269 |                         if (file.find("-o") != std::string::npos)
270 |                             file.erase(0, 2); // handle file name attached to option
271 |                         
272 |                         userInput.outFiles.push_back(file);
273 |                         ++i;
274 |                     }
275 |                     userInput.stats_flag = 1;
276 |                 }
277 |                 break;
278 |             case 's': // output size of features
279 |                 bedOutType = *optarg;
280 |                 userInput.outSize_flag = 1;
281 |                 break;
282 |             case 't': // tabular output
283 |                 tabular_flag = 1;
284 |                 break;
285 |             case 'v': // software version
286 |                 printf("gfastats v%s\n", version.c_str());
287 |                 printf("Giulio Formenti giulio.formenti@gmail.com\n");
288 |                 exit(EXIT_SUCCESS);
289 |             case 'h': // help
290 |                 printf("gfastats input.[fasta|fastq|gfa][.gz] [expected genome size] [header[:start-end]]\n");
291 |                 printf("genome size: estimated genome size for NG* statistics (optional).\n");
292 |                 printf("header: target specific sequence by header, optionally with coordinates (optional).\n");
293 |                 printf("\nOptions:\n");
294 |                 printf("\t-a --agp-to-path <file> converts input agp to path and replaces existing paths.\n");
295 |                 printf("\t-b --out-coord a|s|c|g generates bed coordinates of given feature (agp|scaffolds|contigs|gaps default:agp).\n");
296 |                 printf("\t-e --exclude-bed <file> opposite of --include-bed. They can be combined (no coordinates).\n");
297 |                 printf("\t-f --input-sequence <file> input file (fasta, fastq, gfa [.gz]). Also as first positional argument.\n");
298 |                 printf("\t-h --help print help and exit.\n");
299 |                 printf("\t-i --include-bed <file> generates output on a subset list of headers or coordinates in 0-based bed format.\n");
300 |                 printf("\t-k --swiss-army-knife <file> set of instructions provided as an ordered list.\n");
301 |                 printf("\t-j --threads <n> numbers of threads (default: max).\n");
302 |                 printf("\t-o --out-format fasta|fastq|gfa[.gz] outputs selected sequences. If more than the extension is provided the output is written to the specified file (e.g. out.fasta.gz). Multiple file outputs can be given at once.\n");
303 |                 printf("\t-s --out-size s|c|g  generates size list of given feature (scaffolds|contigs|gaps default:scaffolds).\n");
304 |                 printf("\t-t --tabular output in tabular format.\n");
305 |                 printf("\t-v --version software version.\n\n");
306 |                 printf("\t--cmd print $0 to stdout.\n");
307 |                 printf("\t--remove-terminal-gaps removes leading/trailing Ns from scaffolds.\n");
308 |                 printf("\t--discover-paths prototype to induce paths from input.\n");
309 |                 printf("\t--discover-terminal-overlaps <n> append perfect terminal overlaps of minimum length n (default: 1000).\n");
310 |                 printf("\t--homopolymer-compress <n> compress all the homopolymers longer than n in the input.\n");
311 |                 printf("\t--line-length <n> specifies line length in when output format is fasta. Default has no line breaks.\n");
312 |                 printf("\t--nstar-report generates full N* and L* statistics.\n");
313 |                 printf("\t--no-sequence do not output the sequence (eg. in gfa).\n");
314 |                 printf("\t--out-sequence reports also the actual sequence (in combination with --seq-report).\n");
315 |                 printf("\t--out-bubbles outputs a potential list of bubbles in the graph.\n");
316 |                 printf("\t--segment-report report statistics for each segment/contig.\n");
317 |                 printf("\t--path-report report statistics for each path/scaffold.\n");
318 |                 printf("\t--sort ascending|descending|largest|smallest|file sort sequences according to input. Ascending/descending used the sequence/path header.\n");
319 |                 printf("\t--stats report summary statistics (default).\n");
320 |                 printf("\t--verbose verbose output.\n");
321 |                 printf("\t--locale set a different locale, for instance to use , for thousand separators use en_US.UTF-8.\n");
322 |                 printf("\nAll input files can be piped from stdin using '-'.\n");
323 |                 exit(EXIT_SUCCESS);
324 |         }
325 |         if    (argc == 2 || // handle various cases in which the output should include summary stats
326 |               (argc == 3 && pos_op == 2) ||
327 |               (argc == 4 && pos_op == 3) ||
328 |                userInput.nstarReport_flag ||
329 |                userInput.discoverPaths_flag) {
330 |             
331 |             userInput.stats_flag = 1; // default mode 'stats'
332 |         }
333 |     }
334 |     lg.verbose("Input variables assigned");
335 |     
336 |     if (userInput.cmd_flag) { // print command line
337 |         for (unsigned short int arg_counter = 0; arg_counter < argc; arg_counter++) {
338 |             printf("%s ", argv[arg_counter]);
339 |         }
340 |         printf("\n");
341 |     }
342 | 
343 |     Input in;
344 |     
345 |     in.load(userInput); // load user input
346 |     lg.verbose("Loaded user input");
347 |     
348 |     InSequences inSequences; // initialize sequence collection object
349 |     lg.verbose("Sequence object generated");
350 |     
351 |     in.read(inSequences); // read input content to inSequences container
352 |     lg.verbose("Finished reading input files");
353 |     if(verbose_flag) {std::cerr<<"\n";};
354 |     
355 |     Report report;
356 |     
357 |     if (userInput.segmentReport_flag || userInput.outSequence_flag) { // report results for each sequence
358 |         userInput.stats_flag = 0;
359 |         report.segmentReport(inSequences, userInput.outSequence_flag);
360 |     }
361 |     if (userInput.pathReport_flag) { // report results for each sequence
362 |         userInput.stats_flag = 0;
363 |         report.pathReport(inSequences);
364 |     }
365 |     if (userInput.outFile_flag) { // output sequences to file or stdout
366 |         userInput.stats_flag = 0;
367 |         for (std::string file : userInput.outFiles)
368 |             report.writeToStream(inSequences, file, userInput);
369 |     }
370 |     if (userInput.outCoord_flag || userInput.outSize_flag) { // output coordinates
371 |         userInput.stats_flag = 0;
372 |         report.outCoord(inSequences, bedOutType, userInput.outSize_flag);
373 |     }
374 |     if (userInput.stats_flag) { // output summary statistics
375 |         report.reportStats(inSequences, gSize, userInput.outBubbles_flag);
376 |     }
377 |     if (userInput.nstarReport_flag) { // output full N/L* statistics
378 |         report.nstarReport(inSequences, gSize);
379 |     }
380 |     lg.verbose("Generated output");
381 |     exit(EXIT_SUCCESS);
382 | }
383 | 


--------------------------------------------------------------------------------
/src/validate.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | USAGE:
  3 | test <path to test folder or files>
  4 | 
  5 | EXAMPLE:
  6 | build/bin/gfastats-validate validateFiles
  7 | build/bin/gfastats-validate validateFiles/random1.fasta0.tst
  8 | 
  9 | 
 10 | */
 11 | 
 12 | #include <algorithm>
 13 | #include <fstream>
 14 | #include <string>
 15 | #include <dirent.h>
 16 | #include <vector>
 17 | #include <unistd.h>
 18 | #include <limits.h>
 19 | #include <map>
 20 | #include <set>
 21 | #include <regex>
 22 | 
 23 | #include <validate.h>
 24 | 
 25 | bool printCommand = false;
 26 | const std::string tmp = "tmp.txt";
 27 | const std::string err = "err.txt";
 28 | bool pass = true;
 29 | 
 30 | void printFAIL(const char *m1="", const char *m2="", const char *m3="", const char *m4="") {
 31 |     pass = false;
 32 |     std::cout << "\033[0;31mFAIL\033[0m " << m1 << " " << m2 << " " << m3 << " " << m4 << std::endl;
 33 | }
 34 | 
 35 | void printPASS(const char *m1="", const char *m2="", const char *m3="", const char *m4="") {
 36 |     std::cout << "\033[0;32mPASS\033[0m " << m1 << " " << m2 << " " << m3 << " " << m4 << std::endl;
 37 | }
 38 | 
 39 | int main(int argc, char **argv) {
 40 |     if (argc == 1) { // test with no arguments
 41 |         std::cout << "gfastats-validate <path to test folder and/or files>" << std::endl;
 42 |         exit(EXIT_SUCCESS);
 43 |     }
 44 | 
 45 |     int opt;
 46 |     while((opt = getopt(argc, argv, "c")) != -1) 
 47 |     {
 48 |         switch(opt) 
 49 |         {
 50 |         case 'c':
 51 |             printCommand = true;
 52 |             break;
 53 |         }
 54 |     }
 55 | 
 56 |     std::set<std::string> input_files;
 57 | 
 58 |     for(int i=1; i<argc; ++i) {
 59 |         get_recursive(argv[i], input_files);
 60 |     }
 61 | 
 62 |     std::string exePath = getExePath(argv[0]);
 63 | 
 64 |     std::string line;
 65 |     std::ifstream istream, exp, actOutput, *expOutput;
 66 |     for(const auto &input_file : input_files) {
 67 | //#ifdef _WIN32
 68 | //        if(input_file.find(".gz") != std::string::npos)
 69 | //            continue;
 70 | //#endif
 71 |         istream.open(input_file);
 72 |         if(!istream) {
 73 |             printFAIL(input_file.c_str(), "couldn't open test file");
 74 |             continue;
 75 |         }
 76 |         std::getline(istream, line);
 77 |         line.erase(remove(line.begin(), line.end(), '\r'), line.end());
 78 |         line.erase(remove(line.begin(), line.end(), '\n'), line.end());
 79 | #ifdef _WIN32
 80 |         std::string cmd = "\"\""+exePath+"\""+" "+line+" > "+tmp+" 2>"+err+"\"";
 81 | #else
 82 |         std::string cmd = "\""+exePath+"\""+" "+line+" > "+tmp+" 2>"+err;
 83 | #endif
 84 |         if(printCommand) std::cout << cmd << std::endl;
 85 | 
 86 |         if(system(cmd.c_str()) != EXIT_SUCCESS) {
 87 |             printFAIL(input_file.c_str(), "runtime error");
 88 |             istream.close();
 89 |             std::ifstream errfstream;
 90 |             errfstream.open(err);
 91 |             if(!errfstream) {
 92 |                 std::cout << "    error: couldn't open err.txt" << std::endl;
 93 |                 continue;
 94 |             }
 95 |             for(std::string line; std::getline(errfstream, line);) {
 96 |                 std::cout << "    " << line.c_str() << std::endl;
 97 |             }
 98 |             errfstream.close();
 99 |             continue;
100 |         }
101 | 
102 | 
103 |         std::getline(istream, line);
104 |         exp.open(line);
105 |         if(exp) {
106 |             expOutput = &exp; // seperate expected output file
107 |         } else if(line == "embedded") {
108 |             expOutput = &istream;
109 |         } else {
110 |             printFAIL("couldn't open expected output");
111 |             continue;
112 |         }
113 | 
114 |         actOutput.open(tmp);
115 |         std::string line;
116 |         std::getline(*expOutput, line);
117 |         if(line == "+++Summary+++: ") {
118 |             std::getline(actOutput, line);
119 |             std::set<std::string> exp_summary, act_summary;
120 |             while(!actOutput.eof()) {
121 |                 std::getline(actOutput, line);
122 |                 act_summary.insert(line);
123 |             }
124 |             while(!expOutput->eof()) {
125 |                 std::getline(*expOutput, line);
126 |                 exp_summary.insert(line);
127 |             }
128 |             std::set<std::string> additions, missings;
129 |             for(const auto &entry : exp_summary) {
130 |                 if(act_summary.count(entry) == 0) {
131 |                     missings.insert(entry);
132 |                 }
133 |             }
134 |             for(const auto &entry : act_summary) {
135 |                 if(exp_summary.count(entry) == 0) {
136 |                     additions.insert(entry);
137 |                 }
138 |             }
139 | 
140 |             actOutput.close();
141 |             exp.close();
142 |             istream.close();
143 | 
144 |             if(additions.size() > 0 || missings.size() > 0) {
145 |                 printFAIL(input_file.c_str(), "expected output did not match actual output");
146 |                 std::cout << "additions:" << std::endl;
147 |                 for(const auto &addition : additions) {
148 |                     std::cout << addition << std::endl;
149 |                 }
150 |                 std::cout << "missing:" << std::endl;
151 |                 for(const auto &missing : missings) {
152 |                     std::cout << missing << std::endl;
153 |                 }
154 | 
155 |                 continue; // to next validate file
156 |             }
157 |         }
158 |         else {
159 |             std::vector<std::pair<std::string, std::string>> diffs;
160 | 
161 |             std::string l1, l2;
162 |             std::getline(actOutput, l1);
163 |             l2 = line;
164 |             if(l1 != l2) diffs.push_back(std::pair<std::string, std::string>(l1, l2));
165 | 
166 |             while(!actOutput.eof() || !expOutput->eof()) {
167 |                 std::getline(actOutput, l1);
168 |                 std::getline(*expOutput, l2);
169 |                 if(l1 != l2) diffs.push_back(std::pair<std::string, std::string>(l1, l2));
170 |             }
171 | 
172 |             actOutput.close();
173 |             exp.close();
174 |             istream.close();
175 | 
176 |             if(diffs.size() > 0) {
177 |                 printFAIL(input_file.c_str(), "expected output did not match actual output");
178 |                 for(const auto &pair : diffs) {
179 |                     std::cout << "    expected: " << pair.second.c_str() << std::endl << "      actual: " << pair.first.c_str() << std::endl;
180 |                 }
181 |                 continue;
182 |             }
183 |         }
184 | 
185 |         printPASS(input_file.c_str());
186 |     }
187 | 
188 |     if(input_files.size() != 0 && remove(tmp.c_str()) != 0) {
189 |         std::cerr << "error deleting temp file " << tmp.c_str() << std::endl;
190 |     }
191 | 
192 |     exit(pass ? EXIT_SUCCESS : EXIT_FAILURE);
193 | }
194 | 


--------------------------------------------------------------------------------
/testFiles/random1.agp:
--------------------------------------------------------------------------------
 1 | newpath1	1	10	1	N	10	scaffold	yes
 2 | newpath1	11	15	2	W	Header1	2	5	+
 3 | newpath1	16	20	3	N	5	scaffold	yes
 4 | newpath1	21	23	4	W	Header2	1	3	-
 5 | newpath1	24	28	5	N	5	scaffold	yes
 6 | newpath1	29	34	6	W	Header3	4	8	+
 7 | newpath1	35	40	7	N	5	scaffold	yes
 8 | newpath2	1	10	1	N	10	scaffold	yes
 9 | newpath2	11	15	2	W	Header5	3	7	-
10 | newpath2	16	20	3	N	5	scaffold	yes
11 | newpath2	21	35	4	W	Header4	1	15	+
12 | 


--------------------------------------------------------------------------------
/testFiles/random1.comment.sak:
--------------------------------------------------------------------------------
1 | COMMENT	Header3	This is header 3
2 | COMMENT	Header2	This is header 2
3 | COMMENT	Header5	This is header 5
4 | COMMENT	Header4	This is header 4
5 | COMMENT	Header1	This is header 1
6 | 


--------------------------------------------------------------------------------
/testFiles/random1.fasta:
--------------------------------------------------------------------------------
 1 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 2 | CGa
 3 | cT
 4 | >Header2 5bp sequence with internal 1bp non-canonical gap
 5 | CG
 6 | AXT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANA
 9 | TNCTN
10 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
11 | NNNTTCC
12 | TcgCACtC
13 | >Header5 15bp sequence with terminal 3bp canonical gap
14 | AACTCGAT
15 | CACGNNN
16 | 


--------------------------------------------------------------------------------
/testFiles/random1.fasta.1.bed:
--------------------------------------------------------------------------------
1 | Header1	0	5
2 | Header2	0	3
3 | Header2	4	5
4 | Header3	0	3
5 | Header3	4	6
6 | Header3	7	9
7 | Header4	2	13
8 | Header5	3	14
9 | 


--------------------------------------------------------------------------------
/testFiles/random1.fasta.2.bed:
--------------------------------------------------------------------------------
1 | Header3
2 | Header4
3 | 


--------------------------------------------------------------------------------
/testFiles/random1.fasta.3.bed:
--------------------------------------------------------------------------------
1 | Header1	4	5
2 | Header4
3 | 


--------------------------------------------------------------------------------
/testFiles/random1.fasta.4.bed:
--------------------------------------------------------------------------------
1 | Header3
2 | Header4	4	6
3 | 


--------------------------------------------------------------------------------
/testFiles/random1.fasta.5.bed:
--------------------------------------------------------------------------------
1 | Header3	4	6	
2 | Header4
3 | 


--------------------------------------------------------------------------------
/testFiles/random1.fasta.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vgl-hub/gfastats/cedd755227fe0d7b6daf3fee932e325af54f3b21/testFiles/random1.fasta.gz


--------------------------------------------------------------------------------
/testFiles/random1.fastq:
--------------------------------------------------------------------------------
 1 | @Header1 5bp sequence with no gaps
 2 | CGacT
 3 | +
 4 | 12345
 5 | @Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGAXT
 7 | +
 8 | 56789
 9 | @Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
10 | TGANATNCTN
11 | +
12 | 56789:;<=>
13 | @Header4 15bp sequence with start 3bp canonical gap
14 | NNNTTCCTcgCACtC
15 | +
16 | !!!45:;<=>?5678
17 | @Header5 15bp sequence with terminal 3bp canonical gap
18 | AACTCGATCACGNNN
19 | +
20 | 98765:;<=>?5678
21 | 


--------------------------------------------------------------------------------
/testFiles/random1.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vgl-hub/gfastats/cedd755227fe0d7b6daf3fee932e325af54f3b21/testFiles/random1.fastq.gz


--------------------------------------------------------------------------------
/testFiles/random1.gfa2:
--------------------------------------------------------------------------------
 1 | H	VN:Z:2.0
 2 | S	Header1.1	5	CGacT
 3 | S	Header2.1	3	CGA
 4 | S	Header2.3	1	T
 5 | S	Header3.1	3	TGA
 6 | S	Header3.3	2	AT
 7 | S	Header3.5	2	CT
 8 | S	Header4.2	12	TTCCTcgCACtC
 9 | S	Header5.1	12	AACTCGATCACG
10 | G	Header2.2	Header2.1+	Header2.3+	1
11 | G	Header3.2	Header3.1+	Header3.3+	1
12 | G	Header3.4	Header3.3+	Header3.5+	1
13 | G	Header3.6	Header3.5+	Header3.5-	1
14 | G	Header4.1	Header4.2+	Header4.2+	3
15 | G	Header5.2	Header5.1+	Header5.1-	3
16 | O	Header1	Header1.1+	5bp sequence with no gaps and 2 lowercase bases
17 | O	Header2	Header2.1+ Header2.2 Header2.3+	5bp sequence with internal 1bp non-canonical gap
18 | O	Header3	Header3.1+ Header3.2 Header3.3+ Header3.4 Header3.5+ Header3.6	10bp sequence with internal 4bp and 1bp terminal canonical gap
19 | O	Header4	Header4.1 Header4.2+	15bp sequence with start 3bp canonical gap and 3 lowercase bases
20 | O	Header5	Header5.1+ Header5.2	15bp sequence with terminal 3bp canonical gap
21 | 


--------------------------------------------------------------------------------
/testFiles/random1.gfa2.instructions.sak:
--------------------------------------------------------------------------------
1 | EXCLUDE	Header3
2 | 


--------------------------------------------------------------------------------
/testFiles/random1.hc.sak:
--------------------------------------------------------------------------------
1 | COMPRESS	Header4.2	1
2 | COMPRESS	Header5.1	1
3 | 


--------------------------------------------------------------------------------
/testFiles/random1.hdc.sak:
--------------------------------------------------------------------------------
1 | COMPRESS	Header4.2	1
2 | DECOMPRESS	Header4.2
3 | COMPRESS	Header5.1	1
4 | DECOMPRESS	Header5.1
5 | 


--------------------------------------------------------------------------------
/testFiles/random1.instructions.sak:
--------------------------------------------------------------------------------
1 | JOIN	Header1+	Header2+	5	newGap1	Scaffold1
2 | JOIN	Header4+	Header5+	5	newGap2	Scaffold2
3 | JOIN	Scaffold1+	Header3+	10	newGap3	FinalScaffold
4 | SPLIT	Header2.1	Header2.3	Scaffold3	Scaffold4
5 | EXCISE	Header3.3	3	newGap4
6 | INVERT	Header5.1
7 | REMOVE	Header1.1
8 | RESIZE	newGap2	10
9 | 


--------------------------------------------------------------------------------
/testFiles/random1.mask.sak:
--------------------------------------------------------------------------------
1 | MASK	Header5	1	3	5
2 | 


--------------------------------------------------------------------------------
/testFiles/random1.rename.sak:
--------------------------------------------------------------------------------
1 | RENAME	Header4	newHeader4
2 | RENAME	Header1	newHeader1
3 | RENAME	Header5	newHeader5
4 | RENAME	Header2	newHeader2
5 | RENAME	Header3	newHeader3
6 | 


--------------------------------------------------------------------------------
/testFiles/random1.rvcp.sak:
--------------------------------------------------------------------------------
1 | RVCP	Header4
2 | RVCP	Header3
3 | 


--------------------------------------------------------------------------------
/testFiles/random2.gfa:
--------------------------------------------------------------------------------
 1 | H	VN:Z:1.2
 2 | S	11	ACCTT	LN:i:5	QL:Z:?@97?
 3 | S	12	TCAAGG	LN:i:6	QL:Z:@6?84@
 4 | S	13	CTTgaTT	LN:i:7	QL:Z:>=?@877
 5 | L	11	+	12	-	4M
 6 | L	12	-	13	+	5M
 7 | L	11	+	13	+	3M
 8 | J	11	+	13	-	5	SC:i:1
 9 | J	13	-	12	+	3	SC:i:1
10 | P	14	11+;13-;12+	5,3
11 | P	15	11+,12-,13+	4M,5M
12 | 


--------------------------------------------------------------------------------
/testFiles/random2.gfa.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vgl-hub/gfastats/cedd755227fe0d7b6daf3fee932e325af54f3b21/testFiles/random2.gfa.gz


--------------------------------------------------------------------------------
/testFiles/random2.gfa2:
--------------------------------------------------------------------------------
 1 | H	VN:Z:2.0
 2 | S	id2	6	TCAAGG
 3 | G	id5	id3+	id4-	5
 4 | S	id3	7	CTTGATT
 5 | G	id6	id1+	id2+	3
 6 | S	id1	5	ACCTT
 7 | S	id4	8	CATGACTC
 8 | E	id10	id1+	id2-	2	4	2	4	3M
 9 | S	id7	9	TGAATGAAA
10 | G	id8	id2+	id3-	2
11 | G	id9	id7+	id7+	5
12 | E	id11	id2-	id1+	2	4	2	4	3M
13 | O	id12	id1+ id6 id2(1:3)+ id8 id3-
14 | O	path1	id1+
15 | O	path2	id2+
16 | O	path3	id3+
17 | 


--------------------------------------------------------------------------------
/testFiles/random2.gfa2.agp:
--------------------------------------------------------------------------------
1 | id13	1	5	1	W	path1	1	5	+
2 | id13	6	8	2	N	3	gap1	yes
3 | id13	9	14	3	W	path2	1	6	+
4 | id13	15	16	4	N	2	gap2	yes
5 | id13	17	23	5	W	path3	1	7	-
6 | 


--------------------------------------------------------------------------------
/testFiles/random2.gfa2.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vgl-hub/gfastats/cedd755227fe0d7b6daf3fee932e325af54f3b21/testFiles/random2.gfa2.gz


--------------------------------------------------------------------------------
/testFiles/random2.noseq.gfa:
--------------------------------------------------------------------------------
 1 | H	VN:Z:1.2
 2 | S	11	*	LN:i:5	QL:Z:?@97?
 3 | S	12	*	LN:i:6	QL:Z:@6?84@
 4 | S	13	*	LN:i:7	QL:Z:>=?@877
 5 | L	11	+	12	-	4M
 6 | L	12	-	13	+	5M
 7 | L	11	+	13	+	3M
 8 | J	11	+	13	-	5	SC:i:1
 9 | J	13	-	12	+	3	SC:i:1
10 | P	14	11+;13-;12+	5,3
11 | P	15	11+,12-,13+	4M,5M
12 | 


--------------------------------------------------------------------------------
/testFiles/random3.sorting.fasta:
--------------------------------------------------------------------------------
1 | >c
2 | ACGT
3 | >d
4 | CGTA
5 | >b
6 | GTAC
7 | >a
8 | TACG
9 | 


--------------------------------------------------------------------------------
/testFiles/random4.fasta:
--------------------------------------------------------------------------------
 1 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 2 | CGacT
 3 | >Header2 5bp sequence with internal 1bp non-canonical gap
 4 | CGAXT
 5 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 6 | TGANATNCTN
 7 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
 8 | NNNTTCCTcgCACtC
 9 | >Header5 15bp sequence with terminal 3bp canonical gap
10 | AACTCGATCACGNNN
11 | 


--------------------------------------------------------------------------------
/testFiles/random5.findovl.gfa:
--------------------------------------------------------------------------------
1 | H	VN:Z:1.2
2 | S	11	CCGTTCCATGAAGGCCAGAGTTACTTACCGGCCCTTTCCATGCGCGCGCCATAAA	LN:i:55
3 | S	12	GATTTAAGAATATGTTAACGGAGGATTGCACGATCTTCTCTCCTCGTGAGAGAATTTATG	LN:i:60
4 | S	13	AAATCGCATAGCTATGTATTTTGCAGAGGTAGCGACATCTTGACGGGCACTTCACAGATAGTGGG	LN:i:65
5 | J	11	+	13	-	5	SC:i:1
6 | J	13	-	12	+	3	SC:i:1
7 | P	14	11+;13-;12+	5,3
8 | P	15	11+,12-,13+	6M,5M
9 | 


--------------------------------------------------------------------------------
/testFiles/random6.circular.gfa:
--------------------------------------------------------------------------------
 1 | H	VN:Z:1.0
 2 | S	edge_1	*	dp:i:32
 3 | S	edge_2	*	dp:i:21
 4 | S	edge_3	*	dp:i:0
 5 | S	edge_4	*	dp:i:0
 6 | S	edge_5	*	dp:i:2
 7 | S	edge_6	*	dp:i:0
 8 | S	edge_7	*	dp:i:6
 9 | S	edge_8	*	dp:i:20
10 | S	edge_9	*	dp:i:0
11 | L	edge_1	+	edge_1	+	0M	RC:i:0
12 | L	edge_1	-	edge_1	-	0M	RC:i:18
13 | L	edge_2	+	edge_2	+	0M	RC:i:0
14 | L	edge_2	-	edge_2	-	0M	RC:i:0
15 | L	edge_7	+	edge_7	+	0M	RC:i:32
16 | L	edge_7	-	edge_7	-	0M	RC:i:40
17 | P	contig_1	edge_1+	*
18 | P	contig_2	edge_2+	*
19 | P	contig_3	edge_3+	*
20 | P	contig_4	edge_4+	*
21 | P	contig_5	edge_5+	*
22 | P	contig_6	edge_6+	*
23 | P	contig_7	edge_7+	*
24 | P	contig_8	edge_8+	*
25 | P	contig_9	edge_9+	*
26 | 


--------------------------------------------------------------------------------
/validateFiles/README.md:
--------------------------------------------------------------------------------
 1 | # gfastats validation
 2 | 
 3 | Validation files with a .tst extension are used to ensure that gfastats is in a working state after changes to the code.
 4 | A .tst file consists of a single line of command line arguments to run gfastats with, followed by either "embedded" to signify the expected output is in the same file, or a path to a file with the expected output.
 5 | gfastats is run with the given command line arguments and the expected output is compared to the actual output, any differences can be printed with the `-v` option, and gfastats-validate will return `EXIT_FAILURE`. The exact commands being run to test output can be seen with the `-c` option.
 6 | 
 7 | Run gfastats-validate with .tst files as command line arguments or folders to be recursively searched for all .tst files.
 8 | The val.sh and val.bat scripts will run gfastats-validate with all files in "validateFiles/".
 9 | 
10 | Example Usage:
11 | ```
12 | gfastats-validate validateFiles/random1.fasta0.tst  // tests only random1.fasta0.tst
13 | gfastats-validate validateFiles                     // tests all files in the validateFiles folder
14 | ```
15 | 
16 | Test files can be automatically generated by running `gfastats-generate-tests`, but this should only be done while gfastats is in a working state, with no known bugs or unexpected behaviour.
17 | 
18 | `gfastats-validate` is run automatically with all files in "validateFiles/" on windows, ubuntu, and mac virtual machines through github actions when any changes are pushed or merged to the main branch.
19 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.10.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -b g
2 | embedded
3 | Header2	3	4
4 | Header3	3	4
5 | Header3	6	7
6 | Header3	9	10
7 | Header4	0	3
8 | Header5	12	15
9 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.11.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta --homopolymer-compress 1 -ofa
 2 | embedded
 3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANATNCTN
 9 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
10 | NNNTCTcgCACtC
11 | >Header5 15bp sequence with terminal 3bp canonical gap
12 | ACTCGATCACGNNN
13 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.3.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta 
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 9:14:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.4.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -s s
2 | embedded
3 | Header1	5
4 | Header2	5
5 | Header3	10
6 | Header4	15
7 | Header5	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.5.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -s c
 2 | embedded
 3 | Header1.1	5
 4 | Header2.1	3
 5 | Header2.3	1
 6 | Header3.1	3
 7 | Header3.3	2
 8 | Header3.5	2
 9 | Header4.2	12
10 | Header5.1	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.6.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -s g
2 | embedded
3 | Header2.2	1
4 | Header3.2	1
5 | Header3.4	1
6 | Header3.6	1
7 | Header4.1	3
8 | Header5.2	3
9 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.7.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -b a
 2 | embedded
 3 | Header1	1	5	1	W	Header1.1	1	5	+
 4 | Header2	1	3	1	W	Header2.1	1	3	+
 5 | Header2	4	4	2	N	1	Header2.2	yes
 6 | Header2	5	5	3	W	Header2.3	1	1	+
 7 | Header3	1	3	1	W	Header3.1	1	3	+
 8 | Header3	4	4	2	N	1	Header3.2	yes
 9 | Header3	5	6	3	W	Header3.3	1	2	+
10 | Header3	7	7	4	N	1	Header3.4	yes
11 | Header3	8	9	5	W	Header3.5	1	2	+
12 | Header3	10	10	6	N	1	Header3.6	yes
13 | Header4	1	3	1	N	3	Header4.1	yes
14 | Header4	4	15	2	W	Header4.2	1	12	+
15 | Header5	1	12	1	W	Header5.1	1	12	+
16 | Header5	13	15	2	N	3	Header5.2	yes
17 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.78.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -k testFiles/random1.instructions.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 2
 5 | Total scaffold length: 60
 6 | Average scaffold length: 30.00
 7 | Scaffold N50: 40
 8 | Scaffold auN: 33.33
 9 | Scaffold L50: 1
10 | Largest scaffold: 40
11 | Smallest scaffold: 20
12 | # contigs: 5
13 | Total contig length: 30
14 | Average contig length: 6.00
15 | Contig N50: 12
16 | Contig auN: 10.07
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 30
22 | Average gap length in scaffolds: 5.00
23 | Gap N50 in scaffolds: 10
24 | Gap auN in scaffolds: 7.60
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 10
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 6:11:4:9
29 | GC content %: 50.00
30 | # soft-masked bases: 3
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 7
35 | # paths: 2
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.79.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -ofa -k testFiles/random1.instructions.sak
2 | embedded
3 | >Scaffold2
4 | NNNTTCCTcgCACtCNNNNNNNNNNGCACTAGCTCAANNN
5 | >Scaffold4
6 | TNNNNNNNNNNTGANNNCTN
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.8.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -b s
2 | embedded
3 | Header1	0	5
4 | Header2	0	5
5 | Header3	0	10
6 | Header4	0	15
7 | Header5	0	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.80.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -ofa -k testFiles/random1.hc.sak
 2 | embedded
 3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANATNCTN
 9 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
10 | NNNTCTcgCACtC
11 | >Header5 15bp sequence with terminal 3bp canonical gap
12 | ACTCGATCACGNNN
13 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.81.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -ofa -k testFiles/random1.hdc.sak
 2 | embedded
 3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANATNCTN
 9 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
10 | NNNTTCCTcgCACtC
11 | >Header5 15bp sequence with terminal 3bp canonical gap
12 | AACTCGATCACGNNN
13 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.82.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta Header2
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 1
 5 | Total scaffold length: 5
 6 | Average scaffold length: 5.00
 7 | Scaffold N50: 5
 8 | Scaffold auN: 5.00
 9 | Scaffold L50: 1
10 | Largest scaffold: 5
11 | Smallest scaffold: 5
12 | # contigs: 2
13 | Total contig length: 4
14 | Average contig length: 2.00
15 | Contig N50: 3
16 | Contig auN: 2.50
17 | Contig L50: 1
18 | Largest contig: 3
19 | Smallest contig: 1
20 | # gaps in scaffolds: 1
21 | Total gap length in scaffolds: 1
22 | Average gap length in scaffolds: 1.00
23 | Gap N50 in scaffolds: 1
24 | Gap auN in scaffolds: 1.00
25 | Gap L50 in scaffolds: 1
26 | Largest gap in scaffolds: 1
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 1:1:1:1
29 | GC content %: 50.00
30 | # soft-masked bases: 0
31 | # segments: 2
32 | Total segment length: 4
33 | Average segment length: 2.00
34 | # gaps: 1
35 | # paths: 1
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.83.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -ofa -e testFiles/random1.fasta.1.bed
 2 | embedded
 3 | >Header2 5bp sequence with internal 1bp non-canonical gap
 4 | N
 5 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 6 | NNN
 7 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
 8 | NNtC
 9 | >Header5 15bp sequence with terminal 3bp canonical gap
10 | AACN
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.84.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -ofa -e testFiles/random1.fasta.2.bed
2 | embedded
3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
4 | CGacT
5 | >Header2 5bp sequence with internal 1bp non-canonical gap
6 | CGANT
7 | >Header5 15bp sequence with terminal 3bp canonical gap
8 | AACTCGATCACGNNN
9 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.85.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -ofa -e testFiles/random1.fasta.3.bed
 2 | embedded
 3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 4 | CGac
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANATNCTN
 9 | >Header5 15bp sequence with terminal 3bp canonical gap
10 | AACTCGATCACGNNN
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.86.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -ofa -e testFiles/random1.fasta.4.bed
 2 | embedded
 3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
 8 | NNNTCTcgCACtC
 9 | >Header5 15bp sequence with terminal 3bp canonical gap
10 | AACTCGATCACGNNN
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.87.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -ofa -e testFiles/random1.fasta.5.bed
 2 | embedded
 3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANNCTN
 9 | >Header5 15bp sequence with terminal 3bp canonical gap
10 | AACTCGATCACGNNN
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.88.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -ofa -i testFiles/random1.fasta.1.bed
 2 | embedded
 3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGAT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGAATCT
 9 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
10 | NTTCCTcgCAC
11 | >Header5 15bp sequence with terminal 3bp canonical gap
12 | TCGATCACGNN
13 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.89.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -ofa -i testFiles/random1.fasta.2.bed
2 | embedded
3 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
4 | TGANATNCTN
5 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
6 | NNNTTCCTcgCACtC
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.9.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -b c
 2 | embedded
 3 | Header1	0	5
 4 | Header2	0	3
 5 | Header2	4	5
 6 | Header3	0	3
 7 | Header3	4	6
 8 | Header3	7	9
 9 | Header4	3	15
10 | Header5	0	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.90.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -ofa -i testFiles/random1.fasta.3.bed
2 | embedded
3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
4 | T
5 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
6 | NNNTTCCTcgCACtC
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.91.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -ofa -i testFiles/random1.fasta.4.bed
2 | embedded
3 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
4 | TGANATNCTN
5 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
6 | TC
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.92.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -ofa -i testFiles/random1.fasta.5.bed
2 | embedded
3 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
4 | AT
5 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
6 | NNNTTCCTcgCACtC
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.93.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -a testFiles/random1.agp --stats
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 2
 5 | Total scaffold length: 47
 6 | Average scaffold length: 23.50
 7 | Scaffold N50: 25
 8 | Scaffold auN: 23.60
 9 | Scaffold L50: 1
10 | Largest scaffold: 25
11 | Smallest scaffold: 22
12 | # contigs: 6
13 | Total contig length: 27
14 | Average contig length: 4.50
15 | Contig N50: 5
16 | Contig auN: 7.37
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 4
21 | Total gap length in scaffolds: 20
22 | Average gap length in scaffolds: 5.00
23 | Gap N50 in scaffolds: 6
24 | Gap auN in scaffolds: 6.30
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 8
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 4:10:5:8
29 | GC content %: 55.56
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 9
35 | # paths: 2
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.94.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta -a testFiles/random1.agp -ofa
2 | embedded
3 | >newpath1
4 | GacTNNNNNTCGNNNNNNATNC
5 | >newpath2
6 | TCGAGNNNNNNNNTTCCTcgCACtC
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.95.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -k testFiles/random1.rvcp.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 13:9:11:7
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.96.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta -k testFiles/random1.mask.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 53
 6 | Average scaffold length: 10.60
 7 | Scaffold N50: 15
 8 | Scaffold auN: 13.19
 9 | Scaffold L50: 2
10 | Largest scaffold: 18
11 | Smallest scaffold: 5
12 | # contigs: 9
13 | Total contig length: 38
14 | Average contig length: 4.22
15 | Contig N50: 9
16 | Contig auN: 7.32
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 7
21 | Total gap length in scaffolds: 15
22 | Average gap length in scaffolds: 2.14
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 3.13
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 5
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 8:13:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 7
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.100.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta.gz -k testFiles/random1.mask.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 53
 6 | Average scaffold length: 10.60
 7 | Scaffold N50: 15
 8 | Scaffold auN: 13.19
 9 | Scaffold L50: 2
10 | Largest scaffold: 18
11 | Smallest scaffold: 5
12 | # contigs: 9
13 | Total contig length: 38
14 | Average contig length: 4.22
15 | Contig N50: 9
16 | Contig auN: 7.32
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 7
21 | Total gap length in scaffolds: 15
22 | Average gap length in scaffolds: 2.14
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 3.13
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 5
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 8:13:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 7
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.69.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta.gz 
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 9:14:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.70.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta.gz -s s
2 | embedded
3 | Header1	5
4 | Header2	5
5 | Header3	10
6 | Header4	15
7 | Header5	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.71.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta.gz -s c
 2 | embedded
 3 | Header1.1	5
 4 | Header2.1	3
 5 | Header2.3	1
 6 | Header3.1	3
 7 | Header3.3	2
 8 | Header3.5	2
 9 | Header4.2	12
10 | Header5.1	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.72.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta.gz -s g
2 | embedded
3 | Header2.2	1
4 | Header3.2	1
5 | Header3.4	1
6 | Header3.6	1
7 | Header4.1	3
8 | Header5.2	3
9 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.73.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta.gz -b a
 2 | embedded
 3 | Header1	1	5	1	W	Header1.1	1	5	+
 4 | Header2	1	3	1	W	Header2.1	1	3	+
 5 | Header2	4	4	2	N	1	Header2.2	yes
 6 | Header2	5	5	3	W	Header2.3	1	1	+
 7 | Header3	1	3	1	W	Header3.1	1	3	+
 8 | Header3	4	4	2	N	1	Header3.2	yes
 9 | Header3	5	6	3	W	Header3.3	1	2	+
10 | Header3	7	7	4	N	1	Header3.4	yes
11 | Header3	8	9	5	W	Header3.5	1	2	+
12 | Header3	10	10	6	N	1	Header3.6	yes
13 | Header4	1	3	1	N	3	Header4.1	yes
14 | Header4	4	15	2	W	Header4.2	1	12	+
15 | Header5	1	12	1	W	Header5.1	1	12	+
16 | Header5	13	15	2	N	3	Header5.2	yes
17 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.74.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta.gz -b s
2 | embedded
3 | Header1	0	5
4 | Header2	0	5
5 | Header3	0	10
6 | Header4	0	15
7 | Header5	0	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.75.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta.gz -b c
 2 | embedded
 3 | Header1	0	5
 4 | Header2	0	3
 5 | Header2	4	5
 6 | Header3	0	3
 7 | Header3	4	6
 8 | Header3	7	9
 9 | Header4	3	15
10 | Header5	0	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.76.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta.gz -b g
2 | embedded
3 | Header2	3	4
4 | Header3	3	4
5 | Header3	6	7
6 | Header3	9	10
7 | Header4	0	3
8 | Header5	12	15
9 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.77.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta.gz --homopolymer-compress 1 -ofa
 2 | embedded
 3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANATNCTN
 9 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
10 | NNNTCTcgCACtC
11 | >Header5 15bp sequence with terminal 3bp canonical gap
12 | ACTCGATCACGNNN
13 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.97.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta.gz -a testFiles/random1.agp --stats
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 2
 5 | Total scaffold length: 47
 6 | Average scaffold length: 23.50
 7 | Scaffold N50: 25
 8 | Scaffold auN: 23.60
 9 | Scaffold L50: 1
10 | Largest scaffold: 25
11 | Smallest scaffold: 22
12 | # contigs: 6
13 | Total contig length: 27
14 | Average contig length: 4.50
15 | Contig N50: 5
16 | Contig auN: 7.37
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 4
21 | Total gap length in scaffolds: 20
22 | Average gap length in scaffolds: 5.00
23 | Gap N50 in scaffolds: 6
24 | Gap auN in scaffolds: 6.30
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 8
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 4:10:5:8
29 | GC content %: 55.56
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 9
35 | # paths: 2
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.98.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fasta.gz -a testFiles/random1.agp -ofa
2 | embedded
3 | >newpath1
4 | GacTNNNNNTCGNNNNNNATNC
5 | >newpath2
6 | TCGAGNNNNNNNNTTCCTcgCACtC
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fasta.gz.99.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fasta.gz -k testFiles/random1.rvcp.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 13:9:11:7
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.101.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq -a testFiles/random1.agp --stats
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 2
 5 | Total scaffold length: 47
 6 | Average scaffold length: 23.50
 7 | Scaffold N50: 25
 8 | Scaffold auN: 23.60
 9 | Scaffold L50: 1
10 | Largest scaffold: 25
11 | Smallest scaffold: 22
12 | # contigs: 6
13 | Total contig length: 27
14 | Average contig length: 4.50
15 | Contig N50: 5
16 | Contig auN: 7.37
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 4
21 | Total gap length in scaffolds: 20
22 | Average gap length in scaffolds: 5.00
23 | Gap N50 in scaffolds: 6
24 | Gap auN in scaffolds: 6.30
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 8
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 4:10:5:8
29 | GC content %: 55.56
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 9
35 | # paths: 2
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.102.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq -a testFiles/random1.agp -ofa
2 | embedded
3 | >newpath1
4 | GacTNNNNNTCGNNNNNNATNC
5 | >newpath2
6 | TCGAGNNNNNNNNTTCCTcgCACtC
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.103.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq -k testFiles/random1.rvcp.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 13:9:11:7
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.104.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq -k testFiles/random1.mask.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 53
 6 | Average scaffold length: 10.60
 7 | Scaffold N50: 15
 8 | Scaffold auN: 13.19
 9 | Scaffold L50: 2
10 | Largest scaffold: 18
11 | Smallest scaffold: 5
12 | # contigs: 9
13 | Total contig length: 38
14 | Average contig length: 4.22
15 | Contig N50: 9
16 | Contig auN: 7.32
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 7
21 | Total gap length in scaffolds: 15
22 | Average gap length in scaffolds: 2.14
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 3.13
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 5
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 8:13:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 7
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.54.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq 
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 9:14:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.55.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq -s s
2 | embedded
3 | Header1	5
4 | Header2	5
5 | Header3	10
6 | Header4	15
7 | Header5	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.56.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq -s c
 2 | embedded
 3 | Header1.1	5
 4 | Header2.1	3
 5 | Header2.3	1
 6 | Header3.1	3
 7 | Header3.3	2
 8 | Header3.5	2
 9 | Header4.2	12
10 | Header5.1	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.57.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq -s g
2 | embedded
3 | Header2.2	1
4 | Header3.2	1
5 | Header3.4	1
6 | Header3.6	1
7 | Header4.1	3
8 | Header5.2	3
9 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.58.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq -b a
 2 | embedded
 3 | Header1	1	5	1	W	Header1.1	1	5	+
 4 | Header2	1	3	1	W	Header2.1	1	3	+
 5 | Header2	4	4	2	N	1	Header2.2	yes
 6 | Header2	5	5	3	W	Header2.3	1	1	+
 7 | Header3	1	3	1	W	Header3.1	1	3	+
 8 | Header3	4	4	2	N	1	Header3.2	yes
 9 | Header3	5	6	3	W	Header3.3	1	2	+
10 | Header3	7	7	4	N	1	Header3.4	yes
11 | Header3	8	9	5	W	Header3.5	1	2	+
12 | Header3	10	10	6	N	1	Header3.6	yes
13 | Header4	1	3	1	N	3	Header4.1	yes
14 | Header4	4	15	2	W	Header4.2	1	12	+
15 | Header5	1	12	1	W	Header5.1	1	12	+
16 | Header5	13	15	2	N	3	Header5.2	yes
17 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.59.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq -b s
2 | embedded
3 | Header1	0	5
4 | Header2	0	5
5 | Header3	0	10
6 | Header4	0	15
7 | Header5	0	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.60.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq -b c
 2 | embedded
 3 | Header1	0	5
 4 | Header2	0	3
 5 | Header2	4	5
 6 | Header3	0	3
 7 | Header3	4	6
 8 | Header3	7	9
 9 | Header4	3	15
10 | Header5	0	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.61.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq -b g
2 | embedded
3 | Header2	3	4
4 | Header3	3	4
5 | Header3	6	7
6 | Header3	9	10
7 | Header4	0	3
8 | Header5	12	15
9 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.62.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq --homopolymer-compress 1 -ofa
 2 | embedded
 3 | >Header1 5bp sequence with no gaps
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANATNCTN
 9 | >Header4 15bp sequence with start 3bp canonical gap
10 | NNNTCTcgCACtC
11 | >Header5 15bp sequence with terminal 3bp canonical gap
12 | ACTCGATCACGNNN
13 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.105.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq.gz -a testFiles/random1.agp --stats
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 2
 5 | Total scaffold length: 47
 6 | Average scaffold length: 23.50
 7 | Scaffold N50: 25
 8 | Scaffold auN: 23.60
 9 | Scaffold L50: 1
10 | Largest scaffold: 25
11 | Smallest scaffold: 22
12 | # contigs: 6
13 | Total contig length: 27
14 | Average contig length: 4.50
15 | Contig N50: 5
16 | Contig auN: 7.37
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 4
21 | Total gap length in scaffolds: 20
22 | Average gap length in scaffolds: 5.00
23 | Gap N50 in scaffolds: 6
24 | Gap auN in scaffolds: 6.30
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 8
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 4:10:5:8
29 | GC content %: 55.56
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 9
35 | # paths: 2
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.106.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq.gz -a testFiles/random1.agp -ofa
2 | embedded
3 | >newpath1
4 | GacTNNNNNTCGNNNNNNATNC
5 | >newpath2
6 | TCGAGNNNNNNNNTTCCTcgCACtC
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.107.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq.gz -k testFiles/random1.rvcp.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 13:9:11:7
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.108.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq.gz -k testFiles/random1.mask.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 53
 6 | Average scaffold length: 10.60
 7 | Scaffold N50: 15
 8 | Scaffold auN: 13.19
 9 | Scaffold L50: 2
10 | Largest scaffold: 18
11 | Smallest scaffold: 5
12 | # contigs: 9
13 | Total contig length: 38
14 | Average contig length: 4.22
15 | Contig N50: 9
16 | Contig auN: 7.32
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 7
21 | Total gap length in scaffolds: 15
22 | Average gap length in scaffolds: 2.14
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 3.13
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 5
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 8:13:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 7
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.33.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq.gz 
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 9:14:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.34.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq.gz -s s
2 | embedded
3 | Header1	5
4 | Header2	5
5 | Header3	10
6 | Header4	15
7 | Header5	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.35.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq.gz -s c
 2 | embedded
 3 | Header1.1	5
 4 | Header2.1	3
 5 | Header2.3	1
 6 | Header3.1	3
 7 | Header3.3	2
 8 | Header3.5	2
 9 | Header4.2	12
10 | Header5.1	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.36.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq.gz -s g
2 | embedded
3 | Header2.2	1
4 | Header3.2	1
5 | Header3.4	1
6 | Header3.6	1
7 | Header4.1	3
8 | Header5.2	3
9 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.37.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq.gz -b a
 2 | embedded
 3 | Header1	1	5	1	W	Header1.1	1	5	+
 4 | Header2	1	3	1	W	Header2.1	1	3	+
 5 | Header2	4	4	2	N	1	Header2.2	yes
 6 | Header2	5	5	3	W	Header2.3	1	1	+
 7 | Header3	1	3	1	W	Header3.1	1	3	+
 8 | Header3	4	4	2	N	1	Header3.2	yes
 9 | Header3	5	6	3	W	Header3.3	1	2	+
10 | Header3	7	7	4	N	1	Header3.4	yes
11 | Header3	8	9	5	W	Header3.5	1	2	+
12 | Header3	10	10	6	N	1	Header3.6	yes
13 | Header4	1	3	1	N	3	Header4.1	yes
14 | Header4	4	15	2	W	Header4.2	1	12	+
15 | Header5	1	12	1	W	Header5.1	1	12	+
16 | Header5	13	15	2	N	3	Header5.2	yes
17 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.38.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq.gz -b s
2 | embedded
3 | Header1	0	5
4 | Header2	0	5
5 | Header3	0	10
6 | Header4	0	15
7 | Header5	0	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.39.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq.gz -b c
 2 | embedded
 3 | Header1	0	5
 4 | Header2	0	3
 5 | Header2	4	5
 6 | Header3	0	3
 7 | Header3	4	6
 8 | Header3	7	9
 9 | Header4	3	15
10 | Header5	0	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.40.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.fastq.gz -b g
2 | embedded
3 | Header2	3	4
4 | Header3	3	4
5 | Header3	6	7
6 | Header3	9	10
7 | Header4	0	3
8 | Header5	12	15
9 | 


--------------------------------------------------------------------------------
/validateFiles/random1.fastq.gz.41.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.fastq.gz --homopolymer-compress 1 -ofa
 2 | embedded
 3 | >Header1 5bp sequence with no gaps
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANATNCTN
 9 | >Header4 15bp sequence with start 3bp canonical gap
10 | NNNTCTcgCACtC
11 | >Header5 15bp sequence with terminal 3bp canonical gap
12 | ACTCGATCACGNNN
13 | 


--------------------------------------------------------------------------------
/validateFiles/random1.gfa2.109.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.gfa2 -a testFiles/random1.agp --stats
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 2
 5 | Total scaffold length: 47
 6 | Average scaffold length: 23.50
 7 | Scaffold N50: 25
 8 | Scaffold auN: 23.60
 9 | Scaffold L50: 1
10 | Largest scaffold: 25
11 | Smallest scaffold: 22
12 | # contigs: 6
13 | Total contig length: 27
14 | Average contig length: 4.50
15 | Contig N50: 5
16 | Contig auN: 7.37
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 4
21 | Total gap length in scaffolds: 20
22 | Average gap length in scaffolds: 5.00
23 | Gap N50 in scaffolds: 6
24 | Gap auN in scaffolds: 6.30
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 8
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 4:10:5:8
29 | GC content %: 55.56
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 9
35 | # paths: 2
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.gfa2.110.tst:
--------------------------------------------------------------------------------
1 | testFiles/random1.gfa2 -a testFiles/random1.agp -ofa
2 | embedded
3 | >newpath1
4 | GacTNNNNNTCGNNNNNNATNC
5 | >newpath2
6 | TCGAGNNNNNNNNTTCCTcgCACtC
7 | 


--------------------------------------------------------------------------------
/validateFiles/random1.gfa2.111.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.gfa2 -k testFiles/random1.rvcp.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 13:9:11:7
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.gfa2.112.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.gfa2 -k testFiles/random1.mask.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 53
 6 | Average scaffold length: 10.60
 7 | Scaffold N50: 15
 8 | Scaffold auN: 13.19
 9 | Scaffold L50: 2
10 | Largest scaffold: 18
11 | Smallest scaffold: 5
12 | # contigs: 9
13 | Total contig length: 38
14 | Average contig length: 4.22
15 | Contig N50: 9
16 | Contig auN: 7.32
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 7
21 | Total gap length in scaffolds: 15
22 | Average gap length in scaffolds: 2.14
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 3.13
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 5
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 8:13:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 7
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.gfa2.113.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.gfa2 -k testFiles/random1.gfa2.instructions.sak
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 4
 5 | Total scaffold length: 40
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.50
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 5
13 | Total contig length: 33
14 | Average contig length: 6.60
15 | Contig N50: 12
16 | Contig auN: 9.79
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 3
21 | Total gap length in scaffolds: 7
22 | Average gap length in scaffolds: 2.33
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.71
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 7:13:5:8
29 | GC content %: 54.55
30 | # soft-masked bases: 5
31 | # segments: 5
32 | Total segment length: 33
33 | Average segment length: 6.60
34 | # gaps: 3
35 | # paths: 4
36 | 


--------------------------------------------------------------------------------
/validateFiles/random1.gfa2.12.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.gfa2 -o gfa2
 2 | embedded
 3 | H	VN:Z:2.0
 4 | S	Header1.1	5	CGacT
 5 | S	Header2.1	3	CGA
 6 | S	Header2.3	1	T
 7 | S	Header3.1	3	TGA
 8 | S	Header3.3	2	AT
 9 | S	Header3.5	2	CT
10 | S	Header4.2	12	TTCCTcgCACtC
11 | S	Header5.1	12	AACTCGATCACG
12 | G	Header2.2	Header2.1+	Header2.3+	1
13 | G	Header3.2	Header3.1+	Header3.3+	1
14 | G	Header3.4	Header3.3+	Header3.5+	1
15 | G	Header3.6	Header3.5+	Header3.5-	1
16 | G	Header4.1	Header4.2+	Header4.2+	3
17 | G	Header5.2	Header5.1+	Header5.1-	3
18 | O	Header1	Header1.1+
19 | O	Header2	Header2.1+ Header2.2 Header2.3+
20 | O	Header3	Header3.1+ Header3.2 Header3.3+ Header3.4 Header3.5+ Header3.6
21 | O	Header4	Header4.1 Header4.2+
22 | O	Header5	Header5.1+ Header5.2
23 | 


--------------------------------------------------------------------------------
/validateFiles/random1.gfa2.13.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.gfa2 -o gfa
 2 | embedded
 3 | H	VN:Z:1.2
 4 | S	Header1.1	CGacT
 5 | S	Header2.1	CGA
 6 | S	Header2.3	T
 7 | S	Header3.1	TGA
 8 | S	Header3.3	AT
 9 | S	Header3.5	CT
10 | S	Header4.2	TTCCTcgCACtC
11 | S	Header5.1	AACTCGATCACG
12 | J	Header2.1	+	Header2.3	+	1
13 | J	Header3.1	+	Header3.3	+	1
14 | J	Header3.3	+	Header3.5	+	1
15 | J	Header3.5	+	Header3.5	-	1
16 | J	Header4.2	+	Header4.2	+	3
17 | J	Header5.1	+	Header5.1	-	3
18 | P	Header1	Header1.1+	*
19 | P	Header2	Header2.1+;Header2.3+	*
20 | P	Header3	Header3.1+;Header3.3+;Header3.5+;	*
21 | P	Header4	;Header4.2+	*
22 | P	Header5	Header5.1+;	*
23 | 


--------------------------------------------------------------------------------
/validateFiles/random1.gfa2.14.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random1.gfa2 -o fasta
 2 | embedded
 3 | >Header1
 4 | CGacT
 5 | >Header2
 6 | CGANT
 7 | >Header3
 8 | TGANATNCTN
 9 | >Header4
10 | NNNTTCCTcgCACtC
11 | >Header5
12 | AACTCGATCACGNNN
13 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa.42.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa -o gfa2
 2 | embedded
 3 | H	VN:Z:2.0
 4 | S	11	5	ACCTT	LN:i:5	QL:Z:?@97?
 5 | S	12	6	TCAAGG	LN:i:6	QL:Z:@6?84@
 6 | S	13	7	CTTgaTT	LN:i:7	QL:Z:>=?@877
 7 | E	edge0	11	+	12	-	4M
 8 | E	edge1	12	-	13	+	5M
 9 | E	edge2	11	+	13	+	3M
10 | G	gap0	11+	13-	5	SC:i:1
11 | G	gap1	13-	12+	3	SC:i:1
12 | O	14	11+ gap0 13- gap1 12+
13 | O	15	11+ edge0 12- edge1 13+
14 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa.43.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa -o gfa
 2 | embedded
 3 | H	VN:Z:1.2
 4 | S	11	ACCTT	LN:i:5	QL:Z:?@97?
 5 | S	12	TCAAGG	LN:i:6	QL:Z:@6?84@
 6 | S	13	CTTgaTT	LN:i:7	QL:Z:>=?@877
 7 | L	11	+	12	-	4M
 8 | L	12	-	13	+	5M
 9 | L	11	+	13	+	3M
10 | J	11	+	13	-	5	SC:i:1
11 | J	13	-	12	+	3	SC:i:1
12 | P	14	11+;13-;12+	*
13 | P	15	11+,12-,13+	*
14 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa.44.tst:
--------------------------------------------------------------------------------
1 | testFiles/random2.gfa -o fasta
2 | embedded
3 | >14
4 | ACCTTNNNNNAAtcAAGNNNTCAAGG
5 | >15
6 | ACCTTGATT
7 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa.gz.45.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa.gz -o gfa2
 2 | embedded
 3 | H	VN:Z:2.0
 4 | S	11	5	ACCTT	LN:i:5	QL:Z:?@97?
 5 | S	12	6	TCAAGG	LN:i:6	QL:Z:@6?84@
 6 | S	13	7	CTTgaTT	LN:i:7	QL:Z:>=?@877
 7 | E	edge0	11	+	12	-	4M
 8 | E	edge1	12	-	13	+	5M
 9 | E	edge2	11	+	13	+	3M
10 | G	gap0	11+	13-	5	SC:i:1
11 | G	gap1	13-	12+	3	SC:i:1
12 | O	14	11+ gap0 13- gap1 12+
13 | O	15	11+ edge0 12- edge1 13+
14 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa.gz.46.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa.gz -o gfa
 2 | embedded
 3 | H	VN:Z:1.2
 4 | S	11	ACCTT	LN:i:5	QL:Z:?@97?
 5 | S	12	TCAAGG	LN:i:6	QL:Z:@6?84@
 6 | S	13	CTTgaTT	LN:i:7	QL:Z:>=?@877
 7 | L	11	+	12	-	4M
 8 | L	12	-	13	+	5M
 9 | L	11	+	13	+	3M
10 | J	11	+	13	-	5	SC:i:1
11 | J	13	-	12	+	3	SC:i:1
12 | P	14	11+;13-;12+	*
13 | P	15	11+,12-,13+	*
14 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa.gz.47.tst:
--------------------------------------------------------------------------------
1 | testFiles/random2.gfa.gz -o fasta
2 | embedded
3 | >14
4 | ACCTTNNNNNAAtcAAGNNNTCAAGG
5 | >15
6 | ACCTTGATT
7 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa2.63.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa2 -o gfa2
 2 | embedded
 3 | H	VN:Z:2.0
 4 | S	id2	6	TCAAGG
 5 | S	id3	7	CTTGATT
 6 | S	id1	5	ACCTT
 7 | S	id4	8	CATGACTC
 8 | S	id7	9	TGAATGAAA
 9 | E	id10	id1	+	id2	-	3M
10 | E	id11	id2	-	id1	+	3M
11 | G	id5	id3+	id4-	5
12 | G	id6	id1+	id2+	3
13 | G	id8	id2+	id3-	2
14 | G	id9	id7+	id7+	5
15 | O	id12	id1+ id6 id2(1:3)+ id8 id3-
16 | O	path1	id1+
17 | O	path2	id2+
18 | O	path3	id3+
19 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa2.64.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa2 -o gfa
 2 | embedded
 3 | H	VN:Z:1.2
 4 | S	id2	TCAAGG
 5 | S	id3	CTTGATT
 6 | S	id1	ACCTT
 7 | S	id4	CATGACTC
 8 | S	id7	TGAATGAAA
 9 | L	id1	+	id2	-	3M
10 | L	id2	-	id1	+	3M
11 | J	id3	+	id4	-	5
12 | J	id1	+	id2	+	3
13 | J	id2	+	id3	-	2
14 | J	id7	+	id7	+	5
15 | P	id12	id1+;id2(1:3)+;id3-	*
16 | P	path1	id1+	*
17 | P	path2	id2+	*
18 | P	path3	id3+	*
19 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa2.65.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa2 -o fasta
 2 | embedded
 3 | >id12
 4 | ACCTTNNNTCANNAATCAAG
 5 | >path1
 6 | ACCTT
 7 | >path2
 8 | TCAAGG
 9 | >path3
10 | CTTGATT
11 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa2.gz.51.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa2.gz -o gfa2
 2 | embedded
 3 | H	VN:Z:2.0
 4 | S	id2	6	TCAAGG
 5 | S	id3	7	CTTGATT
 6 | S	id1	5	ACCTT
 7 | S	id4	8	CATGACTC
 8 | S	id7	9	TGAATGAAA
 9 | E	id10	id1	+	id2	-	3M
10 | E	id11	id2	-	id1	+	3M
11 | G	id5	id3+	id4-	5
12 | G	id6	id1+	id2+	3
13 | G	id8	id2+	id3-	2
14 | G	id9	id7+	id7+	5
15 | O	id12	id1+ id6 id2(1:3)+ id8 id3-
16 | O	path1	id1+
17 | O	path2	id2+
18 | O	path3	id3+
19 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa2.gz.52.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa2.gz -o gfa
 2 | embedded
 3 | H	VN:Z:1.2
 4 | S	id2	TCAAGG
 5 | S	id3	CTTGATT
 6 | S	id1	ACCTT
 7 | S	id4	CATGACTC
 8 | S	id7	TGAATGAAA
 9 | L	id1	+	id2	-	3M
10 | L	id2	-	id1	+	3M
11 | J	id3	+	id4	-	5
12 | J	id1	+	id2	+	3
13 | J	id2	+	id3	-	2
14 | J	id7	+	id7	+	5
15 | P	id12	id1+;id2(1:3)+;id3-	*
16 | P	path1	id1+	*
17 | P	path2	id2+	*
18 | P	path3	id3+	*
19 | 


--------------------------------------------------------------------------------
/validateFiles/random2.gfa2.gz.53.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.gfa2.gz -o fasta
 2 | embedded
 3 | >id12
 4 | ACCTTNNNTCANNAATCAAG
 5 | >path1
 6 | ACCTT
 7 | >path2
 8 | TCAAGG
 9 | >path3
10 | CTTGATT
11 | 


--------------------------------------------------------------------------------
/validateFiles/random2.noseq.gfa.114.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.noseq.gfa 
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 2
 5 | Total scaffold length: 44
 6 | Average scaffold length: 22.00
 7 | Scaffold N50: 26
 8 | Scaffold auN: 22.73
 9 | Scaffold L50: 1
10 | Largest scaffold: 26
11 | Smallest scaffold: 18
12 | # contigs: 6
13 | Total contig length: 36
14 | Average contig length: 6.00
15 | Contig N50: 6
16 | Contig auN: 6.11
17 | Contig L50: 3
18 | Largest contig: 7
19 | Smallest contig: 5
20 | # gaps in scaffolds: 2
21 | Total gap length in scaffolds: 8
22 | Average gap length in scaffolds: 4.00
23 | Gap N50 in scaffolds: 5
24 | Gap auN in scaffolds: 4.25
25 | Gap L50 in scaffolds: 1
26 | Largest gap in scaffolds: 5
27 | Smallest gap in scaffolds: 3
28 | Base composition (A:C:G:T): 0:0:0:0
29 | GC content %: nan
30 | # soft-masked bases: 36
31 | # segments: 3
32 | Total segment length: 0
33 | Average segment length: 0.00
34 | # gaps: 2
35 | # paths: 2
36 | # edges: 3
37 | Average degree: 1.00
38 | # connected components: 1
39 | Largest connected component length: 18
40 | # dead ends: 2
41 | # disconnected components: 0
42 | Total length disconnected components: 0
43 | # separated components: 1
44 | # bubbles: 0
45 | # circular segments: 0
46 | # circular paths: 0
47 | 


--------------------------------------------------------------------------------
/validateFiles/random2.noseq.gfa.48.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.noseq.gfa -o gfa2
 2 | embedded
 3 | H	VN:Z:2.0
 4 | S	11	5	*	LN:i:5	QL:Z:?@97?
 5 | S	12	6	*	LN:i:6	QL:Z:@6?84@
 6 | S	13	7	*	LN:i:7	QL:Z:>=?@877
 7 | E	edge0	11	+	12	-	4M
 8 | E	edge1	12	-	13	+	5M
 9 | E	edge2	11	+	13	+	3M
10 | G	gap0	11+	13-	5	SC:i:1
11 | G	gap1	13-	12+	3	SC:i:1
12 | O	14	11+ gap0 13- gap1 12+
13 | O	15	11+ edge0 12- edge1 13+
14 | 


--------------------------------------------------------------------------------
/validateFiles/random2.noseq.gfa.49.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random2.noseq.gfa -o gfa
 2 | embedded
 3 | H	VN:Z:1.2
 4 | S	11	*	LN:i:5	QL:Z:?@97?
 5 | S	12	*	LN:i:6	QL:Z:@6?84@
 6 | S	13	*	LN:i:7	QL:Z:>=?@877
 7 | L	11	+	12	-	4M
 8 | L	12	-	13	+	5M
 9 | L	11	+	13	+	3M
10 | J	11	+	13	-	5	SC:i:1
11 | J	13	-	12	+	3	SC:i:1
12 | P	14	11+;13-;12+	*
13 | P	15	11+,12-,13+	*
14 | 


--------------------------------------------------------------------------------
/validateFiles/random2.noseq.gfa.50.tst:
--------------------------------------------------------------------------------
1 | testFiles/random2.noseq.gfa -o fasta
2 | embedded
3 | >14
4 | Error: Fasta output not possible without segment sequence. Terminating.
5 | 


--------------------------------------------------------------------------------
/validateFiles/random3.sorting.fasta.24.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random3.sorting.fasta 
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 4
 5 | Total scaffold length: 16
 6 | Average scaffold length: 4.00
 7 | Scaffold N50: 4
 8 | Scaffold auN: 4.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 4
11 | Smallest scaffold: 4
12 | # contigs: 4
13 | Total contig length: 16
14 | Average contig length: 4.00
15 | Contig N50: 4
16 | Contig auN: 4.00
17 | Contig L50: 2
18 | Largest contig: 4
19 | Smallest contig: 4
20 | # gaps in scaffolds: 0
21 | Total gap length in scaffolds: 0
22 | Average gap length in scaffolds: 0.00
23 | Gap N50 in scaffolds: 0
24 | Gap auN in scaffolds: 0.00
25 | Gap L50 in scaffolds: 0
26 | Largest gap in scaffolds: 0
27 | Smallest gap in scaffolds: 0
28 | Base composition (A:C:G:T): 4:4:4:4
29 | GC content %: 50.00
30 | # soft-masked bases: 0
31 | # segments: 4
32 | Total segment length: 16
33 | Average segment length: 4.00
34 | # gaps: 0
35 | # paths: 4
36 | 


--------------------------------------------------------------------------------
/validateFiles/random3.sorting.fasta.25.tst:
--------------------------------------------------------------------------------
1 | testFiles/random3.sorting.fasta -s s
2 | embedded
3 | c	4
4 | d	4
5 | b	4
6 | a	4
7 | 


--------------------------------------------------------------------------------
/validateFiles/random3.sorting.fasta.26.tst:
--------------------------------------------------------------------------------
1 | testFiles/random3.sorting.fasta -s c
2 | embedded
3 | c.1	4
4 | d.1	4
5 | b.1	4
6 | a.1	4
7 | 


--------------------------------------------------------------------------------
/validateFiles/random3.sorting.fasta.27.tst:
--------------------------------------------------------------------------------
1 | testFiles/random3.sorting.fasta -s g
2 | embedded
3 | 


--------------------------------------------------------------------------------
/validateFiles/random3.sorting.fasta.28.tst:
--------------------------------------------------------------------------------
1 | testFiles/random3.sorting.fasta -b a
2 | embedded
3 | c	1	4	1	W	c.1	1	4	+
4 | d	1	4	1	W	d.1	1	4	+
5 | b	1	4	1	W	b.1	1	4	+
6 | a	1	4	1	W	a.1	1	4	+
7 | 


--------------------------------------------------------------------------------
/validateFiles/random3.sorting.fasta.29.tst:
--------------------------------------------------------------------------------
1 | testFiles/random3.sorting.fasta -b s
2 | embedded
3 | c	0	4
4 | d	0	4
5 | b	0	4
6 | a	0	4
7 | 


--------------------------------------------------------------------------------
/validateFiles/random3.sorting.fasta.30.tst:
--------------------------------------------------------------------------------
1 | testFiles/random3.sorting.fasta -b c
2 | embedded
3 | c	0	4
4 | d	0	4
5 | b	0	4
6 | a	0	4
7 | 


--------------------------------------------------------------------------------
/validateFiles/random3.sorting.fasta.31.tst:
--------------------------------------------------------------------------------
1 | testFiles/random3.sorting.fasta -b g
2 | embedded
3 | 


--------------------------------------------------------------------------------
/validateFiles/random3.sorting.fasta.32.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random3.sorting.fasta --homopolymer-compress 1 -ofa
 2 | embedded
 3 | >c
 4 | ACGT
 5 | >d
 6 | CGTA
 7 | >b
 8 | GTAC
 9 | >a
10 | TACG
11 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.115.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random4.fasta 
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 9:14:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.15.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random4.fasta 
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 5
 5 | Total scaffold length: 50
 6 | Average scaffold length: 10.00
 7 | Scaffold N50: 15
 8 | Scaffold auN: 12.00
 9 | Scaffold L50: 2
10 | Largest scaffold: 15
11 | Smallest scaffold: 5
12 | # contigs: 8
13 | Total contig length: 40
14 | Average contig length: 5.00
15 | Contig N50: 12
16 | Contig auN: 8.50
17 | Contig L50: 2
18 | Largest contig: 12
19 | Smallest contig: 1
20 | # gaps in scaffolds: 6
21 | Total gap length in scaffolds: 10
22 | Average gap length in scaffolds: 1.67
23 | Gap N50 in scaffolds: 3
24 | Gap auN in scaffolds: 2.20
25 | Gap L50 in scaffolds: 2
26 | Largest gap in scaffolds: 3
27 | Smallest gap in scaffolds: 1
28 | Base composition (A:C:G:T): 9:14:6:11
29 | GC content %: 50.00
30 | # soft-masked bases: 5
31 | # segments: 8
32 | Total segment length: 40
33 | Average segment length: 5.00
34 | # gaps: 6
35 | # paths: 5
36 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.16.tst:
--------------------------------------------------------------------------------
1 | testFiles/random4.fasta -s s
2 | embedded
3 | Header1	5
4 | Header2	5
5 | Header3	10
6 | Header4	15
7 | Header5	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.17.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random4.fasta -s c
 2 | embedded
 3 | Header1.1	5
 4 | Header2.1	3
 5 | Header2.3	1
 6 | Header3.1	3
 7 | Header3.3	2
 8 | Header3.5	2
 9 | Header4.2	12
10 | Header5.1	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.18.tst:
--------------------------------------------------------------------------------
1 | testFiles/random4.fasta -s g
2 | embedded
3 | Header2.2	1
4 | Header3.2	1
5 | Header3.4	1
6 | Header3.6	1
7 | Header4.1	3
8 | Header5.2	3
9 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.19.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random4.fasta -b a
 2 | embedded
 3 | Header1	1	5	1	W	Header1.1	1	5	+
 4 | Header2	1	3	1	W	Header2.1	1	3	+
 5 | Header2	4	4	2	N	1	Header2.2	yes
 6 | Header2	5	5	3	W	Header2.3	1	1	+
 7 | Header3	1	3	1	W	Header3.1	1	3	+
 8 | Header3	4	4	2	N	1	Header3.2	yes
 9 | Header3	5	6	3	W	Header3.3	1	2	+
10 | Header3	7	7	4	N	1	Header3.4	yes
11 | Header3	8	9	5	W	Header3.5	1	2	+
12 | Header3	10	10	6	N	1	Header3.6	yes
13 | Header4	1	3	1	N	3	Header4.1	yes
14 | Header4	4	15	2	W	Header4.2	1	12	+
15 | Header5	1	12	1	W	Header5.1	1	12	+
16 | Header5	13	15	2	N	3	Header5.2	yes
17 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.20.tst:
--------------------------------------------------------------------------------
1 | testFiles/random4.fasta -b s
2 | embedded
3 | Header1	0	5
4 | Header2	0	5
5 | Header3	0	10
6 | Header4	0	15
7 | Header5	0	15
8 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.21.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random4.fasta -b c
 2 | embedded
 3 | Header1	0	5
 4 | Header2	0	3
 5 | Header2	4	5
 6 | Header3	0	3
 7 | Header3	4	6
 8 | Header3	7	9
 9 | Header4	3	15
10 | Header5	0	12
11 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.22.tst:
--------------------------------------------------------------------------------
1 | testFiles/random4.fasta -b g
2 | embedded
3 | Header2	3	4
4 | Header3	3	4
5 | Header3	6	7
6 | Header3	9	10
7 | Header4	0	3
8 | Header5	12	15
9 | 


--------------------------------------------------------------------------------
/validateFiles/random4.fasta.23.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random4.fasta --homopolymer-compress 1 -ofa
 2 | embedded
 3 | >Header1 5bp sequence with no gaps and 2 lowercase bases
 4 | CGacT
 5 | >Header2 5bp sequence with internal 1bp non-canonical gap
 6 | CGANT
 7 | >Header3 10bp sequence with internal 4bp and 1bp terminal canonical gap
 8 | TGANATNCTN
 9 | >Header4 15bp sequence with start 3bp canonical gap and 3 lowercase bases
10 | NNNTCTcgCACtC
11 | >Header5 15bp sequence with terminal 3bp canonical gap
12 | ACTCGATCACGNNN
13 | 


--------------------------------------------------------------------------------
/validateFiles/random5.findovl.gfa.116.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random5.findovl.gfa --discover-terminal-overlaps 3 -ogfa
 2 | embedded
 3 | H	VN:Z:1.2
 4 | S	11	CCGTTCCATGAAGGCCAGAGTTACTTACCGGCCCTTTCCATGCGCGCGCCATAAA	LN:i:55
 5 | S	12	GATTTAAGAATATGTTAACGGAGGATTGCACGATCTTCTCTCCTCGTGAGAGAATTTATG	LN:i:60
 6 | S	13	AAATCGCATAGCTATGTATTTTGCAGAGGTAGCGACATCTTGACGGGCACTTCACAGATAGTGGG	LN:i:65
 7 | L	11	+	12	-	6M
 8 | L	11	+	13	+	3M
 9 | L	12	-	13	+	5M
10 | J	11	+	13	-	5	SC:i:1
11 | J	13	-	12	+	3	SC:i:1
12 | P	14	11+;13-;12+	*
13 | P	15	11+12-13+	*
14 | 


--------------------------------------------------------------------------------
/validateFiles/random5.findovl.gfa.66.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random5.findovl.gfa -o gfa2
 2 | embedded
 3 | H	VN:Z:2.0
 4 | S	11	55	CCGTTCCATGAAGGCCAGAGTTACTTACCGGCCCTTTCCATGCGCGCGCCATAAA	LN:i:55
 5 | S	12	60	GATTTAAGAATATGTTAACGGAGGATTGCACGATCTTCTCTCCTCGTGAGAGAATTTATG	LN:i:60
 6 | S	13	65	AAATCGCATAGCTATGTATTTTGCAGAGGTAGCGACATCTTGACGGGCACTTCACAGATAGTGGG	LN:i:65
 7 | G	gap0	11+	13-	5	SC:i:1
 8 | G	gap1	13-	12+	3	SC:i:1
 9 | O	14	11+ gap0 13- gap1 12+
10 | O	15	11+ 12- 13+
11 | 


--------------------------------------------------------------------------------
/validateFiles/random5.findovl.gfa.67.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random5.findovl.gfa -o gfa
 2 | embedded
 3 | H	VN:Z:1.2
 4 | S	11	CCGTTCCATGAAGGCCAGAGTTACTTACCGGCCCTTTCCATGCGCGCGCCATAAA	LN:i:55
 5 | S	12	GATTTAAGAATATGTTAACGGAGGATTGCACGATCTTCTCTCCTCGTGAGAGAATTTATG	LN:i:60
 6 | S	13	AAATCGCATAGCTATGTATTTTGCAGAGGTAGCGACATCTTGACGGGCACTTCACAGATAGTGGG	LN:i:65
 7 | J	11	+	13	-	5	SC:i:1
 8 | J	13	-	12	+	3	SC:i:1
 9 | P	14	11+;13-;12+	*
10 | P	15	11+12-13+	*
11 | 


--------------------------------------------------------------------------------
/validateFiles/random5.findovl.gfa.68.tst:
--------------------------------------------------------------------------------
1 | testFiles/random5.findovl.gfa -o fasta
2 | embedded
3 | >14
4 | CCGTTCCATGAAGGCCAGAGTTACTTACCGGCCCTTTCCATGCGCGCGCCATAAANNNNNCCCACTATCTGTGAAGTGCCCGTCAAGATGTCGCTACCTCTGCAAAATACATAGCTATGCGATTTNNNGATTTAAGAATATGTTAACGGAGGATTGCACGATCTTCTCTCCTCGTGAGAGAATTTATG
5 | >15
6 | CCGTTCCATGAAGGCCAGAGTTACTTACCGGCCCTTTCCATGCGCGCGCCATAAACATAAATTCTCTCACGAGGAGAGAAGATCGTGCAATCCTCCGTTAACATATTCTTAAATCAAATCGCATAGCTATGTATTTTGCAGAGGTAGCGACATCTTGACGGGCACTTCACAGATAGTGGG
7 | 


--------------------------------------------------------------------------------
/validateFiles/random6.circular.gfa.0.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random6.circular.gfa -o gfa2
 2 | embedded
 3 | H	VN:Z:2.0
 4 | S	edge_1	0	*	dp:i:32
 5 | S	edge_2	0	*	dp:i:21
 6 | S	edge_3	0	*	dp:i:0
 7 | S	edge_4	0	*	dp:i:0
 8 | S	edge_5	0	*	dp:i:2
 9 | S	edge_6	0	*	dp:i:0
10 | S	edge_7	0	*	dp:i:6
11 | S	edge_8	0	*	dp:i:20
12 | S	edge_9	0	*	dp:i:0
13 | E	edge0	edge_1	+	edge_1	+	0M	RC:i:0
14 | E	edge1	edge_1	-	edge_1	-	0M	RC:i:18
15 | E	edge2	edge_2	+	edge_2	+	0M	RC:i:0
16 | E	edge3	edge_2	-	edge_2	-	0M	RC:i:0
17 | E	edge4	edge_7	+	edge_7	+	0M	RC:i:32
18 | E	edge5	edge_7	-	edge_7	-	0M	RC:i:40
19 | O	contig_1	edge_1+
20 | O	contig_2	edge_2+
21 | O	contig_3	edge_3+
22 | O	contig_4	edge_4+
23 | O	contig_5	edge_5+
24 | O	contig_6	edge_6+
25 | O	contig_7	edge_7+
26 | O	contig_8	edge_8+
27 | O	contig_9	edge_9+
28 | 


--------------------------------------------------------------------------------
/validateFiles/random6.circular.gfa.1.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random6.circular.gfa -o gfa
 2 | embedded
 3 | H	VN:Z:1.2
 4 | S	edge_1	*	dp:i:32
 5 | S	edge_2	*	dp:i:21
 6 | S	edge_3	*	dp:i:0
 7 | S	edge_4	*	dp:i:0
 8 | S	edge_5	*	dp:i:2
 9 | S	edge_6	*	dp:i:0
10 | S	edge_7	*	dp:i:6
11 | S	edge_8	*	dp:i:20
12 | S	edge_9	*	dp:i:0
13 | L	edge_1	+	edge_1	+	0M	RC:i:0
14 | L	edge_1	-	edge_1	-	0M	RC:i:18
15 | L	edge_2	+	edge_2	+	0M	RC:i:0
16 | L	edge_2	-	edge_2	-	0M	RC:i:0
17 | L	edge_7	+	edge_7	+	0M	RC:i:32
18 | L	edge_7	-	edge_7	-	0M	RC:i:40
19 | P	contig_1	edge_1+	*
20 | P	contig_2	edge_2+	*
21 | P	contig_3	edge_3+	*
22 | P	contig_4	edge_4+	*
23 | P	contig_5	edge_5+	*
24 | P	contig_6	edge_6+	*
25 | P	contig_7	edge_7+	*
26 | P	contig_8	edge_8+	*
27 | P	contig_9	edge_9+	*
28 | 


--------------------------------------------------------------------------------
/validateFiles/random6.circular.gfa.117.tst:
--------------------------------------------------------------------------------
 1 | testFiles/random6.circular.gfa 
 2 | embedded
 3 | +++Assembly summary+++: 
 4 | # scaffolds: 9
 5 | Total scaffold length: 0
 6 | Average scaffold length: 0.00
 7 | Scaffold N50: 0
 8 | Scaffold auN: nan
 9 | Scaffold L50: 1
10 | Largest scaffold: 0
11 | Smallest scaffold: 0
12 | # contigs: 9
13 | Total contig length: 0
14 | Average contig length: 0.00
15 | Contig N50: 0
16 | Contig auN: nan
17 | Contig L50: 1
18 | Largest contig: 0
19 | Smallest contig: 0
20 | # gaps in scaffolds: 0
21 | Total gap length in scaffolds: 0
22 | Average gap length in scaffolds: 0.00
23 | Gap N50 in scaffolds: 0
24 | Gap auN in scaffolds: 0.00
25 | Gap L50 in scaffolds: 0
26 | Largest gap in scaffolds: 0
27 | Smallest gap in scaffolds: 0
28 | Base composition (A:C:G:T): 0:0:0:0
29 | GC content %: nan
30 | # soft-masked bases: 0
31 | # segments: 9
32 | Total segment length: 0
33 | Average segment length: 0.00
34 | # gaps: 0
35 | # paths: 9
36 | # edges: 6
37 | Average degree: 0.67
38 | # connected components: 3
39 | Largest connected component length: 0
40 | # dead ends: 12
41 | # disconnected components: 6
42 | Total length disconnected components: 0
43 | # separated components: 9
44 | # bubbles: 0
45 | # circular segments: 3
46 | # circular paths: 3
47 | 


--------------------------------------------------------------------------------
/validateFiles/random6.circular.gfa.2.tst:
--------------------------------------------------------------------------------
1 | testFiles/random6.circular.gfa -o fasta
2 | embedded
3 | >contig_1
4 | Error: Fasta output not possible without segment sequence. Terminating.
5 | 


--------------------------------------------------------------------------------