├── .gitignore ├── seqio ├── organism.go ├── utils.go ├── testdata │ └── NC_001422_part.fasta ├── dictionary_test.go ├── strings.go ├── contig.go ├── filetype_test.go ├── strings_test.go ├── filetype.go ├── dictionary.go ├── reference.go ├── writer.go ├── writer_test.go ├── fasta_test.go ├── scanner.go ├── date_test.go ├── origin_test.go ├── fasta.go ├── format_conversion_test.go ├── origin.go ├── genbank_subparsers_test.go ├── scanner_test.go └── date.go ├── gts_test.go ├── version.go ├── cmd ├── terminal.go ├── gts │ ├── hash.go │ ├── main.go │ ├── length.go │ ├── reverse.go │ ├── complement.go │ ├── repair.go │ ├── clear.go │ ├── join.go │ ├── rotate.go │ ├── sort.go │ ├── delete.go │ ├── define.go │ ├── annotate.go │ ├── cache.go │ └── select.go └── cache │ ├── header_test.go │ ├── header.go │ └── file.go ├── man ├── gts-seqin.7.ronn ├── gts-seqout.7.ronn ├── gts-cache-purge.1.ronn ├── gts-seqin.7 ├── gts-seqout.7 ├── gts-cache-path.1.ronn ├── gts-cache-list.1.ronn ├── gts-cache-purge.1 ├── gts-cache-path.1 ├── gts-cache-list.1 ├── index.txt ├── gts-cache.1.ronn ├── gts-length.1.ronn ├── gts-cache.1 ├── gts-length.1 ├── gts-summary.1.ronn ├── gts-modifier.7.ronn ├── gts-selector.7.ronn ├── gts-summary.1 ├── gts-reverse.1.ronn ├── gts-clear.1.ronn ├── gts-sort.1.ronn ├── gts-complement.1.ronn ├── gts-locator.7.ronn ├── gts-clear.1 ├── gts-reverse.1 ├── gts-repair.1.ronn ├── gts-sort.1 ├── gts-join.1.ronn ├── gts-complement.1 ├── gts-define.1.ronn ├── gts-repair.1 ├── gts-annotate.1.ronn ├── gts-selector.7 ├── gts-modifier.7 ├── gts-join.1 ├── gts-define.1 ├── gts-annotate.1 ├── gts-locator.7 ├── gts-split.1.ronn ├── gts-rotate.1.ronn ├── gts-pick.1.ronn ├── gts-split.1 ├── gts.1.ronn ├── gts-delete.1.ronn ├── gts-rotate.1 ├── gts-pick.1 ├── gts-extract.1.ronn ├── gts-cache.7.ronn ├── gts-select.1.ronn ├── gts-search.1.ronn ├── gts-infix.1.ronn ├── gts-cache.7 ├── gts.1 ├── gts-delete.1 ├── gts-query.1.ronn ├── gts-extract.1 ├── gts-infix.1 └── gts-search.1 ├── docs └── index.html ├── go.mod ├── conda ├── build.sh └── meta.yaml ├── props_test.go ├── utils.go ├── molecule.go ├── molecule_test.go ├── LICENSE ├── modifier_test.go ├── topology.go ├── topology_test.go ├── utils_test.go ├── go.sum ├── props.go ├── nucleotide_test.go ├── locator_test.go ├── .goreleaser.yml ├── locator.go ├── internal └── testutils │ └── testutils.go └── nucleotide.go /.gitignore: -------------------------------------------------------------------------------- 1 | /dist/ 2 | /man/*.html 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /seqio/organism.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | // Organism represents an organism of a record. 4 | type Organism struct { 5 | Species string 6 | Name string 7 | Taxon []string 8 | } 9 | -------------------------------------------------------------------------------- /seqio/utils.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | func dig(err error) error { 4 | if v, ok := err.(interface{ Unwrap() error }); ok { 5 | return dig(v.Unwrap()) 6 | } 7 | return err 8 | } 9 | -------------------------------------------------------------------------------- /gts_test.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import "encoding/json" 4 | 5 | func jsonify(v interface{}) string { 6 | p, err := json.Marshal(v) 7 | if err != nil { 8 | panic(err) 9 | } 10 | return string(p) 11 | } 12 | -------------------------------------------------------------------------------- /version.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import "github.com/go-gts/flags" 4 | 5 | // Version represents the GTS software version. 6 | var Version = flags.Version{ 7 | Major: 0, 8 | Minor: 28, 9 | Patch: 1, 10 | } 11 | -------------------------------------------------------------------------------- /seqio/testdata/NC_001422_part.fasta: -------------------------------------------------------------------------------- 1 | >NC_001422.1:2380-2512 Coliphage phi-X174, complete genome 2 | CTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGG 3 | TTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGT 4 | -------------------------------------------------------------------------------- /cmd/terminal.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import "github.com/mattn/go-isatty" 4 | 5 | // IsTerminal tests if the file descriptor is a terminal. 6 | func IsTerminal(fd uintptr) bool { 7 | return isatty.IsTerminal(fd) || isatty.IsCygwinTerminal(fd) 8 | } 9 | -------------------------------------------------------------------------------- /cmd/gts/hash.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha1" 5 | "encoding/hex" 6 | "hash" 7 | ) 8 | 9 | func newHash() hash.Hash { 10 | return sha1.New() 11 | } 12 | 13 | func encodeToString(p []byte) string { 14 | return hex.EncodeToString(p) 15 | } 16 | -------------------------------------------------------------------------------- /cmd/gts/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/go-gts/flags" 7 | "github.com/go-gts/gts" 8 | ) 9 | 10 | func main() { 11 | name, desc := "gts", "the genome transformation subprograms command line tool" 12 | os.Exit(flags.Run(name, desc, gts.Version, flags.Compile())) 13 | } 14 | -------------------------------------------------------------------------------- /man/gts-seqin.7.ronn: -------------------------------------------------------------------------------- 1 | # gts-seqin(7) -- input sequence formats supported in GTS. 2 | 3 | ## SYNOPSIS 4 | 5 | * `GenBank` 6 | * `FASTA` 7 | 8 | ## DESCRIPTION 9 | 10 | GTS implements parsers for a number of sequence formats, and have plans for 11 | implementing more commonly used sequence formats. 12 | 13 | ## SEE ALSO 14 | 15 | gts(1), gts-seqout(7) -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |

The page has been moved to gts.1.html

10 | 11 | -------------------------------------------------------------------------------- /man/gts-seqout.7.ronn: -------------------------------------------------------------------------------- 1 | # gts-seqout(7) -- output sequence formats supported in GTS. 2 | 3 | ## SYNOPSIS 4 | 5 | * `GenBank` 6 | * `FASTA` 7 | 8 | ## DESCRIPTION 9 | 10 | GTS implements parsers for a number of sequence formats, and have plans for 11 | implementing more commonly used sequence formats. 12 | 13 | ## SEE ALSO 14 | 15 | gts(1), gts-seqin(7) -------------------------------------------------------------------------------- /cmd/cache/header_test.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "bytes" 5 | "crypto/sha1" 6 | "testing" 7 | ) 8 | 9 | func TestHeaderFail(t *testing.T) { 10 | h := sha1.New() 11 | b := &bytes.Buffer{} 12 | b.Write(h.Sum(nil)) 13 | if _, err := ReadHeader(b, h.Size()); err == nil { 14 | t.Fatal("expected error in ReadHeader for insufficient read") 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /seqio/dictionary_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/go-gts/gts/internal/testutils" 7 | ) 8 | 9 | func TestPairList(t *testing.T) { 10 | d := Dictionary{} 11 | d.Set("foo", "foo") 12 | testutils.Equals(t, d.Get("foo"), []string{"foo"}) 13 | d.Set("foo", "bar") 14 | testutils.Equals(t, d.Get("foo"), []string{"bar"}) 15 | d.Del("foo") 16 | testutils.Equals(t, d.Get("foo"), []string{}) 17 | } 18 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/go-gts/gts 2 | 3 | go 1.15 4 | 5 | require ( 6 | github.com/dustin/go-humanize v1.0.0 7 | github.com/go-ascii/ascii v1.0.3 8 | github.com/go-flip/flip v1.1.0 9 | github.com/go-gts/flags v0.0.12 10 | github.com/go-pars/pars v1.1.6 11 | github.com/go-test/deep v1.0.7 12 | github.com/go-wrap/wrap v1.0.3 13 | github.com/mattn/go-isatty v0.0.12 14 | golang.org/x/sys v0.0.0-20210514084401-e8d321eab015 // indirect 15 | ) 16 | -------------------------------------------------------------------------------- /seqio/strings.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import "strings" 4 | 5 | // FlatFileSplit splits the string with the flatfile convention. 6 | func FlatFileSplit(s string) []string { 7 | s = strings.TrimSuffix(s, ".") 8 | if len(s) == 0 { 9 | return nil 10 | } 11 | return strings.Split(s, "; ") 12 | } 13 | 14 | // AddPrefix adds the given prefix after each newline. 15 | func AddPrefix(s, prefix string) string { 16 | return strings.ReplaceAll(s, "\n", "\n"+prefix) 17 | } 18 | -------------------------------------------------------------------------------- /seqio/contig.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/go-gts/gts" 7 | ) 8 | 9 | // Contig represents a contig field. 10 | type Contig struct { 11 | Accession string 12 | Region gts.Segment 13 | } 14 | 15 | // String satisfies the fmt.Stringer interface. 16 | func (contig Contig) String() string { 17 | if contig.Accession == "" { 18 | return "" 19 | } 20 | head, tail := gts.Unpack(contig.Region) 21 | return fmt.Sprintf("join(%s:%d..%d)", contig.Accession, head+1, tail) 22 | } 23 | -------------------------------------------------------------------------------- /man/gts-cache-purge.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-cache-purge(1) -- delete all cache files 2 | 3 | ## SYNOPSIS 4 | 5 | gts-cache-purge [--version] [-h | --help] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-cache-purge** will remove all of the files contained in the cache 10 | directory. 11 | 12 | ## OPTIONS 13 | 14 | None. 15 | 16 | ## BUGS 17 | 18 | **gts-cache-purge** currently has no known bugs. 19 | 20 | ## AUTHORS 21 | 22 | **gts-cache-purge** is written and maintained by Kotone Itaya. 23 | 24 | ## SEE ALSO 25 | 26 | gts(1), gts-cache(1), gts-cache-list(1), gts-cache-path(1), gts-cache(7) -------------------------------------------------------------------------------- /man/gts-seqin.7: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-SEQIN" "7" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-seqin\fR \- input sequence formats supported in GTS\. 8 | . 9 | .SH "SYNOPSIS" 10 | . 11 | .IP "\(bu" 4 12 | \fBGenBank\fR 13 | . 14 | .IP "\(bu" 4 15 | \fBFASTA\fR 16 | . 17 | .IP "" 0 18 | . 19 | .SH "DESCRIPTION" 20 | GTS implements parsers for a number of sequence formats, and have plans for implementing more commonly used sequence formats\. 21 | . 22 | .SH "SEE ALSO" 23 | gts(1), gts\-seqout(7) 24 | -------------------------------------------------------------------------------- /man/gts-seqout.7: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-SEQOUT" "7" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-seqout\fR \- output sequence formats supported in GTS\. 8 | . 9 | .SH "SYNOPSIS" 10 | . 11 | .IP "\(bu" 4 12 | \fBGenBank\fR 13 | . 14 | .IP "\(bu" 4 15 | \fBFASTA\fR 16 | . 17 | .IP "" 0 18 | . 19 | .SH "DESCRIPTION" 20 | GTS implements parsers for a number of sequence formats, and have plans for implementing more commonly used sequence formats\. 21 | . 22 | .SH "SEE ALSO" 23 | gts(1), gts\-seqin(7) 24 | -------------------------------------------------------------------------------- /man/gts-cache-path.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-cache-path(1) -- print the cache directory path 2 | 3 | ## SYNOPSIS 4 | 5 | gts-cache-path [--version] [-h | --help] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-cache-path** will print the path of the directory containing the 10 | gts-cache(7) files. 11 | 12 | ## OPTIONS 13 | 14 | None. 15 | 16 | ## BUGS 17 | 18 | **gts-cache-path** currently has no known bugs. 19 | 20 | ## AUTHORS 21 | 22 | **gts-cache-path** is written and maintained by Kotone Itaya. 23 | 24 | ## SEE ALSO 25 | 26 | gts(1), gts-cache(1), gts-cache-list(1), gts-cache-purge(1), gts-cache(7) -------------------------------------------------------------------------------- /seqio/filetype_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import "testing" 4 | 5 | var detectTests = []struct { 6 | in string 7 | out FileType 8 | }{ 9 | {"foo", DefaultFile}, 10 | {"foo.fasta", FastaFile}, 11 | {"foo.fastq", FastqFile}, 12 | {"foo.gb", GenBankFile}, 13 | {"foo.genbank", GenBankFile}, 14 | {"foo.emb", EMBLFile}, 15 | {"foo.embl", EMBLFile}, 16 | } 17 | 18 | func TestDetect(t *testing.T) { 19 | for _, tt := range detectTests { 20 | out := Detect(tt.in) 21 | if out != tt.out { 22 | t.Errorf("Detect(%q) = %v, want %v", tt.in, out, tt.out) 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /man/gts-cache-list.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-cache-list(1) -- list the cache files 2 | 3 | ## SYNOPSIS 4 | 5 | gts-cache-list [--version] [-h | --help] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-cache-list** will print a list of existing gts-cache(7) files, along with 10 | its file size and the total size occupied by the files. 11 | 12 | ## OPTIONS 13 | 14 | None. 15 | 16 | ## BUGS 17 | 18 | **gts-cache-list** currently has no known bugs. 19 | 20 | ## AUTHORS 21 | 22 | **gts-cache-list** is written and maintained by Kotone Itaya. 23 | 24 | ## SEE ALSO 25 | 26 | gts(1), gts-cache(1), gts-cache-path(1), gts-cache-purge(1), gts-cache(7) -------------------------------------------------------------------------------- /man/gts-cache-purge.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-CACHE\-PURGE" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-cache\-purge\fR \- delete all cache files 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-cache\-purge [\-\-version] [\-h | \-\-help] 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-cache\-purge\fR will remove all of the files contained in the cache directory\. 14 | . 15 | .SH "OPTIONS" 16 | None\. 17 | . 18 | .SH "BUGS" 19 | \fBgts\-cache\-purge\fR currently has no known bugs\. 20 | . 21 | .SH "AUTHORS" 22 | \fBgts\-cache\-purge\fR is written and maintained by Kotone Itaya\. 23 | . 24 | .SH "SEE ALSO" 25 | gts(1), gts\-cache(1), gts\-cache\-list(1), gts\-cache\-path(1), gts\-cache(7) 26 | -------------------------------------------------------------------------------- /man/gts-cache-path.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-CACHE\-PATH" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-cache\-path\fR \- print the cache directory path 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-cache\-path [\-\-version] [\-h | \-\-help] 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-cache\-path\fR will print the path of the directory containing the gts\-cache(7) files\. 14 | . 15 | .SH "OPTIONS" 16 | None\. 17 | . 18 | .SH "BUGS" 19 | \fBgts\-cache\-path\fR currently has no known bugs\. 20 | . 21 | .SH "AUTHORS" 22 | \fBgts\-cache\-path\fR is written and maintained by Kotone Itaya\. 23 | . 24 | .SH "SEE ALSO" 25 | gts(1), gts\-cache(1), gts\-cache\-list(1), gts\-cache\-purge(1), gts\-cache(7) 26 | -------------------------------------------------------------------------------- /conda/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -ex 4 | 5 | mkdir -p "$PREFIX/bin" 6 | mkdir -p "$PREFIX/share/man/man1" 7 | mkdir -p "$PREFIX/share/man/man7" 8 | mkdir -p "$PREFIX/share/bash-completion/completions" 9 | mkdir -p "$PREFIX/share/zsh/site-functions" 10 | 11 | cp "$SRC_DIR/gts" "$PREFIX/bin" 12 | cp "$SRC_DIR/togo" "$PREFIX/bin" 13 | 14 | chmod +x "$PREFIX/bin/gts" 15 | chmod +x "$PREFIX/bin/togo" 16 | 17 | for FILE in "$SRC_DIR"/man/*.1; do 18 | cp "$FILE" "$PREFIX/share/man/man1" 19 | done 20 | 21 | for FILE in "$SRC_DIR"/man/*.7; do 22 | cp "$FILE" "$PREFIX/share/man/man7" 23 | done 24 | 25 | cp "$SRC_DIR/completion/gts-completion.bash" "$PREFIX/share/bash-completion/completions" 26 | cp "$SRC_DIR/completion/gts-completion.bash" "$PREFIX/share/zsh/site-functions" -------------------------------------------------------------------------------- /props_test.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/go-gts/gts/internal/testutils" 7 | ) 8 | 9 | func TestProps(t *testing.T) { 10 | p := Props{} 11 | testutils.Equals(t, p.Get("foo") == nil, true) 12 | p.Add("foo", "baz") 13 | testutils.Equals(t, p.Get("foo"), []string{"baz"}) 14 | testutils.Equals(t, p.Has("foo"), true) 15 | p.Set("foo", "bar") 16 | testutils.Equals(t, p.Get("foo"), []string{"bar"}) 17 | p.Add("foo", "baz") 18 | testutils.Equals(t, p.Get("foo"), []string{"bar", "baz"}) 19 | testutils.Equals(t, p.Keys(), []string{"foo"}) 20 | testutils.Equals(t, p.Items(), []Item{{"foo", "bar"}, {"foo", "baz"}}) 21 | p.Del("foo") 22 | testutils.Equals(t, p.Get("foo") == nil, true) 23 | testutils.Equals(t, p.Has("foo"), false) 24 | } 25 | -------------------------------------------------------------------------------- /seqio/strings_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/go-gts/gts/internal/testutils" 7 | ) 8 | 9 | var flatfileSplitTests = []struct { 10 | in string 11 | out []string 12 | }{ 13 | {".", nil}, 14 | {"foo; bar.", []string{"foo", "bar"}}, 15 | } 16 | 17 | func TestFlatfileSplit(t *testing.T) { 18 | for _, tt := range flatfileSplitTests { 19 | out := FlatFileSplit(tt.in) 20 | testutils.Equals(t, out, tt.out) 21 | } 22 | } 23 | 24 | var addPrefixTests = []struct { 25 | in, out string 26 | }{ 27 | {"foo", "foo"}, 28 | {"foo\nbar", "foo\n bar"}, 29 | } 30 | 31 | func TestAddPrefix(t *testing.T) { 32 | for _, tt := range addPrefixTests { 33 | out := AddPrefix(tt.in, " ") 34 | testutils.DiffLine(t, out, tt.out) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /man/gts-cache-list.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-CACHE\-LIST" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-cache\-list\fR \- list the cache files 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-cache\-list [\-\-version] [\-h | \-\-help] 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-cache\-list\fR will print a list of existing gts\-cache(7) files, along with its file size and the total size occupied by the files\. 14 | . 15 | .SH "OPTIONS" 16 | None\. 17 | . 18 | .SH "BUGS" 19 | \fBgts\-cache\-list\fR currently has no known bugs\. 20 | . 21 | .SH "AUTHORS" 22 | \fBgts\-cache\-list\fR is written and maintained by Kotone Itaya\. 23 | . 24 | .SH "SEE ALSO" 25 | gts(1), gts\-cache(1), gts\-cache\-path(1), gts\-cache\-purge(1), gts\-cache(7) 26 | -------------------------------------------------------------------------------- /man/index.txt: -------------------------------------------------------------------------------- 1 | gts(1) gts.1.ronn 2 | gts-annotate(1) gts-annotate.1.ronn 3 | gts-clear(1) gts-clear.1.ronn 4 | gts-complement(1) gts-complement.1.ronn 5 | gts-delete(1) gts-delete.1.ronn 6 | gts-extract(1) gts-extract.1.ronn 7 | gts-insert(1) gts-insert.1.ronn 8 | gts-length(1) gts-length.1.ronn 9 | gts-query(1) gts-query.1.ronn 10 | gts-reverse(1) gts-reverse.1.ronn 11 | gts-rotate(1) gts-rotate.1.ronn 12 | gts-search(1) gts-search.1.ronn 13 | gts-select(1) gts-select.1.ronn 14 | gts-summary(1) gts-summary.1.ronn 15 | gts-locator(7) gts-locator.7.ronn 16 | gts-modifier(7) gts-modifier.7.ronn 17 | gts-selector(7) gts-selector.7.ronn 18 | gts-seqin(7) gts-seqin.7.ronn 19 | gts-seqout(7) gts-seqout.7.ronn 20 | 21 | # external 22 | cut(1) http://man.cx/cut(1) -------------------------------------------------------------------------------- /man/gts-cache.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-cache -- manage gts cache files 2 | 3 | ## SYNOPSIS 4 | 5 | usage: gts cache [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-cache** is a command set for interacting with gts-cache(7) files. For all 10 | detailed description of how gts(1) handles caches, refer to gts-cache(7). 11 | 12 | ## COMMANDS 13 | 14 | * `gts-cache-list(1)`: 15 | List the cache files. 16 | 17 | * `gts-cache-path(1)`: 18 | Print the cache directory path. 19 | 20 | * `gts-cache-purge(1)`: 21 | Delete all cache files. 22 | 23 | ## BUGS 24 | 25 | **gts-cache** currently has no known bugs. 26 | 27 | ## AUTHORS 28 | 29 | **gts-cache** is written and maintained by Kotone Itaya. 30 | 31 | ## SEE ALSO 32 | 33 | gts(1), gts-cache-list(1), gts-cache-path(1), gts-cache-purge(1), gts-cache(7) -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | // Unpack the integer pair to its elements. 4 | func Unpack(p [2]int) (int, int) { 5 | return p[0], p[1] 6 | } 7 | 8 | const intSize = 32 << (^uint(0) >> 63) 9 | 10 | // Abs returns the absolute value of the given integer. 11 | func Abs(x int) int { 12 | y := x >> (intSize - 1) 13 | return (x ^ y) - y 14 | } 15 | 16 | // Compare the two integers and return the result. 17 | func Compare(i, j int) int { 18 | switch { 19 | case i < j: 20 | return -1 21 | case j < i: 22 | return 1 23 | default: 24 | return 0 25 | } 26 | } 27 | 28 | // Min returns the smaller integer. 29 | func Min(i, j int) int { 30 | if i < j { 31 | return i 32 | } 33 | return j 34 | } 35 | 36 | // Max returns the bigger integer. 37 | func Max(i, j int) int { 38 | if j < i { 39 | return i 40 | } 41 | return j 42 | } 43 | -------------------------------------------------------------------------------- /seqio/filetype.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import "path/filepath" 4 | 5 | // FileType represents a file type. 6 | type FileType int 7 | 8 | // Available file types in GTS. 9 | const ( 10 | DefaultFile FileType = iota 11 | FastaFile 12 | FastqFile 13 | GenBankFile 14 | EMBLFile 15 | ) 16 | 17 | // Detect returns the FileType associated to extension of the given filename. 18 | func Detect(filename string) FileType { 19 | ext := filepath.Ext(filename) 20 | if ext != "" { 21 | ext = ext[1:] 22 | } 23 | return ToFileType(ext) 24 | } 25 | 26 | // ToFileType converts the file type name string to a FileType 27 | func ToFileType(name string) FileType { 28 | switch name { 29 | case "fasta": 30 | return FastaFile 31 | case "fastq": 32 | return FastqFile 33 | case "gb", "genbank": 34 | return GenBankFile 35 | case "emb", "embl": 36 | return EMBLFile 37 | default: 38 | return DefaultFile 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /man/gts-length.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-length -- report the length of the sequence(s) 2 | 3 | ## SYNOPSIS 4 | 5 | gts-length [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-length** takes a single sequence input and prints the length of each 10 | sequence in the given sequence file. If the sequence input is ommited, standard 11 | input will be read instead. 12 | 13 | ## OPTIONS 14 | 15 | * ``: 16 | Input sequence file (may be omitted if standard input is provided). See 17 | gts-seqin(7) for a list of currently supported list of sequence formats. 18 | 19 | * `--no-cache`: 20 | Do not use or create cache. See gts-cache(7) for details. 21 | 22 | * `-o `, `--output=`: 23 | Output file (specifying `-` will force standard output). 24 | 25 | ## BUGS 26 | 27 | **gts-length** currently has no known bugs. 28 | 29 | ## AUTHORS 30 | 31 | **gts-length** is written and maintained by Kotone Itaya. 32 | 33 | ## SEE ALSO 34 | 35 | gts(1), gts-seqin(7) -------------------------------------------------------------------------------- /molecule.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import "fmt" 4 | 5 | // Molecule represents the sequence molecule type. 6 | type Molecule string 7 | 8 | // Counter returns the count word for the Molecule. 9 | func (mol Molecule) Counter() string { 10 | switch mol { 11 | case AA: 12 | return "residues" 13 | default: 14 | return "bases" 15 | } 16 | } 17 | 18 | // Molecule constants for DNA, RNA, and amino acid (AA). 19 | const ( 20 | DNA Molecule = "DNA" 21 | RNA Molecule = "RNA" 22 | AA Molecule = "AA" 23 | 24 | SingleStrandDNA Molecule = "ss-DNA" 25 | DoubleStrandDNA Molecule = "ds-DNA" 26 | ) 27 | 28 | // AsMolecule attempts to convert a string into a Molecule object. 29 | func AsMolecule(s string) (Molecule, error) { 30 | switch s { 31 | case "DNA": 32 | return DNA, nil 33 | case "RNA": 34 | return RNA, nil 35 | case "AA": 36 | return AA, nil 37 | case "ss-DNA": 38 | return SingleStrandDNA, nil 39 | case "ds-DNA": 40 | return DoubleStrandDNA, nil 41 | } 42 | return "", fmt.Errorf("molecule type for %q not known", s) 43 | } 44 | -------------------------------------------------------------------------------- /man/gts-cache.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-CACHE" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-cache\fR \- manage gts cache files 8 | . 9 | .SH "SYNOPSIS" 10 | usage: gts cache [\-\-version] [\-h | \-\-help] \fIcommand\fR [\fIargs\fR] 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-cache\fR is a command set for interacting with gts\-cache(7) files\. For all detailed description of how gts(1) handles caches, refer to gts\-cache(7)\. 14 | . 15 | .SH "COMMANDS" 16 | . 17 | .TP 18 | \fBgts\-cache\-list(1)\fR 19 | List the cache files\. 20 | . 21 | .TP 22 | \fBgts\-cache\-path(1)\fR 23 | Print the cache directory path\. 24 | . 25 | .TP 26 | \fBgts\-cache\-purge(1)\fR 27 | Delete all cache files\. 28 | . 29 | .SH "BUGS" 30 | \fBgts\-cache\fR currently has no known bugs\. 31 | . 32 | .SH "AUTHORS" 33 | \fBgts\-cache\fR is written and maintained by Kotone Itaya\. 34 | . 35 | .SH "SEE ALSO" 36 | gts(1), gts\-cache\-list(1), gts\-cache\-path(1), gts\-cache\-purge(1), gts\-cache(7) 37 | -------------------------------------------------------------------------------- /seqio/dictionary.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | // Pair represents a key-value pair of strings. 4 | type Pair struct { 5 | Key string 6 | Value string 7 | } 8 | 9 | // Dictionary represents an ordered key-value pair. 10 | type Dictionary []Pair 11 | 12 | // Get the value associated to the given key. 13 | func (d *Dictionary) Get(key string) []string { 14 | ret := []string{} 15 | for _, p := range *d { 16 | if p.Key == key { 17 | ret = append(ret, p.Value) 18 | } 19 | } 20 | return ret 21 | } 22 | 23 | // Set the value associated to the given key. 24 | func (d *Dictionary) Set(key, value string) { 25 | for i, p := range *d { 26 | if p.Key == key { 27 | (*d)[i].Value = value 28 | return 29 | } 30 | } 31 | *d = append(*d, Pair{key, value}) 32 | } 33 | 34 | // Del removes the value associated to the given key. 35 | func (d *Dictionary) Del(key string) { 36 | for i, p := range *d { 37 | if p.Key == key { 38 | copy((*d)[i:], (*d)[i+1:]) 39 | (*d)[len(*d)-1] = Pair{} 40 | (*d) = (*d)[:len(*d)-1] 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /molecule_test.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import "testing" 4 | 5 | var moleculeCounterTests = []struct { 6 | in Molecule 7 | out string 8 | }{ 9 | {DNA, "bases"}, 10 | {RNA, "bases"}, 11 | {AA, "residues"}, 12 | {SingleStrandDNA, "bases"}, 13 | {DoubleStrandDNA, "bases"}, 14 | } 15 | 16 | func TestMoleculeCounter(t *testing.T) { 17 | for _, tt := range moleculeCounterTests { 18 | out := tt.in.Counter() 19 | if out != tt.out { 20 | t.Errorf("%q.Counter = %q, want %q", tt.in, out, tt.out) 21 | } 22 | } 23 | } 24 | 25 | var asMoleculeTests = []Molecule{ 26 | DNA, 27 | RNA, 28 | AA, 29 | SingleStrandDNA, 30 | DoubleStrandDNA, 31 | } 32 | 33 | func TestAsMolecule(t *testing.T) { 34 | for _, in := range asMoleculeTests { 35 | out, err := AsMolecule(string(in)) 36 | if err != nil { 37 | t.Errorf("AsMolecule(%q): %v", string(in), err) 38 | } 39 | if out != in { 40 | t.Errorf("AsMolecule(%q) = %q, expected %q", string(in), out, string(in)) 41 | } 42 | } 43 | 44 | _, err := AsMolecule("") 45 | if err == nil { 46 | t.Errorf("expected error in AsMolecule(%q)", "") 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 ktnyt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /conda/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set version = "0.26.13" %} 2 | 3 | package: 4 | name: gts-bio 5 | version: {{ version }} 6 | 7 | source: 8 | url: https://github.com/go-gts/gts/releases/download/v{{ version }}/gts_{{ version }}_Darwin_x86_64.tar.gz # [osx] 9 | sha256: 37dfc09c775f6fa0202b6bde9b0b09034cf67ee480c976e7a7e931448417a5a3 # [osx] 10 | 11 | url: https://github.com/go-gts/gts/releases/download/v{{ version }}/gts_{{ version }}_Linux_x86_64.tar.gz # [linux64] 12 | sha256: 0bd0d204b1d7b47c24ba6fa896eb26fc567287b15b88b4c2bfc9dcabe29ca038 # [linux64] 13 | 14 | url: https://github.com/go-gts/gts/releases/download/v{{ version }}/gts_{{ version }}_Linux_i386.tar.gz # [linux32] 15 | sha256: 2c35f4d14a0651d9b13b2e669860b56832cc37fe6594446c9430015bbcd4ba57 # [linux32] 16 | 17 | build: 18 | number: 0 19 | skip: False 20 | 21 | test: 22 | commands: 23 | - gts --version 24 | - togo --version 25 | 26 | about: 27 | home: https://github.com/go-gts/gts 28 | summary: Genome Transformation Subprograms 29 | license: MIT 30 | 31 | extra: 32 | skip-lints: 33 | - should_be_noarch_generic 34 | - should_not_be_noarch_source 35 | -------------------------------------------------------------------------------- /man/gts-length.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-LENGTH" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-length\fR \- report the length of the sequence(s) 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-length [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-length\fR takes a single sequence input and prints the length of each sequence in the given sequence file\. If the sequence input is ommited, standard input will be read instead\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 20 | . 21 | .TP 22 | \fB\-\-no\-cache\fR 23 | Do not use or create cache\. See gts\-cache(7) for details\. 24 | . 25 | .TP 26 | \fB\-o \fR, \fB\-\-output=\fR 27 | Output file (specifying \fB\-\fR will force standard output)\. 28 | . 29 | .SH "BUGS" 30 | \fBgts\-length\fR currently has no known bugs\. 31 | . 32 | .SH "AUTHORS" 33 | \fBgts\-length\fR is written and maintained by Kotone Itaya\. 34 | . 35 | .SH "SEE ALSO" 36 | gts(1), gts\-seqin(7) 37 | -------------------------------------------------------------------------------- /seqio/reference.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/go-gts/gts" 7 | "github.com/go-pars/pars" 8 | ) 9 | 10 | func parseReferenceInfo(s string) pars.Parser { 11 | prefix := fmt.Sprintf("(%s ", s) 12 | parser := pars.Seq(pars.Int, " to ", pars.Int).Map(func(result *pars.Result) error { 13 | start := result.Children[0].Value.(int) - 1 14 | end := result.Children[2].Value.(int) 15 | result.SetValue(gts.Range(start, end)) 16 | return nil 17 | }) 18 | return pars.Seq( 19 | prefix, 20 | parser, 21 | pars.Many(pars.Seq("; ", parser).Child(1)), 22 | ')', 23 | ).Map(func(result *pars.Result) error { 24 | head := result.Children[1].Value.(gts.Ranged) 25 | tail := result.Children[2].Children 26 | locs := make([]gts.Ranged, len(tail)+1) 27 | locs[0] = head 28 | for i, r := range tail { 29 | locs[i+1] = r.Value.(gts.Ranged) 30 | } 31 | result.SetValue(locs) 32 | return nil 33 | }) 34 | } 35 | 36 | // Reference represents a reference of a record. 37 | type Reference struct { 38 | Number int 39 | Info string 40 | Authors string 41 | Group string 42 | Title string 43 | Journal string 44 | Xref map[string]string 45 | Comment string 46 | } 47 | -------------------------------------------------------------------------------- /man/gts-summary.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-summary(1) -- report a brief summary of the sequence(s) 2 | 3 | ## SYNOPSIS 4 | 5 | gts-summary [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-summary** takes a single sequence input and returns a brief summary of 10 | its contents. If the sequence input is ommited, standard input will be read 11 | instead. By defalt, it will report the description, length, sequence 12 | composition, feature counts, and qualifier counts. Use gts-query(1) to retrieve 13 | more elaborate information of features. 14 | 15 | ## OPTIONS 16 | 17 | * ``: 18 | Input sequence file (may be omitted if standard input is provided). See 19 | gts-seqin(7) for a list of currently supported list of sequence formats. 20 | 21 | * `-F`, `--no-feature`: 22 | Suppress feature summary. 23 | 24 | * `--no-cache`: 25 | Do not use or create cache. 26 | 27 | * `-o `, `--output=`: 28 | Output file (specifying `-` will force standard output). 29 | 30 | * `-Q`, `--no-qualifier`: 31 | Suppress qualifier summary. 32 | 33 | 34 | ## BUGS 35 | 36 | **gts-summary** currently has no known bugs. 37 | 38 | ## AUTHORS 39 | 40 | **gts-summary** is written and maintained by Kotone Itaya. 41 | 42 | ## SEE ALSO 43 | 44 | gts(1), gts-query(1), gts-seqin(7) -------------------------------------------------------------------------------- /seqio/writer.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | 7 | "github.com/go-gts/gts" 8 | ) 9 | 10 | type SeqWriter interface { 11 | WriteSeq(seq gts.Sequence) (int, error) 12 | } 13 | 14 | type AutoWriter struct { 15 | w io.Writer 16 | sw SeqWriter 17 | } 18 | 19 | func NewWriter(w io.Writer, filetype FileType) SeqWriter { 20 | switch filetype { 21 | case FastaFile: 22 | return FastaWriter{w} 23 | case GenBankFile: 24 | return GenBankWriter{w} 25 | default: 26 | return AutoWriter{w, nil} 27 | } 28 | } 29 | 30 | func detectWriter(seq gts.Sequence, w io.Writer) (SeqWriter, error) { 31 | switch seq.(type) { 32 | case GenBank, *GenBank: 33 | return GenBankWriter{w}, nil 34 | case Fasta, *Fasta: 35 | return FastaWriter{w}, nil 36 | default: 37 | switch info := seq.Info().(type) { 38 | case GenBankFields: 39 | return GenBankWriter{w}, nil 40 | case string, fmt.Stringer: 41 | return FastaWriter{w}, nil 42 | default: 43 | return nil, fmt.Errorf("gts does not know how to format a sequence with metadata type `%T`", info) 44 | } 45 | } 46 | } 47 | 48 | func (w AutoWriter) WriteSeq(seq gts.Sequence) (int, error) { 49 | if w.sw == nil { 50 | sw, err := detectWriter(seq, w.w) 51 | if err != nil { 52 | return 0, err 53 | } 54 | w.sw = sw 55 | } 56 | return w.sw.WriteSeq(seq) 57 | } 58 | -------------------------------------------------------------------------------- /modifier_test.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | var asModifierTests = []struct { 8 | in string 9 | out Modifier 10 | }{ 11 | {"^", Head(0)}, 12 | {"^+42", Head(42)}, 13 | {"^-42", Head(-42)}, 14 | 15 | {"$", Tail(0)}, 16 | {"$+42", Tail(42)}, 17 | {"$-42", Tail(-42)}, 18 | 19 | {"^..$", HeadTail{0, 0}}, 20 | {"^+1..$+1", HeadTail{+1, +1}}, 21 | {"^-1..$+1", HeadTail{-1, +1}}, 22 | {"^-1..$-1", HeadTail{-1, -1}}, 23 | 24 | {"^..^", HeadHead{0, 0}}, 25 | {"^+1..^+1", HeadHead{+1, +1}}, 26 | {"^-1..^+1", HeadHead{-1, +1}}, 27 | {"^-1..^-1", HeadHead{-1, -1}}, 28 | 29 | {"$..$", TailTail{0, 0}}, 30 | {"$+1..$+1", TailTail{+1, +1}}, 31 | {"$-1..$+1", TailTail{-1, +1}}, 32 | {"$-1..$-1", TailTail{-1, -1}}, 33 | } 34 | 35 | var asModifierFailTests = []string{ 36 | "", 37 | "^-2..0", 38 | "$..^", 39 | } 40 | 41 | func TestAsModifier(t *testing.T) { 42 | for _, tt := range asModifierTests { 43 | out, err := AsModifier(tt.in) 44 | if err != nil { 45 | t.Errorf("AsModifier(%q): %v", tt.in, err) 46 | continue 47 | } 48 | if out.String() != tt.out.String() { 49 | t.Errorf("AsModifier(%q) = %q, want %q", tt.in, out, tt.out) 50 | } 51 | } 52 | 53 | for _, in := range asModifierFailTests { 54 | if _, err := AsModifier(in); err == nil { 55 | t.Errorf("expected error in AsModifier(%q)", in) 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /cmd/cache/header.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "io" 8 | ) 9 | 10 | // Header represents a cache header. 11 | type Header struct { 12 | RootSum []byte 13 | DataSum []byte 14 | BodySum []byte 15 | } 16 | 17 | // ReadHeader reads a header with the given hash from the reader. 18 | func ReadHeader(r io.Reader, size int) (Header, error) { 19 | p := make([]byte, size*3) 20 | 21 | n, err := r.Read(p) 22 | if err != nil { 23 | return Header{}, fmt.Errorf("while reading header: %v", err) 24 | } 25 | if n != len(p) { 26 | return Header{}, errors.New("could not read sufficient bytes in header") 27 | } 28 | 29 | i, j := size, size*2 30 | return Header{p[:i], p[i:j], p[j:]}, nil 31 | } 32 | 33 | // Validate checks for the hash value integrity. 34 | func (h Header) Validate(rsum, dsum, bsum []byte) error { 35 | if !bytes.Equal(rsum, h.RootSum) { 36 | return errors.New("root hash sum mismatch") 37 | } 38 | if !bytes.Equal(dsum, h.DataSum) { 39 | return errors.New("data hash sum mismatch") 40 | } 41 | if !bytes.Equal(bsum, h.BodySum) { 42 | return errors.New("body hash sum mismatch") 43 | } 44 | return nil 45 | } 46 | 47 | // WriteTo writes the hash values to the given io.Writer. 48 | func (h Header) WriteTo(w io.Writer) (int64, error) { 49 | p := append(append(h.RootSum, h.DataSum...), h.BodySum...) 50 | n, err := w.Write(p) 51 | return int64(n), err 52 | } 53 | -------------------------------------------------------------------------------- /topology.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | // Topology represents the sequence topology. 9 | type Topology int 10 | 11 | const ( 12 | // Linear represents a linear sequence. 13 | Linear Topology = iota 14 | 15 | // Circular represents a circular sequence. 16 | Circular 17 | ) 18 | 19 | // AsTopology converts a string to a Topology object. 20 | func AsTopology(s string) (Topology, error) { 21 | switch strings.ToLower(s) { 22 | case "linear": 23 | return Linear, nil 24 | case "circular": 25 | return Circular, nil 26 | default: 27 | return Topology(-1), fmt.Errorf("unknown topology: %q", s) 28 | } 29 | } 30 | 31 | // String satisfies the fmt.Stringer interface. 32 | func (t Topology) String() string { 33 | switch t { 34 | case Linear: 35 | return "linear" 36 | case Circular: 37 | return "circular" 38 | default: 39 | return "" 40 | } 41 | } 42 | 43 | type withTopology interface { 44 | WithTopology(t Topology) Sequence 45 | } 46 | 47 | // WithTopology creates a shallow copy of the given Sequence object and swaps 48 | // the topology value with the given topology. If the sequence implements the 49 | // `WithTopology(t Topoplogy) Sequence` method, it will be called instead. 50 | func WithTopology(seq Sequence, t Topology) Sequence { 51 | switch v := seq.(type) { 52 | case withTopology: 53 | return v.WithTopology(t) 54 | default: 55 | return seq 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /man/gts-modifier.7.ronn: -------------------------------------------------------------------------------- 1 | ## gts-modifier(7) -- patterns for modifying sequence locations 2 | 3 | 4 | ## SYNOPSIS 5 | 6 | ^[(+|-)n] 7 | $[(+|-)m] 8 | ^[(+|-)n]..$[(+|-)m] 9 | ^[(+|-)n]..^[(+|-)m] 10 | $[(+|-)n]..$[(+|-)m] 11 | 12 | ## DESCRIPTION 13 | 14 | **gts-modifier**s are patterns for modifying locations within a sequence. A 15 | _modifier_ can take one of five forms: `^[(+|-)n]`, `$[[(+|-)m]]`, 16 | `^[(+|-)n]..$[(+|-)m]`, `^[(+|-)n]..^[(+|-)m]`, or `$[(+|-)n]..$[(+|-)m]`. 17 | A caret `^` character denotes the beginning of the location and a dollar `$` 18 | character denotes the end of the location. The numbers following these 19 | characters denote the offset of the position, where a negative number 20 | represents the 5' region and a positive number represents the 3' region. The 21 | first two forms of the _modifier_ will return a singular point location and the 22 | latter three forms will return a modified range location. The positions and 23 | offset values will be flipped for complement locations. 24 | 25 | ## EXAMPLES 26 | 27 | Collapse the location to the start of the region: 28 | 29 | ^ 30 | 31 | Collapse the location to the end of the region: 32 | 33 | $ 34 | 35 | Leave the entire region as is: 36 | 37 | ^..$ 38 | 39 | Extend the region 20 bases upstream: 40 | 41 | ^-20..$ 42 | 43 | Focus the 20 bases around the end of the region: 44 | 45 | $-20..$+20 46 | 47 | ## SEE ALSO 48 | 49 | gts(1), gts-extract(1), gts-locator(7) -------------------------------------------------------------------------------- /man/gts-selector.7.ronn: -------------------------------------------------------------------------------- 1 | ## gts-selector(7) -- patterns to select sequence features 2 | 3 | ## SYNOPSIS 4 | 5 | [feature_key][/[qualifier1][=regexp1]][/[qualifier2][=regexp2]]... 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-selector**s are patterns for selecting sequence features that match the 10 | given _selector_. A _selector_ consists of a single feature key and/or multiple 11 | qualifier matchers. A feature key must currently be a perfect match 12 | (case sensitive) and if omitted all feature keys will match. A qualifier 13 | matcher has two parts: a qualifier name and a regular expression delimited by 14 | the `=` sign. The qualifier name must currently be a perfect match (case 15 | sensitive) and if omitted all qualifier names will match. The regular 16 | expression will be tested against the contents of the qualifier value. If 17 | omitted, any features that has the qualifier with the given qualifier name will 18 | match. 19 | 20 | ## EXAMPLES 21 | 22 | Select all `gene` features: 23 | 24 | gene 25 | 26 | Select all `CDS` features that produce a DNA-binding `product`: 27 | 28 | CDS/product=DNA-binding 29 | 30 | Select all features with `locus_tag` of `b0001`: 31 | 32 | /locus_tag=b0001 33 | 34 | Select all features with the qualifier `translation`: 35 | 36 | /translation 37 | 38 | Select all features with a qualifier value matching `recombinase` 39 | 40 | /=recombinase 41 | 42 | ## SEE ALSO 43 | 44 | gts(1), gts-select(1), gts-locator(7) -------------------------------------------------------------------------------- /topology_test.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/go-gts/gts/internal/testutils" 7 | ) 8 | 9 | var topologyTests = []Topology{ 10 | Linear, 11 | Circular, 12 | } 13 | 14 | func TestTopology(t *testing.T) { 15 | for _, in := range topologyTests { 16 | s := in.String() 17 | out, err := AsTopology(s) 18 | if err != nil { 19 | t.Errorf("AsTopology(%q): %v", s, err) 20 | } 21 | if in != out { 22 | t.Errorf("AsTopology(%q) = %q, expected %q", in.String(), out.String(), in.String()) 23 | } 24 | } 25 | } 26 | 27 | func TestTopologyFail(t *testing.T) { 28 | in := "" 29 | out, err := AsTopology(in) 30 | if err == nil { 31 | t.Errorf("expected error in AsTopology(%q)", in) 32 | } 33 | if out.String() != "" { 34 | t.Errorf("Topology(%d).String() = %q, expected %q", out, out.String(), in) 35 | } 36 | } 37 | 38 | func (wt seqWithTest) WithTopology(t Topology) Sequence { 39 | if _, ok := wt.info.(Topology); ok { 40 | return wt.WithInfo(t) 41 | } 42 | return wt 43 | } 44 | 45 | var withTopologyTests = []struct { 46 | in Sequence 47 | out Sequence 48 | }{ 49 | {New(nil, nil, nil), New(nil, nil, nil)}, 50 | {newSeqWithTest(nil, nil, nil), newSeqWithTest(nil, nil, nil)}, 51 | {newSeqWithTest(Linear, nil, nil), newSeqWithTest(Circular, nil, nil)}, 52 | } 53 | 54 | func TestWithTopology(t *testing.T) { 55 | for _, tt := range withTopologyTests { 56 | out := WithTopology(tt.in, Circular) 57 | testutils.Equals(t, out, tt.out) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /man/gts-summary.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-SUMMARY" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-summary\fR \- report a brief summary of the sequence(s) 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-summary [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-summary\fR takes a single sequence input and returns a brief summary of its contents\. If the sequence input is ommited, standard input will be read instead\. By defalt, it will report the description, length, sequence composition, feature counts, and qualifier counts\. Use gts\-query(1) to retrieve more elaborate information of features\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 20 | . 21 | .TP 22 | \fB\-F\fR, \fB\-\-no\-feature\fR 23 | Suppress feature summary\. 24 | . 25 | .TP 26 | \fB\-\-no\-cache\fR 27 | Do not use or create cache\. 28 | . 29 | .TP 30 | \fB\-o \fR, \fB\-\-output=\fR 31 | Output file (specifying \fB\-\fR will force standard output)\. 32 | . 33 | .TP 34 | \fB\-Q\fR, \fB\-\-no\-qualifier\fR 35 | Suppress qualifier summary\. 36 | . 37 | .SH "BUGS" 38 | \fBgts\-summary\fR currently has no known bugs\. 39 | . 40 | .SH "AUTHORS" 41 | \fBgts\-summary\fR is written and maintained by Kotone Itaya\. 42 | . 43 | .SH "SEE ALSO" 44 | gts(1), gts\-query(1), gts\-seqin(7) 45 | -------------------------------------------------------------------------------- /man/gts-reverse.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-reverse -- reverse order of the given sequence(s) 2 | 3 | ## SYNOPSIS 4 | 5 | gts-reverse [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-reverse** takes a single sequence input and reverses the sequence. Any 10 | features present in the seqeuence will be relocated to match the reversed 11 | location. This command _will not_ complement the sequence. To obtain the 12 | complemented sequence, use **gts-complement(1)**. 13 | 14 | ## OPTIONS 15 | 16 | * ``: 17 | Input sequence file (may be omitted if standard input is provided). See 18 | gts-seqin(7) for a list of currently supported list of sequence formats. 19 | 20 | * `-F `, `--format=`: 21 | Output file format (defaults to same as input). See gts-seqout(7) for a 22 | list of currently supported list of sequence formats. The format specified 23 | with this option will override the file type detection from the output 24 | filename. 25 | 26 | * `--no-cache`: 27 | Do not use or create cache. See gts-cache(7) for details. 28 | 29 | * `-o `, `--output=`: 30 | Output sequence file (specifying `-` will force standard output). The 31 | output file format will be automatically detected from the filename if none 32 | is specified with the `-F` or `--format` option. 33 | 34 | ## BUGS 35 | 36 | **gts-reverse** currently has no known bugs. 37 | 38 | ## AUTHORS 39 | 40 | **gts-reverse** is written and maintained by Kotone Itaya. 41 | 42 | ## SEE ALSO 43 | 44 | gts(1), gts-complement(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /seqio/writer_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/go-gts/gts" 8 | "github.com/go-gts/gts/internal/testutils" 9 | ) 10 | 11 | var writerTests = []struct { 12 | filename string 13 | filetype FileType 14 | }{ 15 | {"NC_001422.fasta", FastaFile}, 16 | {"NC_001422.fasta", DefaultFile}, 17 | {"NC_001422.gb", GenBankFile}, 18 | {"NC_001422.gb", DefaultFile}, 19 | } 20 | 21 | func TestWriter(t *testing.T) { 22 | for _, tt := range writerTests { 23 | in := testutils.ReadTestfile(t, tt.filename) 24 | scanner := NewAutoScanner(strings.NewReader(in)) 25 | if !scanner.Scan() { 26 | t.Errorf("failed to scan test file %s", tt.filename) 27 | } 28 | 29 | seq := scanner.Value() 30 | 31 | w := &strings.Builder{} 32 | n, err := NewWriter(w, tt.filetype).WriteSeq(seq) 33 | if n != len(in) || err != nil { 34 | t.Errorf("writer.WriteSeq(seq) = (%d, %v), want (%d, nil)", n, err, len(in)) 35 | } 36 | testutils.DiffLine(t, w.String(), in) 37 | 38 | seq = gts.New(seq.Info(), seq.Features(), seq.Bytes()) 39 | 40 | w.Reset() 41 | n, err = NewWriter(w, tt.filetype).WriteSeq(seq) 42 | if n != len(in) || err != nil { 43 | t.Errorf("writer.WriteSeq(seq) = (%d, %v), want (%d, nil)", n, err, len(in)) 44 | } 45 | testutils.DiffLine(t, w.String(), in) 46 | } 47 | } 48 | 49 | func TestWriterFail(t *testing.T) { 50 | w := &strings.Builder{} 51 | n, err := NewWriter(w, DefaultFile).WriteSeq(gts.New(nil, nil, nil)) 52 | if n != 0 || err == nil { 53 | t.Errorf("writer.WriteSeq(seq) = (%d, nil), want (0, error)", n) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /man/gts-clear.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-clear(1) -- remove all features from the sequence (excluding source features) 2 | 3 | ## SYNOPSIS 4 | 5 | gts-clear [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-clear** takes a single sequence file input and strips off all features 10 | except for the `source` features which are mandatory in GenBank. If the 11 | sequence input is ommited, standard input will be read instead. This command 12 | is equivalent to running `gts select source `. 13 | 14 | ## OPTIONS 15 | 16 | * ``: 17 | Input sequence file (may be omitted if standard input is provided). See 18 | gts-seqin(7) for a list of currently supported list of sequence formats. 19 | 20 | * `-F `, `--format=`: 21 | Output file format (defaults to same as input). See gts-seqout(7) for a 22 | list of currently supported list of sequence formats. The format specified 23 | with this option will override the file type detection from the output 24 | filename. 25 | 26 | * `--no-cache`: 27 | Do not use or create cache. See gts-cache(7) for details. 28 | 29 | * `-o `, `--output=`: 30 | Output sequence file (specifying `-` will force standard output). The 31 | output file format will be automatically detected from the filename if none 32 | is specified with the `-F` or `--format` option. 33 | 34 | ## BUGS 35 | 36 | **gts-clear** currently has no known bugs. 37 | 38 | ## AUTHORS 39 | 40 | **gts-clear** is written and maintained by Kotone Itaya. 41 | 42 | ## SEE ALSO 43 | 44 | gts(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /utils_test.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import "testing" 4 | 5 | func TestUnpack(t *testing.T) { 6 | a, b := 39, 42 7 | p := [2]int{a, b} 8 | x, y := Unpack(p) 9 | if a != x || b != y { 10 | t.Errorf("Unpack(%v) = (%d, %d), want (%d, %d)", p, x, y, a, b) 11 | } 12 | } 13 | 14 | var absTests = [][2]int{ 15 | {-42, 42}, 16 | {42, 42}, 17 | } 18 | 19 | func TestAbs(t *testing.T) { 20 | for _, tt := range absTests { 21 | in, exp := Unpack(tt) 22 | out := Abs(in) 23 | if out != exp { 24 | t.Errorf("Abs(%d) = %d, want %d", in, out, exp) 25 | } 26 | } 27 | } 28 | 29 | var compareTests = []struct { 30 | i, j int 31 | out int 32 | }{ 33 | {39, 42, -1}, 34 | {42, 39, 1}, 35 | {42, 42, 0}, 36 | } 37 | 38 | func TestCompare(t *testing.T) { 39 | for _, tt := range compareTests { 40 | out := Compare(tt.i, tt.j) 41 | if out != tt.out { 42 | t.Errorf("Compare(%d, %d) = %d, want %d", tt.i, tt.j, out, tt.out) 43 | } 44 | } 45 | } 46 | 47 | var minTests = []struct { 48 | i, j int 49 | out int 50 | }{ 51 | {39, 42, 39}, 52 | {42, 39, 39}, 53 | } 54 | 55 | func TestMin(t *testing.T) { 56 | for _, tt := range minTests { 57 | out := Min(tt.i, tt.j) 58 | if out != tt.out { 59 | t.Errorf("Min(%d, %d) = %d, want %d", tt.i, tt.j, out, tt.out) 60 | } 61 | } 62 | } 63 | 64 | var maxTests = []struct { 65 | i, j int 66 | out int 67 | }{ 68 | {39, 42, 42}, 69 | {42, 39, 42}, 70 | } 71 | 72 | func TestMax(t *testing.T) { 73 | for _, tt := range maxTests { 74 | out := Max(tt.i, tt.j) 75 | if out != tt.out { 76 | t.Errorf("Max(%d, %d) = %d, want %d", tt.i, tt.j, out, tt.out) 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /man/gts-sort.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-sort(1) -- sort the list of sequences 2 | 3 | ## SYNOPSIS 4 | 5 | gts-sort [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-sort** takes a single sequence input and sorts the sequences. If the 10 | sequence input is ommited, standard input will be read instead. By default, the 11 | sequences will be sorted from longest to shortest. It is advised against to use 12 | this command on files with large numbers of sequences. 13 | 14 | ## OPTIONS 15 | 16 | * ``: 17 | Input sequence file (may be omitted if standard input is provided). See 18 | gts-seqin(7) for a list of currently supported list of sequence formats. 19 | 20 | * `-F `, `--format=`: 21 | Output file format (defaults to same as input). See gts-seqout(7) for a 22 | list of currently supported list of sequence formats. The format specified 23 | with this option will override the file type detection from the output 24 | filename. 25 | 26 | * `--no-cache`: 27 | Do not use or create cache. See gts-cache(7) for details. 28 | 29 | * `-o `, `--output=`: 30 | Output sequence file (specifying `-` will force standard output). The 31 | output file format will be automatically detected from the filename if none 32 | is specified with the `-F` or `--format` option. 33 | 34 | * `-r`, `--reverse`: 35 | Reverse the sort order. 36 | 37 | ## BUGS 38 | 39 | **gts-sort** currently has no known bugs. 40 | 41 | ## AUTHORS 42 | 43 | **gts-sort** is written and maintained by Kotone Itaya. 44 | 45 | ## SEE ALSO 46 | 47 | gts(1), gts-pick(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /man/gts-complement.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-complement(1) -- compute the complement of the given sequence(s) 2 | 3 | ## SYNOPSIS 4 | 5 | gts-complement [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-complement** takes a single sequence input and return the complemented 10 | sequence as output. If the sequence input is ommited, standard input will be 11 | read instead. Any features present in the sequence will be relocated to the 12 | complement strand. This command _will not_ reverse the sequence. To obtain 13 | the reversed sequence, use **gts-reverse(1)**. 14 | 15 | ## OPTIONS 16 | 17 | * ``: 18 | Input sequence file (may be omitted if standard input is provided). See 19 | gts-seqin(7) for a list of currently supported list of sequence formats. 20 | 21 | * `-F `, `--format=`: 22 | Output file format (defaults to same as input). See gts-seqout(7) for a 23 | list of currently supported list of sequence formats. The format specified 24 | with this option will override the file type detection from the output 25 | filename. 26 | 27 | * `--no-cache`: 28 | Do not use or create cache. See gts-cache(7) for details. 29 | 30 | * `-o `, `--output=`: 31 | Output sequence file (specifying `-` will force standard output). The 32 | output file format will be automatically detected from the filename if none 33 | is specified with the `-F` or `--format` option. 34 | 35 | ## BUGS 36 | 37 | **gts-complement** currently has no known bugs. 38 | 39 | ## AUTHORS 40 | 41 | **gts-complement** is written and maintained by Kotone Itaya. 42 | 43 | ## SEE ALSO 44 | 45 | gts(1), gts-reverse(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= 2 | github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= 3 | github.com/go-ascii/ascii v1.0.3 h1:PzqTidYM5B8bS8BHByjFUHq95+O/R1JD1Y07KXuxUFM= 4 | github.com/go-ascii/ascii v1.0.3/go.mod h1:dunT2OpwwyuEoHXprC876gNQaxKFFYqoaRZfjVLIGb4= 5 | github.com/go-flip/flip v1.1.0 h1:vMRDlGxaPeyf66fI/YZ928qqQCWLoWdJjSCqG7iuy4s= 6 | github.com/go-flip/flip v1.1.0/go.mod h1:TMjs2G9lS33sa+kDEPMmqBY35V9j94Hks0fj7uJUoEU= 7 | github.com/go-gts/flags v0.0.12 h1:OBtZggDZdeOhx2BRWHj/Ep9jgpDNXllxB0+YHIOx35U= 8 | github.com/go-gts/flags v0.0.12/go.mod h1:zIYMn9mLU1N+tXa3rrKYfrx33cT+aL7QKcx83G23EZk= 9 | github.com/go-pars/pars v1.1.6 h1:Ahi6G+N4Dka8zN2bnlFNwx+rqTYBm5sxB555uAXLQAo= 10 | github.com/go-pars/pars v1.1.6/go.mod h1:CoFQeW1ZswG9tHpBxfN1cLdEp6AI1q/iF2izSJmPMG0= 11 | github.com/go-test/deep v1.0.7 h1:/VSMRlnY/JSyqxQUzQLKVMAskpY/NZKFA5j2P+0pP2M= 12 | github.com/go-test/deep v1.0.7/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8= 13 | github.com/go-wrap/wrap v1.0.3 h1:RU0jS4l4s+fvcwS78EyK2GifQ30DsE7qQPj2GKwvuIc= 14 | github.com/go-wrap/wrap v1.0.3/go.mod h1:kL8K6KIL5pMt85dLdbRb9hDXO0cOk+YoArrlM8LNh8E= 15 | github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= 16 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= 17 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 18 | golang.org/x/sys v0.0.0-20210514084401-e8d321eab015 h1:hZR0X1kPW+nwyJ9xRxqZk1vx5RUObAPBdKVvXPDUH/E= 19 | golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 20 | -------------------------------------------------------------------------------- /man/gts-locator.7.ronn: -------------------------------------------------------------------------------- 1 | ## gts-locator(7) -- patterns to refer to locations in a sequence 2 | 3 | ## SYNOPSIS 4 | 5 | [selector|point|range][@modifier] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-locator**s are patterns for specifying locations within a sequence. 10 | A _locator_ consists of a _location specifier_ and a _modifier_. A 11 | _location specifier_ is one of a `modifier`, a `selector`, a `point location`, 12 | or `range location`. A _selector_ takes the form 13 | `[feature_key][/[qualifier1][=regexp1]][/[qualifier2][=regexp2]]...`. See 14 | gts-selector(7) for more details. A _point location_ is simply a single integer 15 | that directly specifies a single point in the sequence (starting at 1). A 16 | _range location_ is a pair of integers connected with `..` (starting at 1), 17 | which is identical to the notation of a feature range location. However, the 18 | _range location_ of a _locator_ may specify a _modifier_, in which case the `^` 19 | represents the beginning of the sequence and the `$` represents the end of the 20 | sequence. The locations specified by the _location specifier_ can be modified 21 | using a _modifier_. A _modifier_ can take one of five forms: `^[(+|-)n]`, 22 | `$[[(+|-)m]]`, `^[(+|-)n]..$[(+|-)m]`, `^[(+|-)n]..^[(+|-)m]`, or 23 | `$[(+|-)n]..$[(+|-)m]`. See gts-modifier(7) for more details. 24 | 25 | ## EXAMPLES 26 | 27 | Locate the sequence 100 bases upstream of a `CDS`: 28 | 29 | CDS@^-100..^ 30 | 31 | Extend a location 20 bases upstream and downstream of a gene: 32 | 33 | gene@^-20..$+20 34 | 35 | Locate a range between 100 and 200 bases: 36 | 37 | 100..200 38 | 39 | ## SEE ALSO 40 | 41 | gts(1), gts-delete(1), gts-infix(1) gts-insert(1), gts-rotate(1), gts-split(1), 42 | gts-modifier(7), gts-selector(7) -------------------------------------------------------------------------------- /man/gts-clear.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-CLEAR" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-clear\fR \- remove all features from the sequence (excluding source features) 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-clear [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-clear\fR takes a single sequence file input and strips off all features except for the \fBsource\fR features which are mandatory in GenBank\. If the sequence input is ommited, standard input will be read instead\. This command is equivalent to running \fBgts select source \fR\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 20 | . 21 | .TP 22 | \fB\-F \fR, \fB\-\-format=\fR 23 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 24 | . 25 | .TP 26 | \fB\-\-no\-cache\fR 27 | Do not use or create cache\. See gts\-cache(7) for details\. 28 | . 29 | .TP 30 | \fB\-o \fR, \fB\-\-output=\fR 31 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 32 | . 33 | .SH "BUGS" 34 | \fBgts\-clear\fR currently has no known bugs\. 35 | . 36 | .SH "AUTHORS" 37 | \fBgts\-clear\fR is written and maintained by Kotone Itaya\. 38 | . 39 | .SH "SEE ALSO" 40 | gts(1), gts\-seqin(7), gts\-seqout(7) 41 | -------------------------------------------------------------------------------- /man/gts-reverse.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-REVERSE" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-reverse\fR \- reverse order of the given sequence(s) 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-reverse [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-reverse\fR takes a single sequence input and reverses the sequence\. Any features present in the seqeuence will be relocated to match the reversed location\. This command \fIwill not\fR complement the sequence\. To obtain the complemented sequence, use \fBgts\-complement(1)\fR\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 20 | . 21 | .TP 22 | \fB\-F \fR, \fB\-\-format=\fR 23 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 24 | . 25 | .TP 26 | \fB\-\-no\-cache\fR 27 | Do not use or create cache\. See gts\-cache(7) for details\. 28 | . 29 | .TP 30 | \fB\-o \fR, \fB\-\-output=\fR 31 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 32 | . 33 | .SH "BUGS" 34 | \fBgts\-reverse\fR currently has no known bugs\. 35 | . 36 | .SH "AUTHORS" 37 | \fBgts\-reverse\fR is written and maintained by Kotone Itaya\. 38 | . 39 | .SH "SEE ALSO" 40 | gts(1), gts\-complement(1), gts\-seqin(7), gts\-seqout(7) 41 | -------------------------------------------------------------------------------- /man/gts-repair.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-repair(1) -- repair fragmented features 2 | 3 | ## SYNOPSIS 4 | 5 | gts-repair [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-repair** takes a single input sequence and attempts to reconstruct 10 | features that have been fragmented as a result of other manipulations. 11 | Specifically, **gts-repair** will scan each sequence to find features that have 12 | identical feature keys and qualifier key-value pairs, check if their locations 13 | are pointing to a consecutive region of the sequence, and if they are, merge 14 | the locations to create a single feature. Any features that has been lost as a 15 | result of other manipulations will not be reconstructed. 16 | 17 | ## OPTIONS 18 | 19 | * ``: 20 | Input sequence file (may be omitted if standard input is provided). See 21 | gts-seqin(7) for a list of currently supported list of sequence formats. 22 | 23 | * `-F `, `--format=`: 24 | Output file format (defaults to same as input). See gts-seqout(7) for a 25 | list of currently supported list of sequence formats. The format specified 26 | with this option will override the file type detection from the output 27 | filename. 28 | 29 | * `--no-cache`: 30 | Do not use or create cache. See gts-cache(7) for details. 31 | 32 | * `-o `, `--output=`: 33 | Output sequence file (specifying `-` will force standard output). The 34 | output file format will be automatically detected from the filename if none 35 | is specified with the `-F` or `--format` option. 36 | 37 | ## BUGS 38 | 39 | **gts-repair** currently has no known bugs. 40 | 41 | ## AUTHORS 42 | 43 | **gts-repair** is written and maintained by Kotone Itaya. 44 | 45 | ## SEE ALSO 46 | 47 | gts(1), gts-join(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /man/gts-sort.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-SORT" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-sort\fR \- sort the list of sequences 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-sort [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-sort\fR takes a single sequence input and sorts the sequences\. If the sequence input is ommited, standard input will be read instead\. By default, the sequences will be sorted from longest to shortest\. It is advised against to use this command on files with large numbers of sequences\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 20 | . 21 | .TP 22 | \fB\-F \fR, \fB\-\-format=\fR 23 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 24 | . 25 | .TP 26 | \fB\-\-no\-cache\fR 27 | Do not use or create cache\. See gts\-cache(7) for details\. 28 | . 29 | .TP 30 | \fB\-o \fR, \fB\-\-output=\fR 31 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 32 | . 33 | .TP 34 | \fB\-r\fR, \fB\-\-reverse\fR 35 | Reverse the sort order\. 36 | . 37 | .SH "BUGS" 38 | \fBgts\-sort\fR currently has no known bugs\. 39 | . 40 | .SH "AUTHORS" 41 | \fBgts\-sort\fR is written and maintained by Kotone Itaya\. 42 | . 43 | .SH "SEE ALSO" 44 | gts(1), gts\-pick(1), gts\-seqin(7), gts\-seqout(7) 45 | -------------------------------------------------------------------------------- /cmd/gts/length.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io" 7 | "os" 8 | 9 | "github.com/go-gts/flags" 10 | "github.com/go-gts/gts" 11 | "github.com/go-gts/gts/cmd" 12 | "github.com/go-gts/gts/seqio" 13 | ) 14 | 15 | func init() { 16 | flags.Register("length", "report the length of the sequence(s)", lengthFunc) 17 | } 18 | 19 | func lengthFunc(ctx *flags.Context) error { 20 | pos, opt := flags.Flags() 21 | 22 | var seqinPath *string 23 | if cmd.IsTerminal(os.Stdin.Fd()) { 24 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 25 | } 26 | 27 | outPath := opt.String('o', "output", "-", "output file (specifying `-` will force standard output)") 28 | 29 | if err := ctx.Parse(pos, opt); err != nil { 30 | return err 31 | } 32 | 33 | seqinFile := os.Stdin 34 | if seqinPath != nil && *seqinPath != "-" { 35 | f, err := os.Open(*seqinPath) 36 | if err != nil { 37 | return ctx.Raise(fmt.Errorf("failed to open file %q: %v", *seqinPath, err)) 38 | } 39 | seqinFile = f 40 | defer seqinFile.Close() 41 | } 42 | 43 | outFile := os.Stdout 44 | if *outPath != "-" { 45 | f, err := os.Create(*outPath) 46 | if err != nil { 47 | return ctx.Raise(fmt.Errorf("failed to create file %q: %v", *outPath, err)) 48 | } 49 | outFile = f 50 | defer outFile.Close() 51 | } 52 | 53 | w := bufio.NewWriter(outFile) 54 | 55 | scanner := seqio.NewAutoScanner(seqinFile) 56 | for scanner.Scan() { 57 | seq := scanner.Value() 58 | _, err := io.WriteString(w, fmt.Sprintf("%d\n", gts.Len(seq))) 59 | if err != nil { 60 | return ctx.Raise(err) 61 | } 62 | 63 | if err := w.Flush(); err != nil { 64 | return ctx.Raise(err) 65 | } 66 | } 67 | 68 | if err := scanner.Err(); err != nil { 69 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 70 | } 71 | 72 | return nil 73 | } 74 | -------------------------------------------------------------------------------- /man/gts-join.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-join(1) -- join the sequences contained in the files 2 | 3 | ## SYNOPSIS 4 | 5 | gts-join [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-join** takes a single input sequence and joins its contents into a single 10 | sequence. If the sequence input is ommited, standard input will be read instead. 11 | This command will make no attempt to restore features that originated from a 12 | single sequence which is fragmented across different entries. To repair such 13 | features, first run **gts-join** and pass the output to gts-repair(1). 14 | 15 | ## OPTIONS 16 | 17 | * ``: 18 | Input sequence file (may be omitted if standard input is provided). See 19 | gts-seqin(7) for a list of currently supported list of sequence formats. 20 | 21 | * `-c`, `--circular`: 22 | Output the sequence as circular if possible. 23 | 24 | * `-F `, `--format=`: 25 | Output file format (defaults to same as input). See gts-seqout(7) for a 26 | list of currently supported list of sequence formats. The format specified 27 | with this option will override the file type detection from the output 28 | filename. 29 | 30 | * `--no-cache`: 31 | Do not use or create cache. See gts-cache(7) for details. 32 | 33 | * `--no-cache`: 34 | Do not use or create cache. See gts-cache(7) for details. 35 | 36 | * `-o `, `--output=`: 37 | Output sequence file (specifying `-` will force standard output). The 38 | output file format will be automatically detected from the filename if none 39 | is specified with the `-F` or `--format` option. 40 | 41 | ## BUGS 42 | 43 | **gts-join** currently has no known bugs. 44 | 45 | ## AUTHORS 46 | 47 | **gts-join** is written and maintained by Kotone Itaya. 48 | 49 | ## SEE ALSO 50 | 51 | gts(1), gts-split(1), gts-repair(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /man/gts-complement.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-COMPLEMENT" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-complement\fR \- compute the complement of the given sequence(s) 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-complement [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-complement\fR takes a single sequence input and return the complemented sequence as output\. If the sequence input is ommited, standard input will be read instead\. Any features present in the sequence will be relocated to the complement strand\. This command \fIwill not\fR reverse the sequence\. To obtain the reversed sequence, use \fBgts\-reverse(1)\fR\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 20 | . 21 | .TP 22 | \fB\-F \fR, \fB\-\-format=\fR 23 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 24 | . 25 | .TP 26 | \fB\-\-no\-cache\fR 27 | Do not use or create cache\. See gts\-cache(7) for details\. 28 | . 29 | .TP 30 | \fB\-o \fR, \fB\-\-output=\fR 31 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 32 | . 33 | .SH "BUGS" 34 | \fBgts\-complement\fR currently has no known bugs\. 35 | . 36 | .SH "AUTHORS" 37 | \fBgts\-complement\fR is written and maintained by Kotone Itaya\. 38 | . 39 | .SH "SEE ALSO" 40 | gts(1), gts\-reverse(1), gts\-seqin(7), gts\-seqout(7) 41 | -------------------------------------------------------------------------------- /props.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | type Props [][]string 4 | 5 | func (props Props) Index(key string) int { 6 | for i := range props { 7 | if props[i][0] == key { 8 | return i 9 | } 10 | } 11 | return -1 12 | } 13 | 14 | func (props Props) Has(name string) bool { 15 | return props.Index(name) >= 0 16 | } 17 | 18 | func (props Props) Keys() []string { 19 | keys := make([]string, len(props)) 20 | for i := range props { 21 | keys[i] = props[i][0] 22 | } 23 | return keys 24 | } 25 | 26 | type Item struct { 27 | Key string 28 | Value string 29 | } 30 | 31 | func (props Props) Items() []Item { 32 | items := make([]Item, 0) 33 | for _, key := range props.Keys() { 34 | for _, value := range props.Get(key) { 35 | items = append(items, Item{key, value}) 36 | } 37 | } 38 | return items 39 | } 40 | 41 | func (props Props) Get(key string) []string { 42 | switch i := props.Index(key); i { 43 | case -1: 44 | return nil 45 | default: 46 | return props[i][1:] 47 | } 48 | } 49 | 50 | func (props *Props) Set(key string, values ...string) { 51 | prop := make([]string, len(values)+1) 52 | prop[0] = key 53 | copy(prop[1:], values) 54 | switch i := props.Index(key); i { 55 | case -1: 56 | *props = append(*props, prop) 57 | default: 58 | (*props)[i] = prop 59 | } 60 | } 61 | 62 | func (props *Props) Add(key string, values ...string) { 63 | switch i := props.Index(key); i { 64 | case -1: 65 | props.Set(key, values...) 66 | default: 67 | (*props)[i] = append((*props)[i], values...) 68 | } 69 | } 70 | 71 | func (props *Props) Del(key string) { 72 | if i := props.Index(key); i >= 0 { 73 | *props = append((*props)[:i], (*props)[i+1:]...) 74 | } 75 | } 76 | 77 | func (props Props) Clone() Props { 78 | ret := make([][]string, len(props)) 79 | for i, prop := range props { 80 | ret[i] = make([]string, len(prop)) 81 | copy(ret[i], prop) 82 | } 83 | return ret 84 | } 85 | -------------------------------------------------------------------------------- /seqio/fasta_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "bytes" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/go-gts/gts" 9 | "github.com/go-gts/gts/internal/testutils" 10 | "github.com/go-pars/pars" 11 | ) 12 | 13 | func TestFastaIO(t *testing.T) { 14 | in := testutils.ReadTestfile(t, "NC_001422.fasta") 15 | state := pars.FromString(in) 16 | parser := pars.AsParser(FastaParser) 17 | 18 | result, err := parser.Parse(state) 19 | if err != nil { 20 | t.Errorf("parser returned %v\nBuffer:\n%q", err, string(result.Token)) 21 | } 22 | 23 | switch seq := result.Value.(type) { 24 | case Fasta: 25 | if gts.Len(seq) != 5386 { 26 | t.Errorf("gts.Len(seq) = %d, want 5386", gts.Len(seq)) 27 | } 28 | if seq.Info() == nil { 29 | t.Error("seq.Info() is nil") 30 | } 31 | if seq.Features() != nil { 32 | t.Error("seq.Features() is not nil") 33 | } 34 | t.Run("format from *Fasta", func(t *testing.T) { 35 | b := strings.Builder{} 36 | n, err := FastaWriter{&b}.WriteSeq(&seq) 37 | if int(n) != len([]byte(in)) || err != nil { 38 | t.Errorf("f.WriteSeq(seq) = (%d, %v), want %d, nil", n, err, len(in)) 39 | return 40 | } 41 | out := b.String() 42 | testutils.DiffLine(t, in, out) 43 | }) 44 | t.Run("format from BasicSequence", func(t *testing.T) { 45 | b := strings.Builder{} 46 | n, err := FastaWriter{&b}.WriteSeq(gts.Copy(seq)) 47 | if int(n) != len([]byte(in)) || err != nil { 48 | t.Errorf("f.WriteSeq(seq) = (%d, %v), want %d, nil", n, err, len(in)) 49 | return 50 | } 51 | out := b.String() 52 | testutils.DiffLine(t, in, out) 53 | }) 54 | default: 55 | t.Errorf("result.Value.(type) = %T, want %T", seq, Fasta{}) 56 | } 57 | } 58 | 59 | func TestFastaIOFail(t *testing.T) { 60 | b := bytes.Buffer{} 61 | n, err := FastaWriter{&b}.WriteSeq(gts.New(nil, nil, nil)) 62 | if n != 0 || err == nil { 63 | t.Errorf("formatting an empty Sequence should return an error") 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /man/gts-define.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-define(1) -- define a new feature 2 | 3 | ## SYNOPSIS 4 | 5 | gts-define [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-define** takes a single sequence input and defines a new feature with the 10 | given `key` and `location`. If the sequence input is ommited, standard input 11 | will be read instead. Feature qualifiers may be added by using the `-q` or 12 | `--qualifier` option, which may be given multiple times to add more than one 13 | qualifier to the feature. 14 | 15 | ## OPTIONS 16 | 17 | * ``: 18 | Feature key. 19 | 20 | * ``: 21 | Feature location. 22 | 23 | * ``: 24 | Input sequence file (may be omitted if standard input is provided). See 25 | gts-seqin(7) for a list of currently supported list of sequence formats. 26 | 27 | * `-F `, `--format=`: 28 | Output file format (defaults to same as input). See gts-seqout(7) for a 29 | list of currently supported list of sequence formats. The format specified 30 | with this option will override the file type detection from the output 31 | filename. 32 | 33 | * `--no-cache`: 34 | Do not use or create cache. See gts-cache(7) for details. 35 | 36 | * `-o `, `--output=`: 37 | Output sequence file (specifying `-` will force standard output). The 38 | output file format will be automatically detected from the filename if none 39 | is specified with the `-F` or `--format` option. 40 | 41 | * `-q `, `--qualifier=`: 42 | Qualifier key-value pairs (syntax: key=value)). Multiple values may be set 43 | by repeatedly passing this option to the command. 44 | 45 | ## BUGS 46 | 47 | **gts-define** currently has no known bugs. 48 | 49 | ## AUTHORS 50 | 51 | **gts-define** is written and maintained by Kotone Itaya. 52 | 53 | ## SEE ALSO 54 | 55 | gts(1), gts-annotate(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /nucleotide_test.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/go-gts/gts/internal/testutils" 7 | ) 8 | 9 | func TestComplement(t *testing.T) { 10 | p, q := []byte("ACGTURYKMWSBDHVacgturykmwsbdhv."), []byte("TGCAAYRMKSWVHDBtgcaayrmkswvhdb.") 11 | props := Props{} 12 | props.Add("organism", "Genus species") 13 | props.Add("mol_type", "Genomic DNA") 14 | ff := []Feature{ 15 | NewFeature("source", Range(0, len(p)), props), 16 | NewFeature("gene", Range(2, 4), props), 17 | NewFeature("misc_feature", Ambiguous{5, 7}, props), 18 | } 19 | gg := []Feature{ 20 | NewFeature("source", Range(0, len(p)).Complement(), props), 21 | NewFeature("gene", Range(2, 4).Complement(), props), 22 | NewFeature("misc_feature", Ambiguous{5, 7}, props), 23 | } 24 | in := New(nil, ff, p) 25 | exp := New(nil, gg, q) 26 | out := Complement(in) 27 | testutils.Equals(t, out, exp) 28 | } 29 | 30 | func TestTranscribe(t *testing.T) { 31 | in := New(nil, nil, []byte("ACGTURYKMWSBDHVacgturykmwsbdhv.")) 32 | exp := New(nil, nil, []byte("UGCAAYRMKSWVHDBtgcaayrmkswvhdb.")) 33 | out := Transcribe(in) 34 | testutils.Equals(t, out, exp) 35 | } 36 | 37 | var matchTests = []struct { 38 | base byte 39 | match string 40 | }{ 41 | {'a', ""}, 42 | {'a', "a"}, 43 | {'c', "c"}, 44 | {'g', "g"}, 45 | {'t', "tu"}, 46 | {'u', "tu"}, 47 | {'r', "agr"}, 48 | {'y', "ctuy"}, 49 | {'k', "gtuy"}, 50 | {'m', "acm"}, 51 | {'s', "cgs"}, 52 | {'w', "atuw"}, 53 | {'b', "cgtuyksb"}, 54 | {'d', "agturkwd"}, 55 | {'h', "actuymwh"}, 56 | {'v', "acgrmsv"}, 57 | {'n', "acgturykmswbdhvn"}, 58 | } 59 | 60 | func TestMatch(t *testing.T) { 61 | for _, tt := range matchTests { 62 | query := New(nil, nil, []byte{tt.base}) 63 | seq := New(nil, nil, []byte(tt.match)) 64 | exp := make([]Segment, len(tt.match)) 65 | for i := range exp { 66 | exp[i] = Segment{i, i + 1} 67 | } 68 | out := Match(seq, query) 69 | if len(out) != len(exp) { 70 | testutils.Equals(t, out, exp) 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /man/gts-repair.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-REPAIR" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-repair\fR \- repair fragmented features 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-repair [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-repair\fR takes a single input sequence and attempts to reconstruct features that have been fragmented as a result of other manipulations\. Specifically, \fBgts\-repair\fR will scan each sequence to find features that have identical feature keys and qualifier key\-value pairs, check if their locations are pointing to a consecutive region of the sequence, and if they are, merge the locations to create a single feature\. Any features that has been lost as a result of other manipulations will not be reconstructed\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 20 | . 21 | .TP 22 | \fB\-F \fR, \fB\-\-format=\fR 23 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 24 | . 25 | .TP 26 | \fB\-\-no\-cache\fR 27 | Do not use or create cache\. See gts\-cache(7) for details\. 28 | . 29 | .TP 30 | \fB\-o \fR, \fB\-\-output=\fR 31 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 32 | . 33 | .SH "BUGS" 34 | \fBgts\-repair\fR currently has no known bugs\. 35 | . 36 | .SH "AUTHORS" 37 | \fBgts\-repair\fR is written and maintained by Kotone Itaya\. 38 | . 39 | .SH "SEE ALSO" 40 | gts(1), gts\-join(1), gts\-seqin(7), gts\-seqout(7) 41 | -------------------------------------------------------------------------------- /man/gts-annotate.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-annotate(1) -- merge features from a feature list file into a sequence 2 | 3 | ## SYNOPSIS 4 | 5 | gts-annotate [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-annotate** takes two inputs: one file containing a list of features and 10 | another containing a sequence, and annotates the sequence with the contents of 11 | the feature file. If the sequence input is ommited, standard input will be read 12 | instead. No attempts to check if the features being annotated make logical 13 | sense in the given sequence will be made. 14 | 15 | ## OPTIONS 16 | 17 | * ``: 18 | Feature table file containing features to merge. This file should be 19 | formatted in the INSDC feature table format. For more information, visit 20 | the INSDC feature table documentation located at the following URL. 21 | http://www.insdc.org/documents/feature-table 22 | 23 | * ``: 24 | Input sequence file (may be omitted if standard input is provided). See 25 | gts-seqin(7) for a list of currently supported list of sequence formats. 26 | 27 | * `-F `, `--format=`: 28 | Output file format (defaults to same as input). See gts-seqout(7) for a 29 | list of currently supported list of sequence formats. The format specified 30 | with this option will override the file type detection from the output 31 | filename. 32 | 33 | * `--no-cache`: 34 | Do not use or create cache. See gts-cache(7) for details. 35 | 36 | * `-o `, `--output=`: 37 | Output sequence file (specifying `-` will force standard output). The 38 | output file format will be automatically detected from the filename if none 39 | is specified with the `-F` or `--format` option. 40 | 41 | ## BUGS 42 | 43 | **gts-annotate** currently has no known bugs. 44 | 45 | ## AUTHORS 46 | 47 | **gts-annotate** is written and maintained by Kotone Itaya. 48 | 49 | ## SEE ALSO 50 | 51 | gts(1), gts-define(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /man/gts-selector.7: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-SELECTOR" "7" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-selector\fR 8 | . 9 | .SH "gts\-selector(7) \-\- patterns to select sequence features" 10 | . 11 | .SH "SYNOPSIS" 12 | [feature_key][/[qualifier1][=regexp1]][/[qualifier2][=regexp2]]\.\.\. 13 | . 14 | .SH "DESCRIPTION" 15 | \fBgts\-selector\fRs are patterns for selecting sequence features that match the given \fIselector\fR\. A \fIselector\fR consists of a single feature key and/or multiple qualifier matchers\. A feature key must currently be a perfect match (case sensitive) and if omitted all feature keys will match\. A qualifier matcher has two parts: a qualifier name and a regular expression delimited by the \fB=\fR sign\. The qualifier name must currently be a perfect match (case sensitive) and if omitted all qualifier names will match\. The regular expression will be tested against the contents of the qualifier value\. If omitted, any features that has the qualifier with the given qualifier name will match\. 16 | . 17 | .SH "EXAMPLES" 18 | Select all \fBgene\fR features: 19 | . 20 | .IP "" 4 21 | . 22 | .nf 23 | 24 | gene 25 | . 26 | .fi 27 | . 28 | .IP "" 0 29 | . 30 | .P 31 | Select all \fBCDS\fR features that produce a DNA\-binding \fBproduct\fR: 32 | . 33 | .IP "" 4 34 | . 35 | .nf 36 | 37 | CDS/product=DNA\-binding 38 | . 39 | .fi 40 | . 41 | .IP "" 0 42 | . 43 | .P 44 | Select all features with \fBlocus_tag\fR of \fBb0001\fR: 45 | . 46 | .IP "" 4 47 | . 48 | .nf 49 | 50 | /locus_tag=b0001 51 | . 52 | .fi 53 | . 54 | .IP "" 0 55 | . 56 | .P 57 | Select all features with the qualifier \fBtranslation\fR: 58 | . 59 | .IP "" 4 60 | . 61 | .nf 62 | 63 | /translation 64 | . 65 | .fi 66 | . 67 | .IP "" 0 68 | . 69 | .P 70 | Select all features with a qualifier value matching \fBrecombinase\fR 71 | . 72 | .IP "" 4 73 | . 74 | .nf 75 | 76 | /=recombinase 77 | . 78 | .fi 79 | . 80 | .IP "" 0 81 | . 82 | .SH "SEE ALSO" 83 | gts(1), gts\-select(1), gts\-locator(7) 84 | -------------------------------------------------------------------------------- /man/gts-modifier.7: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-MODIFIER" "7" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-modifier\fR 8 | . 9 | .SH "gts\-modifier(7) \-\- patterns for modifying sequence locations" 10 | . 11 | .SH "SYNOPSIS" 12 | ^[(+|\-)n] $[(+|\-)m] ^[(+|\-)n]\.\.$[(+|\-)m] ^[(+|\-)n]\.\.^[(+|\-)m] $[(+|\-)n]\.\.$[(+|\-)m] 13 | . 14 | .SH "DESCRIPTION" 15 | \fBgts\-modifier\fRs are patterns for modifying locations within a sequence\. A \fImodifier\fR can take one of five forms: \fB^[(+|\-)n]\fR, \fB$[[(+|\-)m]]\fR, \fB^[(+|\-)n]\.\.$[(+|\-)m]\fR, \fB^[(+|\-)n]\.\.^[(+|\-)m]\fR, or \fB$[(+|\-)n]\.\.$[(+|\-)m]\fR\. A caret \fB^\fR character denotes the beginning of the location and a dollar \fB$\fR character denotes the end of the location\. The numbers following these characters denote the offset of the position, where a negative number represents the 5\' region and a positive number represents the 3\' region\. The first two forms of the \fImodifier\fR will return a singular point location and the latter three forms will return a modified range location\. The positions and offset values will be flipped for complement locations\. 16 | . 17 | .SH "EXAMPLES" 18 | Collapse the location to the start of the region: 19 | . 20 | .IP "" 4 21 | . 22 | .nf 23 | 24 | ^ 25 | . 26 | .fi 27 | . 28 | .IP "" 0 29 | . 30 | .P 31 | Collapse the location to the end of the region: 32 | . 33 | .IP "" 4 34 | . 35 | .nf 36 | 37 | $ 38 | . 39 | .fi 40 | . 41 | .IP "" 0 42 | . 43 | .P 44 | Leave the entire region as is: 45 | . 46 | .IP "" 4 47 | . 48 | .nf 49 | 50 | ^\.\.$ 51 | . 52 | .fi 53 | . 54 | .IP "" 0 55 | . 56 | .P 57 | Extend the region 20 bases upstream: 58 | . 59 | .IP "" 4 60 | . 61 | .nf 62 | 63 | ^\-20\.\.$ 64 | . 65 | .fi 66 | . 67 | .IP "" 0 68 | . 69 | .P 70 | Focus the 20 bases around the end of the region: 71 | . 72 | .IP "" 4 73 | . 74 | .nf 75 | 76 | $\-20\.\.$+20 77 | . 78 | .fi 79 | . 80 | .IP "" 0 81 | . 82 | .SH "SEE ALSO" 83 | gts(1), gts\-extract(1), gts\-locator(7) 84 | -------------------------------------------------------------------------------- /locator_test.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/go-test/deep" 7 | ) 8 | 9 | var asLocatorTests = []struct { 10 | in string 11 | loc Locator 12 | }{ 13 | {"^..$", relativeLocator(HeadTail{0, 0})}, 14 | {"1", locationLocator(Point(0))}, 15 | {"3..6", locationLocator(Range(2, 6))}, 16 | {"complement(3..6)", locationLocator(Range(2, 6).Complement())}, 17 | 18 | {"exon", filterLocator(selectorFilter("exon"))}, 19 | {"exon/gene=INS", filterLocator(selectorFilter("exon"))}, 20 | {"/gene=INS", filterLocator(selectorFilter("/gene=INS"))}, 21 | 22 | {"@^-20..^", resizeLocator(allLocator, HeadHead{-20, 0})}, 23 | {"@^..$", resizeLocator(allLocator, HeadTail{0, 0})}, 24 | {"exon@^..$", resizeLocator(filterLocator(selectorFilter("exon")), HeadTail{0, 0})}, 25 | } 26 | 27 | var asLocatorFailTests = []string{ 28 | "exon/gene=[", 29 | "@", 30 | "exon/gene=[@", 31 | "exon/gene=INS@", 32 | } 33 | 34 | func TestAsLocator(t *testing.T) { 35 | ff := testFeatureTable 36 | seq := New(nil, ff, []byte(""+ 37 | "AGCCCTCCAGGACAGGCTGCATCAGAAGAGGCCATCAAGCAGATCACTGTCCTTCTGCCATGGCCCTGTG"+ 38 | "GATGCGCCTCCTGCCCCTGCTGGCGCTGCTGGCCCTCTGGGGACCTGACCCAGCCGCAGCCTTTGTGAAC"+ 39 | "CAACACCTGTGCGGCTCACACCTGGTGGAAGCTCTCTACCTAGTGTGCGGGGAACGAGGCTTCTTCTACA"+ 40 | "CACCCAAGACCCGCCGGGAGGCAGAGGACCTGCAGGTGGGGCAGGTGGAGCTGGGCGGGGGCCCTGGTGC"+ 41 | "AGGCAGCCTGCAGCCCTTGGCCCTGGAGGGGTCCCTGCAGAAGCGTGGCATTGTGGAACAATGCTGTACC"+ 42 | "AGCATCTGCTCCCTCTACCAGCTGGAGAACTACTGCAACTAGACGCAGCCCGCAGGCAGCCCCACACCCG"+ 43 | "CCGCCTCCTGCACCGAGAGAGATGGAATAAAGCCCTTGAACCAGC")) 44 | for _, tt := range asLocatorTests { 45 | loc, err := AsLocator(tt.in) 46 | if err != nil { 47 | t.Errorf("AsLocator(%q): %v", tt.in, err) 48 | return 49 | } 50 | out := loc(seq) 51 | exp := tt.loc(seq) 52 | if diff := deep.Equal(out, exp); diff != nil { 53 | t.Errorf("AsLocator(%q): %v", tt.in, diff) 54 | } 55 | } 56 | 57 | for _, in := range asLocatorFailTests { 58 | _, err := AsLocator(in) 59 | if err == nil { 60 | t.Errorf("AsLocator(%q) expected an error", in) 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /man/gts-join.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-JOIN" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-join\fR \- join the sequences contained in the files 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-join [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-join\fR takes a single input sequence and joins its contents into a single sequence\. If the sequence input is ommited, standard input will be read instead\. This command will make no attempt to restore features that originated from a single sequence which is fragmented across different entries\. To repair such features, first run \fBgts\-join\fR and pass the output to gts\-repair(1)\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 20 | . 21 | .TP 22 | \fB\-c\fR, \fB\-\-circular\fR 23 | Output the sequence as circular if possible\. 24 | . 25 | .TP 26 | \fB\-F \fR, \fB\-\-format=\fR 27 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 28 | . 29 | .TP 30 | \fB\-\-no\-cache\fR 31 | Do not use or create cache\. See gts\-cache(7) for details\. 32 | . 33 | .TP 34 | \fB\-\-no\-cache\fR 35 | Do not use or create cache\. See gts\-cache(7) for details\. 36 | . 37 | .TP 38 | \fB\-o \fR, \fB\-\-output=\fR 39 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 40 | . 41 | .SH "BUGS" 42 | \fBgts\-join\fR currently has no known bugs\. 43 | . 44 | .SH "AUTHORS" 45 | \fBgts\-join\fR is written and maintained by Kotone Itaya\. 46 | . 47 | .SH "SEE ALSO" 48 | gts(1), gts\-split(1), gts\-repair(1), gts\-seqin(7), gts\-seqout(7) 49 | -------------------------------------------------------------------------------- /man/gts-define.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-DEFINE" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-define\fR \- define a new feature 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-define [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIkey\fR \fIlocation\fR \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-define\fR takes a single sequence input and defines a new feature with the given \fBkey\fR and \fBlocation\fR\. If the sequence input is ommited, standard input will be read instead\. Feature qualifiers may be added by using the \fB\-q\fR or \fB\-\-qualifier\fR option, which may be given multiple times to add more than one qualifier to the feature\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Feature key\. 20 | . 21 | .TP 22 | \fB\fR 23 | Feature location\. 24 | . 25 | .TP 26 | \fB\fR 27 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 28 | . 29 | .TP 30 | \fB\-F \fR, \fB\-\-format=\fR 31 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 32 | . 33 | .TP 34 | \fB\-\-no\-cache\fR 35 | Do not use or create cache\. See gts\-cache(7) for details\. 36 | . 37 | .TP 38 | \fB\-o \fR, \fB\-\-output=\fR 39 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 40 | . 41 | .TP 42 | \fB\-q \fR, \fB\-\-qualifier=\fR 43 | Qualifier key\-value pairs (syntax: key=value))\. Multiple values may be set by repeatedly passing this option to the command\. 44 | . 45 | .SH "BUGS" 46 | \fBgts\-define\fR currently has no known bugs\. 47 | . 48 | .SH "AUTHORS" 49 | \fBgts\-define\fR is written and maintained by Kotone Itaya\. 50 | . 51 | .SH "SEE ALSO" 52 | gts(1), gts\-annotate(1), gts\-seqin(7), gts\-seqout(7) 53 | -------------------------------------------------------------------------------- /cmd/gts/reverse.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/go-gts/flags" 10 | "github.com/go-gts/gts" 11 | "github.com/go-gts/gts/cmd" 12 | "github.com/go-gts/gts/seqio" 13 | ) 14 | 15 | func init() { 16 | flags.Register("reverse", "reverse order of the given sequence(s)", reverseFunc) 17 | } 18 | 19 | func reverseFunc(ctx *flags.Context) error { 20 | h := newHash() 21 | pos, opt := flags.Flags() 22 | 23 | seqinPath := new(string) 24 | *seqinPath = "-" 25 | if cmd.IsTerminal(os.Stdin.Fd()) { 26 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 27 | } 28 | 29 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 30 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 31 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 32 | 33 | if err := ctx.Parse(pos, opt); err != nil { 34 | return err 35 | } 36 | 37 | d, err := newIODelegate(*seqinPath, *seqoutPath) 38 | if err != nil { 39 | return ctx.Raise(err) 40 | } 41 | defer d.Close() 42 | 43 | filetype := seqio.Detect(*seqoutPath) 44 | if *format != "" { 45 | filetype = seqio.ToFileType(*format) 46 | } 47 | 48 | if !*nocache { 49 | data := encodePayload([]tuple{ 50 | {"command", strings.Join(ctx.Name, "-")}, 51 | {"version", gts.Version.String()}, 52 | {"filetype", filetype}, 53 | }) 54 | 55 | ok, err := d.TryCache(h, data) 56 | if ok || err != nil { 57 | return ctx.Raise(err) 58 | } 59 | } 60 | 61 | scanner := seqio.NewAutoScanner(d) 62 | buffer := bufio.NewWriter(d) 63 | writer := seqio.NewWriter(buffer, filetype) 64 | 65 | for scanner.Scan() { 66 | seq := scanner.Value() 67 | seq = gts.Reverse(seq) 68 | if _, err := writer.WriteSeq(seq); err != nil { 69 | return ctx.Raise(err) 70 | } 71 | 72 | if err := buffer.Flush(); err != nil { 73 | return ctx.Raise(err) 74 | } 75 | } 76 | 77 | if err := scanner.Err(); err != nil { 78 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 79 | } 80 | 81 | return nil 82 | } 83 | -------------------------------------------------------------------------------- /cmd/gts/complement.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/go-gts/flags" 10 | "github.com/go-gts/gts" 11 | "github.com/go-gts/gts/cmd" 12 | "github.com/go-gts/gts/seqio" 13 | ) 14 | 15 | func init() { 16 | flags.Register("complement", "compute the complement of the given sequence", complementFunc) 17 | } 18 | 19 | func complementFunc(ctx *flags.Context) error { 20 | h := newHash() 21 | pos, opt := flags.Flags() 22 | 23 | seqinPath := new(string) 24 | *seqinPath = "-" 25 | if cmd.IsTerminal(os.Stdin.Fd()) { 26 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 27 | } 28 | 29 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 30 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 31 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 32 | 33 | if err := ctx.Parse(pos, opt); err != nil { 34 | return err 35 | } 36 | 37 | d, err := newIODelegate(*seqinPath, *seqoutPath) 38 | if err != nil { 39 | return ctx.Raise(err) 40 | } 41 | defer d.Close() 42 | 43 | filetype := seqio.Detect(*seqoutPath) 44 | if *format != "" { 45 | filetype = seqio.ToFileType(*format) 46 | } 47 | 48 | if !*nocache { 49 | data := encodePayload([]tuple{ 50 | {"command", strings.Join(ctx.Name, "-")}, 51 | {"version", gts.Version.String()}, 52 | {"filetype", filetype}, 53 | }) 54 | 55 | ok, err := d.TryCache(h, data) 56 | if ok || err != nil { 57 | return ctx.Raise(err) 58 | } 59 | } 60 | 61 | scanner := seqio.NewAutoScanner(d) 62 | buffer := bufio.NewWriter(d) 63 | writer := seqio.NewWriter(buffer, filetype) 64 | 65 | for scanner.Scan() { 66 | seq := scanner.Value() 67 | seq = gts.Complement(seq) 68 | if _, err := writer.WriteSeq(seq); err != nil { 69 | return ctx.Raise(err) 70 | } 71 | } 72 | 73 | if err := buffer.Flush(); err != nil { 74 | return ctx.Raise(err) 75 | } 76 | 77 | if err := scanner.Err(); err != nil { 78 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 79 | } 80 | 81 | return nil 82 | } 83 | -------------------------------------------------------------------------------- /man/gts-annotate.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-ANNOTATE" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-annotate\fR \- merge features from a feature list file into a sequence 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-annotate [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIfeature_table\fR \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-annotate\fR takes two inputs: one file containing a list of features and another containing a sequence, and annotates the sequence with the contents of the feature file\. If the sequence input is ommited, standard input will be read instead\. No attempts to check if the features being annotated make logical sense in the given sequence will be made\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Feature table file containing features to merge\. This file should be formatted in the INSDC feature table format\. For more information, visit the INSDC feature table documentation located at the following URL\. http://www\.insdc\.org/documents/feature\-table 20 | . 21 | .TP 22 | \fB\fR 23 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 24 | . 25 | .TP 26 | \fB\-F \fR, \fB\-\-format=\fR 27 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 28 | . 29 | .TP 30 | \fB\-\-no\-cache\fR 31 | Do not use or create cache\. See gts\-cache(7) for details\. 32 | . 33 | .TP 34 | \fB\-o \fR, \fB\-\-output=\fR 35 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 36 | . 37 | .SH "BUGS" 38 | \fBgts\-annotate\fR currently has no known bugs\. 39 | . 40 | .SH "AUTHORS" 41 | \fBgts\-annotate\fR is written and maintained by Kotone Itaya\. 42 | . 43 | .SH "SEE ALSO" 44 | gts(1), gts\-define(1), gts\-seqin(7), gts\-seqout(7) 45 | -------------------------------------------------------------------------------- /seqio/scanner.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/go-gts/gts" 7 | "github.com/go-pars/pars" 8 | ) 9 | 10 | var sequenceParsers = []pars.Parser{ 11 | GenBankParser, 12 | FastaParser, 13 | } 14 | 15 | // Scanner represents a sequence file scanner. 16 | type Scanner struct { 17 | p pars.Parser 18 | s *pars.State 19 | res pars.Result 20 | err error 21 | } 22 | 23 | // NewScanner creates a new sequence scanner. 24 | func NewScanner(p pars.Parser, r io.Reader) *Scanner { 25 | return &Scanner{p, pars.NewState(r), pars.Result{}, nil} 26 | } 27 | 28 | // NewAutoScanner creates a new sequence scanner which will automatically 29 | // detect the sequence format from a list of known parsers on the first scan. 30 | func NewAutoScanner(r io.Reader) *Scanner { 31 | return NewScanner(nil, r) 32 | } 33 | 34 | // Scan advances the scanner using the given parser. If the parser is not yet 35 | // specified, the first scan will match one of the known parsers. 36 | func (s *Scanner) Scan() bool { 37 | if s.err != nil { 38 | return false 39 | } 40 | 41 | if s.p == nil { 42 | errs := make([]struct { 43 | err error 44 | pos pars.Position 45 | }, len(sequenceParsers)) 46 | for i, p := range sequenceParsers { 47 | s.s.Push() 48 | s.res, errs[i].err = p.Parse(s.s) 49 | if errs[i].err == nil { 50 | s.s.Drop() 51 | s.p = p 52 | return true 53 | } 54 | errs[i].pos = s.s.Position() 55 | s.s.Pop() 56 | } 57 | argmax := 0 58 | maxpos := pars.Position{Line: 0, Byte: 0} 59 | for i, v := range errs { 60 | if maxpos.Less(v.pos) { 61 | argmax = i 62 | maxpos = v.pos 63 | } 64 | } 65 | s.err = errs[argmax].err 66 | return false 67 | } 68 | 69 | s.res, s.err = s.p.Parse(s.s) 70 | return s.err == nil 71 | } 72 | 73 | // Value returns the most recently scanned sequence value. 74 | func (s Scanner) Value() gts.Sequence { 75 | if seq, ok := s.res.Value.(gts.Sequence); ok { 76 | return seq 77 | } 78 | return nil 79 | } 80 | 81 | // Err returns the first non-EOF error that was encountered by the scanner. 82 | func (s Scanner) Err() error { 83 | if s.err == nil || dig(s.err) == io.EOF { 84 | return nil 85 | } 86 | return s.err 87 | } 88 | -------------------------------------------------------------------------------- /man/gts-locator.7: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-LOCATOR" "7" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-locator\fR 8 | . 9 | .SH "gts\-locator(7) \-\- patterns to refer to locations in a sequence" 10 | . 11 | .SH "SYNOPSIS" 12 | [selector|point|range][@modifier] 13 | . 14 | .SH "DESCRIPTION" 15 | \fBgts\-locator\fRs are patterns for specifying locations within a sequence\. A \fIlocator\fR consists of a \fIlocation specifier\fR and a \fImodifier\fR\. A \fIlocation specifier\fR is one of a \fBmodifier\fR, a \fBselector\fR, a \fBpoint location\fR, or \fBrange location\fR\. A \fIselector\fR takes the form \fB[feature_key][/[qualifier1][=regexp1]][/[qualifier2][=regexp2]]\.\.\.\fR\. See gts\-selector(7) for more details\. A \fIpoint location\fR is simply a single integer that directly specifies a single point in the sequence (starting at 1)\. A \fIrange location\fR is a pair of integers connected with \fB\.\.\fR (starting at 1), which is identical to the notation of a feature range location\. However, the \fIrange location\fR of a \fIlocator\fR may specify a \fImodifier\fR, in which case the \fB^\fR represents the beginning of the sequence and the \fB$\fR represents the end of the sequence\. The locations specified by the \fIlocation specifier\fR can be modified using a \fImodifier\fR\. A \fImodifier\fR can take one of five forms: \fB^[(+|\-)n]\fR, \fB$[[(+|\-)m]]\fR, \fB^[(+|\-)n]\.\.$[(+|\-)m]\fR, \fB^[(+|\-)n]\.\.^[(+|\-)m]\fR, or \fB$[(+|\-)n]\.\.$[(+|\-)m]\fR\. See gts\-modifier(7) for more details\. 16 | . 17 | .SH "EXAMPLES" 18 | Locate the sequence 100 bases upstream of a \fBCDS\fR: 19 | . 20 | .IP "" 4 21 | . 22 | .nf 23 | 24 | CDS@^\-100\.\.^ 25 | . 26 | .fi 27 | . 28 | .IP "" 0 29 | . 30 | .P 31 | Extend a location 20 bases upstream and downstream of a gene: 32 | . 33 | .IP "" 4 34 | . 35 | .nf 36 | 37 | gene@^\-20\.\.$+20 38 | . 39 | .fi 40 | . 41 | .IP "" 0 42 | . 43 | .P 44 | Locate a range between 100 and 200 bases: 45 | . 46 | .IP "" 4 47 | . 48 | .nf 49 | 50 | 100\.\.200 51 | . 52 | .fi 53 | . 54 | .IP "" 0 55 | . 56 | .SH "SEE ALSO" 57 | gts(1), gts\-delete(1), gts\-infix(1) gts\-insert(1), gts\-rotate(1), gts\-split(1), gts\-modifier(7), gts\-selector(7) 58 | -------------------------------------------------------------------------------- /cmd/gts/repair.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/go-gts/flags" 10 | "github.com/go-gts/gts" 11 | "github.com/go-gts/gts/cmd" 12 | "github.com/go-gts/gts/seqio" 13 | ) 14 | 15 | func init() { 16 | flags.Register("repair", "repair fragmented features", repairFunc) 17 | } 18 | 19 | func repairFunc(ctx *flags.Context) error { 20 | h := newHash() 21 | pos, opt := flags.Flags() 22 | 23 | seqinPath := new(string) 24 | *seqinPath = "-" 25 | if cmd.IsTerminal(os.Stdin.Fd()) { 26 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 27 | } 28 | 29 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 30 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 31 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 32 | 33 | if err := ctx.Parse(pos, opt); err != nil { 34 | return err 35 | } 36 | 37 | d, err := newIODelegate(*seqinPath, *seqoutPath) 38 | if err != nil { 39 | return ctx.Raise(err) 40 | } 41 | defer d.Close() 42 | 43 | filetype := seqio.Detect(*seqoutPath) 44 | if *format != "" { 45 | filetype = seqio.ToFileType(*format) 46 | } 47 | 48 | if !*nocache { 49 | data := encodePayload([]tuple{ 50 | {"command", strings.Join(ctx.Name, "-")}, 51 | {"version", gts.Version.String()}, 52 | {"filetype", filetype}, 53 | }) 54 | 55 | ok, err := d.TryCache(h, data) 56 | if ok || err != nil { 57 | return ctx.Raise(err) 58 | } 59 | } 60 | 61 | scanner := seqio.NewAutoScanner(d) 62 | buffer := bufio.NewWriter(d) 63 | writer := seqio.NewWriter(buffer, filetype) 64 | 65 | for scanner.Scan() { 66 | seq := scanner.Value() 67 | 68 | ff := seq.Features() 69 | ff = gts.Repair(ff) 70 | seq = gts.WithFeatures(seq, ff) 71 | 72 | if _, err := writer.WriteSeq(seq); err != nil { 73 | return ctx.Raise(err) 74 | } 75 | 76 | if err := buffer.Flush(); err != nil { 77 | return ctx.Raise(err) 78 | } 79 | } 80 | 81 | if err := scanner.Err(); err != nil { 82 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 83 | } 84 | 85 | return nil 86 | } 87 | -------------------------------------------------------------------------------- /seqio/date_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | "time" 7 | 8 | "github.com/go-gts/gts/internal/testutils" 9 | ) 10 | 11 | func TestDate(t *testing.T) { 12 | now := time.Now() 13 | in := FromTime(now) 14 | out := FromTime(in.ToTime()) 15 | testutils.Equals(t, in, out) 16 | } 17 | 18 | var isLeapYearTests = []struct { 19 | in int 20 | out bool 21 | }{ 22 | {2000, true}, 23 | {2100, false}, 24 | {2020, true}, 25 | {2021, false}, 26 | } 27 | 28 | func TestIsLeapYear(t *testing.T) { 29 | for _, tt := range isLeapYearTests { 30 | out := isLeapYear(tt.in) 31 | if out != tt.out { 32 | t.Errorf("isLeapYear(%q) = %v, want %v", tt.in, out, tt.out) 33 | } 34 | } 35 | } 36 | 37 | var checkDateTests = []struct { 38 | year int 39 | month time.Month 40 | day int 41 | pass bool 42 | }{ 43 | {2020, 13, 29, false}, 44 | {2020, time.February, 0, false}, 45 | {2029, time.February, 29, false}, 46 | {2020, time.February, 29, true}, 47 | } 48 | 49 | func TestCheckDate(t *testing.T) { 50 | for _, tt := range checkDateTests { 51 | err := checkDate(tt.year, tt.month, tt.day) 52 | if tt.pass && err != nil { 53 | t.Errorf("checkDate(%d, %s, %d): %v", tt.year, tt.month, tt.day, err) 54 | } 55 | if !tt.pass && err == nil { 56 | t.Errorf("checkDate(%d, %s, %d): expected an error", tt.year, tt.month, tt.day) 57 | } 58 | } 59 | } 60 | 61 | var asDatePassTests = []struct { 62 | in string 63 | out Date 64 | }{ 65 | {"02-JAN-2006", Date{2006, time.January, 2}}, 66 | {"02-Jan-2006", Date{2006, time.January, 2}}, 67 | {"02-01-2006", Date{2006, time.January, 2}}, 68 | } 69 | 70 | var asDateFailTests = []string{ 71 | "02", 72 | "foo-JAN-2006", 73 | "02-foo-2006", 74 | "02-JAN-foo", 75 | } 76 | 77 | func TestAsDate(t *testing.T) { 78 | for _, tt := range asDatePassTests { 79 | out, err := AsDate(tt.in) 80 | if err != nil { 81 | t.Errorf("AsDate(%q): %v", tt.in, err) 82 | continue 83 | } 84 | if !reflect.DeepEqual(out, tt.out) { 85 | t.Errorf("AsDate(%q) = %v, want %v", tt.in, out, tt.out) 86 | } 87 | } 88 | 89 | for _, in := range asDateFailTests { 90 | _, err := AsDate(in) 91 | if err == nil { 92 | t.Errorf("AsDate(%q): expected an error", in) 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /seqio/origin_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/go-gts/gts/internal/testutils" 8 | ) 9 | 10 | func multiLineString(ss ...string) string { 11 | return strings.Join(ss, "\n") + "\n" 12 | } 13 | 14 | var originTests = []struct { 15 | in, out string 16 | }{ 17 | { 18 | "gagttttatcgcttccatgacgcagaagttaacactttcggatatttctgatgagtcgaa", 19 | " 1 gagttttatc gcttccatga cgcagaagtt aacactttcg gatatttctg atgagtcgaa\n", 20 | }, 21 | { 22 | "gagttttatcgcttccatgacgcagaagttaacactttcggatatttctgatgagtcgaaaaattatcttgataaagcaggaattactactgcttgtttacgaattaaat", 23 | multiLineString( 24 | " 1 gagttttatc gcttccatga cgcagaagtt aacactttcg gatatttctg atgagtcgaa", 25 | " 61 aaattatctt gataaagcag gaattactac tgcttgttta cgaattaaat", 26 | ), 27 | }, 28 | { 29 | "gagttttatcgcttccatgacgcagaagttaacactttcggatatttctgatgagtcgaaaaattatcttgataaagcaggaattactactgcttgtttacgaattaaatcgaagtgga", 30 | multiLineString( 31 | " 1 gagttttatc gcttccatga cgcagaagtt aacactttcg gatatttctg atgagtcgaa", 32 | " 61 aaattatctt gataaagcag gaattactac tgcttgttta cgaattaaat cgaagtgga", 33 | ), 34 | }, 35 | { 36 | "gagttttatcgcttccatgacgcagaagttaacactttcggatatttctgatgagtcgaaaaattatcttgataaagcaggaattactactgcttgtttacgaattaaatcgaagtggac", 37 | multiLineString( 38 | " 1 gagttttatc gcttccatga cgcagaagtt aacactttcg gatatttctg atgagtcgaa", 39 | " 61 aaattatctt gataaagcag gaattactac tgcttgttta cgaattaaat cgaagtggac", 40 | ), 41 | }, 42 | } 43 | 44 | func TestOrigin(t *testing.T) { 45 | for _, tt := range originTests { 46 | o := NewOrigin([]byte(tt.in)) 47 | out := o.String() 48 | if out != tt.out { 49 | testutils.DiffLine(t, out, tt.out) 50 | } 51 | if o.Len() != len(tt.in) { 52 | t.Errorf("o.Len() = %d, want %d", o.Len(), len(tt.in)) 53 | } 54 | 55 | out = string(o.Bytes()) 56 | if out != tt.in { 57 | testutils.Diff(t, out, tt.in) 58 | } 59 | if o.Len() != len(tt.in) { 60 | t.Errorf("o.Len() = %d, want %d", o.Len(), len(tt.in)) 61 | } 62 | 63 | out = o.String() 64 | if out != tt.out { 65 | testutils.DiffLine(t, out, tt.out) 66 | } 67 | out = string(o.Buffer) 68 | if out != tt.in { 69 | testutils.Diff(t, out, tt.in) 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /cmd/gts/clear.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/go-gts/flags" 10 | "github.com/go-gts/gts" 11 | "github.com/go-gts/gts/cmd" 12 | "github.com/go-gts/gts/seqio" 13 | ) 14 | 15 | func init() { 16 | flags.Register("clear", "remove all features from the sequence (excluding source features)", clearFunc) 17 | } 18 | 19 | func clearFunc(ctx *flags.Context) error { 20 | h := newHash() 21 | pos, opt := flags.Flags() 22 | 23 | seqinPath := new(string) 24 | *seqinPath = "-" 25 | if cmd.IsTerminal(os.Stdin.Fd()) { 26 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 27 | } 28 | 29 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 30 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 31 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 32 | 33 | if err := ctx.Parse(pos, opt); err != nil { 34 | return err 35 | } 36 | 37 | d, err := newIODelegate(*seqinPath, *seqoutPath) 38 | if err != nil { 39 | return ctx.Raise(err) 40 | } 41 | defer d.Close() 42 | 43 | filetype := seqio.Detect(*seqoutPath) 44 | if *format != "" { 45 | filetype = seqio.ToFileType(*format) 46 | } 47 | 48 | if !*nocache { 49 | data := encodePayload([]tuple{ 50 | {"command", strings.Join(ctx.Name, "-")}, 51 | {"version", gts.Version.String()}, 52 | {"filetype", filetype}, 53 | }) 54 | 55 | ok, err := d.TryCache(h, data) 56 | if ok || err != nil { 57 | return ctx.Raise(err) 58 | } 59 | } 60 | 61 | scanner := seqio.NewAutoScanner(d) 62 | buffer := bufio.NewWriter(d) 63 | writer := seqio.NewWriter(buffer, filetype) 64 | 65 | for scanner.Scan() { 66 | seq := scanner.Value() 67 | ff := seq.Features().Filter(gts.Key("source")) 68 | seq = gts.WithFeatures(seq, ff) 69 | if _, err := writer.WriteSeq(seq); err != nil { 70 | return ctx.Raise(err) 71 | } 72 | 73 | if err := buffer.Flush(); err != nil { 74 | return ctx.Raise(err) 75 | } 76 | } 77 | 78 | if err := scanner.Err(); err != nil { 79 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 80 | } 81 | 82 | return nil 83 | } 84 | -------------------------------------------------------------------------------- /seqio/fasta.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "strings" 8 | 9 | "github.com/go-gts/gts" 10 | "github.com/go-pars/pars" 11 | "github.com/go-wrap/wrap" 12 | ) 13 | 14 | // Fasta represents a FASTA format sequence object. 15 | type Fasta struct { 16 | Desc string 17 | Data []byte 18 | } 19 | 20 | // Info returns the metadata of the sequence. 21 | func (f Fasta) Info() interface{} { 22 | return f.Desc 23 | } 24 | 25 | // Features returns the feature table of the sequence. 26 | func (f Fasta) Features() gts.FeatureSlice { 27 | return nil 28 | } 29 | 30 | // Bytes returns the byte representation of the sequence. 31 | func (f Fasta) Bytes() []byte { 32 | return f.Data 33 | } 34 | 35 | // WriteTo satisfies the io.WriterTo interface. 36 | func (f Fasta) WriteTo(w io.Writer) (int64, error) { 37 | desc := strings.ReplaceAll(f.Desc, "\n", " ") 38 | data := wrap.Force(string(f.Data), 70) 39 | s := fmt.Sprintf(">%s\n%s\n", desc, data) 40 | n, err := io.WriteString(w, s) 41 | return int64(n), err 42 | } 43 | 44 | // FastaWriter writes a gts.Sequence to an io.Writer in FASTA format. 45 | type FastaWriter struct { 46 | w io.Writer 47 | } 48 | 49 | // WriteSeq satisfies the seqio.SeqWriter interface. 50 | func (w FastaWriter) WriteSeq(seq gts.Sequence) (int, error) { 51 | switch v := seq.(type) { 52 | case Fasta: 53 | n, err := v.WriteTo(w.w) 54 | return int(n), err 55 | case *Fasta: 56 | return w.WriteSeq(*v) 57 | default: 58 | switch info := v.Info().(type) { 59 | case string: 60 | f := Fasta{info, v.Bytes()} 61 | return w.WriteSeq(f) 62 | case fmt.Stringer: 63 | f := Fasta{info.String(), v.Bytes()} 64 | return w.WriteSeq(f) 65 | default: 66 | return 0, fmt.Errorf("gts does not know how to format a sequence with metadata type `%T` as FASTA", info) 67 | } 68 | } 69 | } 70 | 71 | // FastaParser attempts to parse a single FASTA file entry. 72 | var FastaParser = pars.Seq( 73 | '>', pars.Line, pars.Until(pars.Any('>', pars.End)), 74 | ).Map(func(result *pars.Result) error { 75 | desc := string(result.Children[1].Token) 76 | body := result.Children[2].Token 77 | lines := bytes.Split(body, []byte{'\n'}) 78 | data := bytes.Join(lines, nil) 79 | result.SetValue(Fasta{desc, data}) 80 | return nil 81 | }) 82 | -------------------------------------------------------------------------------- /man/gts-split.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-split(1) -- split the sequence at the provided locations 2 | 3 | ## SYNOPSIS 4 | 5 | gts-split [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-split** takes a single sequence input and splits the sequences into 10 | fragments at the specified locations. If the sequence input is ommited, 11 | standard input will be read instead. The location to be split is specified 12 | using a `locator`. 13 | 14 | A locator consists of a location specifier and a modifier. A location specifier 15 | may be a `modifier`, a `point location`, a `range location`, or a `selector`. 16 | The syntax for a locator is `[specifier][@modifier]`. See gts-locator(7) for a 17 | more in-depth explanation of a locator. Refer to the EXAMPLES for some examples 18 | to get started. 19 | 20 | ## OPTIONS 21 | 22 | * ``: 23 | A locator string (`[specifier][@modifier]`). See gts-locator(7) for more 24 | details. 25 | 26 | * ``: 27 | Input sequence file (may be omitted if standard input is provided). See 28 | gts-seqin(7) for a list of currently supported list of sequence formats. 29 | 30 | * `-F `, `--format=`: 31 | Output file format (defaults to same as input). See gts-seqout(7) for a 32 | list of currently supported list of sequence formats. The format specified 33 | with this option will override the file type detection from the output 34 | filename. 35 | 36 | * `--no-cache`: 37 | Do not use or create cache. See gts-cache(7) for details. 38 | 39 | * `-o `, `--output=`: 40 | Output sequence file (specifying `-` will force standard output). The 41 | output file format will be automatically detected from the filename if none 42 | is specified with the `-F` or `--format` option. 43 | 44 | ## EXAMPLES 45 | 46 | Split the sequence at 100th base: 47 | 48 | $ gts split 100 49 | 50 | Split the sequence before each CDS feature: 51 | 52 | $ gts split CDS@^ 53 | 54 | ## BUGS 55 | 56 | **gts-split** currently has no known bugs. 57 | 58 | ## AUTHORS 59 | 60 | **gts-split** is written and maintained by Kotone Itaya. 61 | 62 | ## SEE ALSO 63 | 64 | gts(1), gts-join(1), gts-locator(7), gts-modifier(7), gts-selector(7), 65 | gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /seqio/format_conversion_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/go-gts/gts" 10 | "github.com/go-gts/gts/internal/testutils" 11 | "github.com/go-pars/pars" 12 | ) 13 | 14 | func parseString(parser pars.Parser, s string) (gts.Sequence, error) { 15 | state := pars.FromString(s) 16 | result, err := parser.Parse(state) 17 | if err != nil { 18 | return GenBank{}, fmt.Errorf("parser returned %v\nBuffer:\n%q", err, string(result.Token)) 19 | } 20 | return result.Value.(gts.Sequence), nil 21 | } 22 | 23 | func TestFormatConversion(t *testing.T) { 24 | s1 := testutils.ReadTestfile(t, "NC_001422.gb") 25 | s2 := testutils.ReadTestfile(t, "NC_001422.fasta") 26 | 27 | seq1, err := parseString(GenBankParser, s1) 28 | if err != nil { 29 | t.Error(err) 30 | return 31 | } 32 | 33 | seq2, err := parseString(FastaParser, s2) 34 | if err != nil { 35 | t.Error(err) 36 | return 37 | } 38 | 39 | testutils.Equals(t, bytes.ToUpper(seq1.Bytes()), bytes.ToUpper(seq2.Bytes())) 40 | b := &strings.Builder{} 41 | n, err := NewWriter(b, FastaFile).WriteSeq(seq1) 42 | if int(n) != len(s2) || err != nil { 43 | t.Errorf("formatter.WriteTo(builder) = (%d, %v), want (%d, nil)", n, err, len(s2)) 44 | } 45 | out := b.String() 46 | testutils.DiffLine(t, strings.ToUpper(s2), strings.ToUpper(out)) 47 | } 48 | 49 | func TestSliceToFasta(t *testing.T) { 50 | in := testutils.ReadTestfile(t, "NC_001422.gb") 51 | state := pars.FromString(in) 52 | parser := pars.AsParser(GenBankParser) 53 | 54 | exp := testutils.ReadTestfile(t, "NC_001422_part.fasta") 55 | 56 | result, err := parser.Parse(state) 57 | if err != nil { 58 | t.Errorf("parser returned %v\nBuffer:\n%q", err, string(result.Token)) 59 | } 60 | 61 | switch seq := result.Value.(type) { 62 | case GenBank: 63 | seq = gts.Slice(seq, 2379, 2512).(GenBank) 64 | b := &strings.Builder{} 65 | n, err := NewWriter(b, FastaFile).WriteSeq(seq) 66 | if int(n) != len(exp) || err != nil { 67 | t.Errorf("writer.WriteSeq(seq) = (%d, %v), want (%d, nil)", n, err, len(exp)) 68 | } 69 | out := b.String() 70 | testutils.DiffLine(t, strings.ToUpper(exp), strings.ToUpper(out)) 71 | 72 | default: 73 | t.Errorf("result.Value.(type) = %T, want %T", seq, GenBank{}) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /cmd/gts/join.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "github.com/go-gts/flags" 9 | "github.com/go-gts/gts" 10 | "github.com/go-gts/gts/cmd" 11 | "github.com/go-gts/gts/seqio" 12 | ) 13 | 14 | func init() { 15 | flags.Register("join", "join the sequences contained in the files", joinFunc) 16 | } 17 | 18 | func joinFunc(ctx *flags.Context) error { 19 | h := newHash() 20 | pos, opt := flags.Flags() 21 | 22 | seqinPath := new(string) 23 | *seqinPath = "-" 24 | if cmd.IsTerminal(os.Stdin.Fd()) { 25 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 26 | } 27 | 28 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 29 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 30 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 31 | circular := opt.Switch('c', "circular", "output the sequence as circular if possible") 32 | 33 | if err := ctx.Parse(pos, opt); err != nil { 34 | return err 35 | } 36 | 37 | d, err := newIODelegate(*seqinPath, *seqoutPath) 38 | if err != nil { 39 | return ctx.Raise(err) 40 | } 41 | defer d.Close() 42 | 43 | filetype := seqio.Detect(*seqoutPath) 44 | if *format != "" { 45 | filetype = seqio.ToFileType(*format) 46 | } 47 | 48 | if !*nocache { 49 | data := encodePayload([]tuple{ 50 | {"command", strings.Join(ctx.Name, "-")}, 51 | {"version", gts.Version.String()}, 52 | {"circular", *circular}, 53 | {"filetype", filetype}, 54 | }) 55 | 56 | ok, err := d.TryCache(h, data) 57 | if ok || err != nil { 58 | return ctx.Raise(err) 59 | } 60 | } 61 | 62 | seqs := []gts.Sequence{} 63 | scanner := seqio.NewAutoScanner(d) 64 | for scanner.Scan() { 65 | seq := scanner.Value() 66 | seqs = append(seqs, seq) 67 | } 68 | 69 | seq := gts.Concat(seqs...) 70 | 71 | if *circular { 72 | seq = gts.WithTopology(seq, gts.Circular) 73 | } 74 | 75 | writer := seqio.NewWriter(d, filetype) 76 | 77 | if _, err := writer.WriteSeq(seq); err != nil { 78 | return ctx.Raise(err) 79 | } 80 | 81 | if err := scanner.Err(); err != nil { 82 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 83 | } 84 | 85 | return nil 86 | } 87 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | before: 2 | hooks: 3 | - go mod download 4 | - go generate ./... 5 | release: 6 | github: 7 | owner: go-gts 8 | name: gts 9 | extra_files: 10 | - glob: ./completion/* 11 | builds: 12 | - main: ./cmd/gts 13 | id: gts-bio 14 | env: 15 | - CGO_ENABLED=0 16 | binary: gts 17 | flags: "-v" 18 | goos: 19 | - linux 20 | - darwin 21 | - main: ./cmd/togo 22 | id: gts-togo 23 | env: 24 | - CGO_ENABLED=0 25 | binary: togo 26 | flags: "-v" 27 | goos: 28 | - linux 29 | - darwin 30 | archives: 31 | - replacements: 32 | "386": i386 33 | darwin: Darwin 34 | linux: Linux 35 | windows: Windows 36 | amd64: x86_64 37 | files: 38 | - README.md 39 | - LICENSE 40 | - man/*.1 41 | - man/*.7 42 | - completion/* 43 | checksum: 44 | name_template: checksums.txt 45 | snapshot: 46 | name_template: "{{ .Tag }}-next" 47 | changelog: 48 | sort: asc 49 | filters: 50 | exclude: 51 | - "^docs:" 52 | - "^test:" 53 | nfpms: 54 | - id: gts-bio-nfpms 55 | builds: 56 | - gts-bio 57 | - gts-togo 58 | homepage: "https://github.com/go-gts/gts" 59 | maintainer: "Kotone Itaya " 60 | description: "GTS: Genome Transformation Subprograms" 61 | license: MIT 62 | formats: 63 | - deb 64 | - rpm 65 | contents: 66 | - src: ./man/gts*.1 67 | dst: /usr/share/man/man1 68 | - src: ./man/gts*.7 69 | dst: /usr/share/man/man7 70 | - src: ./completion/gts-completion.bash 71 | dst: /etc/bash_completion.d/gts-completion.bash 72 | - src: ./completion/gts-completion.zsh 73 | dst: /usr/local/share/zsh/site-functions/_gts 74 | epoch: 1 75 | brews: 76 | - name: gts-bio 77 | tap: 78 | owner: go-gts 79 | name: homebrew-gts 80 | folder: Formula 81 | homepage: "https://github.com/go-gts/gts" 82 | description: "GTS: Genome Transformation Subprograms" 83 | install: | 84 | bin.install "gts" 85 | bin.install "togo" 86 | man1.install Dir["man/gts*.1"] 87 | man7.install Dir["man/gts*.7"] 88 | bash_completion.install "completion/gts-completion.bash" 89 | zsh_completion.install "completion/gts-completion.zsh" => "_gts" 90 | test: 'system "#{bin}/gts --version"' 91 | -------------------------------------------------------------------------------- /man/gts-rotate.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-rotate -- shift the coordinates of a circular sequence 2 | 3 | ## SYNOPSIS 4 | 5 | gts-rotate [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-rotate** takes a single sequence input and shifts the sequence so that 10 | the position matching the location specified by the given `locator` comes to 11 | the start of the sequence file. If the sequence input is ommited, standard 12 | input will be read instead. 13 | 14 | A locator consists of a location specifier and a modifier. A location specifier 15 | may be a `modifier`, a `point location`, a `range location`, or a `selector`. 16 | The syntax for a locator is `[specifier][@modifier]`. See gts-locator(7) for a 17 | more in-depth explanation of a locator. Refer to the EXAMPLES for some examples 18 | to get started. 19 | 20 | The topology of a sequence that is rotated will be changed to circular. 21 | 22 | ## OPTIONS 23 | 24 | * ``: 25 | A locator string (`[specifier][@modifier]`). See gts-locator(7) for more 26 | details. 27 | 28 | * ``: 29 | Input sequence file (may be omitted if standard input is provided). See 30 | gts-seqin(7) for a list of currently supported list of sequence formats. 31 | 32 | * `-F `, `--format=`: 33 | Output file format (defaults to same as input). See gts-seqout(7) for a 34 | list of currently supported list of sequence formats. The format specified 35 | with this option will override the file type detection from the output 36 | filename. 37 | 38 | * `--no-cache`: 39 | Do not use or create cache. See gts-cache(7) for details. 40 | 41 | * `-o `, `--output=`: 42 | Output sequence file (specifying `-` will force standard output). The 43 | output file format will be automatically detected from the filename if none 44 | is specified with the `-F` or `--format` option. 45 | 46 | ## EXAMPLES 47 | 48 | Rotate a sequence 100 bases: 49 | 50 | $ gts rotate 100 51 | 52 | Rotate a sequence to the first CDS in the sequence: 53 | 54 | $ gts rotate CDS 55 | 56 | ## BUGS 57 | 58 | **gts-rotate** currently has no known bugs. 59 | 60 | ## AUTHORS 61 | 62 | **gts-rotate** is written and maintained by Kotone Itaya. 63 | 64 | ## SEE ALSO 65 | 66 | gts(1), gts-locator(7), gts-modifier(7), gts-selector(7), gts-seqin(7), 67 | gts-seqout(7) -------------------------------------------------------------------------------- /man/gts-pick.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-pick(1) -- pick sequence(s) from multiple sequences 2 | 3 | ## SYNOPSIS 4 | 5 | gts-pick [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-pick** takes a single sequence input and returns the sequences specified 10 | by the _list_ option. If the sequence input is ommited, standard input will be 11 | read instead. The _list_ option is equivalent to that of cut(1). Sequence 12 | numbering starts at 1. Specifying the `-f` or `--feature` option will output 13 | all sequences but pick the features matching the _list_ option. 14 | 15 | ## OPTIONS 16 | 17 | * ``: 18 | List of sequences to pick (identical to the list option in cut). A list is 19 | a comma separated set of numbers and/or number ranges. Number ranges 20 | consist of a number, a dash character `-`, and a second number. A number 21 | range will select the sequences from the first number to the second, 22 | inclusive. Numbers may be preceded by a dash, which selects all sequences 23 | from 1 up to the number. Numbers may be followed by a dash, which selects 24 | all sequences from the number to the last. 25 | 26 | * ``: 27 | Input sequence file (may be omitted if standard input is provided). See 28 | gts-seqin(7) for a list of currently supported list of sequence formats. 29 | 30 | * `-f`, `--feature`: 31 | Pick features instead of sequences. 32 | 33 | * `-F `, `--format=`: 34 | Output file format (defaults to same as input). See gts-seqout(7) for a 35 | list of currently supported list of sequence formats. The format specified 36 | with this option will override the file type detection from the output 37 | filename. 38 | 39 | * `--no-cache`: 40 | Do not use or create cache. See gts-cache(7) for details. 41 | 42 | * `-o `, `--output=`: 43 | Output sequence file (specifying `-` will force standard output). The 44 | output file format will be automatically detected from the filename if none 45 | is specified with the `-F` or `--format` option. 46 | 47 | 48 | ## EXAMPLES 49 | 50 | Pick the first sequence in the file: 51 | 52 | $ gts pick 1 53 | 54 | Pick the first ten features from each sequence in the file: 55 | 56 | $ gts pick -f -10 57 | 58 | ## BUGS 59 | 60 | **gts-pick** currently has no known bugs. 61 | 62 | ## AUTHORS 63 | 64 | **gts-pick** is written and maintained by Kotone Itaya. 65 | 66 | ## SEE ALSO 67 | 68 | gts(1), gts-seqin(7), gts-seqout(7) cut(1) -------------------------------------------------------------------------------- /cmd/gts/rotate.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/go-gts/flags" 10 | "github.com/go-gts/gts" 11 | "github.com/go-gts/gts/cmd" 12 | "github.com/go-gts/gts/seqio" 13 | ) 14 | 15 | func init() { 16 | flags.Register("rotate", "shift the coordinates of a circular sequence", rotateFunc) 17 | } 18 | 19 | func rotateFunc(ctx *flags.Context) error { 20 | h := newHash() 21 | pos, opt := flags.Flags() 22 | 23 | locstr := pos.String("locator", "a locator string ([modifier|selector|point|range][@modifier])") 24 | 25 | seqinPath := new(string) 26 | *seqinPath = "-" 27 | if cmd.IsTerminal(os.Stdin.Fd()) { 28 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 29 | } 30 | 31 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 32 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 33 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 34 | 35 | if err := ctx.Parse(pos, opt); err != nil { 36 | return err 37 | } 38 | 39 | locate, err := gts.AsLocator(*locstr) 40 | if err != nil { 41 | return ctx.Raise(err) 42 | } 43 | 44 | d, err := newIODelegate(*seqinPath, *seqoutPath) 45 | if err != nil { 46 | return ctx.Raise(err) 47 | } 48 | defer d.Close() 49 | 50 | filetype := seqio.Detect(*seqoutPath) 51 | if *format != "" { 52 | filetype = seqio.ToFileType(*format) 53 | } 54 | 55 | if !*nocache { 56 | data := encodePayload([]tuple{ 57 | {"command", strings.Join(ctx.Name, "-")}, 58 | {"version", gts.Version.String()}, 59 | {"locator", *locstr}, 60 | {"filetype", filetype}, 61 | }) 62 | 63 | ok, err := d.TryCache(h, data) 64 | if ok || err != nil { 65 | return ctx.Raise(err) 66 | } 67 | } 68 | 69 | scanner := seqio.NewAutoScanner(d) 70 | buffer := bufio.NewWriter(d) 71 | writer := seqio.NewWriter(buffer, filetype) 72 | 73 | for scanner.Scan() { 74 | seq := scanner.Value() 75 | rr := locate(seq) 76 | if len(rr) > 0 { 77 | seq = gts.Rotate(seq, -rr[0].Head()) 78 | } 79 | seq = gts.WithTopology(seq, gts.Circular) 80 | if _, err := writer.WriteSeq(seq); err != nil { 81 | return ctx.Raise(err) 82 | } 83 | 84 | if err := buffer.Flush(); err != nil { 85 | return ctx.Raise(err) 86 | } 87 | } 88 | 89 | if err := scanner.Err(); err != nil { 90 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 91 | } 92 | 93 | return nil 94 | } 95 | -------------------------------------------------------------------------------- /locator.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import ( 4 | "errors" 5 | "strings" 6 | 7 | "github.com/go-pars/pars" 8 | ) 9 | 10 | // Locator is a function that maps features to its regions. 11 | type Locator func(seq Sequence) Regions 12 | 13 | func allLocator(seq Sequence) Regions { 14 | ff := seq.Features() 15 | rr := make(Regions, len(ff)) 16 | for i, f := range ff { 17 | rr[i] = f.Loc.Region() 18 | } 19 | return rr 20 | } 21 | 22 | func resizeLocator(locate Locator, mod Modifier) Locator { 23 | return func(seq Sequence) Regions { 24 | rr := locate(seq) 25 | for i, r := range rr { 26 | rr[i] = r.Resize(mod) 27 | } 28 | return rr 29 | } 30 | } 31 | 32 | func relativeLocator(mod Modifier) Locator { 33 | return func(seq Sequence) Regions { 34 | seg := Segment{0, Len(seq)} 35 | return Regions{seg.Resize(mod)} 36 | } 37 | } 38 | 39 | func locationLocator(loc Location) Locator { 40 | return func(seq Sequence) Regions { 41 | return Regions{loc.Region()} 42 | } 43 | } 44 | 45 | func filterLocator(f Filter) Locator { 46 | return func(seq Sequence) Regions { 47 | ff := seq.Features() 48 | ff = ff.Filter(f) 49 | rr := make(Regions, len(ff)) 50 | for i, f := range ff { 51 | rr[i] = f.Loc.Region() 52 | } 53 | return rr 54 | } 55 | } 56 | 57 | func tryLocation(s string) (Location, bool) { 58 | var parser pars.Parser 59 | parser = pars.Any(parseComplement(&parser), parseRange, parsePoint) 60 | result, err := parser.Parse(pars.FromString(s)) 61 | if err != nil { 62 | return nil, false 63 | } 64 | return result.Value.(Location), true 65 | } 66 | 67 | // AsLocator interprets the given string as a Locator. 68 | func AsLocator(s string) (Locator, error) { 69 | switch i := strings.IndexByte(s, '@'); i { 70 | case -1: 71 | mod, err := AsModifier(s) 72 | if err == nil { 73 | return relativeLocator(mod), nil 74 | } 75 | 76 | loc, ok := tryLocation(s) 77 | if ok { 78 | return locationLocator(loc), nil 79 | } 80 | 81 | sel, err := Selector(s) 82 | if err == nil { 83 | return filterLocator(sel), nil 84 | } 85 | 86 | return nil, errors.New("expected a selector or locator") 87 | case 0: 88 | mod, err := AsModifier(s[1:]) 89 | if err != nil { 90 | return nil, err 91 | } 92 | return resizeLocator(allLocator, mod), nil 93 | 94 | default: 95 | locate, err := AsLocator(s[:i]) 96 | if err != nil { 97 | return nil, err 98 | } 99 | mod, err := AsModifier(s[i+1:]) 100 | if err != nil { 101 | return nil, err 102 | } 103 | return resizeLocator(locate, mod), nil 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /man/gts-split.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-SPLIT" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-split\fR \- split the sequence at the provided locations 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-split [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIlocator\fR \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-split\fR takes a single sequence input and splits the sequences into fragments at the specified locations\. If the sequence input is ommited, standard input will be read instead\. The location to be split is specified using a \fBlocator\fR\. 14 | . 15 | .P 16 | A locator consists of a location specifier and a modifier\. A location specifier may be a \fBmodifier\fR, a \fBpoint location\fR, a \fBrange location\fR, or a \fBselector\fR\. The syntax for a locator is \fB[specifier][@modifier]\fR\. See gts\-locator(7) for a more in\-depth explanation of a locator\. Refer to the EXAMPLES for some examples to get started\. 17 | . 18 | .SH "OPTIONS" 19 | . 20 | .TP 21 | \fB\fR 22 | A locator string (\fB[specifier][@modifier]\fR)\. See gts\-locator(7) for more details\. 23 | . 24 | .TP 25 | \fB\fR 26 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 27 | . 28 | .TP 29 | \fB\-F \fR, \fB\-\-format=\fR 30 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 31 | . 32 | .TP 33 | \fB\-\-no\-cache\fR 34 | Do not use or create cache\. See gts\-cache(7) for details\. 35 | . 36 | .TP 37 | \fB\-o \fR, \fB\-\-output=\fR 38 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 39 | . 40 | .SH "EXAMPLES" 41 | Split the sequence at 100th base: 42 | . 43 | .IP "" 4 44 | . 45 | .nf 46 | 47 | $ gts split 100 48 | . 49 | .fi 50 | . 51 | .IP "" 0 52 | . 53 | .P 54 | Split the sequence before each CDS feature: 55 | . 56 | .IP "" 4 57 | . 58 | .nf 59 | 60 | $ gts split CDS@^ 61 | . 62 | .fi 63 | . 64 | .IP "" 0 65 | . 66 | .SH "BUGS" 67 | \fBgts\-split\fR currently has no known bugs\. 68 | . 69 | .SH "AUTHORS" 70 | \fBgts\-split\fR is written and maintained by Kotone Itaya\. 71 | . 72 | .SH "SEE ALSO" 73 | gts(1), gts\-join(1), gts\-locator(7), gts\-modifier(7), gts\-selector(7), gts\-seqin(7), gts\-seqout(7) 74 | -------------------------------------------------------------------------------- /man/gts.1.ronn: -------------------------------------------------------------------------------- 1 | # gts -- the genome transformation subprograms command line tool 2 | 3 | ## SYNOPSIS 4 | 5 | usage: gts [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **GTS** provides basic manipulation utilities for genome flatfiles. The command 10 | consists of a number of subcommands listed in the **COMMANDS** section. 11 | 12 | ## COMMANDS 13 | 14 | * `gts-annotate(1)`: 15 | Merge features from a feature list file into a sequence. 16 | 17 | * `gts-cache(1)`: 18 | Manage gts cache files. 19 | 20 | * `gts-clear(1)`: 21 | Remove all features from the sequence (excluding source features). 22 | 23 | * `gts-complement(1)`: 24 | Compute the complement of the given sequence. 25 | 26 | * `gts-define(1)`: 27 | Define a new feature. 28 | 29 | * `gts-delete(1)`: 30 | Delete a region of the given sequence(s). 31 | 32 | * `gts-extract(1)`: 33 | Extract the sequences referenced by the features. 34 | 35 | * `gts-infix(1)`: 36 | Infix input sequence(s) into the host sequence(s). 37 | 38 | * `gts-insert(1)`: 39 | Insert a sequence into another sequence(s). 40 | 41 | * `gts-join(1)`: 42 | Join the sequences contained in the files. 43 | 44 | * `gts-length(1)`: 45 | Report the length of the sequence(s). 46 | 47 | * `gts-pick(1)`: 48 | Pick sequence(s) from multiple sequences. 49 | 50 | * `gts-query(1)`: 51 | Query information from the given sequence. 52 | 53 | * `gts-repair(1)`: 54 | Repair fragmented features. 55 | 56 | * `gts-reverse(1)`: 57 | Reverse order of the given sequence(s). 58 | 59 | * `gts-rotate(1)`: 60 | Shift the coordinates of a circular sequence. 61 | 62 | * `gts-search(1)`: 63 | Search for a subsequence and annotate its results. 64 | 65 | * `gts-select(1)`: 66 | Select features using the given feature selector(s). 67 | 68 | * `gts-sort(1)`: 69 | Sort the list of sequences. 70 | 71 | * `gts-split(1)`: 72 | Split the sequence at the provided locations. 73 | 74 | * `gts-summary(1)`: 75 | Report a brief summary of the sequence(s). 76 | 77 | ## BUGS 78 | 79 | **gts** currently has no known bugs. 80 | 81 | ## AUTHORS 82 | 83 | **gts** is written and maintained by Kotone Itaya. 84 | 85 | ## SEE ALSO 86 | 87 | gts-annotate(1), gts-cache(1), gts-clear(1), gts-complement(1), gts-define(1), 88 | gts-delete(1), gts-extract(1), gts-infix(1), gts-insert(1), gts-join(1), 89 | gts-length(1), gts-pick(1), gts-query(1), gts-repair(1), gts-reverse(1), 90 | gts-rotate(1), gts-search(1), gts-select(1), gts-sort(1), gts-split(1), 91 | gts-summary(1), gts-locator(7), gts-modifier(7), gts-selector(7), gts-seqin(7), 92 | gts-seqout(7) -------------------------------------------------------------------------------- /internal/testutils/testutils.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | import ( 4 | "io/ioutil" 5 | "path/filepath" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/go-gts/gts/internal/diff" 10 | "github.com/go-test/deep" 11 | ) 12 | 13 | // ReadGolden will attempt to read the golden file associated to the test. 14 | func ReadGolden(t *testing.T) string { 15 | t.Helper() 16 | p, err := ioutil.ReadFile(filepath.Join("testdata", t.Name()+".golden")) 17 | if err != nil { 18 | t.Fatalf("failed to read .golden file: %s", err) 19 | } 20 | return string(p) 21 | } 22 | 23 | // ReadTestfile will open a file in the testdata directory. 24 | func ReadTestfile(t *testing.T, path string) string { 25 | t.Helper() 26 | p, err := ioutil.ReadFile(filepath.Join("testdata", path)) 27 | if err != nil { 28 | t.Fatalf("failed to read file: %s", err) 29 | } 30 | return string(p) 31 | } 32 | 33 | // ReadTestfilePkg will open a file in the testdata directory of the gievn pkg. 34 | func ReadTestfilePkg(t *testing.T, path, pkg string) string { 35 | t.Helper() 36 | p, err := ioutil.ReadFile(filepath.Join(pkg, "testdata", path)) 37 | if err != nil { 38 | t.Fatalf("failed to read file: %s", err) 39 | } 40 | return string(p) 41 | } 42 | 43 | // Equals checks the equality of two objects using go-test/deep. 44 | func Equals(t *testing.T, a, b interface{}) { 45 | t.Helper() 46 | if diff := deep.Equal(a, b); diff != nil { 47 | t.Error(diff) 48 | } 49 | } 50 | 51 | // Differs checks the equality of two objects. 52 | func Differs(t *testing.T, a, b interface{}) { 53 | t.Helper() 54 | if diff := deep.Equal(a, b); diff == nil { 55 | t.Errorf("expected %v != %v", a, b) 56 | } 57 | } 58 | 59 | // Diff checks the equality of two strings and reports its diff if they differ. 60 | func Diff(t *testing.T, a, b string) { 61 | t.Helper() 62 | if a != b { 63 | ops := diff.Diff(a, b) 64 | ss := make([]string, len(ops)) 65 | for i, op := range ops { 66 | ss[i] = op.String() 67 | } 68 | s := strings.Join(ss, "") 69 | t.Errorf("\n%s", s) 70 | } 71 | } 72 | 73 | // DiffLine checks the equality of two strings and reports its diff by lines 74 | // if they differ. 75 | func DiffLine(t *testing.T, a, b string) { 76 | t.Helper() 77 | if a != b { 78 | ops := diff.LineDiff(a, b) 79 | lines := make([]string, len(ops)) 80 | for i, op := range ops { 81 | lines[i] = op.String() 82 | } 83 | s := strings.Join(lines, "\n") 84 | t.Errorf("\n%s", s) 85 | } 86 | } 87 | 88 | // Panics will test if the given function panics. 89 | func Panics(t *testing.T, f func()) { 90 | t.Helper() 91 | defer func() { 92 | t.Helper() 93 | if recover() == nil { 94 | t.Errorf("given function did not panic") 95 | } 96 | }() 97 | f() 98 | } 99 | -------------------------------------------------------------------------------- /man/gts-delete.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-delete(1) -- delete a region of the given sequence(s) 2 | 3 | ## SYNOPSIS 4 | 5 | gts-delete [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-delete** takes a single sequence input and deletes the specified region. 10 | If the sequence input is ommited, standard input will be read instead. The 11 | region to be deleted is specified using a `locator`. 12 | 13 | A locator consists of a location specifier and a modifier. A location specifier 14 | may be a `modifier`, a `point location`, a `range location`, or a `selector`. 15 | The syntax for a locator is `[specifier][@modifier]`. See gts-locator(7) for a 16 | more in-depth explanation of a locator. Refer to the EXAMPLES for some examples 17 | to get started. 18 | 19 | Features that were present in the region being deleted will be shifted as being 20 | in between the bases at the deletion point. Such features can be completely 21 | erased from the sequence if the `-e` or `--erase` option is provided. 22 | 23 | ## OPTIONS 24 | 25 | * ``: 26 | A locator string (`[specifier][@modifier]`). See gts-locator(7) for more 27 | details. 28 | 29 | * ``: 30 | Input sequence file (may be omitted if standard input is provided). See 31 | gts-seqin(7) for a list of currently supported list of sequence formats. 32 | 33 | * `-e`, `--erase`: 34 | Remove features contained in the deleted regions. 35 | 36 | * `-F `, `--format=`: 37 | Output file format (defaults to same as input). See gts-seqout(7) for a 38 | list of currently supported list of sequence formats. The format specified 39 | with this option will override the file type detection from the output 40 | filename. 41 | 42 | * `--no-cache`: 43 | Do not use or create cache. See gts-cache(7) for details. 44 | 45 | * `-o `, `--output=`: 46 | Output sequence file (specifying `-` will force standard output). The 47 | output file format will be automatically detected from the filename if none 48 | is specified with the `-F` or `--format` option. 49 | 50 | ## EXAMPLES 51 | 52 | Delete bases 100 to 200: 53 | 54 | $ gts delete 100..200 55 | 56 | Delete all regions of `misc_feature` and its features: 57 | 58 | $ gts delete --erase misc_feature 59 | 60 | Delete 20 bases upstream of every `CDS`: 61 | 62 | $ gts delete CDS^-20..^ 63 | 64 | ## BUGS 65 | 66 | **gts-delete** currently has no known bugs. 67 | 68 | ## AUTHORS 69 | 70 | **gts-delete** is written and maintained by Kotone Itaya. 71 | 72 | ## SEE ALSO 73 | 74 | gts(1), gts-insert(1), gts-locator(7), gts-modifier(7), gts-selector(7), 75 | gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /man/gts-rotate.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-ROTATE" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-rotate\fR \- shift the coordinates of a circular sequence 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-rotate [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIamount\fR \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-rotate\fR takes a single sequence input and shifts the sequence so that the position matching the location specified by the given \fBlocator\fR comes to the start of the sequence file\. If the sequence input is ommited, standard input will be read instead\. 14 | . 15 | .P 16 | A locator consists of a location specifier and a modifier\. A location specifier may be a \fBmodifier\fR, a \fBpoint location\fR, a \fBrange location\fR, or a \fBselector\fR\. The syntax for a locator is \fB[specifier][@modifier]\fR\. See gts\-locator(7) for a more in\-depth explanation of a locator\. Refer to the EXAMPLES for some examples to get started\. 17 | . 18 | .P 19 | The topology of a sequence that is rotated will be changed to circular\. 20 | . 21 | .SH "OPTIONS" 22 | . 23 | .TP 24 | \fB\fR 25 | A locator string (\fB[specifier][@modifier]\fR)\. See gts\-locator(7) for more details\. 26 | . 27 | .TP 28 | \fB\fR 29 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 30 | . 31 | .TP 32 | \fB\-F \fR, \fB\-\-format=\fR 33 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 34 | . 35 | .TP 36 | \fB\-\-no\-cache\fR 37 | Do not use or create cache\. See gts\-cache(7) for details\. 38 | . 39 | .TP 40 | \fB\-o \fR, \fB\-\-output=\fR 41 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 42 | . 43 | .SH "EXAMPLES" 44 | Rotate a sequence 100 bases: 45 | . 46 | .IP "" 4 47 | . 48 | .nf 49 | 50 | $ gts rotate 100 51 | . 52 | .fi 53 | . 54 | .IP "" 0 55 | . 56 | .P 57 | Rotate a sequence to the first CDS in the sequence: 58 | . 59 | .IP "" 4 60 | . 61 | .nf 62 | 63 | $ gts rotate CDS 64 | . 65 | .fi 66 | . 67 | .IP "" 0 68 | . 69 | .SH "BUGS" 70 | \fBgts\-rotate\fR currently has no known bugs\. 71 | . 72 | .SH "AUTHORS" 73 | \fBgts\-rotate\fR is written and maintained by Kotone Itaya\. 74 | . 75 | .SH "SEE ALSO" 76 | gts(1), gts\-locator(7), gts\-modifier(7), gts\-selector(7), gts\-seqin(7), gts\-seqout(7) 77 | -------------------------------------------------------------------------------- /seqio/origin.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/go-gts/gts" 7 | ) 8 | 9 | func toOriginLength(length int) int { 10 | lines := length / 60 11 | ret := lines * 76 12 | 13 | lastLine := length % 60 14 | 15 | if lastLine == 0 { 16 | return ret 17 | } 18 | 19 | blocks := lastLine / 10 20 | ret += 10 + blocks*11 21 | 22 | lastBlock := lastLine % 10 23 | if lastBlock == 0 { 24 | return ret 25 | } 26 | 27 | return ret + lastBlock + 1 28 | } 29 | 30 | func fromOriginLength(length int) int { 31 | lines := length / 76 32 | ret := lines * 60 33 | 34 | lastLine := length % 76 35 | if lastLine == 0 { 36 | return ret 37 | } 38 | 39 | lastLine -= 11 40 | blocks := lastLine / 11 41 | return ret + (blocks * 10) + (lastLine % 11) 42 | } 43 | 44 | // Origin represents a GenBank sequence origin value. 45 | type Origin struct { 46 | Buffer []byte 47 | Parsed bool 48 | } 49 | 50 | // NewOrigin formats a byte slice into GenBank sequence origin format. 51 | func NewOrigin(p []byte) *Origin { 52 | length := len(p) 53 | q := make([]byte, toOriginLength(length)) 54 | offset := 0 55 | for i := 0; i < length; i += 60 { 56 | prefix := fmt.Sprintf("%9d", i+1) 57 | offset += copy(q[offset:], prefix) 58 | for j := 0; j < 60 && i+j < length; j += 10 { 59 | start := i + j 60 | end := gts.Min(i+j+10, length) 61 | q[offset] = spaceByte 62 | offset++ 63 | offset += copy(q[offset:], p[start:end]) 64 | } 65 | q[offset] = '\n' 66 | offset++ 67 | } 68 | return &Origin{q, false} 69 | } 70 | 71 | // Bytes converts the GenBank sequence origin into a byte slice. 72 | func (o *Origin) Bytes() []byte { 73 | if !o.Parsed { 74 | p := o.Buffer 75 | if len(p) < 12 { 76 | return nil 77 | } 78 | 79 | length := fromOriginLength(len(p)) 80 | q := make([]byte, length) 81 | offset, start := 0, 0 82 | for i := 0; i < length; i += 60 { 83 | start += 9 84 | for j := 0; j < 60 && i+j < length; j += 10 { 85 | start++ 86 | end := gts.Min(start+10, len(p)-1) 87 | offset += copy(q[offset:], p[start:end]) 88 | start = end 89 | } 90 | start++ 91 | } 92 | 93 | o.Buffer = q 94 | o.Parsed = true 95 | } 96 | 97 | return o.Buffer 98 | } 99 | 100 | // String satisfies the fmt.Stringer interface. 101 | func (o Origin) String() string { 102 | if !o.Parsed { 103 | return string(o.Buffer) 104 | } 105 | return string(NewOrigin(o.Buffer).Buffer) 106 | } 107 | 108 | // Len returns the actual sequence length. 109 | func (o Origin) Len() int { 110 | if len(o.Buffer) == 0 { 111 | return 0 112 | } 113 | if o.Parsed { 114 | return len(o.Buffer) 115 | } 116 | return fromOriginLength(len(o.Buffer)) 117 | } 118 | -------------------------------------------------------------------------------- /seqio/genbank_subparsers_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/go-pars/pars" 8 | ) 9 | 10 | var genbankSubparsersTests = []struct { 11 | name string 12 | parser pars.Parser 13 | pass []string 14 | fail []string 15 | }{ 16 | { 17 | "DBLink Parser", 18 | genbankDBLinkParser(&GenBank{}, 12), 19 | []string{ 20 | multiLineString( 21 | "DBLINK BioProject: PRJNA14015", 22 | " KEGG BRITE: NC_001422", 23 | ), 24 | }, 25 | []string{ 26 | multiLineString( 27 | "DBLINK BioProject: PRJNA14015", 28 | " KEGG BRITE", 29 | ), 30 | }, 31 | }, 32 | { 33 | "Contig Parser", 34 | genbankContigParser(&GenBank{}, 12), 35 | []string{ 36 | "CONTIG join(U00096.3:1..4641652)", 37 | }, 38 | []string{ 39 | "CONTIG join(U00096.3)", 40 | "CONTIG join(U00096.3:foo)", 41 | "CONTIG join(U00096.3:1foo)", 42 | "CONTIG join(U00096.3:1..foo)", 43 | "CONTIG join(U00096.3:1..4641652", 44 | }, 45 | }, 46 | { 47 | "Origin Parser", 48 | makeGenbankOriginParser(120)(&GenBank{}, 12), 49 | []string{ 50 | multiLineString( 51 | "ORIGIN ", 52 | " 1 gagttttatc gcttccatga cgcagaagtt aacactttcg gatatttctg atgagtcgaa", 53 | " 61 aaattatctt gataaagcag gaattactac tgcttgttta cgaattaaat cgaagtggac", 54 | ), 55 | }, 56 | []string{ 57 | multiLineString( 58 | "ORIGIN ", 59 | " gagttttatc gcttccatga cgcagaagtt aacactttcg gatatttctg atgagtcgaa", 60 | " 61 aaattatctt gataaagcag gaattactac tgcttgttta cgaattaaat cgaagtggac", 61 | ), 62 | multiLineString( 63 | "ORIGIN ", 64 | " 1g agttttatc gcttccatga cgcagaagtt aacactttcg gatatttctg atgagtcgaa", 65 | " 61 aaattatctt gataaagcag gaattactac tgcttgttta cgaattaaat cgaagtggac", 66 | ), 67 | multiLineString( 68 | "ORIGIN ", 69 | " 1 agttttatc gcttccatga cgcagaagtt aacactttcg gatatttctg atgagtcgaa", 70 | " 61 aaattatctt gataaagcag gaattactac tgcttgttta cgaattaaat cgaagtggac", 71 | ), 72 | }, 73 | }, 74 | } 75 | 76 | func TestGenBankSubparsers(t *testing.T) { 77 | for _, tt := range genbankSubparsersTests { 78 | t.Run(fmt.Sprintf("%s pass tests", tt.name), func(t *testing.T) { 79 | for _, s := range tt.pass { 80 | state, result := pars.FromString(s), &pars.Result{} 81 | if err := tt.parser(state, result); err != nil { 82 | t.Errorf("%v while parsing:\n%s", err, s) 83 | } 84 | } 85 | }) 86 | t.Run(fmt.Sprintf("%s fail tests", tt.name), func(t *testing.T) { 87 | for _, s := range tt.fail { 88 | state, result := pars.FromString(s), &pars.Result{} 89 | if tt.parser(state, result) == nil { 90 | t.Errorf("expected error while parsing:\n%s", s) 91 | } 92 | } 93 | }) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /man/gts-pick.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-PICK" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-pick\fR \- pick sequence(s) from multiple sequences 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-pick [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIlist\fR \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-pick\fR takes a single sequence input and returns the sequences specified by the \fIlist\fR option\. If the sequence input is ommited, standard input will be read instead\. The \fIlist\fR option is equivalent to that of cut(1)\. Sequence numbering starts at 1\. Specifying the \fB\-f\fR or \fB\-\-feature\fR option will output all sequences but pick the features matching the \fIlist\fR option\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | List of sequences to pick (identical to the list option in cut)\. A list is a comma separated set of numbers and/or number ranges\. Number ranges consist of a number, a dash character \fB\-\fR, and a second number\. A number range will select the sequences from the first number to the second, inclusive\. Numbers may be preceded by a dash, which selects all sequences from 1 up to the number\. Numbers may be followed by a dash, which selects all sequences from the number to the last\. 20 | . 21 | .TP 22 | \fB\fR 23 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 24 | . 25 | .TP 26 | \fB\-f\fR, \fB\-\-feature\fR 27 | Pick features instead of sequences\. 28 | . 29 | .TP 30 | \fB\-F \fR, \fB\-\-format=\fR 31 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 32 | . 33 | .TP 34 | \fB\-\-no\-cache\fR 35 | Do not use or create cache\. See gts\-cache(7) for details\. 36 | . 37 | .TP 38 | \fB\-o \fR, \fB\-\-output=\fR 39 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 40 | . 41 | .SH "EXAMPLES" 42 | Pick the first sequence in the file: 43 | . 44 | .IP "" 4 45 | . 46 | .nf 47 | 48 | $ gts pick 1 49 | . 50 | .fi 51 | . 52 | .IP "" 0 53 | . 54 | .P 55 | Pick the first ten features from each sequence in the file: 56 | . 57 | .IP "" 4 58 | . 59 | .nf 60 | 61 | $ gts pick \-f \-10 62 | . 63 | .fi 64 | . 65 | .IP "" 0 66 | . 67 | .SH "BUGS" 68 | \fBgts\-pick\fR currently has no known bugs\. 69 | . 70 | .SH "AUTHORS" 71 | \fBgts\-pick\fR is written and maintained by Kotone Itaya\. 72 | . 73 | .SH "SEE ALSO" 74 | gts(1), gts\-seqin(7), gts\-seqout(7) cut(1) 75 | -------------------------------------------------------------------------------- /seqio/scanner_test.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/go-gts/gts/internal/testutils" 8 | ) 9 | 10 | func TestScannerGenBank(t *testing.T) { 11 | in := testutils.ReadTestfile(t, "NC_001422.gb") 12 | s := NewScanner(GenBankParser, strings.NewReader(in)) 13 | 14 | if s.Value() != nil { 15 | t.Error("First scan should be empty") 16 | } 17 | 18 | if !s.Scan() { 19 | if s.Err() == nil { 20 | t.Error("Scan failed but returned nil error") 21 | return 22 | } 23 | } 24 | 25 | if s.Err() != nil { 26 | t.Errorf("Scan failed: %v", s.Err()) 27 | return 28 | } 29 | 30 | if seq, ok := s.Value().(GenBank); !ok { 31 | t.Errorf("result.Value.(type) = %T, want %T", seq, GenBank{}) 32 | } 33 | } 34 | 35 | func TestScannerGenBankCRLF(t *testing.T) { 36 | in := testutils.ReadTestfile(t, "NC_001422.gb") 37 | in = strings.ReplaceAll(in, "\n", "\r\n") 38 | s := NewScanner(GenBankParser, strings.NewReader(in)) 39 | 40 | if s.Value() != nil { 41 | t.Error("First scan should be empty") 42 | } 43 | 44 | if !s.Scan() { 45 | if s.Err() == nil { 46 | t.Error("Scan failed but returned nil error") 47 | return 48 | } 49 | } 50 | 51 | if s.Err() != nil { 52 | t.Errorf("Scan failed: %v", s.Err()) 53 | return 54 | } 55 | 56 | if seq, ok := s.Value().(GenBank); !ok { 57 | t.Errorf("result.Value.(type) = %T, want %T", seq, GenBank{}) 58 | } 59 | } 60 | 61 | func TestScannerGenBankFail(t *testing.T) { 62 | in := testutils.ReadTestfile(t, "NC_001422.fasta") 63 | s := NewScanner(GenBankParser, strings.NewReader(in)) 64 | if s.Scan() { 65 | t.Error("GenBank Scanner should fail for FASTA file") 66 | return 67 | } 68 | if s.Err() == nil { 69 | t.Error("expected error in GenBank Scanner") 70 | return 71 | } 72 | if s.Scan() { 73 | t.Error("Scanner should halt after first error") 74 | return 75 | } 76 | } 77 | 78 | func TestAutoScanner(t *testing.T) { 79 | in := testutils.ReadTestfile(t, "NC_001422.fasta") 80 | s := NewAutoScanner(strings.NewReader(in)) 81 | if !s.Scan() { 82 | if s.Err() == nil { 83 | t.Error("Scan failed but returned nil error") 84 | return 85 | } 86 | } 87 | 88 | if s.Err() != nil { 89 | t.Errorf("Scan failed: %v", s.Err()) 90 | return 91 | } 92 | 93 | if seq, ok := s.Value().(Fasta); !ok { 94 | t.Errorf("result.Value.(type) = %T, want %T", seq, GenBank{}) 95 | } 96 | } 97 | 98 | func TestAutoScannerFail(t *testing.T) { 99 | in := "LOCUS NC_001422 5386 bp ss-DNA circular PHG 06-JUL-2018" 100 | s := NewAutoScanner(strings.NewReader(in)) 101 | if s.Scan() { 102 | t.Error("Auto Scanner should fail") 103 | return 104 | } 105 | if s.Err() == nil { 106 | t.Error("expected error in Auto Scanner") 107 | return 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /cmd/gts/sort.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "sort" 8 | "strings" 9 | 10 | "github.com/go-gts/flags" 11 | "github.com/go-gts/gts" 12 | "github.com/go-gts/gts/cmd" 13 | "github.com/go-gts/gts/seqio" 14 | ) 15 | 16 | func init() { 17 | flags.Register("sort", "sort the list of sequences", sortFunc) 18 | } 19 | 20 | type byLength []gts.Sequence 21 | 22 | func (ss byLength) Len() int { 23 | return len(ss) 24 | } 25 | 26 | func (ss byLength) Less(i, j int) bool { 27 | return gts.Len(ss[j]) < gts.Len(ss[i]) 28 | } 29 | 30 | func (ss byLength) Swap(i, j int) { 31 | ss[i], ss[j] = ss[j], ss[i] 32 | } 33 | 34 | func sortFunc(ctx *flags.Context) error { 35 | h := newHash() 36 | pos, opt := flags.Flags() 37 | 38 | seqinPath := new(string) 39 | *seqinPath = "-" 40 | if cmd.IsTerminal(os.Stdin.Fd()) { 41 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 42 | } 43 | 44 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 45 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 46 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 47 | reverse := opt.Switch('r', "reverse", "reverse the sort order") 48 | 49 | if err := ctx.Parse(pos, opt); err != nil { 50 | return err 51 | } 52 | 53 | d, err := newIODelegate(*seqinPath, *seqoutPath) 54 | if err != nil { 55 | return ctx.Raise(err) 56 | } 57 | defer d.Close() 58 | 59 | filetype := seqio.Detect(*seqoutPath) 60 | if *format != "" { 61 | filetype = seqio.ToFileType(*format) 62 | } 63 | 64 | if !*nocache { 65 | data := encodePayload([]tuple{ 66 | {"command", strings.Join(ctx.Name, "-")}, 67 | {"version", gts.Version.String()}, 68 | {"reverse", *reverse}, 69 | {"filetype", filetype}, 70 | }) 71 | 72 | ok, err := d.TryCache(h, data) 73 | if ok || err != nil { 74 | return ctx.Raise(err) 75 | } 76 | } 77 | seqs := []gts.Sequence{} 78 | scanner := seqio.NewAutoScanner(d) 79 | for scanner.Scan() { 80 | seq := scanner.Value() 81 | seqs = append(seqs, seq) 82 | } 83 | 84 | var iface sort.Interface 85 | iface = byLength(seqs) 86 | if *reverse { 87 | iface = sort.Reverse(iface) 88 | } 89 | sort.Sort(iface) 90 | 91 | buffer := bufio.NewWriter(d) 92 | writer := seqio.NewWriter(buffer, filetype) 93 | 94 | for _, seq := range seqs { 95 | if _, err := writer.WriteSeq(seq); err != nil { 96 | return ctx.Raise(err) 97 | } 98 | 99 | if err := buffer.Flush(); err != nil { 100 | return ctx.Raise(err) 101 | } 102 | } 103 | 104 | if err := scanner.Err(); err != nil { 105 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 106 | } 107 | 108 | return nil 109 | } 110 | -------------------------------------------------------------------------------- /man/gts-extract.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-extract -- extract the sequences referenced by the features 2 | 3 | ## SYNOPSIS 4 | 5 | gts-extract [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-extract** takes a single sequence input and return the sequences 10 | associated with each feature. If the sequence input is ommited, standard input 11 | will be read instead. Additionally, if the `locator` argument is given, the 12 | regions associated with the `locator`s will be extracted. 13 | 14 | A locator consists of a location specifier and a modifier. A location specifier 15 | may be a `modifier`, a `point location`, a `range location`, or a `selector`. 16 | The syntax for a locator is `[specifier][@modifier]`. See gts-locator(7) for a 17 | more in-depth explanation of a locator. Refer to the EXAMPLES for some examples 18 | to get started. 19 | 20 | This command is best utilized in combination with the gts-select(1) command. 21 | Use gts-select(1) to narrow down the sequence regions to be extracted, and then 22 | apply **gts-extract** to retrieve the sequences. See the EXAMPLES section for 23 | more insight. 24 | 25 | ## OPTIONS 26 | 27 | * `...`: 28 | A locator string ([specifier][@modifier]). See gts-locator(7) for more 29 | details. 30 | 31 | * ``: 32 | Input sequence file (may be omitted if standard input is provided). See 33 | gts-seqin(7) for a list of currently supported list of sequence formats. 34 | 35 | * `-F `, `--format=`: 36 | Output file format (defaults to same as input). See gts-seqout(7) for a 37 | list of currently supported list of sequence formats. The format specified 38 | with this option will override the file type detection from the output 39 | filename. 40 | 41 | * `--no-cache`: 42 | Do not use or create cache. See gts-cache(7) for details. 43 | 44 | * `-o `, `--output=`: 45 | Output sequence file (specifying `-` will force standard output). The 46 | output file format will be automatically detected from the filename if none 47 | is specified with the `-F` or `--format` option. 48 | 49 | ## EXAMPLES 50 | 51 | Retrieve the sequences of all CDS features: 52 | 53 | $ gts select CDS | gts extract 54 | 55 | Retrieve the sequence 100 bases upstream of all CDS features: 56 | 57 | $ gts select CDS | gts extract -m ^-100..^ 58 | $ gts select CDS | gts extract --range ^-100..^ 59 | 60 | Retrieve the sequence 100 bases downstream of all CDS features: 61 | 62 | $ gts select CDS | gts extract -m $..$+100 63 | $ gts select CDS | gts extract --range $..$+100 64 | 65 | ## BUGS 66 | 67 | **gts-extract** currently has no known bugs. 68 | 69 | ## AUTHORS 70 | 71 | **gts-extract** is written and maintained by Kotone Itaya. 72 | 73 | ## SEE ALSO 74 | 75 | gts(1), gts-select(1), gts-modifier(7), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /nucleotide.go: -------------------------------------------------------------------------------- 1 | package gts 2 | 3 | import ( 4 | "bytes" 5 | "regexp" 6 | "sort" 7 | "strings" 8 | ) 9 | 10 | func replaceBytes(p, old, new []byte) []byte { 11 | q := make([]byte, len(p)) 12 | for i, c := range p { 13 | switch j := bytes.IndexByte(old, c); j { 14 | case -1: 15 | q[i] = c 16 | default: 17 | q[i] = new[j] 18 | } 19 | } 20 | return q 21 | } 22 | 23 | // Complement returns the complement DNA sequence based on the FASTA sequence 24 | // representation. All 'A's will be complemented to a 'T'. If the resulting 25 | // sequence is intended to be RNA, use Transcribe instead. 26 | func Complement(seq Sequence) Sequence { 27 | p := replaceBytes( 28 | seq.Bytes(), 29 | []byte("ACGTURYKMBDHVacgturykmbdhv"), 30 | []byte("TGCAAYRMKVHDBtgcaayrmkvhdb"), 31 | ) 32 | ff := make([]Feature, len(seq.Features())) 33 | for i, f := range seq.Features() { 34 | ff[i] = Feature{f.Key, f.Loc.Complement(), f.Props.Clone()} 35 | } 36 | return WithBytes(WithFeatures(seq, ff), p) 37 | } 38 | 39 | // Transcribe returns the complement RNA sequence based on the FASTA sequence 40 | // representation. All 'A's will be transcribed to a 'U'. If the resulting 41 | // sequence is intended to be DNA, use Complement instead. 42 | func Transcribe(seq Sequence) Sequence { 43 | p := replaceBytes( 44 | seq.Bytes(), 45 | []byte("ACGTURYKMBDHVacgturykmbdhv"), 46 | []byte("UGCAAYRMKVHDBugcaayrmkvhdb"), 47 | ) 48 | return WithBytes(seq, p) 49 | } 50 | 51 | // Match for an oligomer within a sequence. The ambiguous nucleotides in the 52 | // query sequence will match any of the respective nucleotides. 53 | func Match(seq Sequence, query Sequence) []Segment { 54 | if Len(seq) == 0 || Len(query) == 0 { 55 | return nil 56 | } 57 | 58 | b := strings.Builder{} 59 | for _, c := range bytes.ToLower(query.Bytes()) { 60 | switch c { 61 | case 't', 'u': 62 | b.WriteString("[tu]") 63 | case 'r': 64 | b.WriteString("[agr]") 65 | case 'y': 66 | b.WriteString("[ctuy]") 67 | case 'k': 68 | b.WriteString("[gtuy]") 69 | case 'm': 70 | b.WriteString("[acm]") 71 | case 's': 72 | b.WriteString("[cgs]") 73 | case 'w': 74 | b.WriteString("[atuw]") 75 | case 'b': 76 | b.WriteString("[cgtuyksb]") 77 | case 'd': 78 | b.WriteString("[agturkwd]") 79 | case 'h': 80 | b.WriteString("[actuymwh]") 81 | case 'v': 82 | b.WriteString("[acgrmsv]") 83 | case 'n': 84 | b.WriteString(".") 85 | default: 86 | b.WriteByte(c) 87 | } 88 | } 89 | 90 | s := b.String() 91 | p := bytes.ToLower(seq.Bytes()) 92 | 93 | re := regexp.MustCompile(s) 94 | pairs := re.FindAllIndex(p, -1) 95 | segments := make([]Segment, len(pairs)) 96 | for i, pair := range pairs { 97 | segments[i] = Segment{pair[0], pair[1]} 98 | } 99 | sort.Sort(BySegment(segments)) 100 | return segments 101 | } 102 | -------------------------------------------------------------------------------- /cmd/gts/delete.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/go-flip/flip" 10 | "github.com/go-gts/flags" 11 | "github.com/go-gts/gts" 12 | "github.com/go-gts/gts/cmd" 13 | "github.com/go-gts/gts/seqio" 14 | ) 15 | 16 | func init() { 17 | flags.Register("delete", "delete a region of the given sequence(s)", deleteFunc) 18 | } 19 | 20 | func deleteFunc(ctx *flags.Context) error { 21 | h := newHash() 22 | pos, opt := flags.Flags() 23 | 24 | locstr := pos.String("locator", "a locator string ([modifier|selector|point|range][@modifier])") 25 | 26 | seqinPath := new(string) 27 | *seqinPath = "-" 28 | if cmd.IsTerminal(os.Stdin.Fd()) { 29 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 30 | } 31 | 32 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 33 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 34 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 35 | erase := opt.Switch('e', "erase", "remove features contained in the deleted regions") 36 | 37 | if err := ctx.Parse(pos, opt); err != nil { 38 | return err 39 | } 40 | 41 | locate, err := gts.AsLocator(*locstr) 42 | if err != nil { 43 | return ctx.Raise(err) 44 | } 45 | 46 | d, err := newIODelegate(*seqinPath, *seqoutPath) 47 | if err != nil { 48 | return ctx.Raise(err) 49 | } 50 | defer d.Close() 51 | 52 | filetype := seqio.Detect(*seqoutPath) 53 | if *format != "" { 54 | filetype = seqio.ToFileType(*format) 55 | } 56 | 57 | delete := gts.Delete 58 | if *erase { 59 | delete = gts.Erase 60 | } 61 | 62 | if !*nocache { 63 | data := encodePayload([]tuple{ 64 | {"command", strings.Join(ctx.Name, "-")}, 65 | {"version", gts.Version.String()}, 66 | {"locator", *locstr}, 67 | {"erase", *erase}, 68 | {"filetype", filetype}, 69 | }) 70 | 71 | ok, err := d.TryCache(h, data) 72 | if ok || err != nil { 73 | return ctx.Raise(err) 74 | } 75 | } 76 | 77 | scanner := seqio.NewAutoScanner(d) 78 | buffer := bufio.NewWriter(d) 79 | writer := seqio.NewWriter(buffer, filetype) 80 | 81 | for scanner.Scan() { 82 | seq := scanner.Value() 83 | 84 | ss := gts.Minimize(locate(seq)) 85 | flip.Flip(gts.BySegment(ss)) 86 | for _, s := range ss { 87 | i, n := s.Head(), s.Len() 88 | seq = delete(seq, i, n) 89 | } 90 | 91 | if _, err := writer.WriteSeq(seq); err != nil { 92 | return ctx.Raise(err) 93 | } 94 | 95 | if err := buffer.Flush(); err != nil { 96 | return ctx.Raise(err) 97 | } 98 | } 99 | 100 | if err := scanner.Err(); err != nil { 101 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 102 | } 103 | 104 | return nil 105 | } 106 | -------------------------------------------------------------------------------- /man/gts-cache.7.ronn: -------------------------------------------------------------------------------- 1 | ## gts-cache(7) -- intelligent caching of intermediate files 2 | 3 | ## DESCRIPTION 4 | 5 | **gts-cache**s are files that are kept in user cache directories to avoid 6 | re-executing previously executed commands. By using a **gts-cache**, gts(1) 7 | commands will not only skip the command specific computations but also parsing, 8 | which can provide significant performance boosts in certain scenarios. A cache 9 | file consists of a header which contains information for validating the content 10 | of the cache file, and a body which can be any form of data of any length. 11 | 12 | A gts(1) command will first check for the availability of a cache. This is done 13 | by computing a SHA-1 hash value using the inputs given to the command. First, 14 | the primary input file is digested to produce an _input sum_. Other inputs to 15 | the command is combined into a list of key-value pairs along with the name of 16 | the command and the command version. This list is then serialized and digested 17 | to produce the _data sum_. The _input sum_ and _data sum_ are concatenated and 18 | digested to produce the _output sum_. This _output sum_ will then be encoded as 19 | a hexadecimal string, and this value will be used as the cache filename. 20 | 21 | If a cache file with the computed filename does not exist, a command will 22 | attempt to create a cache file unless caching is disabled or an output file is 23 | specified explicitly. This means that even if caching is enabled, the cache 24 | file will only be created if the command is writing to standard output. 25 | Furthermore, a cache file will be deleted if an output file is specified by the 26 | user. This is done to minimize duplicate data from existing within the system. 27 | While caches do provide temporal benefits, they do occupy disk space which is 28 | generally undesierable. Therefore, gts(1) commands will try to minimize the 29 | amount of cache existing within the system at a given moment. Once the cache 30 | file is created, the _input sum_, _data sum_ and the hash value of the digested 31 | body designated the _body sum_ will be written into the file comprising the 32 | header, followed by the body content. 33 | 34 | If a cache file with the computed filename does exist, a command will attempt 35 | to open the file unless caching is disabled. Once open, the header is read to 36 | verify that the _input sum_ and _data sum_ produces an _output sum_ whose 37 | hexadecimal encoding is identical to the filename. The body is then digested to 38 | also verify that the body content is intact. If either of this verification 39 | fails, the cache is immediately discarded and a new cache file is created. If 40 | both verifications pass, the body content is then written to the specified 41 | output stream. If the output is a file explicitly specified by the user, the 42 | cache file is removed. 43 | 44 | ## SEE ALSO 45 | 46 | gts(1), gts-cache(1), gts-cache-list(1), gts-cache-path(1), gts-cache-purge(1) -------------------------------------------------------------------------------- /man/gts-select.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-select -- select features using the given feature selector(s) 2 | 3 | ## SYNOPSIS 4 | 5 | gts-select [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-select** takes a _selector_ and a single sequence input, and selects the 10 | features which satisfy the _selector_ criteria. If the sequence input is 11 | ommited, standard input will be read instead. A _selector_ takes the form 12 | `[feature_key][/[qualifier1][=regexp1]][/[qualifier2][=regexp2]]...`. See 13 | gts-selector(7) for more details. 14 | 15 | **gts-select** serves as a central command, allowing the user to filter out 16 | features for use in other commands like gts-extract(1) and gts-query(1). See 17 | the EXAMPLES section for more insight. 18 | 19 | ## OPTIONS 20 | 21 | * ``: 22 | Feature selector 23 | (syntax: [feature_key][/[qualifier1][=regexp1]][/[qualifier2][=regexp2]]...). 24 | See gts-selector(7) for more details. 25 | 26 | * ``: 27 | Input sequence file (may be omitted if standard input is provided). See 28 | gts-seqin(7) for a list of currently supported list of sequence formats. 29 | 30 | * `-F `, `--format=`: 31 | Output file format (defaults to same as input). See gts-seqout(7) for a 32 | list of currently supported list of sequence formats. The format specified 33 | with this option will override the file type detection from the output 34 | filename. 35 | 36 | * `--no-cache`: 37 | Do not use or create cache. See gts-cache(7) for details. 38 | 39 | * `-o `, `--output=`: 40 | Output sequence file (specifying `-` will force standard output). The 41 | output file format will be automatically detected from the filename if none 42 | is specified with the `-F` or `--format` option. 43 | 44 | * `-s `, `--strand=`: 45 | Strand to select features from (`both`, `forward`, or `reverse`). If 46 | `forward` is specified, only features that reside strictly on the forward 47 | strand (features with non-complement locations or join/order locations whose 48 | locations are all non-complement). The inverse is true for `reverse`. 49 | 50 | * `-v`, `--invert-match`: 51 | Select features that do not match the given criteria. 52 | 53 | ## EXAMPLES 54 | 55 | Select all of the CDS features: 56 | 57 | $ gts select CDS 58 | 59 | Select all features with `locus_tag` of `b0001`: 60 | 61 | $ gts select /locus_tag=b0001 62 | 63 | Select all features with the qualifier `translation`: 64 | 65 | $ gts select /translation 66 | 67 | Select all features with a qualifier value matching `recombinase` 68 | 69 | $ gts select /=recombinase 70 | 71 | ## BUGS 72 | 73 | **gts-select** currently has no known bugs. 74 | 75 | ## AUTHORS 76 | 77 | **gts-select** is written and maintained by Kotone Itaya. 78 | 79 | ## SEE ALSO 80 | 81 | gts(1), gts-query(1), gts-selector(7), gts-seqin(7), 82 | gts-seqout(7) -------------------------------------------------------------------------------- /cmd/gts/define.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/go-gts/flags" 10 | "github.com/go-gts/gts" 11 | "github.com/go-gts/gts/cmd" 12 | "github.com/go-gts/gts/seqio" 13 | ) 14 | 15 | func init() { 16 | flags.Register("define", "define a new feature", defineFunc) 17 | } 18 | 19 | func defineFunc(ctx *flags.Context) error { 20 | h := newHash() 21 | pos, opt := flags.Flags() 22 | 23 | key := pos.String("key", "feature key") 24 | locstr := pos.String("location", "feature location") 25 | 26 | seqinPath := new(string) 27 | *seqinPath = "-" 28 | if cmd.IsTerminal(os.Stdin.Fd()) { 29 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 30 | } 31 | 32 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 33 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 34 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 35 | propstrs := opt.StringSlice('q', "qualifier", nil, "qualifier key-value pairs (syntax: key=value))") 36 | 37 | if err := ctx.Parse(pos, opt); err != nil { 38 | return err 39 | } 40 | 41 | loc, err := gts.AsLocation(*locstr) 42 | if err != nil { 43 | return ctx.Raise(err) 44 | } 45 | 46 | d, err := newIODelegate(*seqinPath, *seqoutPath) 47 | if err != nil { 48 | return ctx.Raise(err) 49 | } 50 | defer d.Close() 51 | 52 | filetype := seqio.Detect(*seqoutPath) 53 | if *format != "" { 54 | filetype = seqio.ToFileType(*format) 55 | } 56 | 57 | props := gts.Props{} 58 | for _, s := range *propstrs { 59 | name, value := s, "" 60 | if i := strings.IndexByte(s, '='); i >= 0 { 61 | name, value = s[:i], s[i+1:] 62 | } 63 | props.Add(name, value) 64 | } 65 | 66 | f := gts.NewFeature(*key, loc, props) 67 | 68 | if !*nocache { 69 | data := encodePayload([]tuple{ 70 | {"command", strings.Join(ctx.Name, "-")}, 71 | {"version", gts.Version.String()}, 72 | {"key", *key}, 73 | {"location", loc.String()}, 74 | {"qualifiers", *propstrs}, 75 | {"filetype", filetype}, 76 | }) 77 | 78 | ok, err := d.TryCache(h, data) 79 | if ok || err != nil { 80 | return ctx.Raise(err) 81 | } 82 | } 83 | 84 | scanner := seqio.NewAutoScanner(d) 85 | buffer := bufio.NewWriter(d) 86 | writer := seqio.NewWriter(buffer, filetype) 87 | 88 | for scanner.Scan() { 89 | seq := scanner.Value() 90 | 91 | ff := seq.Features() 92 | ff = ff.Insert(f) 93 | seq = gts.WithFeatures(seq, ff) 94 | 95 | if _, err := writer.WriteSeq(seq); err != nil { 96 | return ctx.Raise(err) 97 | } 98 | 99 | if err := buffer.Flush(); err != nil { 100 | return ctx.Raise(err) 101 | } 102 | } 103 | 104 | if err := scanner.Err(); err != nil { 105 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 106 | } 107 | 108 | return nil 109 | } 110 | -------------------------------------------------------------------------------- /cmd/gts/annotate.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/go-gts/flags" 10 | "github.com/go-gts/gts" 11 | "github.com/go-gts/gts/cmd" 12 | "github.com/go-gts/gts/seqio" 13 | "github.com/go-pars/pars" 14 | ) 15 | 16 | func init() { 17 | flags.Register("annotate", "merge features from a feature list file into a sequence", annotateFunc) 18 | } 19 | 20 | func annotateFunc(ctx *flags.Context) error { 21 | h := newHash() 22 | pos, opt := flags.Flags() 23 | 24 | featinPath := pos.String("feature_table", "feature table file containing features to merge") 25 | 26 | seqinPath := new(string) 27 | *seqinPath = "-" 28 | if cmd.IsTerminal(os.Stdin.Fd()) { 29 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 30 | } 31 | 32 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 33 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 34 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 35 | 36 | if err := ctx.Parse(pos, opt); err != nil { 37 | return err 38 | } 39 | 40 | featinFile, err := os.Open(*featinPath) 41 | if err != nil { 42 | return ctx.Raise(fmt.Errorf("failed to open file %q: %v", *featinPath, err)) 43 | } 44 | 45 | h.Reset() 46 | r := attach(h, featinFile) 47 | state := pars.NewState(r) 48 | result, err := seqio.INSDCTableParser("").Parse(state) 49 | if err != nil { 50 | return ctx.Raise(err) 51 | } 52 | 53 | featin := result.Value.([]gts.Feature) 54 | featsum := h.Sum(nil) 55 | 56 | d, err := newIODelegate(*seqinPath, *seqoutPath) 57 | if err != nil { 58 | return ctx.Raise(err) 59 | } 60 | defer d.Close() 61 | 62 | filetype := seqio.Detect(*seqoutPath) 63 | if *format != "" { 64 | filetype = seqio.ToFileType(*format) 65 | } 66 | 67 | if !*nocache { 68 | data := encodePayload([]tuple{ 69 | {"command", strings.Join(ctx.Name, "-")}, 70 | {"version", gts.Version.String()}, 71 | {"featin", encodeToString(featsum)}, 72 | {"filetype", filetype}, 73 | }) 74 | 75 | ok, err := d.TryCache(h, data) 76 | if ok || err != nil { 77 | return ctx.Raise(err) 78 | } 79 | } 80 | 81 | scanner := seqio.NewAutoScanner(d) 82 | buffer := bufio.NewWriter(d) 83 | writer := seqio.NewWriter(buffer, filetype) 84 | 85 | for scanner.Scan() { 86 | seq := scanner.Value() 87 | ff := seq.Features() 88 | for _, f := range featin { 89 | ff = ff.Insert(f) 90 | } 91 | seq = gts.WithFeatures(seq, ff) 92 | if _, err := writer.WriteSeq(seq); err != nil { 93 | return ctx.Raise(err) 94 | } 95 | 96 | if err := buffer.Flush(); err != nil { 97 | return ctx.Raise(err) 98 | } 99 | } 100 | 101 | if err := scanner.Err(); err != nil { 102 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 103 | } 104 | 105 | return nil 106 | } 107 | -------------------------------------------------------------------------------- /man/gts-search.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-search(1) -- search for a subsequence and annotate its results 2 | 3 | ## SYNOPSIS 4 | 5 | gts-search [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-search** takes a _query_ and a single input sequence, and marks the 10 | regions where the _query_ sequences were found. If the sequence input is 11 | ommited, standard input will be read instead. If a file with a filename 12 | equivalent to the _query_ value exists, it will be opened and read by the 13 | command. If it does not, the command will interpret the _query_ string as a 14 | sequence. The _query_ sequence(s) will be treated as an oligomer. In order to 15 | find perfect matches only, use the `-e` or `--exact` option. By default, 16 | regions are marked as `misc_feature`s without any qualifiers. Use the `-k` or 17 | `--key` option and `-q` or `--qualifier` option so you can easily discover 18 | these features later on with gts-select(1). See the EXAMPLES section for more 19 | insight. 20 | 21 | ## OPTIONS 22 | 23 | * ``: 24 | Query sequence file (will be interpreted literally if preceded with @). 25 | See gts-seqin(7) for a list of currently supported list of sequence 26 | formats. 27 | 28 | * ``: 29 | Input sequence file (may be omitted if standard input is provided). See 30 | gts-seqin(7) for a list of currently supported list of sequence formats. 31 | 32 | * `-e`, `--exact`: 33 | Match the exact pattern even for ambiguous letters. 34 | 35 | * `-F `, `--format=`: 36 | Output file format (defaults to same as input). See gts-seqout(7) for a 37 | list of currently supported list of sequence formats. The format specified 38 | with this option will override the file type detection from the output 39 | filename. 40 | 41 | * `-k `, `--key=`: 42 | Key for the reported oligomer region features. The default feature key is 43 | `misc_feature`. 44 | 45 | * `--no-cache`: 46 | Do not use or create cache. See gts-cache(7) for details. 47 | 48 | * `--no-complement`: 49 | Do not match the complement strand. 50 | 51 | * `-o `, `--output=`: 52 | Output sequence file (specifying `-` will force standard output). The 53 | output file format will be automatically detected from the filename if none 54 | is specified with the `-F` or `--format` option. 55 | 56 | * `-q `, `--qualifier=`: 57 | Qualifier key-value pairs (syntax: key=value)). Multiple values may be set 58 | by repeatedly passing this option to the command. 59 | 60 | ## EXAMPLES 61 | 62 | Search for and retrieve the regions 100 bases around the matches. 63 | 64 | $ gts search -q note=search | \ 65 | gts select misc_feature/note=search | \ 66 | gts extract -m '^-100..$+100' 67 | 68 | ## BUGS 69 | 70 | **gts-search** currently has no known bugs. 71 | 72 | ## AUTHORS 73 | 74 | **gts-search** is written and maintained by Kotone Itaya. 75 | 76 | ## SEE ALSO 77 | 78 | gts(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /man/gts-infix.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-infix(1) -- infix input sequence(s) into the host sequence(s) 2 | 3 | ## SYNOPSIS 4 | 5 | gts-infix [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-infix** takes two sequence inputs: a _host_ sequence input and a _guest_ 10 | sequence input, and inserts the sequences contained in the _host_ sequence 11 | input into the sequences contained in the _guest_ sequence input. If the _guest_ 12 | sequence input is omitted, standard input will be read instead. For each 13 | sequence in the _guest_ sequence input, a copy of each of the _host_ sequence 14 | input will be created. Each _guest_ sequence will then be inserted into the 15 | location(s) specified by the `locator` in the _host_ sequence. 16 | 17 | A locator consists of a location specifier and a modifier. A location specifier 18 | may be a `modifier`, a `point location`, a `range location`, or a `selector`. 19 | The syntax for a locator is `[specifier][@modifier]`. See gts-locator(7) for a 20 | more in-depth explanation of a locator. Refer to the EXAMPLES for some examples 21 | to get started. 22 | 23 | Features that were present at the point of insertion will be split to form 24 | a `join`ed location. Such features can be instead expanded if the `-e` or 25 | `--embed` option is provided. Any features present in the _guest_ sequence 26 | will be transferred to the corresponding locations after being inesrted into 27 | the _host_ sequence. 28 | 29 | There is also a similar command in gts(1) designated gts-insert(1), While 30 | **gts-infix** inserts the primary sequence input into the _host_ sequences, 31 | gts-insert(1) inserts _guest_ sequences into the primary sequence input. Use 32 | **gts-infix** when you want a insert the sequence that you are working on in a 33 | pipeline into another sequence. Note that all of the _host_ sequences will be 34 | read into memory when using **gts-infix** and all of the _guest_ sequences will 35 | be read into memory when using gts-insert(1). If memory availability may be an 36 | issue, make sure to use the command that will read in the smaller file. 37 | 38 | ## OPTIONS 39 | 40 | * ``: 41 | A locator string (`[specifier][@modifier]`). See gts-locator(7) for more 42 | details. 43 | 44 | * ``: 45 | Host sequence file. See gts-seqin(7) for a list of currently supported list 46 | of sequence formats. 47 | 48 | * ``: 49 | Input sequence file (may be omitted if standard input is provided). 50 | 51 | * `-e`, `--embed`: 52 | Extend existing feature locations when inserting instead of splitting them. 53 | 54 | * `-F `, `--format=`: 55 | Output file format (defaults to same as input). 56 | 57 | * `--no-cache`: 58 | Do not use or create cache. 59 | 60 | * `-o `, `--output=`: 61 | Output sequence file (specifying `-` will force standard output). 62 | 63 | ## BUGS 64 | 65 | **gts-infix** currently has no known bugs. 66 | 67 | ## AUTHORS 68 | 69 | **gts-infix** is written and maintained by @AUTHOR@. 70 | 71 | ## SEE ALSO -------------------------------------------------------------------------------- /man/gts-cache.7: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-CACHE" "7" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-cache\fR 8 | . 9 | .SH "gts\-cache(7) \-\- intelligent caching of intermediate files" 10 | . 11 | .SH "DESCRIPTION" 12 | \fBgts\-cache\fRs are files that are kept in user cache directories to avoid re\-executing previously executed commands\. By using a \fBgts\-cache\fR, gts(1) commands will not only skip the command specific computations but also parsing, which can provide significant performance boosts in certain scenarios\. A cache file consists of a header which contains information for validating the content of the cache file, and a body which can be any form of data of any length\. 13 | . 14 | .P 15 | A gts(1) command will first check for the availability of a cache\. This is done by computing a SHA\-1 hash value using the inputs given to the command\. First, the primary input file is digested to produce an \fIinput sum\fR\. Other inputs to the command is combined into a list of key\-value pairs along with the name of the command and the command version\. This list is then serialized and digested to produce the \fIdata sum\fR\. The \fIinput sum\fR and \fIdata sum\fR are concatenated and digested to produce the \fIoutput sum\fR\. This \fIoutput sum\fR will then be encoded as a hexadecimal string, and this value will be used as the cache filename\. 16 | . 17 | .P 18 | If a cache file with the computed filename does not exist, a command will attempt to create a cache file unless caching is disabled or an output file is specified explicitly\. This means that even if caching is enabled, the cache file will only be created if the command is writing to standard output\. Furthermore, a cache file will be deleted if an output file is specified by the user\. This is done to minimize duplicate data from existing within the system\. While caches do provide temporal benefits, they do occupy disk space which is generally undesierable\. Therefore, gts(1) commands will try to minimize the amount of cache existing within the system at a given moment\. Once the cache file is created, the \fIinput sum\fR, \fIdata sum\fR and the hash value of the digested body designated the \fIbody sum\fR will be written into the file comprising the header, followed by the body content\. 19 | . 20 | .P 21 | If a cache file with the computed filename does exist, a command will attempt to open the file unless caching is disabled\. Once open, the header is read to verify that the \fIinput sum\fR and \fIdata sum\fR produces an \fIoutput sum\fR whose hexadecimal encoding is identical to the filename\. The body is then digested to also verify that the body content is intact\. If either of this verification fails, the cache is immediately discarded and a new cache file is created\. If both verifications pass, the body content is then written to the specified output stream\. If the output is a file explicitly specified by the user, the cache file is removed\. 22 | . 23 | .SH "SEE ALSO" 24 | gts(1), gts\-cache(1), gts\-cache\-list(1), gts\-cache\-path(1), gts\-cache\-purge(1) 25 | -------------------------------------------------------------------------------- /man/gts.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\fR \- the genome transformation subprograms command line tool 8 | . 9 | .SH "SYNOPSIS" 10 | usage: gts [\-\-version] [\-h | \-\-help] \fIcommand\fR [\fIargs\fR] 11 | . 12 | .SH "DESCRIPTION" 13 | \fBGTS\fR provides basic manipulation utilities for genome flatfiles\. The command consists of a number of subcommands listed in the \fBCOMMANDS\fR section\. 14 | . 15 | .SH "COMMANDS" 16 | . 17 | .TP 18 | \fBgts\-annotate(1)\fR 19 | Merge features from a feature list file into a sequence\. 20 | . 21 | .TP 22 | \fBgts\-cache(1)\fR 23 | Manage gts cache files\. 24 | . 25 | .TP 26 | \fBgts\-clear(1)\fR 27 | Remove all features from the sequence (excluding source features)\. 28 | . 29 | .TP 30 | \fBgts\-complement(1)\fR 31 | Compute the complement of the given sequence\. 32 | . 33 | .TP 34 | \fBgts\-define(1)\fR 35 | Define a new feature\. 36 | . 37 | .TP 38 | \fBgts\-delete(1)\fR 39 | Delete a region of the given sequence(s)\. 40 | . 41 | .TP 42 | \fBgts\-extract(1)\fR 43 | Extract the sequences referenced by the features\. 44 | . 45 | .TP 46 | \fBgts\-infix(1)\fR 47 | Infix input sequence(s) into the host sequence(s)\. 48 | . 49 | .TP 50 | \fBgts\-insert(1)\fR 51 | Insert a sequence into another sequence(s)\. 52 | . 53 | .TP 54 | \fBgts\-join(1)\fR 55 | Join the sequences contained in the files\. 56 | . 57 | .TP 58 | \fBgts\-length(1)\fR 59 | Report the length of the sequence(s)\. 60 | . 61 | .TP 62 | \fBgts\-pick(1)\fR 63 | Pick sequence(s) from multiple sequences\. 64 | . 65 | .TP 66 | \fBgts\-query(1)\fR 67 | Query information from the given sequence\. 68 | . 69 | .TP 70 | \fBgts\-repair(1)\fR 71 | Repair fragmented features\. 72 | . 73 | .TP 74 | \fBgts\-reverse(1)\fR 75 | Reverse order of the given sequence(s)\. 76 | . 77 | .TP 78 | \fBgts\-rotate(1)\fR 79 | Shift the coordinates of a circular sequence\. 80 | . 81 | .TP 82 | \fBgts\-search(1)\fR 83 | Search for a subsequence and annotate its results\. 84 | . 85 | .TP 86 | \fBgts\-select(1)\fR 87 | Select features using the given feature selector(s)\. 88 | . 89 | .TP 90 | \fBgts\-sort(1)\fR 91 | Sort the list of sequences\. 92 | . 93 | .TP 94 | \fBgts\-split(1)\fR 95 | Split the sequence at the provided locations\. 96 | . 97 | .TP 98 | \fBgts\-summary(1)\fR 99 | Report a brief summary of the sequence(s)\. 100 | . 101 | .SH "BUGS" 102 | \fBgts\fR currently has no known bugs\. 103 | . 104 | .SH "AUTHORS" 105 | \fBgts\fR is written and maintained by Kotone Itaya\. 106 | . 107 | .SH "SEE ALSO" 108 | gts\-annotate(1), gts\-cache(1), gts\-clear(1), gts\-complement(1), gts\-define(1), gts\-delete(1), gts\-extract(1), gts\-infix(1), gts\-insert(1), gts\-join(1), gts\-length(1), gts\-pick(1), gts\-query(1), gts\-repair(1), gts\-reverse(1), gts\-rotate(1), gts\-search(1), gts\-select(1), gts\-sort(1), gts\-split(1), gts\-summary(1), gts\-locator(7), gts\-modifier(7), gts\-selector(7), gts\-seqin(7), gts\-seqout(7) 109 | -------------------------------------------------------------------------------- /cmd/gts/cache.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/hex" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | 9 | "github.com/dustin/go-humanize" 10 | "github.com/go-gts/flags" 11 | "github.com/go-gts/gts/cmd/cache" 12 | ) 13 | 14 | func init() { 15 | cacheSet := flags.CommandSet{} 16 | 17 | cacheSet.Register("list", "list the cache files", cacheListFunc) 18 | cacheSet.Register("path", "print the cache directory path", cachePathFunc) 19 | cacheSet.Register("purge", "delete all cache files", cachePurgeFunc) 20 | 21 | flags.Register("cache", "manage gts cache files", cacheSet.Compile()) 22 | } 23 | 24 | func cacheListFunc(ctx *flags.Context) error { 25 | pos, opt := flags.Flags() 26 | if err := ctx.Parse(pos, opt); err != nil { 27 | return err 28 | } 29 | 30 | dir, err := gtsCacheDir() 31 | if err != nil { 32 | return nil 33 | } 34 | 35 | total := uint64(0) 36 | 37 | walker := func(path string, info os.FileInfo, err error) error { 38 | if err != nil { 39 | return err 40 | } 41 | 42 | if info.IsDir() { 43 | return nil 44 | } 45 | 46 | f, err := os.Open(path) 47 | if err != nil { 48 | return err 49 | } 50 | defer f.Close() 51 | 52 | h := newHash() 53 | hd, err := cache.ReadHeader(f, h.Size()) 54 | if err != nil { 55 | return err 56 | } 57 | 58 | h.Reset() 59 | h.Write(append(hd.RootSum, hd.DataSum...)) 60 | lsum := h.Sum(nil) 61 | lhex := hex.EncodeToString(lsum) 62 | 63 | base := filepath.Base(info.Name()) 64 | size := uint64(info.Size()) 65 | 66 | if lhex != base { 67 | return fmt.Errorf("in cache %s: Leaf hash and filename mismatch", path) 68 | } 69 | 70 | total += size 71 | 72 | fmt.Printf("%s\t%s\n", base, humanize.IBytes(size)) 73 | 74 | return nil 75 | } 76 | 77 | if err := filepath.Walk(dir, walker); err != nil { 78 | return ctx.Raise(err) 79 | } 80 | 81 | fmt.Printf("Total\t%s\n", humanize.IBytes(total)) 82 | 83 | return nil 84 | } 85 | 86 | func cachePathFunc(ctx *flags.Context) error { 87 | pos, opt := flags.Flags() 88 | if err := ctx.Parse(pos, opt); err != nil { 89 | return err 90 | } 91 | 92 | dir, err := gtsCacheDir() 93 | if err != nil { 94 | return nil 95 | } 96 | 97 | fmt.Println(dir) 98 | 99 | return nil 100 | } 101 | 102 | func cachePurgeFunc(ctx *flags.Context) error { 103 | pos, opt := flags.Flags() 104 | if err := ctx.Parse(pos, opt); err != nil { 105 | return err 106 | } 107 | 108 | dir, err := gtsCacheDir() 109 | if err != nil { 110 | return nil 111 | } 112 | 113 | walker := func(path string, info os.FileInfo, err error) error { 114 | if err != nil { 115 | return err 116 | } 117 | if info.IsDir() { 118 | if path == dir { 119 | return nil 120 | } 121 | if err := os.RemoveAll(path); err != nil { 122 | return err 123 | } 124 | return filepath.SkipDir 125 | } 126 | 127 | return os.Remove(path) 128 | } 129 | 130 | if err := filepath.Walk(dir, walker); err != nil { 131 | return ctx.Raise(err) 132 | } 133 | 134 | return nil 135 | } 136 | -------------------------------------------------------------------------------- /man/gts-delete.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-DELETE" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-delete\fR \- delete a region of the given sequence(s) 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-delete [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIlocator\fR \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-delete\fR takes a single sequence input and deletes the specified region\. If the sequence input is ommited, standard input will be read instead\. The region to be deleted is specified using a \fBlocator\fR\. 14 | . 15 | .P 16 | A locator consists of a location specifier and a modifier\. A location specifier may be a \fBmodifier\fR, a \fBpoint location\fR, a \fBrange location\fR, or a \fBselector\fR\. The syntax for a locator is \fB[specifier][@modifier]\fR\. See gts\-locator(7) for a more in\-depth explanation of a locator\. Refer to the EXAMPLES for some examples to get started\. 17 | . 18 | .P 19 | Features that were present in the region being deleted will be shifted as being in between the bases at the deletion point\. Such features can be completely erased from the sequence if the \fB\-e\fR or \fB\-\-erase\fR option is provided\. 20 | . 21 | .SH "OPTIONS" 22 | . 23 | .TP 24 | \fB\fR 25 | A locator string (\fB[specifier][@modifier]\fR)\. See gts\-locator(7) for more details\. 26 | . 27 | .TP 28 | \fB\fR 29 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 30 | . 31 | .TP 32 | \fB\-e\fR, \fB\-\-erase\fR 33 | Remove features contained in the deleted regions\. 34 | . 35 | .TP 36 | \fB\-F \fR, \fB\-\-format=\fR 37 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 38 | . 39 | .TP 40 | \fB\-\-no\-cache\fR 41 | Do not use or create cache\. See gts\-cache(7) for details\. 42 | . 43 | .TP 44 | \fB\-o \fR, \fB\-\-output=\fR 45 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 46 | . 47 | .SH "EXAMPLES" 48 | Delete bases 100 to 200: 49 | . 50 | .IP "" 4 51 | . 52 | .nf 53 | 54 | $ gts delete 100\.\.200 55 | . 56 | .fi 57 | . 58 | .IP "" 0 59 | . 60 | .P 61 | Delete all regions of \fBmisc_feature\fR and its features: 62 | . 63 | .IP "" 4 64 | . 65 | .nf 66 | 67 | $ gts delete \-\-erase misc_feature 68 | . 69 | .fi 70 | . 71 | .IP "" 0 72 | . 73 | .P 74 | Delete 20 bases upstream of every \fBCDS\fR: 75 | . 76 | .IP "" 4 77 | . 78 | .nf 79 | 80 | $ gts delete CDS^\-20\.\.^ 81 | . 82 | .fi 83 | . 84 | .IP "" 0 85 | . 86 | .SH "BUGS" 87 | \fBgts\-delete\fR currently has no known bugs\. 88 | . 89 | .SH "AUTHORS" 90 | \fBgts\-delete\fR is written and maintained by Kotone Itaya\. 91 | . 92 | .SH "SEE ALSO" 93 | gts(1), gts\-insert(1), gts\-locator(7), gts\-modifier(7), gts\-selector(7), gts\-seqin(7), gts\-seqout(7) 94 | -------------------------------------------------------------------------------- /man/gts-query.1.ronn: -------------------------------------------------------------------------------- 1 | # gts-query(1) -- query information from the given sequence 2 | 3 | ## SYNOPSIS 4 | 5 | gts-query [--version] [-h | --help] [] 6 | 7 | ## DESCRIPTION 8 | 9 | **gts-query** takes a single sequence input and reports various information 10 | about its features. If the sequence input is ommited, standard input will be 11 | read instead. By default, it will output the sequence ID (or a unique sequence 12 | number if there are no IDs available), a feature key, its location, and any 13 | qualifiers that are common to all of the features present. A single line 14 | represents a single feature entry. 15 | 16 | This command is best utilized in combination with the gts-select(1) command. 17 | Use gts-select(1) to narrow down the features to be extracted, and then apply 18 | **gts-extract** to retrieve information. See the EXAMPLES section for more 19 | insight. For a brief summary of a sequence, consider using gts-summary(1). 20 | 21 | ## OPTIONS 22 | 23 | * ``: 24 | Input sequence file (may be omitted if standard input is provided). See 25 | gts-seqin(7) for a list of currently supported list of sequence formats. 26 | 27 | * `-d `, `--delimiter=`: 28 | String to insert between columns. The default delimiter is a tab `\t` 29 | character. 30 | 31 | * `--empty`: 32 | Allow missing qualifiers to be reported. Unlink GFFs, these columns will be 33 | completely empty. 34 | 35 | * `-H`, `--no-header`: 36 | Do not print the header line. 37 | 38 | * `-I`, `--no-seqid`: 39 | Do not report the sequence identifier. 40 | 41 | * `-K`, `--no-key`: 42 | Do not report the feature key. 43 | 44 | * `-L`, `--no-location`: 45 | Do not report the feature location. 46 | 47 | * `-n `, `--name=`: 48 | Qualifier name(s) to select. Multiple values may be set by repeatedly 49 | passing this option to the command. If set, only qualifiers that have the 50 | given name will be reported. 51 | 52 | * `--no-cache`: 53 | Do not use or create cache. See gts-cache(7) for details. 54 | 55 | * `-o `, `--output=`: 56 | Output table file (specifying `-` will force standard output). 57 | 58 | * `--source`: 59 | Include the source feature(s). 60 | 61 | * `-t `, `--separator=`: 62 | String to insert between qualifier values. The default separator is a comma 63 | `,` character. By default, the qualifier values will be reported in a CSV 64 | format. All commas and double quotes will be escaped, and all newline 65 | characters will be replaced with a whitespace. 66 | 67 | ## EXAMPLES 68 | 69 | Report information of all CDS features: 70 | 71 | $ gts select CDS | gts query 72 | 73 | Report information of a CDS feature with `locus_tag` of `b0001`: 74 | 75 | $ gts select CDS/locus_tag=b0001 | gts query 76 | 77 | Report all of the `db_xref` qualifiers for every gene in the sequence: 78 | 79 | $ gts select gene | gts query -n db_xref 80 | $ gts select gene | gts query --name db_xref 81 | 82 | ## BUGS 83 | 84 | **gts-query** currently has no known bugs. 85 | 86 | ## AUTHORS 87 | 88 | **gts-query** is written and maintained by Kotone Itaya. 89 | 90 | ## SEE ALSO 91 | 92 | gts(1), gts-select(1), gts-summary(1), gts-seqin(7), gts-seqout(7) -------------------------------------------------------------------------------- /seqio/date.go: -------------------------------------------------------------------------------- 1 | package seqio 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "strconv" 7 | "strings" 8 | "time" 9 | ) 10 | 11 | // Date represents a date stamp for record entries. 12 | type Date struct { 13 | Year int 14 | Month time.Month 15 | Day int 16 | } 17 | 18 | // FromTime creates a Date object from a time.Time object. 19 | func FromTime(t time.Time) Date { 20 | return Date{t.Year(), t.Month(), t.Day()} 21 | } 22 | 23 | // ToTime converts the Date object into a time.Time object. 24 | func (d Date) ToTime() time.Time { 25 | return time.Date(d.Year, d.Month, d.Day, 0, 0, 0, 0, time.UTC) 26 | } 27 | 28 | var monthMap = map[string]time.Month{ 29 | "JAN": time.January, "Jan": time.January, "01": time.January, 30 | "FEB": time.February, "Feb": time.February, "02": time.February, 31 | "MAR": time.March, "Mar": time.March, "03": time.March, 32 | "APR": time.April, "Apr": time.April, "04": time.April, 33 | "MAY": time.May, "May": time.May, "05": time.May, 34 | "JUN": time.June, "Jun": time.June, "06": time.June, 35 | "JUL": time.July, "Jul": time.July, "07": time.July, 36 | "AUG": time.August, "Aug": time.August, "08": time.August, 37 | "SEP": time.September, "Sep": time.September, "09": time.September, 38 | "OCT": time.October, "Oct": time.October, "10": time.October, 39 | "NOV": time.November, "Nov": time.November, "11": time.November, 40 | "DEC": time.December, "Dec": time.December, "12": time.December, 41 | } 42 | 43 | var dayMap = map[time.Month]int{ 44 | time.January: 31, 45 | time.February: 28, 46 | time.March: 31, 47 | time.April: 30, 48 | time.May: 31, 49 | time.June: 30, 50 | time.July: 31, 51 | time.August: 31, 52 | time.September: 30, 53 | time.October: 31, 54 | time.November: 30, 55 | time.December: 31, 56 | } 57 | 58 | func isLeapYear(year int) bool { 59 | switch { 60 | case year%400 == 0: 61 | return true 62 | case year%100 == 0: 63 | return false 64 | default: 65 | return year%4 == 0 66 | } 67 | } 68 | 69 | func checkDate(year int, month time.Month, day int) error { 70 | dayMax, ok := dayMap[month] 71 | if !ok { 72 | return fmt.Errorf("bad month value: %q", month) 73 | } 74 | if month == time.February && isLeapYear(year) { 75 | dayMax++ 76 | } 77 | if day < 1 { 78 | return fmt.Errorf("day cannot be less than 1, got %d", day) 79 | } 80 | if day > dayMax { 81 | return fmt.Errorf("%q has %d days: got %d", month, dayMax, day) 82 | } 83 | return nil 84 | } 85 | 86 | // AsDate interprets the given string as a Date. 87 | func AsDate(s string) (Date, error) { 88 | parts := strings.Split(s, "-") 89 | if len(parts) != 3 { 90 | return Date{}, errors.New("expected 3 fields in date") 91 | } 92 | sday, smonth, syear := parts[0], parts[1], parts[2] 93 | day, err := strconv.Atoi(sday) 94 | if err != nil { 95 | return Date{}, fmt.Errorf("cannot interpret %q as day value", sday) 96 | } 97 | month, ok := monthMap[smonth] 98 | if !ok { 99 | return Date{}, fmt.Errorf("cannot interpret %q as month value", smonth) 100 | } 101 | year, err := strconv.Atoi(syear) 102 | if err != nil { 103 | return Date{}, fmt.Errorf("cannot interpret %q as year value", syear) 104 | } 105 | return Date{year, month, day}, checkDate(year, month, day) 106 | } 107 | -------------------------------------------------------------------------------- /man/gts-extract.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-EXTRACT" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-extract\fR \- extract the sequences referenced by the features 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-extract [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-extract\fR takes a single sequence input and return the sequences associated with each feature\. If the sequence input is ommited, standard input will be read instead\. Additionally, if the \fBlocator\fR argument is given, the regions associated with the \fBlocator\fRs will be extracted\. 14 | . 15 | .P 16 | A locator consists of a location specifier and a modifier\. A location specifier may be a \fBmodifier\fR, a \fBpoint location\fR, a \fBrange location\fR, or a \fBselector\fR\. The syntax for a locator is \fB[specifier][@modifier]\fR\. See gts\-locator(7) for a more in\-depth explanation of a locator\. Refer to the EXAMPLES for some examples to get started\. 17 | . 18 | .P 19 | This command is best utilized in combination with the gts\-select(1) command\. Use gts\-select(1) to narrow down the sequence regions to be extracted, and then apply \fBgts\-extract\fR to retrieve the sequences\. See the EXAMPLES section for more insight\. 20 | . 21 | .SH "OPTIONS" 22 | . 23 | .TP 24 | \fB\.\.\.\fR 25 | A locator string ([specifier][@modifier])\. See gts\-locator(7) for more details\. 26 | . 27 | .TP 28 | \fB\fR 29 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 30 | . 31 | .TP 32 | \fB\-F \fR, \fB\-\-format=\fR 33 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 34 | . 35 | .TP 36 | \fB\-\-no\-cache\fR 37 | Do not use or create cache\. See gts\-cache(7) for details\. 38 | . 39 | .TP 40 | \fB\-o \fR, \fB\-\-output=\fR 41 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 42 | . 43 | .SH "EXAMPLES" 44 | Retrieve the sequences of all CDS features: 45 | . 46 | .IP "" 4 47 | . 48 | .nf 49 | 50 | $ gts select CDS | gts extract 51 | . 52 | .fi 53 | . 54 | .IP "" 0 55 | . 56 | .P 57 | Retrieve the sequence 100 bases upstream of all CDS features: 58 | . 59 | .IP "" 4 60 | . 61 | .nf 62 | 63 | $ gts select CDS | gts extract \-m ^\-100\.\.^ 64 | $ gts select CDS | gts extract \-\-range ^\-100\.\.^ 65 | . 66 | .fi 67 | . 68 | .IP "" 0 69 | . 70 | .P 71 | Retrieve the sequence 100 bases downstream of all CDS features: 72 | . 73 | .IP "" 4 74 | . 75 | .nf 76 | 77 | $ gts select CDS | gts extract \-m $\.\.$+100 78 | $ gts select CDS | gts extract \-\-range $\.\.$+100 79 | . 80 | .fi 81 | . 82 | .IP "" 0 83 | . 84 | .SH "BUGS" 85 | \fBgts\-extract\fR currently has no known bugs\. 86 | . 87 | .SH "AUTHORS" 88 | \fBgts\-extract\fR is written and maintained by Kotone Itaya\. 89 | . 90 | .SH "SEE ALSO" 91 | gts(1), gts\-select(1), gts\-modifier(7), gts\-seqin(7), gts\-seqout(7) 92 | -------------------------------------------------------------------------------- /man/gts-infix.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-INFIX" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-infix\fR \- infix input sequence(s) into the host sequence(s) 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-infix [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIlocator\fR \fIhost\fR \fIguest\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-infix\fR takes two sequence inputs: a \fIhost\fR sequence input and a \fIguest\fR sequence input, and inserts the sequences contained in the \fIhost\fR sequence input into the sequences contained in the \fIguest\fR sequence input\. If the \fIguest\fR sequence input is omitted, standard input will be read instead\. For each sequence in the \fIguest\fR sequence input, a copy of each of the \fIhost\fR sequence input will be created\. Each \fIguest\fR sequence will then be inserted into the location(s) specified by the \fBlocator\fR in the \fIhost\fR sequence\. 14 | . 15 | .P 16 | A locator consists of a location specifier and a modifier\. A location specifier may be a \fBmodifier\fR, a \fBpoint location\fR, a \fBrange location\fR, or a \fBselector\fR\. The syntax for a locator is \fB[specifier][@modifier]\fR\. See gts\-locator(7) for a more in\-depth explanation of a locator\. Refer to the EXAMPLES for some examples to get started\. 17 | . 18 | .P 19 | Features that were present at the point of insertion will be split to form a \fBjoin\fRed location\. Such features can be instead expanded if the \fB\-e\fR or \fB\-\-embed\fR option is provided\. Any features present in the \fIguest\fR sequence will be transferred to the corresponding locations after being inesrted into the \fIhost\fR sequence\. 20 | . 21 | .P 22 | There is also a similar command in gts(1) designated gts\-insert(1), While \fBgts\-infix\fR inserts the primary sequence input into the \fIhost\fR sequences, gts\-insert(1) inserts \fIguest\fR sequences into the primary sequence input\. Use \fBgts\-infix\fR when you want a insert the sequence that you are working on in a pipeline into another sequence\. Note that all of the \fIhost\fR sequences will be read into memory when using \fBgts\-infix\fR and all of the \fIguest\fR sequences will be read into memory when using gts\-insert(1)\. If memory availability may be an issue, make sure to use the command that will read in the smaller file\. 23 | . 24 | .SH "OPTIONS" 25 | . 26 | .TP 27 | \fB\fR 28 | A locator string (\fB[specifier][@modifier]\fR)\. See gts\-locator(7) for more details\. 29 | . 30 | .TP 31 | \fB\fR 32 | Host sequence file\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 33 | . 34 | .TP 35 | \fB\fR 36 | Input sequence file (may be omitted if standard input is provided)\. 37 | . 38 | .TP 39 | \fB\-e\fR, \fB\-\-embed\fR 40 | Extend existing feature locations when inserting instead of splitting them\. 41 | . 42 | .TP 43 | \fB\-F \fR, \fB\-\-format=\fR 44 | Output file format (defaults to same as input)\. 45 | . 46 | .TP 47 | \fB\-\-no\-cache\fR 48 | Do not use or create cache\. 49 | . 50 | .TP 51 | \fB\-o \fR, \fB\-\-output=\fR 52 | Output sequence file (specifying \fB\-\fR will force standard output)\. 53 | . 54 | .SH "BUGS" 55 | \fBgts\-infix\fR currently has no known bugs\. 56 | . 57 | .SH "AUTHORS" 58 | \fBgts\-infix\fR is written and maintained by @AUTHOR@\. 59 | . 60 | .SH "SEE ALSO" 61 | 62 | -------------------------------------------------------------------------------- /man/gts-search.1: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GTS\-SEARCH" "1" "October 2020" "" "" 5 | . 6 | .SH "NAME" 7 | \fBgts\-search\fR \- search for a subsequence and annotate its results 8 | . 9 | .SH "SYNOPSIS" 10 | gts\-search [\-\-version] [\-h | \-\-help] [\fIargs\fR] \fIquery\fR \fIseqin\fR 11 | . 12 | .SH "DESCRIPTION" 13 | \fBgts\-search\fR takes a \fIquery\fR and a single input sequence, and marks the regions where the \fIquery\fR sequences were found\. If the sequence input is ommited, standard input will be read instead\. If a file with a filename equivalent to the \fIquery\fR value exists, it will be opened and read by the command\. If it does not, the command will interpret the \fIquery\fR string as a sequence\. The \fIquery\fR sequence(s) will be treated as an oligomer\. In order to find perfect matches only, use the \fB\-e\fR or \fB\-\-exact\fR option\. By default, regions are marked as \fBmisc_feature\fRs without any qualifiers\. Use the \fB\-k\fR or \fB\-\-key\fR option and \fB\-q\fR or \fB\-\-qualifier\fR option so you can easily discover these features later on with gts\-select(1)\. See the EXAMPLES section for more insight\. 14 | . 15 | .SH "OPTIONS" 16 | . 17 | .TP 18 | \fB\fR 19 | Query sequence file (will be interpreted literally if preceded with @)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 20 | . 21 | .TP 22 | \fB\fR 23 | Input sequence file (may be omitted if standard input is provided)\. See gts\-seqin(7) for a list of currently supported list of sequence formats\. 24 | . 25 | .TP 26 | \fB\-e\fR, \fB\-\-exact\fR 27 | Match the exact pattern even for ambiguous letters\. 28 | . 29 | .TP 30 | \fB\-F \fR, \fB\-\-format=\fR 31 | Output file format (defaults to same as input)\. See gts\-seqout(7) for a list of currently supported list of sequence formats\. The format specified with this option will override the file type detection from the output filename\. 32 | . 33 | .TP 34 | \fB\-k \fR, \fB\-\-key=\fR 35 | Key for the reported oligomer region features\. The default feature key is \fBmisc_feature\fR\. 36 | . 37 | .TP 38 | \fB\-\-no\-cache\fR 39 | Do not use or create cache\. See gts\-cache(7) for details\. 40 | . 41 | .TP 42 | \fB\-\-no\-complement\fR 43 | Do not match the complement strand\. 44 | . 45 | .TP 46 | \fB\-o \fR, \fB\-\-output=\fR 47 | Output sequence file (specifying \fB\-\fR will force standard output)\. The output file format will be automatically detected from the filename if none is specified with the \fB\-F\fR or \fB\-\-format\fR option\. 48 | . 49 | .TP 50 | \fB\-q \fR, \fB\-\-qualifier=\fR 51 | Qualifier key\-value pairs (syntax: key=value))\. Multiple values may be set by repeatedly passing this option to the command\. 52 | . 53 | .SH "EXAMPLES" 54 | Search for \fIquery\fR and retrieve the regions 100 bases around the matches\. 55 | . 56 | .IP "" 4 57 | . 58 | .nf 59 | 60 | $ gts search \-q note=search | \e 61 | gts select misc_feature/note=search | \e 62 | gts extract \-m \'^\-100\.\.$+100\' 63 | . 64 | .fi 65 | . 66 | .IP "" 0 67 | . 68 | .SH "BUGS" 69 | \fBgts\-search\fR currently has no known bugs\. 70 | . 71 | .SH "AUTHORS" 72 | \fBgts\-search\fR is written and maintained by Kotone Itaya\. 73 | . 74 | .SH "SEE ALSO" 75 | gts(1), gts\-seqin(7), gts\-seqout(7) 76 | -------------------------------------------------------------------------------- /cmd/cache/file.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "compress/flate" 5 | "encoding/hex" 6 | "hash" 7 | "io" 8 | "os" 9 | "path/filepath" 10 | ) 11 | 12 | // File represents a cache file. 13 | type File struct { 14 | h hash.Hash 15 | f *os.File 16 | hd Header 17 | rd io.ReadCloser 18 | wr io.WriteCloser 19 | } 20 | 21 | // Open a cache file for reading. 22 | func Open(path string, h hash.Hash, rsum, dsum []byte) (*File, error) { 23 | h.Reset() 24 | h.Write(append(rsum, dsum...)) 25 | lsum := h.Sum(nil) 26 | name := filepath.Join(path, hex.EncodeToString(lsum)) 27 | 28 | f, err := os.Open(name) 29 | if err != nil { 30 | return nil, err 31 | } 32 | 33 | size := h.Size() 34 | hd, err := ReadHeader(f, size) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | h.Reset() 40 | _, ret := io.Copy(h, f) 41 | bsum := h.Sum(nil) 42 | 43 | if err := hd.Validate(rsum, dsum, bsum); ret == nil { 44 | ret = err 45 | } 46 | 47 | if _, err := f.Seek(int64(size)*3, io.SeekStart); ret == nil { 48 | ret = err 49 | } 50 | 51 | rd := flate.NewReader(f) 52 | return &File{h, f, hd, rd, nil}, ret 53 | } 54 | 55 | // CreateLevel creates a new cache file with the given compression level. 56 | func CreateLevel(path string, h hash.Hash, rsum, dsum []byte, level int) (*File, error) { 57 | h.Reset() 58 | h.Write(append(rsum, dsum...)) 59 | lsum := h.Sum(nil) 60 | name := filepath.Join(path, hex.EncodeToString(lsum)) 61 | 62 | f, err := os.Create(name) 63 | if err != nil { 64 | return nil, err 65 | } 66 | 67 | _, ret := f.Write(make([]byte, h.Size()*3)) 68 | 69 | hd := Header{rsum, dsum, nil} 70 | rd := flate.NewReader(f) 71 | wr, err := flate.NewWriter(f, level) 72 | if ret == nil { 73 | ret = err 74 | } 75 | return &File{h, f, hd, rd, wr}, ret 76 | } 77 | 78 | // Create a new cache file with the default compression level. 79 | func Create(path string, h hash.Hash, rsum, dsum []byte) (*File, error) { 80 | return CreateLevel(path, h, rsum, dsum, flate.DefaultCompression) 81 | } 82 | 83 | // Name returns the name of the file. 84 | func (f *File) Name() string { 85 | return f.f.Name() 86 | } 87 | 88 | // Read from the file through the flate.Reader. 89 | func (f *File) Read(p []byte) (int, error) { 90 | return f.rd.Read(p) 91 | } 92 | 93 | // ReadOnly returns true if the file is read only. 94 | func (f *File) ReadOnly() bool { 95 | return f.wr == nil 96 | } 97 | 98 | // Write to the file through the flate.Writer. 99 | func (f *File) Write(p []byte) (int, error) { 100 | if f.wr == nil { 101 | return 0, io.EOF 102 | } 103 | return f.wr.Write(p) 104 | } 105 | 106 | // Close the files and write the body hash sum to the header. 107 | func (f *File) Close() error { 108 | defer f.f.Close() 109 | defer f.rd.Close() 110 | 111 | if f.wr != nil { 112 | ret := f.wr.Close() 113 | 114 | if _, err := f.f.Seek(int64(f.h.Size())*3, io.SeekStart); ret == nil { 115 | ret = err 116 | } 117 | 118 | f.h.Reset() 119 | if _, err := io.Copy(f.h, f.f); ret == nil { 120 | ret = err 121 | } 122 | 123 | f.hd.BodySum = f.h.Sum(nil) 124 | if _, err := f.f.Seek(0, io.SeekStart); ret == nil { 125 | ret = err 126 | } 127 | 128 | if _, err := f.hd.WriteTo(f.f); ret == nil { 129 | ret = err 130 | } 131 | 132 | return ret 133 | } 134 | 135 | return nil 136 | } 137 | -------------------------------------------------------------------------------- /cmd/gts/select.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "sort" 8 | "strings" 9 | 10 | "github.com/go-gts/flags" 11 | "github.com/go-gts/gts" 12 | "github.com/go-gts/gts/cmd" 13 | "github.com/go-gts/gts/seqio" 14 | ) 15 | 16 | func init() { 17 | flags.Register("select", "select features using the given feature selector(s)", selectFunc) 18 | } 19 | 20 | func selectFunc(ctx *flags.Context) error { 21 | h := newHash() 22 | pos, opt := flags.Flags() 23 | 24 | selectors := pos.Extra("selector", "feature selector (syntax: [feature_key][/[qualifier1][=regexp1]][/[qualifier2][=regexp2]]...)") 25 | 26 | seqinPath := new(string) 27 | *seqinPath = "-" 28 | if cmd.IsTerminal(os.Stdin.Fd()) { 29 | seqinPath = pos.String("seqin", "input sequence file (may be omitted if standard input is provided)") 30 | } 31 | 32 | nocache := opt.Switch(0, "no-cache", "do not use or create cache") 33 | seqoutPath := opt.String('o', "output", "-", "output sequence file (specifying `-` will force standard output)") 34 | format := opt.String('F', "format", "", "output file format (defaults to same as input)") 35 | strand := opt.String('s', "strand", "both", "strand to select features from (`both`, `forward`, or `reverse`)") 36 | invert := opt.Switch('v', "invert-match", "select features that do not match the given criteria") 37 | 38 | if err := ctx.Parse(pos, opt); err != nil { 39 | return err 40 | } 41 | 42 | sort.Strings(*selectors) 43 | 44 | filters := make([]gts.Filter, len(*selectors)) 45 | for i, selector := range *selectors { 46 | f, err := gts.Selector(selector) 47 | if err != nil { 48 | return ctx.Raise(fmt.Errorf("invalid selector syntax: %v", err)) 49 | } 50 | filters[i] = f 51 | } 52 | filter := gts.Or(filters...) 53 | if *invert { 54 | filter = gts.Not(filter) 55 | } 56 | filter = gts.Or(gts.Key("source"), filter) 57 | 58 | switch *strand { 59 | case "forward": 60 | filter = gts.And(filter, gts.ForwardStrand) 61 | case "reverse": 62 | filter = gts.And(filter, gts.ReverseStrand) 63 | } 64 | 65 | d, err := newIODelegate(*seqinPath, *seqoutPath) 66 | if err != nil { 67 | return ctx.Raise(err) 68 | } 69 | defer d.Close() 70 | 71 | filetype := seqio.Detect(*seqoutPath) 72 | if *format != "" { 73 | filetype = seqio.ToFileType(*format) 74 | } 75 | 76 | if !*nocache { 77 | data := encodePayload([]tuple{ 78 | {"command", strings.Join(ctx.Name, "-")}, 79 | {"version", gts.Version.String()}, 80 | {"selectors", *selectors}, 81 | {"strand", *strand}, 82 | {"invert", *invert}, 83 | {"filetype", filetype}, 84 | }) 85 | 86 | ok, err := d.TryCache(h, data) 87 | if ok || err != nil { 88 | return ctx.Raise(err) 89 | } 90 | } 91 | 92 | scanner := seqio.NewAutoScanner(d) 93 | buffer := bufio.NewWriter(d) 94 | writer := seqio.NewWriter(buffer, filetype) 95 | 96 | for scanner.Scan() { 97 | seq := scanner.Value() 98 | ff := seq.Features().Filter(filter) 99 | seq = gts.WithFeatures(seq, ff) 100 | if _, err := writer.WriteSeq(seq); err != nil { 101 | return ctx.Raise(err) 102 | } 103 | 104 | if err := buffer.Flush(); err != nil { 105 | return ctx.Raise(err) 106 | } 107 | } 108 | 109 | if err := scanner.Err(); err != nil { 110 | return ctx.Raise(fmt.Errorf("encountered error in scanner: %v", err)) 111 | } 112 | 113 | return nil 114 | } 115 | --------------------------------------------------------------------------------