├── .github └── workflows │ └── go.yml ├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── align ├── align.go ├── align_test.go ├── aligner.go ├── const.go ├── mutation.go ├── partition.go ├── phaser.go ├── profile.go ├── seqbag.go ├── seqbag_test.go ├── sequence.go └── sequence_test.go ├── cmd ├── addgaps.go ├── addid.go ├── alleles.go ├── append.go ├── bootstrap.go ├── build.go ├── char.go ├── clean.go ├── cleanseqs.go ├── cleansites.go ├── clustal.go ├── codonalign.go ├── compress.go ├── compute.go ├── computedist.go ├── computeentropy.go ├── concat.go ├── consensus.go ├── dedup.go ├── diff.go ├── distboot.go ├── divide.go ├── draw.go ├── draw_biojs.go ├── draw_png.go ├── extract.go ├── fasta.go ├── identical.go ├── length.go ├── mask.go ├── maxchars.go ├── mutate.go ├── mutategaps.go ├── nalign.go ├── name.go ├── nexus.go ├── nseq.go ├── orf.go ├── paml.go ├── phase.go ├── phasent.go ├── phylip.go ├── pssm.go ├── random.go ├── rarefy.go ├── recomb.go ├── reformat.go ├── rename.go ├── replace.go ├── revcomp.go ├── rogue.go ├── root.go ├── sample.go ├── sampleseq.go ├── samplesites.go ├── seq.go ├── seqs.go ├── shuffle.go ├── sites.go ├── sort.go ├── split.go ├── stats.go ├── stats_alphabet.go ├── stats_gaps.go ├── stats_mutations.go ├── stats_mutations_list.go ├── subseq.go ├── subset.go ├── subsites.go ├── sw.go ├── swap.go ├── taxa.go ├── tnt.go ├── tntweightboot.go ├── tolower.go ├── toupper.go ├── translate.go ├── transpose.go ├── trim.go ├── unalign.go ├── version.go └── weightboot.go ├── distance ├── distance.go ├── dna │ ├── distance.go │ ├── distance_test.go │ ├── f81.go │ ├── f84.go │ ├── jc.go │ ├── k2p.go │ ├── pdist.go │ ├── rawdist.go │ ├── tn82.go │ └── tn93.go └── protein │ ├── lk.go │ ├── model.go │ └── utils.go ├── docs ├── api │ ├── append.md │ ├── codonalign.md │ ├── compress.md │ ├── consensus.md │ ├── dedup.md │ ├── diff.md │ ├── draw.md │ ├── identical.md │ ├── mask.md │ ├── orf.md │ ├── phase.md │ ├── reformat.md │ ├── replace.md │ ├── revcomp.md │ ├── split.md │ ├── stats.md │ ├── subseq.md │ ├── subset.md │ ├── sw.md │ ├── tolower.md │ ├── toupper.md │ ├── translate.md │ ├── transpose.md │ ├── trim.md │ └── unalign.md ├── commands │ ├── addid.md │ ├── append.md │ ├── build.md │ ├── build_image_1.svg │ ├── build_image_2.svg │ ├── clean.md │ ├── codonalign.md │ ├── completion.md │ ├── compress.md │ ├── compute.md │ ├── concat.md │ ├── consensus.md │ ├── dedup.md │ ├── diff.md │ ├── divide.md │ ├── draw.md │ ├── draw.png │ ├── drawPng.png │ ├── extract.md │ ├── identical.md │ ├── mask.md │ ├── mutate.md │ ├── orf.md │ ├── phase.md │ ├── phasent.md │ ├── random.md │ ├── reformat.md │ ├── rename.md │ ├── replace.md │ ├── revcomp.md │ ├── sample.md │ ├── shuffle.md │ ├── sort.md │ ├── split.md │ ├── stats.md │ ├── subseq.md │ ├── subset.md │ ├── subsites.md │ ├── sw.md │ ├── tolower.md │ ├── toupper.md │ ├── translate.md │ ├── transpose.md │ ├── trim.md │ └── unalign.md └── index.md ├── draw ├── biojs.go ├── biojsdep.go ├── draw.go └── png.go ├── go.mod ├── go.sum ├── gutils └── gutils.go ├── images ├── logo.png └── logo.svg ├── io ├── clustal │ ├── lexer.go │ ├── parser.go │ ├── parser_test.go │ ├── tokens.go │ └── writer.go ├── countprofile │ └── countprofile.go ├── error.go ├── fasta │ ├── lexer.go │ ├── parser.go │ ├── parser_test.go │ ├── tokens.go │ ├── utils.go │ └── writer.go ├── nexus │ ├── nexus_lexer.go │ ├── nexus_parser.go │ ├── nexus_parser_test.go │ ├── nexus_token.go │ └── writer.go ├── paml │ └── writer.go ├── partition │ ├── lexer.go │ ├── parser.go │ └── tokens.go ├── phylip │ ├── lexer.go │ ├── parser.go │ ├── parser_test.go │ ├── tokens.go │ ├── utils.go │ └── writer.go ├── stockholm │ ├── stockholm_lexer.go │ ├── stockholm_parser.go │ ├── stockholm_parser_test.go │ ├── stockholm_token.go │ └── writer.go └── utils │ ├── readaligns.go │ ├── readfiles.go │ └── writefiles.go ├── main.go ├── models ├── dna │ ├── eigen_test.go │ ├── f81.go │ ├── f84.go │ ├── gtr.go │ ├── jc.go │ ├── k2p.go │ ├── model.go │ └── tn93.go ├── gamma.go ├── model.go └── protein │ ├── matrices.go │ └── model.go ├── stats ├── dirichlet.go ├── gamma.go └── stats_test.go ├── test.sh ├── tests └── data │ ├── test_bz.bz │ ├── test_distance.phy.gz │ ├── test_rawdistance.phy.gz │ ├── test_rawdistance2.phy.gz │ ├── test_rawdistance3.phy.gz │ └── test_xz.xz └── version └── version.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - dev 8 | pull_request: 9 | branches: [ master ] 10 | 11 | jobs: 12 | 13 | build: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: Set up Go 19 | uses: actions/setup-go@v2 20 | with: 21 | go-version: 1.21.6 22 | 23 | - name: Build 24 | run: make 25 | 26 | - name: Test 27 | run: make && make test && make testcommands 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | deploy/ 3 | goalign 4 | vendor 5 | Gopkg.lock 6 | .vscode 7 | .DS_Store 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.13.6 5 | 6 | script: make && make install 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GO_EXECUTABLE := go 2 | VERSION := $(shell git describe --abbrev=10 --dirty --always --tags) 3 | VERSION_PACKAGE := github.com/evolbioinfo/goalign/version.Version 4 | NAME := goalign 5 | PACKAGE:=github.com/evolbioinfo/goalign 6 | CGO_ENABLED:=0 7 | 8 | all: dep build test 9 | 10 | dep: 11 | ${GO_EXECUTABLE} get . 12 | 13 | build: 14 | CGO_ENABLED=${CGO_ENABLED} ${GO_EXECUTABLE} build -o ${NAME} -ldflags "-X ${VERSION_PACKAGE}=${VERSION}" ${PACKAGE} 15 | 16 | install: testcommands 17 | rm -f ${GOPATH}/bin/${NAME} 18 | CGO_ENABLED=${CGO_ENABLED} ${GO_EXECUTABLE} install -ldflags "-X ${VERSION_PACKAGE}=${VERSION}" ${PACKAGE} 19 | 20 | test: dep 21 | CGO_ENABLED=${CGO_ENABLED} ${GO_EXECUTABLE} test ${PACKAGE}/... 22 | 23 | testcommands: 24 | bash ./test.sh 25 | 26 | .PHONY: deploy deploydir deploywinamd deploywin386 deploylinuxamd deploylinux386 deploydarwinamd 27 | 28 | deploy: deploywinamd deploywin386 deploylinuxamd deploylinux386 deploydarwinamd deploydarwinarm 29 | tar -czvf deploy/${VERSION}.tar.gz --directory="deploy" ${VERSION} 30 | 31 | deploydir: 32 | mkdir -p deploy/${VERSION} 33 | 34 | deploywinamd: dep deploydir 35 | env GOOS=windows GOARCH=amd64 CGO_ENABLED=${CGO_ENABLED} ${GO_EXECUTABLE} build -o deploy/${VERSION}/${NAME}_${VERSION}_amd64.exe -ldflags "-X ${VERSION_PACKAGE}=${VERSION}" ${PACKAGE} 36 | 37 | deploylinuxamd: dep deploydir 38 | env GOOS=linux GOARCH=amd64 CGO_ENABLED=${CGO_ENABLED} ${GO_EXECUTABLE} build -o deploy/${VERSION}/${NAME}_${VERSION}_amd64_linux -ldflags "-X ${VERSION_PACKAGE}=${VERSION}" ${PACKAGE} 39 | 40 | deploydarwinamd: dep deploydir 41 | env GOOS=darwin GOARCH=amd64 CGO_ENABLED=${CGO_ENABLED} ${GO_EXECUTABLE} build -o deploy/${VERSION}/${NAME}_${VERSION}_amd64_darwin -ldflags "-X ${VERSION_PACKAGE}=${VERSION}" ${PACKAGE} 42 | 43 | deploydarwinarm: dep deploydir 44 | env GOOS=darwin GOARCH=arm64 CGO_ENABLED=${CGO_ENABLED} ${GO_EXECUTABLE} build -o deploy/${VERSION}/${NAME}_${VERSION}_arm64_darwin -ldflags "-X ${VERSION_PACKAGE}=${VERSION}" ${PACKAGE} 45 | -------------------------------------------------------------------------------- /align/mutation.go: -------------------------------------------------------------------------------- 1 | package align 2 | 3 | type Mutation struct { 4 | Ref uint8 5 | Pos int 6 | Alt []uint8 7 | } 8 | -------------------------------------------------------------------------------- /align/seqbag_test.go: -------------------------------------------------------------------------------- 1 | package align 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func Test_seqbag_UniqueCharacters(t *testing.T) { 9 | type fields struct { 10 | seqmap map[string]*seq 11 | seqs []*seq 12 | ignoreidentical int 13 | alphabet int 14 | } 15 | tests := []struct { 16 | name string 17 | fields fields 18 | wantChars []uint8 19 | }{ 20 | {name: "t1", 21 | fields: fields{seqmap: nil, 22 | seqs: []*seq{ 23 | {sequence: []uint8("ACGTACGTACGT")}, 24 | {sequence: []uint8("ACGTAC*TACGT")}}}, 25 | wantChars: []uint8("*ACGT")}, 26 | } 27 | for _, tt := range tests { 28 | t.Run(tt.name, func(t *testing.T) { 29 | sb := &seqbag{ 30 | seqmap: tt.fields.seqmap, 31 | seqs: tt.fields.seqs, 32 | ignoreidentical: tt.fields.ignoreidentical, 33 | alphabet: tt.fields.alphabet, 34 | } 35 | if gotChars := sb.UniqueCharacters(); !reflect.DeepEqual(gotChars, tt.wantChars) { 36 | t.Errorf("seqbag.UniqueCharacters() = %v, want %v", gotChars, tt.wantChars) 37 | } 38 | }) 39 | } 40 | } 41 | 42 | func Test_seqbag_ReverseComplement(t *testing.T) { 43 | type fields struct { 44 | seqmap map[string]*seq 45 | seqs []*seq 46 | ignoreidentical int 47 | alphabet int 48 | } 49 | tests := []struct { 50 | name string 51 | fields fields 52 | wantseq []uint8 53 | }{ 54 | {name: "t1", 55 | fields: fields{seqmap: nil, 56 | seqs: []*seq{{sequence: []uint8("ATUGCYRSWKMBDHVN*.")}}, 57 | ignoreidentical: IGNORE_NONE, 58 | alphabet: NUCLEOTIDS}, 59 | wantseq: []uint8(".*NBDHVKMWSYRGCAAT")}, 60 | {name: "t2", 61 | fields: fields{seqmap: nil, 62 | seqs: []*seq{{sequence: []uint8("TAACGRYSWMKVHDBN*.")}}, 63 | ignoreidentical: IGNORE_NONE, 64 | alphabet: NUCLEOTIDS}, 65 | wantseq: []uint8(".*NVHDBMKWSRYCGTTA")}, 66 | } 67 | for _, tt := range tests { 68 | t.Run(tt.name, func(t *testing.T) { 69 | sb := &seqbag{ 70 | seqmap: tt.fields.seqmap, 71 | seqs: tt.fields.seqs, 72 | ignoreidentical: tt.fields.ignoreidentical, 73 | alphabet: tt.fields.alphabet, 74 | } 75 | sb.AutoAlphabet() 76 | sb.ReverseComplement() 77 | s, _ := sb.GetSequenceCharById(0) 78 | if !reflect.DeepEqual(s, tt.wantseq) { 79 | t.Errorf("seqbag.ReverseComplement() = %v, want %v", string(s), string(tt.wantseq)) 80 | } 81 | }) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /cmd/addgaps.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var gapnbseqs float64 11 | 12 | // rogueCmd represents the rogue command 13 | var addgapsCmd = &cobra.Command{ 14 | Use: "gaps", 15 | Short: "Adds gaps uniformly in an input alignment", 16 | Long: `Adds gaps uniformly in an input alignment. 17 | 18 | Example: 19 | goalign mutate gaps -i align.fa -n 0.5 -r 0.5 20 | `, 21 | RunE: func(cmd *cobra.Command, args []string) (err error) { 22 | var aligns *align.AlignChannel 23 | var f utils.StringWriterCloser 24 | 25 | if aligns, err = readalign(infile); err != nil { 26 | io.LogError(err) 27 | return 28 | } 29 | if f, err = utils.OpenWriteFile(mutateOutput); err != nil { 30 | io.LogError(err) 31 | return 32 | } 33 | defer utils.CloseWriteFile(f, mutateOutput) 34 | 35 | for al := range aligns.Achan { 36 | al.AddGaps(mutateRate, gapnbseqs) 37 | writeAlign(al, f) 38 | } 39 | 40 | if aligns.Err != nil { 41 | err = aligns.Err 42 | io.LogError(err) 43 | } 44 | return 45 | }, 46 | } 47 | 48 | func init() { 49 | mutateCmd.AddCommand(addgapsCmd) 50 | addgapsCmd.PersistentFlags().Float64VarP(&gapnbseqs, "prop-seq", "n", 0.5, "Proportion of the sequences in which to add gaps") 51 | } 52 | -------------------------------------------------------------------------------- /cmd/addid.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var addIdOutput string 11 | var addIdName string 12 | var addIdRight bool 13 | 14 | // addidCmd represents the addid command 15 | var addidCmd = &cobra.Command{ 16 | Use: "addid", 17 | Short: "Adds a string to each sequence identifier of the input alignment", 18 | Long: `This command adds an indentifier (string) to all sequences of an input alignment. 19 | 20 | The string may be added to the left or to the right of each sequence name. 21 | `, 22 | RunE: func(cmd *cobra.Command, args []string) (err error) { 23 | var f utils.StringWriterCloser 24 | 25 | if f, err = utils.OpenWriteFile(addIdOutput); err != nil { 26 | io.LogError(err) 27 | return 28 | } 29 | defer utils.CloseWriteFile(f, addIdOutput) 30 | 31 | if unaligned { 32 | var seqs align.SeqBag 33 | 34 | if seqs, err = readsequences(infile); err != nil { 35 | io.LogError(err) 36 | return 37 | } 38 | seqs.AppendSeqIdentifier(addIdName, addIdRight) 39 | writeSequences(seqs, f) 40 | } else { 41 | 42 | var aligns *align.AlignChannel 43 | if aligns, err = readalign(infile); err != nil { 44 | io.LogError(err) 45 | return 46 | } 47 | for al := range aligns.Achan { 48 | al.AppendSeqIdentifier(addIdName, addIdRight) 49 | writeAlign(al, f) 50 | } 51 | 52 | if aligns.Err != nil { 53 | err = aligns.Err 54 | io.LogError(err) 55 | } 56 | } 57 | return 58 | }, 59 | } 60 | 61 | func init() { 62 | RootCmd.AddCommand(addidCmd) 63 | addidCmd.PersistentFlags().StringVarP(&addIdOutput, "out-align", "o", "stdout", "Renamed alignment output file") 64 | addidCmd.PersistentFlags().StringVarP(&addIdName, "name", "n", "none", "String to add to sequence names") 65 | addidCmd.PersistentFlags().BoolVarP(&addIdRight, "right", "r", false, "Adds the String on the right of sequence names (otherwise, adds to left)") 66 | addidCmd.PersistentFlags().BoolVar(&unaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)") 67 | } 68 | -------------------------------------------------------------------------------- /cmd/alleles.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/io" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // allelesCmd represents the alleles command 12 | var allelesCmd = &cobra.Command{ 13 | Use: "alleles", 14 | Short: "Prints the average number of alleles per sites of the alignment", 15 | Long: `Prints the average number of alleles per sites of the alignment.`, 16 | RunE: func(cmd *cobra.Command, args []string) (err error) { 17 | var aligns *align.AlignChannel 18 | 19 | if aligns, err = readalign(infile); err != nil { 20 | io.LogError(err) 21 | return 22 | } 23 | for al := range aligns.Achan { 24 | fmt.Println(al.AvgAllelesPerSite()) 25 | } 26 | 27 | if aligns.Err != nil { 28 | err = aligns.Err 29 | io.LogError(err) 30 | } 31 | return 32 | }, 33 | } 34 | 35 | func init() { 36 | statsCmd.AddCommand(allelesCmd) 37 | } 38 | -------------------------------------------------------------------------------- /cmd/append.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var appendout string 11 | 12 | // appendCmd represents the append command 13 | var appendCmd = &cobra.Command{ 14 | Use: "append", 15 | Short: "Append alignments to an input alignment", 16 | Long: `Append alignments to an input alignment. 17 | 18 | This commands adds the sequences of a set of alignments to a reference alignement 19 | specified by -i. 20 | 21 | If sequences do not have the same length than the reference alignment, then returns an error. 22 | 23 | If format is phylip, it may contain several alignments in one file. 24 | Then we can append all of them at once: 25 | goalign append -i refalign.phy aligns.phy 26 | 27 | If format is Fasta, several alignments may be given in the form: 28 | goalign append -i align.fasta others*.fasta 29 | 30 | `, 31 | RunE: func(cmd *cobra.Command, args []string) (err error) { 32 | var compAligns *align.AlignChannel 33 | var refAligns *align.AlignChannel = nil 34 | var refAlign align.Alignment = nil 35 | 36 | var f utils.StringWriterCloser 37 | 38 | if infile != "none" { 39 | if refAligns, err = readalign(infile); err != nil { 40 | io.LogError(err) 41 | return 42 | } 43 | for al := range refAligns.Achan { 44 | if refAlign == nil { 45 | refAlign = al 46 | } else { 47 | if err = refAlign.Append(al); err != nil { 48 | io.LogError(err) 49 | return 50 | } 51 | } 52 | if refAligns.Err != nil { 53 | err = refAligns.Err 54 | io.LogError(err) 55 | return 56 | } 57 | } 58 | } 59 | 60 | for _, otherfile := range args { 61 | if compAligns, err = readalign(otherfile); err != nil { 62 | io.LogError(err) 63 | return 64 | } 65 | for al := range compAligns.Achan { 66 | if err = refAlign.Append(al); err != nil { 67 | io.LogError(err) 68 | return 69 | } 70 | } 71 | if compAligns.Err != nil { 72 | err = compAligns.Err 73 | io.LogError(err) 74 | return 75 | } 76 | } 77 | 78 | if f, err = utils.OpenWriteFile(appendout); err != nil { 79 | io.LogError(err) 80 | return 81 | } 82 | writeAlign(refAlign, f) 83 | utils.CloseWriteFile(f, appendout) 84 | 85 | return 86 | }, 87 | } 88 | 89 | func init() { 90 | RootCmd.AddCommand(appendCmd) 91 | appendCmd.PersistentFlags().StringVarP(&appendout, "output", "o", "stdout", "Alignment output file") 92 | } 93 | -------------------------------------------------------------------------------- /cmd/build.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // buildCmd represents the build command 8 | var buildCmd = &cobra.Command{ 9 | Use: "build", 10 | Short: "Command to build bootstrap replicates", 11 | Long: `This command builds bootstrap replicates from an input alignment (fasta or phylip): 12 | 13 | 1. goalign build seqboot : Builds bootstrap alignments from an input alignment (nt or aa). Sequence order may be shuffled with option -S. Output alignments may be written in compressed files (--gz) and/or added in a tar archive (--tar). 14 | 2. goalign build distboot: Builds bootstrap distance matrices based on different models, from an input alignment (nt only). It builds n bootstrap alignments and computes a distance matrix for each replicate. All distance matrices are written in the output file. If the input alignment file contains several alignments, it will take the first one only. The following models for distance computation are available: 15 | - pdist 16 | - jc : Juke-Cantor 17 | - k2p : Kimura 2 Parameters 18 | - f81 : Felsenstein 81 19 | - f84 : Felsenstein 84 20 | - tn93 : Tamura and Nei 1993 21 | `, 22 | } 23 | 24 | func init() { 25 | RootCmd.AddCommand(buildCmd) 26 | } 27 | -------------------------------------------------------------------------------- /cmd/char.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | var charstatpersites bool 10 | var charstatpersequences bool 11 | var charstatonly string 12 | 13 | // charCmd represents the char command 14 | var charCmd = &cobra.Command{ 15 | Use: "char", 16 | Short: "Prints frequence of different characters (aa/nt) of the alignment", 17 | Long: `Prints frequence of different characters (aa/nt) of the alignment. 18 | May take a Phylip of Fasta input alignment. 19 | 20 | Example of usages: 21 | 22 | goalign stats char -i align.phylip -p 23 | goalign stats char -i align.fasta 24 | `, 25 | RunE: func(cmd *cobra.Command, args []string) (err error) { 26 | var aligns *align.AlignChannel 27 | 28 | if aligns, err = readalign(infile); err != nil { 29 | io.LogError(err) 30 | return 31 | } 32 | 33 | for al := range aligns.Achan { 34 | if aligns.Err != nil { 35 | err = aligns.Err 36 | io.LogError(err) 37 | return 38 | } 39 | if charstatpersites { 40 | err = printSiteCharStats(al, charstatonly) 41 | } else if charstatpersequences { 42 | err = printSequenceCharStats(al, charstatonly) 43 | } else { 44 | printCharStats(al, charstatonly) 45 | } 46 | } 47 | return 48 | }, 49 | } 50 | 51 | func init() { 52 | statsCmd.AddCommand(charCmd) 53 | charCmd.PersistentFlags().BoolVar(&charstatpersites, "per-sites", false, "Prints char statistics per alignment site (priority over --per-sequences)") 54 | charCmd.PersistentFlags().BoolVar(&charstatpersequences, "per-sequences", false, "Prints char statistics per alignment sequences") 55 | charCmd.PersistentFlags().StringVar(&charstatonly, "only", "*", "Prints only this character counts. * means all characters") 56 | } 57 | -------------------------------------------------------------------------------- /cmd/clean.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var cleanOutput string 8 | var cleanCutoff float64 9 | var cleanQuiet bool 10 | var cleanChar string 11 | var cleanIgnoreCase bool 12 | var cleanIgnoreGaps bool 13 | var cleanIgnoreNs bool 14 | 15 | // cleanCmd represents the clean command 16 | var cleanCmd = &cobra.Command{ 17 | Use: "clean", 18 | Short: "Removes gap sites or sequences", 19 | Long: `Removes sites or sequences constituted of gaps 20 | 21 | Removes sites or sequences constitued of >= cutoff gap sites. 22 | 23 | Exception for a cutoff of 0: removes sites constitued of > 0 gap sites. 24 | 25 | Examples: 26 | - With a cutoff of 0.5: a site with 5 gaps over 10 sequences will be removed; 27 | - With a cutoff of 0.5: a site with 4 gaps over 10 sequences will not be removed; 28 | - With a cutoff of 0.0 a site with 1 gap over 10 sequences will be removed. 29 | 30 | If cutoff is <0 or >1, it will be considered as 0, which means that every site/sequence with at least 1 gap 31 | will be removed. 32 | `, 33 | } 34 | 35 | func init() { 36 | RootCmd.AddCommand(cleanCmd) 37 | cleanCmd.PersistentFlags().StringVarP(&cleanOutput, "output", "o", "stdout", "Cleaned alignment output file") 38 | cleanCmd.PersistentFlags().Float64VarP(&cleanCutoff, "cutoff", "c", 0, "Cutoff for deletion : 0 remove sites/sequences with > 0 given char, 1 remove sites/sequences with 100% given character)") 39 | cleanCmd.PersistentFlags().StringVar(&cleanChar, "char", "GAP", "The character the cutoff is applied to. May be GAP, MAJ, or any other character") 40 | cleanCmd.PersistentFlags().BoolVar(&cleanIgnoreCase, "ignore-case", false, "Ignore case of given character (--char) if non special character (GAP/-)") 41 | cleanCmd.PersistentFlags().BoolVar(&cleanIgnoreGaps, "ignore-gaps", false, "Ignore gaps for the % computation") 42 | cleanCmd.PersistentFlags().BoolVar(&cleanIgnoreNs, "ignore-n", false, "Ignore Ns for the % computation") 43 | cleanCmd.PersistentFlags().BoolVarP(&cleanQuiet, "quiet", "q", false, "Do not print results on stderr") 44 | } 45 | -------------------------------------------------------------------------------- /cmd/cleanseqs.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/io" 8 | "github.com/evolbioinfo/goalign/io/utils" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | // cleanseqsCmd represents the cleanseqs command 13 | var cleanseqsCmd = &cobra.Command{ 14 | Use: "seqs", 15 | Short: "Removes sequences with gaps", 16 | Long: `Removes sequences constituted of gaps 17 | 18 | Removes sequences constitued of >= cutoff gap sites. 19 | 20 | Exception for a cutoff of 0: removes sequencs constitued of > 0 gap sites. 21 | 22 | Examples: 23 | - With a cutoff of 0.5: a sequence with 5 gaps over 10 sites will be removed; 24 | - With a cutoff of 0.5: a sequence with 4 gaps over 10 sites will not be removed; 25 | - With a cutoff of 0.0 a site sequence 1 gap over 10 sites will be removed. 26 | 27 | If cutoff is <0 or >1, it will be considered as 0, which means that every sequence with at least 1 gap 28 | will be removed.`, 29 | RunE: func(cmd *cobra.Command, args []string) (err error) { 30 | var aligns *align.AlignChannel 31 | var f utils.StringWriterCloser 32 | 33 | if aligns, err = readalign(infile); err != nil { 34 | io.LogError(err) 35 | return 36 | } 37 | if f, err = utils.OpenWriteFile(cleanOutput); err != nil { 38 | io.LogError(err) 39 | return 40 | } 41 | defer utils.CloseWriteFile(f, cleanOutput) 42 | 43 | i := 0 44 | for al := range aligns.Achan { 45 | before := al.NbSequences() 46 | if cleanChar == string(align.GAP) || cleanChar == "GAP" { 47 | al.RemoveGapSeqs(cleanCutoff, cleanIgnoreNs) 48 | } else { 49 | //single character 50 | c := []uint8(cleanChar) 51 | if len(c) != 1 { 52 | err = fmt.Errorf("--char should be a single character") 53 | io.LogError(err) 54 | return 55 | } 56 | al.RemoveCharacterSeqs(c[0], cleanCutoff, cleanIgnoreCase, cleanIgnoreGaps, cleanIgnoreNs) 57 | } 58 | after := al.NbSequences() 59 | writeAlign(al, f) 60 | if !cleanQuiet { 61 | io.PrintMessage(fmt.Sprintf("Alignment (%d) #seqs before cleaning=%d", i, before)) 62 | io.PrintMessage(fmt.Sprintf("Alignment (%d) #seqs after cleaning=%d", i, after)) 63 | io.PrintMessage(fmt.Sprintf("Alignment (%d) removed sequences=%d", i, before-after)) 64 | } 65 | } 66 | 67 | if aligns.Err != nil { 68 | err = aligns.Err 69 | io.LogError(err) 70 | } 71 | return 72 | }, 73 | } 74 | 75 | func init() { 76 | cleanCmd.AddCommand(cleanseqsCmd) 77 | } 78 | -------------------------------------------------------------------------------- /cmd/clustal.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // clustalCmd : to reformat in clustal format 11 | var clustalCmd = &cobra.Command{ 12 | Use: "clustal", 13 | Short: "Reformats an input alignment into Clustal format", 14 | Long: `Reformats an alignment into Clustal format. 15 | It may take a Phylip, Fasta, Nexus, or Clustal input alignment. 16 | 17 | Example of usage: 18 | 19 | goalign reformat clustal -i align.phylip -p 20 | goalign reformat clustal -i align.fasta 21 | 22 | `, 23 | RunE: func(cmd *cobra.Command, args []string) (err error) { 24 | var aligns *align.AlignChannel 25 | var f utils.StringWriterCloser 26 | 27 | if aligns, err = readalign(infile); err != nil { 28 | io.LogError(err) 29 | return 30 | } 31 | if f, err = utils.OpenWriteFile(reformatOutput); err != nil { 32 | io.LogError(err) 33 | return 34 | } 35 | defer utils.CloseWriteFile(f, reformatOutput) 36 | 37 | a := <-aligns.Achan 38 | if aligns.Err != nil { 39 | err = aligns.Err 40 | io.LogError(err) 41 | return 42 | } 43 | if reformatCleanNames { 44 | a.CleanNames(nil) 45 | } 46 | writeAlignClustal(a, f) 47 | 48 | return 49 | }, 50 | } 51 | 52 | func init() { 53 | reformatCmd.AddCommand(clustalCmd) 54 | } 55 | -------------------------------------------------------------------------------- /cmd/codonalign.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bufio" 5 | goio "io" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | "github.com/evolbioinfo/goalign/io" 9 | "github.com/evolbioinfo/goalign/io/fasta" 10 | "github.com/evolbioinfo/goalign/io/utils" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | var codonAlignOutput string 15 | var nucleotideFasta string 16 | 17 | // codonAlignCmd 18 | var codonAlignCmd = &cobra.Command{ 19 | Use: "codonalign", 20 | Short: "Aligns a given nt fasta file using a corresponding aa alignment", 21 | Long: `Aligns a given nt fasta file using a corresponding aa alignment. 22 | 23 | If the input alignment is not amino acid, then returns an error. 24 | If the given fasta file is not nucleotides then returns an error. 25 | 26 | Warning: It does not check that the amino acid sequence is a good 27 | translation of the nucleotide sequence, but just add gaps to the 28 | nucleotide sequence where needed. 29 | 30 | Once gaps are added, if the nucleotide alignment length does not match 31 | the protein alignment length * 3, returns an error. 32 | `, 33 | RunE: func(cmd *cobra.Command, args []string) (err error) { 34 | var aligns *align.AlignChannel 35 | var f utils.StringWriterCloser 36 | var ntseqsf *bufio.Reader 37 | var toclose goio.Closer 38 | var ntseqs align.SeqBag 39 | var codonAl align.Alignment 40 | 41 | // Read input aa alignment 42 | if aligns, err = readalign(infile); err != nil { 43 | io.LogError(err) 44 | return 45 | } 46 | 47 | // Read input fasta nt sequences 48 | if toclose, ntseqsf, err = utils.GetReader(nucleotideFasta); err != nil { 49 | io.LogError(err) 50 | return 51 | } 52 | defer toclose.Close() 53 | 54 | if ntseqs, err = fasta.NewParser(ntseqsf).ParseUnalign(); err != nil { 55 | io.LogError(err) 56 | return 57 | } 58 | 59 | // Open output file 60 | if f, err = utils.OpenWriteFile(codonAlignOutput); err != nil { 61 | io.LogError(err) 62 | return 63 | } 64 | defer utils.CloseWriteFile(f, codonAlignOutput) 65 | 66 | for al := range aligns.Achan { 67 | if codonAl, err = al.CodonAlign(ntseqs); err != nil { 68 | io.LogError(err) 69 | return 70 | } 71 | writeAlign(codonAl, f) 72 | } 73 | 74 | if aligns.Err != nil { 75 | err = aligns.Err 76 | io.LogError(err) 77 | } 78 | return 79 | }, 80 | } 81 | 82 | func init() { 83 | RootCmd.AddCommand(codonAlignCmd) 84 | codonAlignCmd.PersistentFlags().StringVarP(&codonAlignOutput, "output", "o", "stdout", "Output codon aligned file") 85 | codonAlignCmd.PersistentFlags().StringVarP(&nucleotideFasta, "fasta", "f", "stdin", "Input nucleotide Fasta file to be codon aligned") 86 | } 87 | -------------------------------------------------------------------------------- /cmd/compress.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/io" 8 | "github.com/evolbioinfo/goalign/io/utils" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var compressOutput string 13 | var compressWeightOutput string 14 | 15 | var compressCmd = &cobra.Command{ 16 | Use: "compress", 17 | Short: "Removes identical patterns/sites from an input alignment", 18 | Long: `Removes identical patterns/sites from an input alignment 19 | 20 | And prints in the weight file the number of occurence of each pattern 21 | 22 | Example: 23 | 24 | ali.phy 25 | 1 GGGGGGGGGGGGGGGGGGGG 26 | 2 TTTTTTTTTTTTTTTTTTTT 27 | 3 GGGGGGGGGGCCCCCCCCCC 28 | 4 AAAAAAAAAAAAAAAAAAAA 29 | 30 | goalign compress -i ali.phy will produce: 31 | 1 GG 32 | 2 TT 33 | 3 GC 34 | 4 AA 35 | 36 | and weight file: 37 | 10 38 | 10 39 | `, 40 | RunE: func(cmd *cobra.Command, args []string) (err error) { 41 | var aligns *align.AlignChannel 42 | var f, wf utils.StringWriterCloser 43 | 44 | if aligns, err = readalign(infile); err != nil { 45 | io.LogError(err) 46 | return 47 | } 48 | if f, err = utils.OpenWriteFile(compressOutput); err != nil { 49 | io.LogError(err) 50 | return 51 | } 52 | defer utils.CloseWriteFile(f, compressOutput) 53 | 54 | if wf, err = utils.OpenWriteFile(compressWeightOutput); err != nil { 55 | io.LogError(err) 56 | return 57 | } 58 | defer utils.CloseWriteFile(wf, compressWeightOutput) 59 | 60 | for al := range aligns.Achan { 61 | var w []int 62 | if w = al.Compress(); err != nil { 63 | io.LogError(err) 64 | return 65 | } else { 66 | writeAlign(al, f) 67 | writeWeights(w, wf) 68 | } 69 | } 70 | 71 | if aligns.Err != nil { 72 | err = aligns.Err 73 | io.LogError(err) 74 | } 75 | return 76 | }, 77 | } 78 | 79 | func init() { 80 | compressCmd.PersistentFlags().StringVarP(&compressOutput, "output", "o", "stdout", "Compressed output alignment file") 81 | compressCmd.PersistentFlags().StringVar(&compressWeightOutput, "weight-out", "none", "Pattern weight output file") 82 | RootCmd.AddCommand(compressCmd) 83 | } 84 | 85 | func writeWeights(weights []int, f utils.StringWriterCloser) { 86 | for _, w := range weights { 87 | fmt.Fprintf(f, "%d\n", w) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /cmd/compute.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var computeCmd = &cobra.Command{ 8 | Use: "compute", 9 | Short: "Different computations (distances, entropy, etc.)", 10 | Long: `Different computations (distances, entropy, etc.) 11 | `, 12 | } 13 | 14 | func init() { 15 | RootCmd.AddCommand(computeCmd) 16 | } 17 | -------------------------------------------------------------------------------- /cmd/computeentropy.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | "github.com/evolbioinfo/goalign/io" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var entropyAverage bool 13 | var entropyRemoveGaps bool 14 | 15 | // entropyCmd represents the entropy command 16 | var entropyCmd = &cobra.Command{ 17 | Use: "entropy", 18 | Short: "Computes entropy of a given alignment", 19 | Long: `Computes entropy of a given alignment. 20 | 21 | Example: 22 | goalign compute entropy -i alignment.fa 23 | goalign compute entropy -i alignment.phy -p 24 | 25 | It is possible to compute the average entropy: 26 | goalign compute entropy -i alignment.phy -p -a 27 | 28 | Which will print one average entropy per alignment in the input file: 29 | Alignment \t AvgEntropy 30 | 31 | Otherwise, it will print one entropy per alignment site, in a tab separated form: 32 | Alignment \t Site \t Entropy 33 | 34 | the computation does not take into account the following characters: 35 | -> '*' 36 | -> '-' (if --remove-gaps is given) 37 | 38 | If a site is made fully of '-' (if --remove-gaps is given) or '*', then its entropy will be "NaN", 39 | and it will not be taken into account in the average. 40 | `, 41 | RunE: func(cmd *cobra.Command, args []string) (err error) { 42 | var aligns *align.AlignChannel 43 | var e float64 44 | 45 | if aligns, err = readalign(infile); err != nil { 46 | io.LogError(err) 47 | return 48 | } 49 | 50 | nb := 0 51 | if entropyAverage { 52 | fmt.Println("Alignment\tAvgEntropy") 53 | } else { 54 | fmt.Println("Alignment\tSite\tEntropy") 55 | } 56 | for align := range aligns.Achan { 57 | avg := 0.0 58 | total := 0 59 | for i := 0; i < align.Length(); i++ { 60 | if e, err = align.Entropy(i, entropyRemoveGaps); err != nil { 61 | io.LogError(err) 62 | return 63 | } else { 64 | if entropyAverage { 65 | if !math.IsNaN(e) { 66 | avg += e 67 | total++ 68 | } 69 | } else { 70 | fmt.Printf("%d\t%d\t%.3f\n", nb, i, e) 71 | } 72 | } 73 | } 74 | if entropyAverage { 75 | fmt.Printf("%d\t%.3f\n", nb, avg/float64(total)) 76 | } 77 | nb++ 78 | } 79 | 80 | if aligns.Err != nil { 81 | err = aligns.Err 82 | io.LogError(err) 83 | } 84 | return 85 | }, 86 | } 87 | 88 | func init() { 89 | computeCmd.AddCommand(entropyCmd) 90 | entropyCmd.PersistentFlags().BoolVarP(&entropyAverage, "average", "a", false, "Compute only the average entropy of input alignment") 91 | entropyCmd.PersistentFlags().BoolVarP(&entropyRemoveGaps, "remove-gaps", "g", false, "If true, then do not take into account gaps in the computation") 92 | } 93 | -------------------------------------------------------------------------------- /cmd/consensus.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var consensusOutput string 11 | var consensusExcludeGaps bool 12 | var consensusIgnoreGaps bool 13 | var consensusIgnoreNs bool 14 | 15 | // concatCmd represents the concat command 16 | var consensusCmd = &cobra.Command{ 17 | Use: "consensus", 18 | Short: "Compute the majority consensus of an input alignment", 19 | Long: `Compute the majority consensus of an input alignment. 20 | 21 | For example: 22 | 23 | goalign consensus -i align.phylip -p 24 | 25 | It will generate a single sequence whose sites will correspond to the 26 | majority characters at each positions (including gaps). 27 | 28 | If several alignment are present in the input file (for phylip) 29 | then will output several consensus sequences. 30 | `, 31 | RunE: func(cmd *cobra.Command, args []string) (err error) { 32 | var aligns *align.AlignChannel 33 | var f utils.StringWriterCloser 34 | 35 | consensusIgnoreGaps = consensusIgnoreGaps || consensusExcludeGaps 36 | 37 | if aligns, err = readalign(infile); err != nil { 38 | io.LogError(err) 39 | return 40 | } 41 | 42 | if f, err = utils.OpenWriteFile(consensusOutput); err != nil { 43 | io.LogError(err) 44 | return 45 | } 46 | defer utils.CloseWriteFile(f, consensusOutput) 47 | 48 | for al := range aligns.Achan { 49 | cons := al.Consensus(consensusIgnoreGaps, consensusIgnoreNs) 50 | writeAlign(cons, f) 51 | } 52 | 53 | if aligns.Err != nil { 54 | err = aligns.Err 55 | io.LogError(err) 56 | } 57 | return 58 | }, 59 | } 60 | 61 | func init() { 62 | RootCmd.AddCommand(consensusCmd) 63 | consensusCmd.PersistentFlags().StringVarP(&consensusOutput, "output", "o", "stdout", "Alignment output file") 64 | consensusCmd.PersistentFlags().BoolVar(&consensusIgnoreGaps, "ignore-gaps", false, "Ignore gaps in the majority computation") 65 | consensusCmd.PersistentFlags().BoolVar(&consensusExcludeGaps, "exclude-gaps", false, "Ignore gaps in the majority computation (for backward compatibility, will be removed in future releases)") 66 | consensusCmd.PersistentFlags().BoolVar(&consensusIgnoreNs, "ignore-n", false, "Ignore Ns in the majority computation") 67 | } 68 | -------------------------------------------------------------------------------- /cmd/draw.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var drawOutput string 8 | 9 | // drawCmd represents the draw command 10 | var drawCmd = &cobra.Command{ 11 | Use: "draw", 12 | Short: "Draw alignments", 13 | Long: `Draw alignments`, 14 | } 15 | 16 | func init() { 17 | RootCmd.AddCommand(drawCmd) 18 | drawCmd.PersistentFlags().StringVarP(&drawOutput, "output", "o", "stdout", "Alignment draw output file") 19 | } 20 | -------------------------------------------------------------------------------- /cmd/draw_biojs.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "path/filepath" 7 | 8 | "github.com/evolbioinfo/goalign/align" 9 | "github.com/evolbioinfo/goalign/draw" 10 | "github.com/evolbioinfo/goalign/io" 11 | "github.com/evolbioinfo/goalign/io/utils" 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | // pngCmd represents the png command 16 | var biojsCmd = &cobra.Command{ 17 | Use: "biojs", 18 | Short: "Draw alignments in html file using msaviewer from biojs", 19 | Long: `Draw alignments in html file using msaviewer from biojs 20 | 21 | See http://msa.biojs.net/ for more informations 22 | `, 23 | RunE: func(cmd *cobra.Command, args []string) (err error) { 24 | var l draw.AlignLayout 25 | var aligns *align.AlignChannel 26 | var f utils.StringWriterCloser 27 | 28 | if aligns, err = readalign(infile); err != nil { 29 | io.LogError(err) 30 | return 31 | } 32 | 33 | nalign := 0 34 | for al := range aligns.Achan { 35 | fname := drawOutput 36 | // Add an index to file output name 37 | // if there are several alignments to draw 38 | if nalign > 0 { 39 | ext := filepath.Ext(fname) 40 | fname = fmt.Sprintf("%s_%d.%s", fname[0:len(fname)-len(ext)], nalign, ext) 41 | } 42 | if f, err = utils.OpenWriteFile(fname); err != nil { 43 | io.LogError(err) 44 | return 45 | } 46 | al.CleanNames(nil) 47 | w := bufio.NewWriter(f) 48 | l = draw.NewBioJSLayout(w) 49 | l.DrawAlign(al) 50 | w.Flush() 51 | f.Close() 52 | nalign++ 53 | } 54 | 55 | if aligns.Err != nil { 56 | err = aligns.Err 57 | io.LogError(err) 58 | } 59 | return 60 | }, 61 | } 62 | 63 | func init() { 64 | drawCmd.AddCommand(biojsCmd) 65 | } 66 | -------------------------------------------------------------------------------- /cmd/draw_png.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "path/filepath" 7 | 8 | "github.com/evolbioinfo/goalign/align" 9 | "github.com/evolbioinfo/goalign/draw" 10 | "github.com/evolbioinfo/goalign/io" 11 | "github.com/evolbioinfo/goalign/io/utils" 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | // pngCmd represents the png command 16 | var pngCmd = &cobra.Command{ 17 | Use: "png", 18 | Short: "Draw alignments in a png file", 19 | Long: `Draw alignments in a png file 20 | 21 | One line per sequence, one pixel per character. 22 | Color schemes are specific to the alphabet of sequences: 23 | - The nucleotide colors are from bioSyntax (doi.org/10.1186/s12859-018-2315-y). 24 | - The amino acid colors are adapted from "Shapely colours" 25 | (http://acces.ens-lyon.fr/biotic/rastop/help/colour.htm) 26 | `, 27 | RunE: func(cmd *cobra.Command, args []string) (err error) { 28 | var l draw.AlignLayout 29 | var aligns *align.AlignChannel 30 | var f utils.StringWriterCloser 31 | 32 | if aligns, err = readalign(infile); err != nil { 33 | io.LogError(err) 34 | return 35 | } 36 | 37 | nalign := 0 38 | for al := range aligns.Achan { 39 | fname := drawOutput 40 | // Add an index to file output name 41 | // if there are several alignments to draw 42 | if nalign > 0 { 43 | ext := filepath.Ext(fname) 44 | fname = fmt.Sprintf("%s_%d.%s", fname[0:len(fname)-len(ext)], nalign, ext) 45 | } 46 | if f, err = utils.OpenWriteFile(fname); err != nil { 47 | io.LogError(err) 48 | return 49 | } 50 | al.CleanNames(nil) 51 | w := bufio.NewWriter(f) 52 | l = draw.NewPngLayout(w) 53 | l.DrawAlign(al) 54 | w.Flush() 55 | f.Close() 56 | nalign++ 57 | } 58 | 59 | if aligns.Err != nil { 60 | err = aligns.Err 61 | io.LogError(err) 62 | } 63 | return 64 | }, 65 | } 66 | 67 | func init() { 68 | drawCmd.AddCommand(pngCmd) 69 | } 70 | -------------------------------------------------------------------------------- /cmd/fasta.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // fastaCmd represents the fasta command 11 | var fastaCmd = &cobra.Command{ 12 | Use: "fasta", 13 | Short: "Reformats an input alignment into Fasta", 14 | Long: `Reformats an alignment into Fasta. 15 | It may take a Phylip of Fasta input alignment. 16 | 17 | If the input alignment contains several alignments, will take the first one only 18 | 19 | 20 | Example of usage: 21 | 22 | goalign reformat fasta -i align.phylip -p 23 | goalign reformat fasta -i align.fasta 24 | 25 | `, 26 | RunE: func(cmd *cobra.Command, args []string) (err error) { 27 | var f utils.StringWriterCloser 28 | 29 | if f, err = utils.OpenWriteFile(reformatOutput); err != nil { 30 | io.LogError(err) 31 | return 32 | } 33 | defer utils.CloseWriteFile(f, reformatOutput) 34 | 35 | if unaligned { 36 | var seqs align.SeqBag 37 | 38 | if seqs, err = readsequences(infile); err != nil { 39 | io.LogError(err) 40 | return 41 | } 42 | if reformatCleanNames { 43 | seqs.CleanNames(nil) 44 | } 45 | writeSequences(seqs, f) 46 | } else { 47 | var aligns *align.AlignChannel 48 | 49 | if aligns, err = readalign(infile); err != nil { 50 | io.LogError(err) 51 | return 52 | } 53 | 54 | a := <-aligns.Achan 55 | if aligns.Err != nil { 56 | err = aligns.Err 57 | io.LogError(err) 58 | return 59 | } 60 | if reformatCleanNames { 61 | a.CleanNames(nil) 62 | } 63 | writeAlignFasta(a, f) 64 | } 65 | return 66 | }, 67 | } 68 | 69 | func init() { 70 | reformatCmd.PersistentFlags().BoolVar(&unaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)") 71 | reformatCmd.AddCommand(fastaCmd) 72 | } 73 | -------------------------------------------------------------------------------- /cmd/identical.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/io" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var compalign string 12 | 13 | var identicalCmd = &cobra.Command{ 14 | Use: "identical", 15 | Short: "Assess whether the two alignments are identical", 16 | Long: `Assess whether the two alignments are identical. 17 | 18 | `, 19 | RunE: func(cmd *cobra.Command, args []string) (err error) { 20 | var aligns, compaligns *align.AlignChannel 21 | 22 | if compalign == "none" { 23 | io.LogError(fmt.Errorf("no compared alignment has been given")) 24 | return 25 | } 26 | 27 | if aligns, err = readalign(infile); err != nil { 28 | io.LogError(err) 29 | return 30 | } 31 | 32 | if compaligns, err = readalign(compalign); err != nil { 33 | io.LogError(err) 34 | return 35 | } 36 | 37 | al := <-aligns.Achan 38 | comp := <-compaligns.Achan 39 | 40 | if aligns.Err != nil { 41 | err = aligns.Err 42 | io.LogError(err) 43 | return 44 | } 45 | if compaligns.Err != nil { 46 | err = compaligns.Err 47 | io.LogError(err) 48 | return 49 | } 50 | 51 | fmt.Println(al.Identical(comp)) 52 | 53 | return 54 | }, 55 | } 56 | 57 | func init() { 58 | RootCmd.AddCommand(identicalCmd) 59 | identicalCmd.PersistentFlags().StringVarP(&compalign, "compared", "c", "none", "Compared alignment file") 60 | } 61 | -------------------------------------------------------------------------------- /cmd/length.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/io" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // lengthCmd represents the length command 12 | var lengthCmd = &cobra.Command{ 13 | Use: "length", 14 | Short: "Prints the length of sequences in the alignment", 15 | Long: `Prints the length of sequences in the alignment. 16 | May take a Phylip of Fasta input alignment. 17 | 18 | If --unaligned is given, then length of all individual sequences is printed. 19 | 20 | If the input alignment contains several alignments, will take all of them 21 | 22 | 23 | 24 | Example of usages: 25 | 26 | goalign stats length -i align.phylip -p 27 | goalign stats length -i align.fasta 28 | 29 | `, 30 | RunE: func(cmd *cobra.Command, args []string) (err error) { 31 | if unaligned { 32 | var seqs align.SeqBag 33 | 34 | if seqs, err = readsequences(infile); err != nil { 35 | io.LogError(err) 36 | return 37 | } 38 | 39 | seqs.IterateChar(func(name string, sequence []uint8) bool { 40 | fmt.Println(name, "\t", len(sequence)) 41 | return false 42 | }) 43 | } else { 44 | var aligns *align.AlignChannel 45 | 46 | if aligns, err = readalign(infile); err != nil { 47 | io.LogError(err) 48 | return 49 | } 50 | 51 | for al := range aligns.Achan { 52 | fmt.Println(al.Length()) 53 | } 54 | 55 | if aligns.Err != nil { 56 | err = aligns.Err 57 | io.LogError(err) 58 | } 59 | } 60 | return 61 | }, 62 | } 63 | 64 | func init() { 65 | lengthCmd.PersistentFlags().BoolVar(&unaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)") 66 | statsCmd.AddCommand(lengthCmd) 67 | } 68 | -------------------------------------------------------------------------------- /cmd/maxchars.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | var maxCharExcludeGaps bool 10 | var maxCharIgnoreGaps bool 11 | var maxCharIgnoreNs bool 12 | 13 | // charCmd represents the char command 14 | var maxCharCmd = &cobra.Command{ 15 | Use: "maxchar", 16 | Short: "Prints the character with the highest occcurence for each site of the alignment", 17 | Long: `Prints the character with the highest occcurence for each site of the alignment. 18 | 19 | Ouput format: Tabulated with columns: 20 | 1) Site index (0...) 21 | 2) Character with maximum occurence 22 | 3) Number of occurence of this character 23 | 24 | Example of usages: 25 | 26 | goalign stats maxchar -i align.phylip -p 27 | goalign stats maxchar -i align.fasta 28 | `, 29 | RunE: func(cmd *cobra.Command, args []string) (err error) { 30 | var aligns *align.AlignChannel 31 | maxCharIgnoreGaps = maxCharIgnoreGaps || maxCharExcludeGaps 32 | 33 | if aligns, err = readalign(infile); err != nil { 34 | io.LogError(err) 35 | return 36 | } 37 | 38 | al := <-aligns.Achan 39 | if aligns.Err != nil { 40 | err = aligns.Err 41 | io.LogError(err) 42 | return 43 | } 44 | printMaxCharStats(al, maxCharIgnoreGaps, maxCharIgnoreNs) 45 | 46 | return 47 | }, 48 | } 49 | 50 | func init() { 51 | statsCmd.AddCommand(maxCharCmd) 52 | 53 | maxCharCmd.PersistentFlags().BoolVar(&maxCharExcludeGaps, "exclude-gaps", false, "Ignore gaps in the majority computation (for backward compatibility, will be removed in future releases)") 54 | maxCharCmd.PersistentFlags().BoolVar(&maxCharIgnoreGaps, "ignore-gaps", false, "Ignore gaps in the majority computation") 55 | maxCharCmd.PersistentFlags().BoolVar(&maxCharIgnoreNs, "ignore-n", false, "Ignore Ns in the majority computation") 56 | } 57 | -------------------------------------------------------------------------------- /cmd/mutate.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var mutateOutput string 8 | var mutateRate float64 9 | 10 | // mutateCmd represents the mutate command 11 | var mutateCmd = &cobra.Command{ 12 | Use: "mutate", 13 | Short: "Adds substitutions (~sequencing errors), or gaps, uniformly in an input alignment", 14 | Long: `Adds substitutions (~sequencing error), or gaps, uniformly in an input alignment. 15 | `, 16 | } 17 | 18 | func init() { 19 | RootCmd.AddCommand(mutateCmd) 20 | mutateCmd.PersistentFlags().Float64VarP(&mutateRate, "rate", "r", 0.1, "Mutation rate per nucleotide/amino acid") 21 | mutateCmd.PersistentFlags().StringVarP(&mutateOutput, "output", "o", "stdout", "Mutated alignment output file") 22 | } 23 | -------------------------------------------------------------------------------- /cmd/mutategaps.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // mutateCmd represents the mutate command 11 | var gapsCmd = &cobra.Command{ 12 | Use: "snvs", 13 | Short: "Adds substitutions uniformly in an input alignment", 14 | Long: `Adds substitutions uniformly in an input alignment. 15 | 16 | if rate <= 0 : does nothing 17 | if rate > 1 : then rate = 1 18 | `, 19 | RunE: func(cmd *cobra.Command, args []string) (err error) { 20 | var aligns *align.AlignChannel 21 | var f utils.StringWriterCloser 22 | 23 | if aligns, err = readalign(infile); err != nil { 24 | io.LogError(err) 25 | return 26 | } 27 | if f, err = utils.OpenWriteFile(mutateOutput); err != nil { 28 | io.LogError(err) 29 | return 30 | } 31 | defer utils.CloseWriteFile(f, mutateOutput) 32 | 33 | for al := range aligns.Achan { 34 | al.Mutate(mutateRate) 35 | writeAlign(al, f) 36 | } 37 | 38 | if aligns.Err != nil { 39 | err = aligns.Err 40 | io.LogError(err) 41 | } 42 | return 43 | }, 44 | } 45 | 46 | func init() { 47 | mutateCmd.AddCommand(gapsCmd) 48 | } 49 | -------------------------------------------------------------------------------- /cmd/nalign.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/io" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // nalignCmd represents the nalign command 12 | var nalignCmd = &cobra.Command{ 13 | Use: "nalign", 14 | Short: "Prints the number of alignments in the input file", 15 | Long: `Prints the number of alignments in the input file 16 | 17 | If the input file is in Fasta format, it should be 1 18 | Otherwize, it may be > 1 19 | 20 | Example: 21 | 22 | goalign stats nalign -i align.ph -p 23 | 24 | `, 25 | RunE: func(cmd *cobra.Command, args []string) (err error) { 26 | var aligns *align.AlignChannel 27 | 28 | if aligns, err = readalign(infile); err != nil { 29 | io.LogError(err) 30 | return 31 | } 32 | 33 | naligns := 0 34 | for range aligns.Achan { 35 | naligns++ 36 | } 37 | fmt.Println(naligns) 38 | 39 | if aligns.Err != nil { 40 | err = aligns.Err 41 | io.LogError(err) 42 | } 43 | return 44 | }, 45 | } 46 | 47 | func init() { 48 | statsCmd.AddCommand(nalignCmd) 49 | } 50 | -------------------------------------------------------------------------------- /cmd/nexus.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // nexusCmd represents the nexus command 11 | var nexusCmd = &cobra.Command{ 12 | Use: "nexus", 13 | Short: "Reformats an input alignment into nexus", 14 | Long: `Reformats an alignment into nexus format. 15 | It may take a Phylip of Fasta input alignment. 16 | 17 | If the input alignment contains several alignments, will take all of them 18 | 19 | Example of usage: 20 | 21 | goalign reformat nexus -i align.phylip -p 22 | goalign reformat nexus -i align.fasta 23 | 24 | `, 25 | RunE: func(cmd *cobra.Command, args []string) (err error) { 26 | var aligns *align.AlignChannel 27 | var f utils.StringWriterCloser 28 | 29 | if aligns, err = readalign(infile); err != nil { 30 | io.LogError(err) 31 | return 32 | } 33 | if f, err = utils.OpenWriteFile(reformatOutput); err != nil { 34 | io.LogError(err) 35 | return 36 | } 37 | defer utils.CloseWriteFile(f, reformatOutput) 38 | 39 | for al := range aligns.Achan { 40 | if reformatCleanNames { 41 | al.CleanNames(nil) 42 | } 43 | writeAlignNexus(al, f) 44 | } 45 | 46 | if aligns.Err != nil { 47 | err = aligns.Err 48 | io.LogError(err) 49 | } 50 | return 51 | }, 52 | } 53 | 54 | func init() { 55 | reformatCmd.AddCommand(nexusCmd) 56 | } 57 | -------------------------------------------------------------------------------- /cmd/nseq.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/io" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // nseqCmd represents the nseq command 12 | var nseqCmd = &cobra.Command{ 13 | Use: "nseq", 14 | Short: "Prints the number of sequences in the alignment", 15 | Long: `Prints the number of sequences in the alignment. 16 | May take a Phylip of Fasta input alignment. 17 | 18 | If the input alignment contains several alignments, will process all of them 19 | 20 | Example of usages: 21 | 22 | goalign stats nseq -i align.phylip -p 23 | goalign stats nseq -i align.fasta 24 | `, 25 | RunE: func(cmd *cobra.Command, args []string) (err error) { 26 | 27 | if unaligned { 28 | var seqs align.SeqBag 29 | 30 | if seqs, err = readsequences(infile); err != nil { 31 | io.LogError(err) 32 | return 33 | } 34 | fmt.Println(seqs.NbSequences()) 35 | } else { 36 | var aligns *align.AlignChannel 37 | 38 | if aligns, err = readalign(infile); err != nil { 39 | io.LogError(err) 40 | return 41 | } 42 | 43 | for al := range aligns.Achan { 44 | fmt.Println(al.NbSequences()) 45 | } 46 | 47 | if aligns.Err != nil { 48 | err = aligns.Err 49 | io.LogError(err) 50 | } 51 | } 52 | return 53 | }, 54 | } 55 | 56 | func init() { 57 | nseqCmd.PersistentFlags().BoolVar(&unaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)") 58 | statsCmd.AddCommand(nseqCmd) 59 | } 60 | -------------------------------------------------------------------------------- /cmd/orf.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var orfOutput string 11 | var orfreverse bool 12 | 13 | // translateCmd represents the addid command 14 | var orfCmd = &cobra.Command{ 15 | Use: "orf", 16 | Short: "Find the longest orf in all given sequences in forward strand", 17 | Long: `Find the longest orf in all given sequences in forward strand. 18 | 19 | If input sequences are not nucleotidic, then returns an error. 20 | If input sequences are aligned (contain '-'), then they are unaligned first. 21 | 22 | Output is in fasta format. 23 | `, 24 | RunE: func(cmd *cobra.Command, args []string) (err error) { 25 | var f utils.StringWriterCloser 26 | var reforf align.SeqBag 27 | var inseqs align.SeqBag 28 | var orf align.Sequence 29 | 30 | if f, err = utils.OpenWriteFile(orfOutput); err != nil { 31 | io.LogError(err) 32 | return 33 | } 34 | defer utils.CloseWriteFile(f, orfOutput) 35 | 36 | if inseqs, err = readsequences(infile); err != nil { 37 | io.LogError(err) 38 | return 39 | } 40 | 41 | inseqs = inseqs.Unalign() 42 | 43 | if orf, err = inseqs.LongestORF(orfreverse); err != nil { 44 | io.LogError(err) 45 | return 46 | } 47 | reforf = align.NewSeqBag(align.UNKNOWN) 48 | reforf.AddSequenceChar(orf.Name(), orf.SequenceChar(), orf.Comment()) 49 | reforf.AutoAlphabet() 50 | writeSequences(reforf, f) 51 | 52 | return 53 | }, 54 | } 55 | 56 | func init() { 57 | RootCmd.AddCommand(orfCmd) 58 | orfCmd.PersistentFlags().StringVarP(&orfOutput, "output", "o", "stdout", "ORF Output Fasta File") 59 | orfCmd.PersistentFlags().BoolVar(&orfreverse, "reverse", false, "Search for the longest ORF ALSO in the reverse strand") 60 | } 61 | -------------------------------------------------------------------------------- /cmd/paml.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // tntCmd represents the tnt command 11 | var pamlCmd = &cobra.Command{ 12 | Use: "paml", 13 | Short: "Reformats an input alignment into input data for PAML", 14 | Long: `Reformats an alignment into input data for PAML. 15 | It may take a Phylip of Fasta input alignment. 16 | 17 | If the input alignment contains several alignments, will take the first one only 18 | 19 | Example of usage: 20 | 21 | goalign reformat paml -i align.phylip -p 22 | goalign reformat paml -i align.fasta 23 | `, 24 | RunE: func(cmd *cobra.Command, args []string) (err error) { 25 | var aligns *align.AlignChannel 26 | var f utils.StringWriterCloser 27 | 28 | if aligns, err = readalign(infile); err != nil { 29 | io.LogError(err) 30 | return 31 | } 32 | if f, err = utils.OpenWriteFile(reformatOutput); err != nil { 33 | io.LogError(err) 34 | return 35 | } 36 | defer utils.CloseWriteFile(f, reformatOutput) 37 | 38 | for al := range aligns.Achan { 39 | if reformatCleanNames { 40 | al.CleanNames(nil) 41 | } 42 | writeAlignPaml(al, f) 43 | } 44 | 45 | if aligns.Err != nil { 46 | err = aligns.Err 47 | io.LogError(err) 48 | } 49 | return 50 | }, 51 | } 52 | 53 | func init() { 54 | reformatCmd.AddCommand(pamlCmd) 55 | 56 | } 57 | -------------------------------------------------------------------------------- /cmd/phylip.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // phylipCmd represents the phylip command 11 | var phylipCmd = &cobra.Command{ 12 | Use: "phylip", 13 | Short: "Reformats an input alignment into Phylip", 14 | Long: `Reformats an alignment into Phylip. 15 | It may take a Phylip of Fasta input alignment. 16 | 17 | If the input alignment contains several alignments, will take all of them 18 | 19 | Example of usage: 20 | 21 | goalign reformat phylip -i align.phylip -p 22 | goalign reformat phylip -i align.fasta 23 | 24 | `, 25 | RunE: func(cmd *cobra.Command, args []string) (err error) { 26 | var aligns *align.AlignChannel 27 | var f utils.StringWriterCloser 28 | 29 | if aligns, err = readalign(infile); err != nil { 30 | io.LogError(err) 31 | return 32 | } 33 | if f, err = utils.OpenWriteFile(reformatOutput); err != nil { 34 | io.LogError(err) 35 | return 36 | } 37 | defer utils.CloseWriteFile(f, reformatOutput) 38 | 39 | for al := range aligns.Achan { 40 | if reformatCleanNames { 41 | al.CleanNames(nil) 42 | } 43 | writeAlignPhylip(al, f) 44 | } 45 | 46 | if aligns.Err != nil { 47 | err = aligns.Err 48 | io.LogError(err) 49 | } 50 | return 51 | }, 52 | } 53 | 54 | func init() { 55 | reformatCmd.AddCommand(phylipCmd) 56 | } 57 | -------------------------------------------------------------------------------- /cmd/random.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var randomLength, randomSize int 11 | var randomAA bool 12 | var randomOutput string 13 | 14 | // randomCmd represents the random command 15 | var randomCmd = &cobra.Command{ 16 | Use: "random", 17 | Short: "Generate random sequences", 18 | Long: `Generate random sequences. 19 | `, 20 | RunE: func(cmd *cobra.Command, args []string) (err error) { 21 | var f utils.StringWriterCloser 22 | var a align.Alignment 23 | 24 | if f, err = utils.OpenWriteFile(randomOutput); err != nil { 25 | io.LogError(err) 26 | return 27 | } 28 | defer utils.CloseWriteFile(f, randomOutput) 29 | 30 | if !randomAA { 31 | if a, err = align.RandomAlignment(align.NUCLEOTIDS, randomLength, randomSize); err != nil { 32 | io.LogError(err) 33 | return 34 | } 35 | } else { 36 | if a, err = align.RandomAlignment(align.AMINOACIDS, randomLength, randomSize); err != nil { 37 | io.LogError(err) 38 | return 39 | } 40 | } 41 | writeAlign(a, f) 42 | 43 | return 44 | }, 45 | } 46 | 47 | func init() { 48 | RootCmd.AddCommand(randomCmd) 49 | randomCmd.PersistentFlags().IntVarP(&randomLength, "length", "l", 100, "Length of sequences to generate") 50 | randomCmd.PersistentFlags().IntVarP(&randomSize, "nb-seqs", "n", 10, "Number of sequences to generate") 51 | randomCmd.PersistentFlags().BoolVarP(&randomAA, "amino-acids", "a", false, "Aminoacid sequences (otherwise, nucleotides)") 52 | randomCmd.PersistentFlags().StringVarP(&randomOutput, "out-align", "o", "stdout", "Random alignment output file") 53 | } 54 | -------------------------------------------------------------------------------- /cmd/reformat.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var reformatOutput string 8 | var reformatCleanNames bool 9 | 10 | // reformatCmd represents the reformat command 11 | var reformatCmd = &cobra.Command{ 12 | Use: "reformat", 13 | Short: "Reformats input alignment into phylip of fasta format", 14 | Long: `Reformats input alignment into phylip of fasta format. 15 | 16 | Allows to convert formats between phylip, fasta and nexus, but also reformats 17 | fasta and phylip, such as 60 characters per line, etc. 18 | 19 | `, 20 | } 21 | 22 | func init() { 23 | RootCmd.AddCommand(reformatCmd) 24 | reformatCmd.PersistentFlags().StringVarP(&reformatOutput, "output", "o", "stdout", "Reformated alignment output file") 25 | reformatCmd.PersistentFlags().BoolVar(&reformatCleanNames, "clean-names", false, "Replaces special characters (tabs, spaces, newick characters) with '-' from input sequence names before writing output alignment") 26 | } 27 | -------------------------------------------------------------------------------- /cmd/sample.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // sampleCmd represents the sample command 8 | var sampleCmd = &cobra.Command{ 9 | Use: "sample", 10 | Short: "Samples sequences or sites from an input alignment", 11 | Long: `Samples sequences or sites from an input alignment. For example: 12 | 13 | Randomly sampling 10 sequences from the alignment: 14 | goalign sample seqs -i align.fa -n 10 > sample.fa 15 | 16 | Randomly sampling 10 subsequences with length 20 from the input alignment: 17 | goalign sample seqs -n 10 -i align.fa -l 20 -o subalign_ 18 | 19 | `, 20 | } 21 | 22 | func init() { 23 | RootCmd.AddCommand(sampleCmd) 24 | } 25 | -------------------------------------------------------------------------------- /cmd/seq.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var trimFromStart bool 11 | 12 | // seqCmd represents the seq command 13 | var seqCmd = &cobra.Command{ 14 | Use: "seq", 15 | Short: "Trims sequences of the alignment", 16 | Long: `Trims sequences of the alignemnt 17 | 18 | It trims n (--nb-char, -n) characters from the beginning (--from-start, -s) or from the end (default) of the input alignment. 19 | 20 | Example: 21 | goalign trim seq -i align.fa -o trimed.fa -s -n 10 22 | 23 | `, 24 | RunE: func(cmd *cobra.Command, args []string) (err error) { 25 | var aligns *align.AlignChannel 26 | var f utils.StringWriterCloser 27 | 28 | if aligns, err = readalign(infile); err != nil { 29 | io.LogError(err) 30 | return 31 | } 32 | if f, err = utils.OpenWriteFile(trimAlignOut); err != nil { 33 | io.LogError(err) 34 | return 35 | } 36 | defer utils.CloseWriteFile(f, trimAlignOut) 37 | 38 | for al := range aligns.Achan { 39 | if err = al.TrimSequences(trimNb, trimFromStart); err != nil { 40 | io.LogError(err) 41 | return 42 | } else { 43 | writeAlign(al, f) 44 | } 45 | } 46 | 47 | if aligns.Err != nil { 48 | err = aligns.Err 49 | io.LogError(err) 50 | } 51 | return 52 | }, 53 | } 54 | 55 | func init() { 56 | trimCmd.AddCommand(seqCmd) 57 | seqCmd.PersistentFlags().IntVarP(&trimNb, "nb-char", "n", 1, "Number of characters to trim from sequences") 58 | seqCmd.PersistentFlags().BoolVarP(&trimFromStart, "from-start", "s", false, "If true: trims n char from the start, else from the end") 59 | } 60 | -------------------------------------------------------------------------------- /cmd/seqs.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // seqsCmd represents the seqs command 11 | var seqsCmd = &cobra.Command{ 12 | Use: "seqs", 13 | Short: "Shuffles sequence order in alignment", 14 | Long: `Shuffle sequence order in alignment. 15 | 16 | It may take a Fasta or Phylip alignment as input. 17 | 18 | If the input alignment contains several alignments, will process all of them 19 | 20 | Output a randomly reordered alignment. It does not 21 | change the biological meaning of the alignment. 22 | 23 | Example of usage: 24 | 25 | goalign shuffle seqs -i align.phylip -p 26 | goalign shuffle seqs -i align.fasta 27 | 28 | `, 29 | RunE: func(cmd *cobra.Command, args []string) (err error) { 30 | var f utils.StringWriterCloser 31 | 32 | if f, err = utils.OpenWriteFile(shuffleOutput); err != nil { 33 | io.LogError(err) 34 | return 35 | } 36 | defer utils.CloseWriteFile(f, shuffleOutput) 37 | 38 | if unaligned { 39 | var seqs align.SeqBag 40 | if seqs, err = readsequences(infile); err != nil { 41 | io.LogError(err) 42 | return 43 | } 44 | seqs.ShuffleSequences() 45 | writeSequences(seqs, f) 46 | } else { 47 | var aligns *align.AlignChannel 48 | 49 | if aligns, err = readalign(infile); err != nil { 50 | io.LogError(err) 51 | return 52 | } 53 | 54 | for al := range aligns.Achan { 55 | al.ShuffleSequences() 56 | writeAlign(al, f) 57 | } 58 | 59 | if aligns.Err != nil { 60 | err = aligns.Err 61 | io.LogError(err) 62 | } 63 | } 64 | return 65 | }, 66 | } 67 | 68 | func init() { 69 | seqsCmd.PersistentFlags().BoolVar(&unaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)") 70 | shuffleCmd.AddCommand(seqsCmd) 71 | } 72 | -------------------------------------------------------------------------------- /cmd/shuffle.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var shuffleOutput string 8 | 9 | // shuffleCmd represents the shuffle command 10 | var shuffleCmd = &cobra.Command{ 11 | Use: "shuffle", 12 | Short: "A set of commands to shuffle an alignment", 13 | Long: `A set of commands to shuffle an alignment. 14 | 15 | It takes a Fasta of Phylip alignment in input. 16 | 17 | It is possible to: 18 | 1 - Shuffle n sites vertically: It takes n sites of the input 19 | alignment and reassign the characters to different sequences; 20 | 2 - Shuffle sequence order in the alignment; 21 | 3 - Recombine n sequences together. 22 | 23 | `, 24 | } 25 | 26 | func init() { 27 | RootCmd.AddCommand(shuffleCmd) 28 | 29 | shuffleCmd.PersistentFlags().StringVarP(&shuffleOutput, "output", "o", "stdout", "Shuffled alignment output file") 30 | } 31 | -------------------------------------------------------------------------------- /cmd/sort.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var sortOutput string 11 | 12 | // reformatCmd represents the reformat command 13 | var sortCmd = &cobra.Command{ 14 | Use: "sort", 15 | Short: "sorts input alignment by sequence name", 16 | Long: `sorts input algignment by sequence name. 17 | `, 18 | RunE: func(cmd *cobra.Command, args []string) (err error) { 19 | var f utils.StringWriterCloser 20 | 21 | if f, err = utils.OpenWriteFile(sortOutput); err != nil { 22 | io.LogError(err) 23 | return 24 | } 25 | defer utils.CloseWriteFile(f, sortOutput) 26 | 27 | if unaligned { 28 | var seqs align.SeqBag 29 | if seqs, err = readsequences(infile); err != nil { 30 | io.LogError(err) 31 | return 32 | } 33 | seqs.Sort() 34 | writeSequences(seqs, f) 35 | } else { 36 | var aligns *align.AlignChannel 37 | 38 | if aligns, err = readalign(infile); err != nil { 39 | io.LogError(err) 40 | return 41 | } 42 | 43 | for al := range aligns.Achan { 44 | al.Sort() 45 | writeAlign(al, f) 46 | } 47 | 48 | if aligns.Err != nil { 49 | err = aligns.Err 50 | io.LogError(err) 51 | } 52 | } 53 | return 54 | }, 55 | } 56 | 57 | func init() { 58 | sortCmd.PersistentFlags().BoolVar(&unaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)") 59 | sortCmd.PersistentFlags().StringVarP(&sortOutput, "output", "o", "stdout", "Sorted alignment output file") 60 | RootCmd.AddCommand(sortCmd) 61 | } 62 | -------------------------------------------------------------------------------- /cmd/split.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/spf13/cobra" 7 | 8 | "github.com/evolbioinfo/goalign/align" 9 | "github.com/evolbioinfo/goalign/io" 10 | "github.com/evolbioinfo/goalign/io/utils" 11 | ) 12 | 13 | var splitpartition *align.PartitionSet 14 | var splitpartitionstr string 15 | var splitprefix string 16 | 17 | // seqbootCmd represents the bootstrap command 18 | var splitCmd = &cobra.Command{ 19 | Use: "split", 20 | Short: "Splits an input alignment given a partition file", 21 | Long: `Splits an input alignment given a partition file. 22 | 23 | Output alignment files will be in the same format as input alignment, 24 | with file names corresponding to partition names. 25 | 26 | Example of usage: 27 | goalign split -i align.phylip --partition partition.txt 28 | `, 29 | RunE: func(cmd *cobra.Command, args []string) (err error) { 30 | var aligns *align.AlignChannel 31 | var splitAligns []align.Alignment 32 | 33 | var f utils.StringWriterCloser 34 | 35 | if aligns, err = readalign(infile); err != nil { 36 | io.LogError(err) 37 | return 38 | } 39 | 40 | align := <-aligns.Achan 41 | if aligns.Err != nil { 42 | err = aligns.Err 43 | io.LogError(err) 44 | return 45 | } 46 | 47 | if splitpartitionstr != "none" { 48 | if splitpartition, err = parsePartition(splitpartitionstr, align.Length()); err != nil { 49 | io.LogError(err) 50 | return 51 | } 52 | if err = splitpartition.CheckSites(); err != nil { 53 | io.LogError(err) 54 | return 55 | } 56 | } else { 57 | err = fmt.Errorf("partition file must be provided") 58 | io.LogError(err) 59 | return 60 | } 61 | 62 | if splitAligns, err = align.Split(splitpartition); err != nil { 63 | io.LogError(err) 64 | return 65 | } 66 | 67 | for i, a := range splitAligns { 68 | name := splitprefix + splitpartition.PartitionName(i) + alignExtension() 69 | if f, err = utils.OpenWriteFile(name); err != nil { 70 | io.LogError(err) 71 | return 72 | } 73 | writeAlign(a, f) 74 | f.Close() 75 | } 76 | 77 | return 78 | }, 79 | } 80 | 81 | func init() { 82 | RootCmd.AddCommand(splitCmd) 83 | 84 | splitCmd.PersistentFlags().StringVarP(&splitprefix, "out-prefix", "o", "", "Prefix of output files") 85 | splitCmd.PersistentFlags().StringVar(&splitpartitionstr, "partition", "none", "File containing definition of the partitions") 86 | } 87 | -------------------------------------------------------------------------------- /cmd/stats_alphabet.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/io" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // charCmd represents the char command 12 | var alphabetCmd = &cobra.Command{ 13 | Use: "alphabet", 14 | Short: "Prints the alphabet detected for the input alignment", 15 | Long: `Prints the alphabet detected for the input alignment. 16 | 17 | `, 18 | RunE: func(cmd *cobra.Command, args []string) (err error) { 19 | var aligns *align.AlignChannel 20 | var seqs align.SeqBag 21 | 22 | if unaligned { 23 | if seqs, err = readsequences(infile); err != nil { 24 | io.LogError(err) 25 | return 26 | } 27 | fmt.Println(seqs.AlphabetStr()) 28 | } else { 29 | 30 | if aligns, err = readalign(infile); err != nil { 31 | io.LogError(err) 32 | return 33 | } else { 34 | 35 | al := <-aligns.Achan 36 | if aligns.Err != nil { 37 | err = aligns.Err 38 | io.LogError(err) 39 | return 40 | } 41 | fmt.Println(al.AlphabetStr()) 42 | } 43 | } 44 | return 45 | }, 46 | } 47 | 48 | func init() { 49 | alphabetCmd.PersistentFlags().BoolVar(&unaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)") 50 | statsCmd.AddCommand(alphabetCmd) 51 | } 52 | -------------------------------------------------------------------------------- /cmd/swap.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/evolbioinfo/goalign/align" 5 | "github.com/evolbioinfo/goalign/io" 6 | "github.com/evolbioinfo/goalign/io/utils" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var swapRate float64 11 | var swapPos float64 12 | 13 | // swapCmd represents the swap command 14 | var swapCmd = &cobra.Command{ 15 | Use: "swap", 16 | Short: "Swap portion of sequences in the input alignment", 17 | Long: `Swap portion of sequences in the input alignment. 18 | It may take Fasta or Phylip input alignment. 19 | 20 | If the input alignment contains several alignments, will process all of them 21 | 22 | It will exchange sequences from one seq to another of the alignment. 23 | if rate>=0 and rate<=1 then it takes rate/2 sequences and exchanges sequences 24 | with rate/2 other sequences, from a random position. 25 | 26 | If given pos >=0 and <=1 then take this position (relative to align length) 27 | instead of a random one. 28 | 29 | A rate of 0.5 will swap 25% of the sequences with 30 | other 25% of the sequences at a random position. 31 | 32 | swap 50% of sequences: 33 | 34 | s1 CCCCCCCCCCCCCC s1 CCCCCCCCCCCCCC 35 | s2 AAAAAAAAAAAAAA => s2 AAAAAATTTTTTTT 36 | s3 GGGGGGGGGGGGGG s3 GGGGGGGGGGGGGG 37 | s4 TTTTTTTTTTTTTT s4 TTTTTTAAAAAAAA 38 | 39 | Example of usage: 40 | 41 | goalign shuffle swap -i align.phylip -p -r 0.5 42 | goalign shuffle swap -i align.fasta -r 0.5 43 | 44 | `, 45 | RunE: func(cmd *cobra.Command, args []string) (err error) { 46 | var aligns *align.AlignChannel 47 | var f utils.StringWriterCloser 48 | 49 | if aligns, err = readalign(infile); err != nil { 50 | io.LogError(err) 51 | return 52 | } 53 | if f, err = utils.OpenWriteFile(shuffleOutput); err != nil { 54 | io.LogError(err) 55 | return 56 | } 57 | defer utils.CloseWriteFile(f, shuffleOutput) 58 | 59 | for al := range aligns.Achan { 60 | if err = al.Swap(swapRate, swapPos); err != nil { 61 | io.LogError(err) 62 | return 63 | } 64 | writeAlign(al, f) 65 | } 66 | 67 | if aligns.Err != nil { 68 | err = aligns.Err 69 | io.LogError(err) 70 | } 71 | return 72 | }, 73 | } 74 | 75 | func init() { 76 | shuffleCmd.AddCommand(swapCmd) 77 | 78 | swapCmd.PersistentFlags().Float64VarP(&swapRate, "rate", "r", 0.5, "Rate of Swap sequences (>=0 and <=1)") 79 | swapCmd.PersistentFlags().Float64Var(&swapPos, "pos", -1, "Position of the break point (0 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cmd 16 | 17 | import ( 18 | "github.com/spf13/cobra" 19 | ) 20 | 21 | // trimCmd represents the trim command 22 | var trimAlignOut string 23 | var trimNb int 24 | 25 | var trimCmd = &cobra.Command{ 26 | Use: "trim", 27 | Short: "This command trims names of sequences or sequences themselves", 28 | Long: `This command trims names of sequences or sequences themselves. 29 | 30 | With "names" subcommand, you can trim names to n characters. In this case, it will 31 | also output mapping between old names and new names into a map file as well as the 32 | new alignment. 33 | 34 | With "seq" subcommand, you can trim sequences from start or from end, by n characters. 35 | `, 36 | } 37 | 38 | func init() { 39 | RootCmd.AddCommand(trimCmd) 40 | trimCmd.PersistentFlags().StringVarP(&trimAlignOut, "out-align", "o", "stdout", "Renamed alignment output file") 41 | } 42 | -------------------------------------------------------------------------------- /cmd/unalign.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/io" 8 | "github.com/evolbioinfo/goalign/io/utils" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var unalignOutput string 13 | 14 | // unalignCmd represents the unalign command 15 | var unalignCmd = &cobra.Command{ 16 | Use: "unalign", 17 | Short: "Unaligns input alignment", 18 | Long: `Unaligns an input alignment, by removing indels. 19 | 20 | The output is in Fasta format, whatever the input format is (Fasta or Phylip). 21 | 22 | Output sequences are free from all indel charachers "-". 23 | 24 | As there may be several alignments in the input alignment (phylip format), 25 | output files are prefixed with argument given to "--output-prefix", and 26 | suffixed with an index and the extension ".fa". 27 | 28 | If --output-prefix is set to "-" or "stdout", all sequences are printed on stdout 29 | 30 | Example: 31 | 32 | goalign unalign -i align.ph -p -o seq_ 33 | 34 | If align contains 3 alignments, this will generate 3 files: 35 | * seq_000001.fa 36 | * seq_000002.fa 37 | * seq_000003.fa 38 | `, 39 | RunE: func(cmd *cobra.Command, args []string) (err error) { 40 | var aligns *align.AlignChannel 41 | var f utils.StringWriterCloser 42 | 43 | if aligns, err = readalign(infile); err != nil { 44 | io.LogError(err) 45 | return 46 | } 47 | 48 | i := 1 49 | filename := unalignOutput 50 | for al := range aligns.Achan { 51 | if filename != "stdout" && filename != "-" { 52 | filename = fmt.Sprintf("%s_%.6d.fa", unalignOutput, i) 53 | } 54 | if f, err = utils.OpenWriteFile(filename); err != nil { 55 | io.LogError(err) 56 | return 57 | } 58 | writeSequences(al.Unalign(), f) 59 | utils.CloseWriteFile(f, filename) 60 | i++ 61 | } 62 | 63 | if aligns.Err != nil { 64 | err = aligns.Err 65 | io.LogError(err) 66 | } 67 | return 68 | }, 69 | } 70 | 71 | func init() { 72 | RootCmd.AddCommand(unalignCmd) 73 | unalignCmd.PersistentFlags().StringVarP(&unalignOutput, "output-prefix", "o", "stdout", "Unaligned alignment output file prefix") 74 | } 75 | -------------------------------------------------------------------------------- /cmd/version.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/version" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // versionCmd represents the version command 11 | var versionCmd = &cobra.Command{ 12 | Use: "version", 13 | Short: "Prints the current version of goalign", 14 | Long: `Prints the current version of goalign`, 15 | Run: func(cmd *cobra.Command, args []string) { 16 | fmt.Println(version.Version) 17 | }, 18 | } 19 | 20 | func init() { 21 | RootCmd.AddCommand(versionCmd) 22 | } 23 | -------------------------------------------------------------------------------- /cmd/weightboot.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | "github.com/evolbioinfo/goalign/distance/dna" 8 | "github.com/evolbioinfo/goalign/io" 9 | "github.com/evolbioinfo/goalign/io/utils" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | var weightbootOutput string 14 | var weightbootnb int 15 | 16 | // weightbootCmd represents the weightboot command 17 | var weightbootCmd = &cobra.Command{ 18 | Use: "weightboot", 19 | Short: "generate continous weights for all positions of the input alignment", 20 | Long: `generate continous weights for all positions of the input alignment 21 | 22 | If the input alignment contains several alignments, will process the first one only. 23 | 24 | Weights follow a Dirichlet distribtion D(n;1,...,1) 25 | 26 | `, 27 | RunE: func(cmd *cobra.Command, args []string) (err error) { 28 | var f utils.StringWriterCloser 29 | var alignChan *align.AlignChannel 30 | 31 | if alignChan, err = readalign(infile); err != nil { 32 | io.LogError(err) 33 | return 34 | } 35 | al, _ := <-alignChan.Achan 36 | if alignChan.Err != nil { 37 | err = alignChan.Err 38 | io.LogError(err) 39 | return 40 | } 41 | 42 | if f, err = utils.OpenWriteFile(weightbootOutput); err != nil { 43 | io.LogError(err) 44 | return 45 | } 46 | defer utils.CloseWriteFile(f, weightbootOutput) 47 | 48 | for i := 0; i < weightbootnb; i++ { 49 | var weights []float64 = nil 50 | weights = dna.BuildWeightsDirichlet(al) 51 | for i, w := range weights { 52 | if i > 0 { 53 | f.WriteString("\t") 54 | } 55 | f.WriteString(fmt.Sprintf("%f", w)) 56 | } 57 | f.WriteString("\n") 58 | } 59 | return 60 | }, 61 | } 62 | 63 | func init() { 64 | buildCmd.AddCommand(weightbootCmd) 65 | weightbootCmd.PersistentFlags().StringVarP(&weightbootOutput, "output", "o", "stdout", "Weight vectors output file") 66 | weightbootCmd.PersistentFlags().IntVarP(&weightbootnb, "nboot", "n", 1, "Number of bootstrap replicates to build") 67 | } 68 | -------------------------------------------------------------------------------- /distance/distance.go: -------------------------------------------------------------------------------- 1 | package distance 2 | -------------------------------------------------------------------------------- /distance/dna/f81.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | ) 9 | 10 | type F81Model struct { 11 | pi []float64 // Vector of nt stationary proba 12 | b1 float64 // Parameter for distance computation 13 | numSites float64 // Number of selected sites (no gaps) 14 | selectedSites []bool // true for selected sites 15 | removegaps bool // If true, we will remove posision with >=1 gaps 16 | gamma bool 17 | alpha float64 18 | sequenceCodes [][]uint8 // Sequences converted into int codes 19 | } 20 | 21 | func NewF81Model(removegaps bool) *F81Model { 22 | return &F81Model{ 23 | nil, 24 | 0, 25 | 0, 26 | nil, 27 | removegaps, 28 | false, 29 | 0., 30 | nil, 31 | } 32 | } 33 | 34 | // Distance computes F81 distance between 2 sequences 35 | func (m *F81Model) Distance(seq1 []uint8, seq2 []uint8, weights []float64) (float64, error) { 36 | var dist float64 37 | 38 | diff, total := countDiffs(seq1, seq2, m.selectedSites, weights, false) 39 | diff = diff / total 40 | 41 | if m.gamma { 42 | dist = 1. * m.b1 * m.alpha * (math.Pow(1.-diff/m.b1, -1./m.alpha) - 1.) 43 | } else { 44 | dist = -1. * m.b1 * math.Log(1.-diff/m.b1) 45 | } 46 | if dist > 0 { 47 | return dist, nil 48 | } 49 | return 0, nil 50 | } 51 | 52 | func (m *F81Model) InitModel(al align.Alignment, weights []float64, gamma bool, alpha float64) (err error) { 53 | m.gamma = gamma 54 | m.alpha = alpha 55 | m.numSites, m.selectedSites = selectedSites(al, weights, m.removegaps) 56 | if m.sequenceCodes, err = alignmentToCodes(al); err != nil { 57 | return 58 | } 59 | m.b1 = 0.0 60 | m.pi, err = probaNt(m.sequenceCodes, m.selectedSites, weights) 61 | if err == nil { 62 | for i := range m.pi { 63 | m.b1 += m.pi[i] * m.pi[i] 64 | } 65 | m.b1 = 1 - m.b1 66 | } 67 | return 68 | } 69 | 70 | // Sequence returns the ith sequence of the alignment 71 | // encoded in int 72 | func (m *F81Model) Sequence(i int) (seq []uint8, err error) { 73 | if i < 0 || i >= len(m.sequenceCodes) { 74 | err = fmt.Errorf("This sequence does not exist: %d", i) 75 | return 76 | } 77 | seq = m.sequenceCodes[i] 78 | return 79 | } 80 | -------------------------------------------------------------------------------- /distance/dna/f84.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | ) 9 | 10 | type F84Model struct { 11 | pi []float64 // Vector of nt stationary proba 12 | a, b, c float64 // Parameters for distance computation 13 | numSites float64 // Number of selected sites (no gaps) 14 | selectedSites []bool // true for selected sites 15 | removegaps bool // If true, we will remove posision with >=1 gaps 16 | gamma bool 17 | alpha float64 18 | sequenceCodes [][]uint8 // Sequences converted to codes 19 | } 20 | 21 | func NewF84Model(removegaps bool) *F84Model { 22 | return &F84Model{ 23 | nil, 24 | 0, 0, 0, 25 | 0, 26 | nil, 27 | removegaps, 28 | false, 29 | 0., 30 | nil, 31 | } 32 | } 33 | 34 | /* computes F84 distance between 2 sequences */ 35 | func (m *F84Model) Distance(seq1 []uint8, seq2 []uint8, weights []float64) (float64, error) { 36 | var dist float64 37 | 38 | trS, trV, _, _, total := countMutations(seq1, seq2, m.selectedSites, weights) 39 | trS, trV = trS/total, trV/total 40 | if m.gamma { 41 | dist = 2.0 * m.alpha * (m.a*math.Pow((1.0-trS/(2.0*m.a)-(m.a-m.b)*trV/(2.0*m.a*m.c)), -1./m.alpha) + 42 | (m.b+m.c-m.a)*math.Pow((1-trV/(2.0*m.c)), -1./m.alpha) - 43 | m.b - m.c) 44 | } else { 45 | dist = -2.0*m.a*math.Log(1.0-trS/(2.0*m.a)-(m.a-m.b)*trV/(2.0*m.a*m.c)) + 2.0*(m.a-m.b-m.c)*math.Log(1-trV/(2.0*m.c)) 46 | } 47 | 48 | return dist, nil 49 | } 50 | 51 | func (m *F84Model) InitModel(al align.Alignment, weights []float64, gamma bool, alpha float64) (err error) { 52 | m.gamma = gamma 53 | m.alpha = alpha 54 | m.numSites, m.selectedSites = selectedSites(al, weights, m.removegaps) 55 | if m.sequenceCodes, err = alignmentToCodes(al); err != nil { 56 | return 57 | } 58 | m.pi, err = probaNt(m.sequenceCodes, m.selectedSites, weights) 59 | if err == nil { 60 | m.a = m.pi[0]*m.pi[2]/(m.pi[0]+m.pi[2]) + m.pi[1]*m.pi[3]/(m.pi[1]+m.pi[3]) 61 | m.b = m.pi[0]*m.pi[2] + m.pi[1]*m.pi[3] 62 | m.c = (m.pi[0] + m.pi[2]) * (m.pi[1] + m.pi[3]) 63 | } 64 | return 65 | } 66 | 67 | // Sequence returns the ith sequence of the alignment 68 | // encoded in int 69 | func (m *F84Model) Sequence(i int) (seq []uint8, err error) { 70 | if i < 0 || i >= len(m.sequenceCodes) { 71 | err = fmt.Errorf("This sequence does not exist: %d", i) 72 | return 73 | } 74 | seq = m.sequenceCodes[i] 75 | return 76 | } 77 | -------------------------------------------------------------------------------- /distance/dna/jc.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | ) 9 | 10 | type JCModel struct { 11 | numSites float64 // Number of selected sites (no gaps) 12 | selectedSites []bool // true for selected sites 13 | removegaps bool // If true, we will remove posision with >=1 gaps 14 | gamma bool 15 | alpha float64 16 | sequenceCodes [][]uint8 // Sequences converted into int codes 17 | } 18 | 19 | func NewJCModel(removegaps bool) *JCModel { 20 | return &JCModel{ 21 | 0, 22 | nil, 23 | removegaps, 24 | false, 25 | 0., 26 | nil, 27 | } 28 | } 29 | 30 | // Distance computes JC69 distance between 2 sequences 31 | func (m *JCModel) Distance(seq1 []uint8, seq2 []uint8, weights []float64) (float64, error) { 32 | var dist float64 33 | diff, total := countDiffs(seq1, seq2, m.selectedSites, weights, false) 34 | diff = diff / total 35 | b := 1. - 4.*diff/3. 36 | if m.gamma { 37 | dist = .75 * m.alpha * (math.Pow(b, -1./m.alpha) - 1.) 38 | } else { 39 | dist = -.75 * math.Log(b) 40 | } 41 | if dist > 0 { 42 | return dist, nil 43 | } else { 44 | return 0, nil 45 | } 46 | } 47 | 48 | func (m *JCModel) InitModel(al align.Alignment, weights []float64, gamma bool, alpha float64) (err error) { 49 | m.gamma = gamma 50 | m.alpha = alpha 51 | m.numSites, m.selectedSites = selectedSites(al, weights, m.removegaps) 52 | m.sequenceCodes, err = alignmentToCodes(al) 53 | return 54 | } 55 | 56 | // Sequence returns the ith sequence of the alignment 57 | // encoded in int 58 | func (m *JCModel) Sequence(i int) (seq []uint8, err error) { 59 | if i < 0 || i >= len(m.sequenceCodes) { 60 | err = fmt.Errorf("This sequence does not exist: %d", i) 61 | return 62 | } 63 | seq = m.sequenceCodes[i] 64 | return 65 | } 66 | -------------------------------------------------------------------------------- /distance/dna/k2p.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | ) 9 | 10 | type K2PModel struct { 11 | numSites float64 // Number of selected sites (no gaps) 12 | selectedSites []bool // true for selected sites 13 | removegaps bool // If true, we will remove posision with >=1 gaps 14 | gamma bool 15 | alpha float64 16 | sequenceCodes [][]uint8 // Sequences converted into int codes 17 | } 18 | 19 | func NewK2PModel(removegaps bool) *K2PModel { 20 | return &K2PModel{ 21 | 0, 22 | nil, 23 | removegaps, 24 | false, 25 | 0., 26 | nil, 27 | } 28 | } 29 | 30 | /* computes K2P distance between 2 sequences */ 31 | func (m *K2PModel) Distance(seq1 []uint8, seq2 []uint8, weights []float64) (float64, error) { 32 | var dist float64 33 | 34 | trS, trV, _, _, total := countMutations(seq1, seq2, m.selectedSites, weights) 35 | trS, trV = trS/total, trV/total 36 | 37 | if m.gamma { 38 | dist = m.alpha * (.5*math.Pow(1.-2.*trS-trV, -1./m.alpha) + .25*math.Pow(1.-2.*trV, -1./m.alpha) - .75) 39 | } else { 40 | dist = -.5*math.Log(1.-2.*trS-trV) - .25*math.Log(1.-2.*trV) 41 | } 42 | 43 | return dist, nil 44 | } 45 | 46 | func (m *K2PModel) InitModel(al align.Alignment, weights []float64, gamma bool, alpha float64) (err error) { 47 | m.gamma = gamma 48 | m.alpha = alpha 49 | m.numSites, m.selectedSites = selectedSites(al, weights, m.removegaps) 50 | m.sequenceCodes, err = alignmentToCodes(al) 51 | 52 | return 53 | } 54 | 55 | // Sequence returns the ith sequence of the alignment 56 | // encoded in int 57 | func (m *K2PModel) Sequence(i int) (seq []uint8, err error) { 58 | if i < 0 || i >= len(m.sequenceCodes) { 59 | err = fmt.Errorf("This sequence does not exist: %d", i) 60 | return 61 | } 62 | seq = m.sequenceCodes[i] 63 | return 64 | } 65 | -------------------------------------------------------------------------------- /distance/dna/rawdist.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | ) 8 | 9 | const ( 10 | GAP_COUNT_NONE = 0 11 | GAP_COUNT_INTERNAL = 1 12 | GAP_COUNT_ALL = 2 13 | ) 14 | 15 | // Like pdist, but without 16 | // Normalization by the number 17 | // of sites 18 | type RawDistModel struct { 19 | numSites float64 // Number of selected sites (no gaps) 20 | selectedSites []bool // true for selected sites 21 | removegaps bool // If true, we will remove posision with >=1 gaps 22 | // If 0, will not count as 1 mutation '-' to 'A" 23 | // If 1, will count as 1 mutation '-' to 'A" 24 | // If 2, will count as 1 mutation '-' to 'A", but only the internal 25 | // Default 0 26 | countgapmut int 27 | sequenceCodes [][]uint8 // Sequences converted into int codes 28 | } 29 | 30 | func NewRawDistModel(removegaps bool) *RawDistModel { 31 | return &RawDistModel{ 32 | numSites: 0, 33 | selectedSites: nil, 34 | removegaps: removegaps, 35 | countgapmut: 0, 36 | sequenceCodes: nil, 37 | } 38 | } 39 | 40 | func (m *RawDistModel) SetCountGapMutations(countgapmut int) (err error) { 41 | if countgapmut < 0 || countgapmut > 2 { 42 | err = fmt.Errorf("Gap count mode not available : %d", countgapmut) 43 | } else { 44 | m.countgapmut = countgapmut 45 | } 46 | return 47 | } 48 | 49 | // Distance computes the number of differences between 2 sequences 50 | // These differences include gaps vs. nt 51 | func (m *RawDistModel) Distance(seq1 []uint8, seq2 []uint8, weights []float64) (diff float64, err error) { 52 | switch m.countgapmut { 53 | case GAP_COUNT_ALL: 54 | diff, _ = countDiffsWithGaps(seq1, seq2, m.selectedSites, weights, false) 55 | case GAP_COUNT_INTERNAL: 56 | diff, _ = countDiffsWithInternalGaps(seq1, seq2, m.selectedSites, weights, false) 57 | default: 58 | diff, _ = countDiffs(seq1, seq2, m.selectedSites, weights, false) 59 | } 60 | return 61 | } 62 | 63 | func (m *RawDistModel) InitModel(al align.Alignment, weights []float64, gamma bool, alpha float64) (err error) { 64 | m.numSites, m.selectedSites = selectedSites(al, weights, m.removegaps) 65 | m.sequenceCodes, err = alignmentToCodes(al) 66 | return 67 | } 68 | 69 | // Sequence returns the ith sequence of the alignment 70 | // encoded in int 71 | func (m *RawDistModel) Sequence(i int) (seq []uint8, err error) { 72 | if i < 0 || i >= len(m.sequenceCodes) { 73 | err = fmt.Errorf("This sequence does not exist: %d", i) 74 | return 75 | } 76 | seq = m.sequenceCodes[i] 77 | return 78 | } 79 | -------------------------------------------------------------------------------- /distance/dna/tn82.go: -------------------------------------------------------------------------------- 1 | //+ build ignore 2 | 3 | package dna 4 | 5 | import ( 6 | "fmt" 7 | "math" 8 | 9 | "github.com/evolbioinfo/goalign/align" 10 | ) 11 | 12 | type TN82Model struct { 13 | /* Vector of nt proba */ 14 | pi []float64 // proba of each nt 15 | numSites float64 // Number of selected sites (no gaps) 16 | selectedSites []bool // true for selected sites 17 | removegaps bool // If true, we will remove posision with >=1 gaps 18 | sequenceCodes [][]uint8 // Sequences converted into int codes 19 | } 20 | 21 | func NewTN82Model(removegaps bool) *TN82Model { 22 | return &TN82Model{ 23 | nil, 24 | 0, 25 | nil, 26 | removegaps, 27 | nil, 28 | } 29 | } 30 | 31 | // Distance computes TN82 distance between 2 sequences 32 | func (m *TN82Model) Distance(seq1 []uint8, seq2 []uint8, weights []float64) (float64, error) { 33 | diff, total := countDiffs(seq1, seq2, m.selectedSites, weights, false) 34 | diff = diff / total 35 | 36 | psi := init2DFloat(4, 4) 37 | totalPairs, err := countNtPairs2Seq(seq1, seq2, m.selectedSites, weights, psi) 38 | if err != nil { 39 | return 0.0, err 40 | } 41 | for i := 0; i < 4; i++ { 42 | for j := 0; j < 4; j++ { 43 | psi[i][j] = psi[i][j] / totalPairs 44 | } 45 | } 46 | denom := 0.0 47 | for i := 0; i < 4; i++ { 48 | for j := i + 1; j < 4; j++ { 49 | denom += psi[i][j] * psi[i][j] / (2 * m.pi[i] * m.pi[j]) 50 | } 51 | } 52 | b1 := diff * diff / denom 53 | dist := -1.0 * b1 * math.Log(1.0-diff/b1) 54 | if dist > 0 { 55 | return dist, nil 56 | } else { 57 | return 0, nil 58 | } 59 | } 60 | 61 | func (m *TN82Model) InitModel(al align.Alignment, weights []float64, gamma bool, alpha float64) (err error) { 62 | m.numSites, m.selectedSites = selectedSites(al, weights, m.removegaps) 63 | if m.sequenceCodes, err = alignmentToCodes(al); err != nil { 64 | return 65 | } 66 | m.pi, err = probaNt(m.sequenceCodes, m.selectedSites, weights) 67 | return 68 | } 69 | 70 | // Sequence returns the ith sequence of the alignment 71 | // encoded in int 72 | func (m *TN82Model) Sequence(i int) (seq []uint8, err error) { 73 | if i < 0 || i >= len(m.sequenceCodes) { 74 | err = fmt.Errorf("This sequence does not exist: %d", i) 75 | return 76 | } 77 | seq = m.sequenceCodes[i] 78 | return 79 | } 80 | -------------------------------------------------------------------------------- /docs/api/append.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### append 6 | 7 | Append alignments to an input alignment. 8 | 9 | This commands adds the sequences of a set of alignments to a reference alignement 10 | specified by -i. 11 | 12 | If sequences do not have the same length than the reference alignment, then returns an error. 13 | 14 | If format is phylip, it may contain several alignments in one file. 15 | Then we can append all of them at once: 16 | goalign append -i refalign.phy aligns.phy 17 | 18 | If format is Fasta, several alignments may be given in the form: 19 | goalign append -i align.fasta others*.fasta 20 | 21 | ```go 22 | package main 23 | 24 | import ( 25 | "bufio" 26 | "fmt" 27 | "io" 28 | 29 | "github.com/evolbioinfo/goalign/align" 30 | "github.com/evolbioinfo/goalign/io/fasta" 31 | "github.com/evolbioinfo/goalign/io/utils" 32 | ) 33 | 34 | func main() { 35 | var fi, fi2 io.Closer 36 | var r, r2 *bufio.Reader 37 | var err error 38 | var a1, a2 align.Alignment 39 | 40 | /* First alignment */ 41 | /* Get reader (plain text or gzip) */ 42 | if fi, r, err = utils.GetReader("align_1.fa"); err != nil { 43 | panic(err) 44 | } 45 | /* Parse Fasta */ 46 | if a1, err = fasta.NewParser(r).Parse(); err != nil { 47 | panic(err) 48 | } 49 | fi.Close() 50 | 51 | /* Second alignment */ 52 | /* Get reader (plain text or gzip) */ 53 | if fi2, r2, err = utils.GetReader("align_2.fa"); err != nil { 54 | panic(err) 55 | } 56 | /* Parse Fasta */ 57 | if a2, err = fasta.NewParser(r2).Parse(); err != nil { 58 | panic(err) 59 | } 60 | fi2.Close() 61 | 62 | /* Printing new alignment */ 63 | if err = a1.Append(a2); err != nil { 64 | panic(err) 65 | } 66 | fmt.Println(fasta.WriteAlignment(a1)) 67 | } 68 | ``` 69 | -------------------------------------------------------------------------------- /docs/api/codonalign.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### codonalign 6 | 7 | Aligns a given nt fasta file using a corresponding aa alignment. 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi, fi2 io.Closer 24 | var r, r2 *bufio.Reader 25 | var err error 26 | var aa, codonaligned align.Alignment 27 | var nt align.SeqBag 28 | 29 | /* Amino Acid Alignment */ 30 | 31 | /* Get reader (plain text or gzip) */ 32 | if fi, r, err = utils.GetReader("align_aa.fa"); err != nil { 33 | panic(err) 34 | } 35 | 36 | /* Parse Fasta */ 37 | if aa, err = fasta.NewParser(r).Parse(); err != nil { 38 | panic(err) 39 | } 40 | fi.Close() 41 | 42 | /* Nt fasta sequences */ 43 | 44 | /* Get reader (plain text or gzip) */ 45 | if fi2, r2, err = utils.GetReader("align_nt.fa"); err != nil { 46 | panic(err) 47 | } 48 | 49 | /* Parse Fasta */ 50 | if nt, err = fasta.NewParser(r2).ParseUnalign(); err != nil { 51 | panic(err) 52 | } 53 | fi2.Close() 54 | 55 | /* Printing codon alingned nt sequences */ 56 | if codonaligned, err = aa.CodonAlign(nt); err != nil { 57 | panic(err) 58 | } 59 | fmt.Println(fasta.WriteAlignment(codonaligned)) 60 | } 61 | ``` 62 | -------------------------------------------------------------------------------- /docs/api/compress.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### compress 6 | 7 | Remove identical patterns/sites 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var al align.Alignment 27 | var weights []int 28 | 29 | /* Get reader (plain text or gzip) */ 30 | fi, r, err = utils.GetReader("align.fa") 31 | if err != nil { 32 | panic(err) 33 | } 34 | 35 | /* Parse Fasta */ 36 | al, err = fasta.NewParser(r).Parse() 37 | if err != nil { 38 | panic(err) 39 | } 40 | fi.Close() 41 | 42 | /* Compress */ 43 | weights = al.Compress() 44 | fmt.Println(fasta.WriteAlignment(al)) 45 | for _, w := range weights { 46 | fmt.Println(w) 47 | } 48 | } 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/api/consensus.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### consensus 6 | 7 | ```go 8 | package main 9 | 10 | import ( 11 | "bufio" 12 | "fmt" 13 | "io" 14 | 15 | "github.com/evolbioinfo/goalign/align" 16 | "github.com/evolbioinfo/goalign/io/fasta" 17 | "github.com/evolbioinfo/goalign/io/utils" 18 | ) 19 | 20 | func main() { 21 | var fi io.Closer 22 | var r *bufio.Reader 23 | var err error 24 | var al, cons align.Alignment 25 | var ignoreGaps bool = false 26 | var ignoreNs bool = false 27 | 28 | /* Get reader (plain text or gzip) */ 29 | fi, r, err = utils.GetReader("align.fa") 30 | if err != nil { 31 | panic(err) 32 | } 33 | 34 | /* Parse Fasta */ 35 | al, err = fasta.NewParser(r).Parse() 36 | if err != nil { 37 | panic(err) 38 | } 39 | fi.Close() 40 | 41 | /* Consensus */ 42 | cons = al.Consensus(ignoreGaps, ignoreNs) 43 | 44 | fmt.Println(fasta.WriteAlignment(cons)) 45 | } 46 | ``` -------------------------------------------------------------------------------- /docs/api/dedup.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### dedup 6 | 7 | Deduplicate sequences 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var al align.Alignment 27 | 28 | /* Get reader (plain text or gzip) */ 29 | fi, r, err = utils.GetReader("align.fa") 30 | if err != nil { 31 | panic(err) 32 | } 33 | 34 | /* Parse Fasta */ 35 | al, err = fasta.NewParser(r).Parse() 36 | if err != nil { 37 | panic(err) 38 | } 39 | fi.Close() 40 | 41 | /* Deduplicate */ 42 | if _,err = al.Deduplicate(false); err != nil { 43 | panic(err) 44 | } else { 45 | fmt.Println(fasta.WriteAlignment(al)) 46 | } 47 | } 48 | ``` 49 | -------------------------------------------------------------------------------- /docs/api/diff.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### dedup 6 | 7 | Replace all identical characters with "." 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var al align.Alignment 27 | 28 | /* Get reader (plain text or gzip) */ 29 | fi, r, err = utils.GetReader("align.fa") 30 | if err != nil { 31 | panic(err) 32 | } 33 | 34 | /* Parse Fasta */ 35 | al, err = fasta.NewParser(r).Parse() 36 | if err != nil { 37 | panic(err) 38 | } 39 | fi.Close() 40 | 41 | /* Diff */ 42 | al.DiffWithFirst() 43 | fmt.Println(fasta.WriteAlignment(al)) 44 | } 45 | ``` 46 | -------------------------------------------------------------------------------- /docs/api/draw.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### draw 6 | 7 | Drawing an alignment in an HTML file using [BioJS](http://msa.biojs.net/). 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "io" 15 | "os" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/draw" 19 | "github.com/evolbioinfo/goalign/io/fasta" 20 | "github.com/evolbioinfo/goalign/io/utils" 21 | ) 22 | 23 | func main() { 24 | var infile io.Closer 25 | var outfile *os.File 26 | 27 | var r *bufio.Reader 28 | var err error 29 | 30 | var al align.Alignment 31 | var l draw.AlignLayout 32 | 33 | /* Get reader (plain text or gzip) */ 34 | infile, r, err = utils.GetReader("align.fa") 35 | if err != nil { 36 | panic(err) 37 | } 38 | 39 | /* Parse Fasta */ 40 | al, err = fasta.NewParser(r).Parse() 41 | if err != nil { 42 | panic(err) 43 | } 44 | infile.Close() 45 | 46 | outfile, err = os.Create("align.html") 47 | w := bufio.NewWriter(outfile) 48 | l = draw.NewBioJSLayout(w) 49 | l.DrawAlign(al) 50 | w.Flush() 51 | outfile.Close() 52 | } 53 | ``` 54 | -------------------------------------------------------------------------------- /docs/api/identical.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### identical 6 | 7 | Telling whether two alignments are identical 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi, fi2 io.Closer 24 | var r, r2 *bufio.Reader 25 | var err error 26 | var al, al2 align.Alignment 27 | 28 | /* First Alignment*/ 29 | 30 | /* Get reader (plain text or gzip) */ 31 | if fi, r, err = utils.GetReader("align.fa"); err != nil { 32 | panic(err) 33 | } 34 | 35 | /* Parse Fasta */ 36 | if al, err = fasta.NewParser(r).Parse(); err != nil { 37 | panic(err) 38 | } 39 | fi.Close() 40 | 41 | /* Second Alignments */ 42 | 43 | /* Get reader (plain text or gzip) */ 44 | if fi2, r2, err = utils.GetReader("align2.fa"); err != nil { 45 | panic(err) 46 | } 47 | 48 | /* Parse Fasta */ 49 | if al2, err = fasta.NewParser(r2).Parse(); err != nil { 50 | panic(err) 51 | } 52 | fi2.Close() 53 | 54 | /* Printing unaligned sequences */ 55 | fmt.Println(al.Identical(al2)) 56 | } 57 | ``` 58 | -------------------------------------------------------------------------------- /docs/api/mask.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### mask 6 | 7 | Mask part of an input fasta alignment 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var al align.Alignment 27 | 28 | /* First Alignment*/ 29 | 30 | /* Get reader (plain text or gzip) */ 31 | if fi, r, err = utils.GetReader("align.fa"); err != nil { 32 | panic(err) 33 | } 34 | defer fi.Close() 35 | 36 | /* Parse Fasta */ 37 | if al, err = fasta.NewParser(r).Parse(); err != nil { 38 | panic(err) 39 | } 40 | 41 | if err = al.Mask(0, 2, ""); err != nil { 42 | panic(err) 43 | } 44 | 45 | fmt.Println(fasta.WriteAlignment(al)) 46 | } 47 | ``` 48 | -------------------------------------------------------------------------------- /docs/api/orf.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### Orf 6 | 7 | Detect the longest orf in an input dataset 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var seqs align.SeqBag 27 | var orf align.Sequence 28 | 29 | // Get reader (plain text or gzip) 30 | if fi, r, err = utils.GetReader("seqs.fa"); err != nil { 31 | panic(err) 32 | } 33 | defer fi.Close() 34 | 35 | // Parse Fasta unaligned sequence file 36 | if seqs, err = fasta.NewParser(r).ParseUnalign(); err != nil { 37 | panic(err) 38 | } 39 | // Removing '-' 40 | seqs = seqs.Unalign() 41 | // Search for the longest orf 42 | if orf, err = seqs.LongestORF(); err != nil { 43 | panic(err) 44 | } 45 | // Print sequence 46 | fmt.Println(orf.Sequence()) 47 | } 48 | ``` 49 | -------------------------------------------------------------------------------- /docs/api/phase.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### phase 6 | 7 | Printing amino acid sequence "phased" version of an input file 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi, fi2 io.Closer 24 | var r, r2 *bufio.Reader 25 | var err error 26 | var seqs, refs align.SeqBag 27 | var phaser align.Phaser 28 | var phased chan align.PhasedSequence 29 | 30 | /* Get ref sequences reader (plain text or gzip) */ 31 | if fi, r, err = utils.GetReader("refs.fa"); err != nil { 32 | panic(err) 33 | } 34 | defer fi.Close() 35 | 36 | /* Parse Reference orfs (nt or aa) */ 37 | if refs, err = fasta.NewParser(r).ParseUnalign(); err != nil { 38 | panic(err) 39 | } 40 | 41 | /* Get sequences reader (plain text or gzip) */ 42 | if fi2, r2, err = utils.GetReader("align.fa"); err != nil { 43 | panic(err) 44 | } 45 | defer fi2.Close() 46 | 47 | /* Parse Fasta sequences */ 48 | seqs, err = fasta.NewParser(r2).ParseUnalign() 49 | if err != nil { 50 | panic(err) 51 | } 52 | 53 | /* Initialize phaser */ 54 | phaser = align.NewPhaser() 55 | phaser.SetLenCutoff(0.8) 56 | phaser.SetMatchCutoff(0.8) 57 | phaser.SetReverse(true) 58 | phaser.SetCutEnd(true) 59 | phaser.SetCpus(1) 60 | phaser.SetTranslate(true) 61 | 62 | /* Phase */ 63 | if phased, err = phaser.Phase(refs, seqs); err != nil { 64 | panic(err) 65 | } 66 | 67 | for p := range phased { 68 | if p.Err != nil { 69 | panic(err) 70 | } 71 | if p.Removed { 72 | fmt.Printf("%s: Removed\n", p.NtSeq.Name()) 73 | } else { 74 | fmt.Printf("Nt Sequence %s : %s\n", p.NtSeq.Name(), p.NtSeq.Sequence()) 75 | fmt.Printf("Aa Sequence %s : %s\n", p.AaSeq.Name(), p.AaSeq.Sequence()) 76 | fmt.Printf("Position: %d\n", p.Position) 77 | fmt.Printf("Length: %d\n", p.AaSeq.Length()) 78 | } 79 | } 80 | } 81 | ``` 82 | -------------------------------------------------------------------------------- /docs/api/reformat.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### reformat 6 | 7 | Reformat a fasta input alignment into different formats 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/nexus" 20 | "github.com/evolbioinfo/goalign/io/paml" 21 | "github.com/evolbioinfo/goalign/io/phylip" 22 | "github.com/evolbioinfo/goalign/io/clustal" 23 | "github.com/evolbioinfo/goalign/io/utils" 24 | ) 25 | 26 | func main() { 27 | var fi io.Closer 28 | var r *bufio.Reader 29 | var err error 30 | var al align.Alignment 31 | 32 | /* Get reader (plain text or gzip) */ 33 | fi, r, err = utils.GetReader("align.fa") 34 | if err != nil { 35 | panic(err) 36 | } 37 | 38 | /* Parse Fasta */ 39 | al, err = fasta.NewParser(r).Parse() 40 | if err != nil { 41 | panic(err) 42 | } 43 | fi.Close() 44 | 45 | /* Printing FASTA alignment */ 46 | fmt.Println(fasta.WriteAlignment(al)) 47 | /* Printing PHYLIP alignment */ 48 | fmt.Println(phylip.WriteAlignment(al, false)) 49 | /* Printing NEXUS alignment */ 50 | fmt.Println(nexus.WriteAlignment(al)) 51 | /* Printing PAML format alignment */ 52 | fmt.Println(paml.WriteAlignment(al)) 53 | /* Printing Clustal format alignment */ 54 | fmt.Println(clustal.WriteAlignment(al)) 55 | } 56 | ``` 57 | -------------------------------------------------------------------------------- /docs/api/replace.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### replace 6 | 7 | Replace characters in sequences of input alignment using a regex. 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var al align.SeqBag 27 | // or var al align.Alignment if aligned sequences 28 | 29 | /* First Alignment*/ 30 | 31 | /* Get reader (plain text or gzip) */ 32 | if fi, r, err = utils.GetReader("align.fa"); err != nil { 33 | panic(err) 34 | } 35 | defer fi.Close() 36 | 37 | /* Parse Fasta */ 38 | if al, err = fasta.NewParser(r).ParseUnalign(); err != nil { 39 | /* for aligned seqs: if al, err = fasta.NewParser(r).Parse(); err != nil {*/ 40 | panic(err) 41 | } 42 | 43 | if err = al.Replace("GA.", "---", true); err != nil { 44 | panic(err) 45 | } 46 | 47 | fmt.Println(fasta.WriteAlignment(al)) 48 | } 49 | ``` 50 | 51 | Replace characters in sequences of input alignment using its position+sequence name. 52 | 53 | ```go 54 | package main 55 | 56 | import ( 57 | "bufio" 58 | "fmt" 59 | "io" 60 | 61 | "github.com/evolbioinfo/goalign/align" 62 | "github.com/evolbioinfo/goalign/io/fasta" 63 | "github.com/evolbioinfo/goalign/io/utils" 64 | ) 65 | 66 | func main() { 67 | var fi io.Closer 68 | var r *bufio.Reader 69 | var err error 70 | var al align.SeqBag 71 | // or var al align.Alignment if aligned sequences 72 | 73 | /* First Alignment*/ 74 | 75 | /* Get reader (plain text or gzip) */ 76 | if fi, r, err = utils.GetReader("align.fa"); err != nil { 77 | panic(err) 78 | } 79 | defer fi.Close() 80 | 81 | /* Parse Fasta */ 82 | if al, err = fasta.NewParser(r).Parse(); err != nil { 83 | /* for aligned seqs: if al, err = fasta.NewParser(r).Parse(); err != nil {*/ 84 | panic(err) 85 | } 86 | 87 | // Will write a G at position 9 (0 based) of sequence "Seq001" 88 | if err = al.ReplaceChar("Seq001", 9, "G"); err != nil { 89 | panic(err) 90 | } 91 | 92 | fmt.Println(fasta.WriteAlignment(al)) 93 | } 94 | ``` 95 | -------------------------------------------------------------------------------- /docs/api/revcomp.md: -------------------------------------------------------------------------------- 1 | 2 | # Goalign: toolkit and api for alignment manipulation 3 | 4 | ## API 5 | 6 | ### revcomp 7 | 8 | This command reverse complements an input alignment. 9 | 10 | ```go 11 | package main 12 | 13 | import ( 14 | "bufio" 15 | "fmt" 16 | "io" 17 | 18 | "github.com/evolbioinfo/goalign/align" 19 | "github.com/evolbioinfo/goalign/io/fasta" 20 | "github.com/evolbioinfo/goalign/io/utils" 21 | ) 22 | 23 | func main() { 24 | var fi io.Closer 25 | var r *bufio.Reader 26 | var err error 27 | var al align.Alignment 28 | 29 | /* Get reader (plain text or gzip) */ 30 | fi, r, err = utils.GetReader("align.fa") 31 | if err != nil { 32 | panic(err) 33 | } 34 | 35 | /* Parse Fasta */ 36 | al, err = fasta.NewParser(r).Parse() 37 | if err != nil { 38 | panic(err) 39 | } 40 | fi.Close() 41 | 42 | /* Translate */ 43 | if err = al.ReverseComplement(); err != nil { 44 | panic(err) 45 | } else { 46 | fmt.Println(fasta.WriteAlignment(al)) 47 | } 48 | } 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/api/split.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### splits 6 | 7 | Printing statistics about an input alignment: 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/partition" 20 | "github.com/evolbioinfo/goalign/io/utils" 21 | ) 22 | 23 | func main() { 24 | var fi io.Closer 25 | var partitionReader *bufio.Reader 26 | var alignmentReader *bufio.Reader 27 | var partitionParser *partition.Parser 28 | var splitAligns []align.Alignment 29 | var ps *align.PartitionSet 30 | var err error 31 | var al align.Alignment 32 | 33 | /* Parse Fasta */ 34 | if fi, alignmentReader, err = utils.GetReader("align.fa"); err != nil { 35 | panic(err) 36 | } 37 | defer fi.Close() 38 | 39 | if al, err = fasta.NewParser(alignmentReader).Parse(); err != nil { 40 | panic(err) 41 | } 42 | defer fi.Close() 43 | 44 | /* Parse Partitions */ 45 | if fi, partitionReader, err = utils.GetReader("partitions.txt"); err != nil { 46 | panic(err) 47 | } 48 | defer fi.Close() 49 | partitionParser = partition.NewParser(partitionReader) 50 | if ps, err = partitionParser.Parse(al.Length()); err != nil { 51 | panic(err) 52 | } 53 | if err = ps.CheckSites(); err != nil { 54 | panic(err) 55 | } 56 | 57 | /* Split alignment per partition and write them on std out */ 58 | if splitAligns, err = al.Split(ps); err != nil { 59 | panic(err) 60 | } 61 | 62 | for i, a := range splitAligns { 63 | fmt.Printf("Alignment %d\n", i) 64 | fmt.Println(fasta.WriteAlignment(a)) 65 | } 66 | } 67 | ``` 68 | -------------------------------------------------------------------------------- /docs/api/stats.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### stats 6 | 7 | Printing statistics about an input alignment: 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var al align.Alignment 27 | 28 | /* Get reader (plain text or gzip) */ 29 | fi, r, err = utils.GetReader("align.fa") 30 | if err != nil { 31 | panic(err) 32 | } 33 | 34 | /* Parse Fasta */ 35 | al, err = fasta.NewParser(r).Parse() 36 | if err != nil { 37 | panic(err) 38 | } 39 | fi.Close() 40 | 41 | /* Print alignment length*/ 42 | fmt.Printf("Length=%d\n", al.Length()) 43 | /* Print number of sequences */ 44 | fmt.Printf("#Seqs=%d\n", al.NbSequences()) 45 | /* Print avg allements per sites */ 46 | fmt.Printf("Avg alleles/sites=%f\n", al.AvgAllelesPerSite()) 47 | /* Print number of occurences of each characters */ 48 | for nt, nb := range al.CharStats() { 49 | fmt.Printf("%c = %d\n", nt, nb) 50 | } 51 | /* Print alphabet */ 52 | fmt.Printf("ALphabet=%s\n", al.AlphabetStr()) 53 | } 54 | ``` 55 | -------------------------------------------------------------------------------- /docs/api/subseq.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### subseq 6 | 7 | Extracting sub-alignment (position 10 0-based inclusive, and with length 15) from an input alignment: 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var al align.Alignment 27 | var subalign align.Alignment = nil 28 | 29 | /* Get reader (plain text or gzip) */ 30 | fi, r, err = utils.GetReader("align.fa") 31 | if err != nil { 32 | panic(err) 33 | } 34 | 35 | /* Parse Fasta */ 36 | al, err = fasta.NewParser(r).Parse() 37 | if err != nil { 38 | panic(err) 39 | } 40 | fi.Close() 41 | 42 | /* Subalignment from position 10 (0-based inclusive), 43 | and with length 15 */ 44 | subalign, err = al.SubAlign(10, 15) 45 | if err != nil { 46 | panic(err) 47 | } 48 | 49 | /* Printing alignment in Fasta */ 50 | fmt.Print(fasta.WriteAlignment(subalign)) 51 | } 52 | ``` 53 | -------------------------------------------------------------------------------- /docs/api/subset.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### subset 6 | 7 | Extracting sequences ""Seq0001" and "Seq0002" from an input alignment 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var al align.Alignment 27 | var filtered align.Alignment = nil 28 | var subset map[string]bool 29 | 30 | /* Sequence names to keep */ 31 | subset = make(map[string]bool) 32 | for _, name := range []string{"Seq0001", "Seq0002"} { 33 | subset[name] = true 34 | } 35 | 36 | /* Get reader (plain text or gzip) */ 37 | fi, r, err = utils.GetReader("align.fa") 38 | if err != nil { 39 | panic(err) 40 | } 41 | 42 | /* Parse Fasta */ 43 | al, err = fasta.NewParser(r).Parse() 44 | if err != nil { 45 | panic(err) 46 | } 47 | fi.Close() 48 | 49 | /* Iterate over alignment sequences */ 50 | al.Iterate(func(name string, sequence string) { 51 | if filtered == nil { 52 | filtered = align.NewAlign(al.Alphabet()) 53 | } 54 | /* Adding only the desired one to the filtered alignment */ 55 | if _, ok := subset[name]; ok { 56 | filtered.AddSequence(name, sequence, "") 57 | } 58 | }) 59 | 60 | fmt.Print(fasta.WriteAlignment(filtered)) 61 | } 62 | ``` 63 | -------------------------------------------------------------------------------- /docs/api/sw.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### Smith & Waterman 6 | 7 | ```go 8 | package main 9 | 10 | import ( 11 | "fmt" 12 | 13 | "github.com/evolbioinfo/goalign/align" 14 | "github.com/evolbioinfo/goalign/io/fasta" 15 | ) 16 | 17 | func main() { 18 | seq1 := align.NewSequence("seq1", []rune("CTGGGGTTTAACCAGCCATGCCAGTGCAGGTTTAAGAACCGATCCGTACTCTGGGTTACTGATGAAGGATGGGCCGTATCGCCCCCTTGCGACGTTTCCA"), "") 19 | seq2 := align.NewSequence("seq2", []rune("TATTATCGTATCGTTTGCATAGACCCGTTATGCCAGCAGATACAGCGTCACAAACTTAGGCTGTAGGGCGTTAGCGGCGCTCCATGTTTAGACTCACGCC"), "") 20 | aligner := align.NewPwAligner(seq1, seq2, align.ALIGN_ALGO_SW) 21 | aligner.SetGapOpenScore(-10.0) 22 | aligner.SetGapExtendScore(-0.5) 23 | if al, err := aligner.Alignment(); err != nil { 24 | panic(err) 25 | } else { 26 | fmt.Println(fasta.WriteAlignment(al)) 27 | } 28 | } 29 | ``` 30 | -------------------------------------------------------------------------------- /docs/api/tolower.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### To Lower case 6 | 7 | ```go 8 | package main 9 | 10 | import ( 11 | "bufio" 12 | "fmt" 13 | "io" 14 | 15 | "github.com/evolbioinfo/goalign/align" 16 | "github.com/evolbioinfo/goalign/io/fasta" 17 | "github.com/evolbioinfo/goalign/io/utils" 18 | ) 19 | 20 | func main() { 21 | var fi io.Closer 22 | var alignmentReader *bufio.Reader 23 | var err error 24 | var al align.Alignment 25 | 26 | /* Parse Fasta */ 27 | if fi, alignmentReader, err = utils.GetReader("align.fa"); err != nil { 28 | panic(err) 29 | } 30 | defer fi.Close() 31 | 32 | if al, err = fasta.NewParser(alignmentReader).Parse(); err != nil { 33 | panic(err) 34 | } 35 | defer fi.Close() 36 | 37 | al.ToLower() 38 | 39 | fmt.Println(fasta.WriteAlignment(al)) 40 | } 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/api/toupper.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### To Upper case 6 | 7 | ```go 8 | package main 9 | 10 | import ( 11 | "bufio" 12 | "fmt" 13 | "io" 14 | 15 | "github.com/evolbioinfo/goalign/align" 16 | "github.com/evolbioinfo/goalign/io/fasta" 17 | "github.com/evolbioinfo/goalign/io/utils" 18 | ) 19 | 20 | func main() { 21 | var fi io.Closer 22 | var alignmentReader *bufio.Reader 23 | var err error 24 | var al align.Alignment 25 | 26 | /* Parse Fasta */ 27 | if fi, alignmentReader, err = utils.GetReader("align.fa"); err != nil { 28 | panic(err) 29 | } 30 | defer fi.Close() 31 | 32 | if al, err = fasta.NewParser(alignmentReader).Parse(); err != nil { 33 | panic(err) 34 | } 35 | defer fi.Close() 36 | 37 | al.ToUpper() 38 | 39 | fmt.Println(fasta.WriteAlignment(al)) 40 | } 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/api/translate.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### translate 6 | 7 | This command translates an input sequence into amino acids while removing first character. 8 | 9 | 10 | ```go 11 | package main 12 | 13 | import ( 14 | "bufio" 15 | "fmt" 16 | "io" 17 | 18 | "github.com/evolbioinfo/goalign/align" 19 | "github.com/evolbioinfo/goalign/io/fasta" 20 | "github.com/evolbioinfo/goalign/io/utils" 21 | ) 22 | 23 | func main() { 24 | var fi io.Closer 25 | var r *bufio.Reader 26 | var err error 27 | var al align.Alignment 28 | 29 | /* Get reader (plain text or gzip) */ 30 | fi, r, err = utils.GetReader("align.fa") 31 | if err != nil { 32 | panic(err) 33 | } 34 | 35 | /* Parse Fasta */ 36 | al, err = fasta.NewParser(r).Parse() 37 | if err != nil { 38 | panic(err) 39 | } 40 | fi.Close() 41 | 42 | /* Translate */ 43 | if err = al.Translate(1,align.GENETIC_CODE_STANDARD); err != nil { 44 | panic(err) 45 | } else { 46 | fmt.Println(fasta.WriteAlignment(al)) 47 | } 48 | } 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/api/transpose.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### transpose 6 | 7 | This command transposes an input alignment 8 | 9 | 10 | ```go 11 | package main 12 | 13 | import ( 14 | "bufio" 15 | "fmt" 16 | "io" 17 | 18 | "github.com/evolbioinfo/goalign/align" 19 | "github.com/evolbioinfo/goalign/io/fasta" 20 | "github.com/evolbioinfo/goalign/io/utils" 21 | ) 22 | 23 | func main() { 24 | var fi io.Closer 25 | var r *bufio.Reader 26 | var err error 27 | var al, trAl align.Alignment 28 | 29 | /* Get reader (plain text or gzip) */ 30 | fi, r, err = utils.GetReader("align.fa") 31 | if err != nil { 32 | panic(err) 33 | } 34 | 35 | /* Parse Fasta */ 36 | al, err = fasta.NewParser(r).Parse() 37 | if err != nil { 38 | panic(err) 39 | } 40 | fi.Close() 41 | 42 | /* Transpose */ 43 | if trAl, err = al.Transpose(); err != nil { 44 | panic(err) 45 | } else { 46 | fmt.Println(fasta.WriteAlignment(trAl)) 47 | } 48 | } 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/api/unalign.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## API 4 | 5 | ### unalign 6 | 7 | Printing unaligned version of an input alignment 8 | 9 | ```go 10 | package main 11 | 12 | import ( 13 | "bufio" 14 | "fmt" 15 | "io" 16 | 17 | "github.com/evolbioinfo/goalign/align" 18 | "github.com/evolbioinfo/goalign/io/fasta" 19 | "github.com/evolbioinfo/goalign/io/utils" 20 | ) 21 | 22 | func main() { 23 | var fi io.Closer 24 | var r *bufio.Reader 25 | var err error 26 | var al align.Alignment 27 | 28 | /* Get reader (plain text or gzip) */ 29 | fi, r, err = utils.GetReader("align.fa") 30 | if err != nil { 31 | panic(err) 32 | } 33 | 34 | /* Parse Fasta */ 35 | al, err = fasta.NewParser(r).Parse() 36 | if err != nil { 37 | panic(err) 38 | } 39 | fi.Close() 40 | 41 | /* Printing unaligned sequences */ 42 | fmt.Println(fasta.WriteSequences(al)) 43 | } 44 | ``` 45 | -------------------------------------------------------------------------------- /docs/commands/addid.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### addid 6 | This command adds an indentifier (string) to all sequences of an input alignment. The string may be added to the left or to the right of each sequence name. 7 | 8 | By default the string is added to the left of each name. 9 | 10 | #### Usage 11 | 12 | General command 13 | ``` 14 | Usage: 15 | goalign addid [flags] 16 | 17 | Flags: 18 | -n, --name string String to add to sequence names (default "none") 19 | -o, --out-align string Renamed alignment output file (default "stdout") 20 | -r, --right Adds the String on the right of sequence names (otherwise, adds to left) 21 | 22 | Global Flags: 23 | -i, --align string Alignment input file (default "stdin") 24 | -p, --phylip Alignment is in phylip? False=Fasta 25 | --input-strict Strict phylip input format (only used with -p) 26 | --output-strict Strict phylip output format (only used with -p) 27 | ``` 28 | 29 | #### Examples 30 | 31 | * Generate a random alignment with 5 sequences, adding "prefix_" as prefix and "_suffix" as suffix to each sequence name. 32 | 33 | ``` 34 | goalign random --seed 10 -n 5 | goalign addid -n prefix_ | goalign addid -n _suffix -r 35 | ``` 36 | 37 | Should give 38 | 39 | ``` 40 | >prefix_Seq0000_suffix 41 | GATTAATTTGCCGTAGGCCAGAATCTGAAGATCGAACACTTTAAGTTTTCACTTCTAATGGAGAGGACTAGTTCATACTT 42 | TTTAAACACTTTTACATCGA 43 | >prefix_Seq0001_suffix 44 | TGTCGGACCTAAGTATTGAGTACAACGGTGTATTCCAGCGGTGGAGAGGTCTATTTTTCCGGTTGAAGGACTCTAGAGCT 45 | GTAAAGGGTATGGCCATGTG 46 | >prefix_Seq0002_suffix 47 | CTAAGCGCGGGCGGATTGCTGTTGGAGCAAGGTTAAATACTCGGCAATGCCCCATGATCCCCCAAGGACAATAAGAGCGA 48 | AGTTAGAACAAATGAACCCC 49 | >prefix_Seq0003_suffix 50 | GAGTGGAGGCTTTATGGCACAAGGTATTAGAGACTGAGGGGCACCCCGGCATGGTAAGCAGGAGCCATCGCGAAGGCTTC 51 | AGGTATCTTCCTGTGTTACC 52 | >prefix_Seq0004_suffix 53 | CATAGCCCCTGATGCCCTGACCCGTGTCGCGGCAACGTCTACATTTCACGATAAATACTCCGCTGCTAGTCGGCTCTAGA 54 | TGCTTTTCTTCCAGATCTGG 55 | ``` 56 | -------------------------------------------------------------------------------- /docs/commands/append.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### append 6 | Append alignments to an input alignment by inserting new sequences. 7 | 8 | This commands adds the sequences of a set of alignments to a reference alignement specified by `-i`. 9 | 10 | If sequences do not have the same length than the reference alignment, then returns an error. 11 | 12 | If format is phylip, it may contain several alignments in one file and then we can append all of them at once: 13 | 14 | ``` 15 | goalign append -i refalign.phy aligns.phy 16 | ``` 17 | 18 | If format is Fasta, several alignments may be given in the form: 19 | 20 | ``` 21 | goalign append -i align.fasta others*.fasta 22 | ``` 23 | 24 | #### Usage 25 | 26 | General command 27 | 28 | ``` 29 | Usage: 30 | goalign append [flags] 31 | 32 | Flags: 33 | -h, --help help for append 34 | -o, --output string Alignment output file (default "stdout") 35 | 36 | Global Flags: 37 | -i, --align string Alignment input file (default "stdin") 38 | --auto-detect Auto detects input format (overrides -p, -x and -u) 39 | -u, --clustal Alignment is in clustal? default fasta 40 | --ignore-identical int Ignore duplicated sequences that have the same name and same sequences 41 | --input-strict Strict phylip input format (only used with -p) 42 | -x, --nexus Alignment is in nexus? default fasta 43 | --no-block Write Phylip sequences without space separated blocks (only used with -p) 44 | --one-line Write Phylip sequences on 1 line (only used with -p) 45 | --output-strict Strict phylip output format (only used with -p) 46 | -p, --phylip Alignment is in phylip? default fasta 47 | ``` 48 | 49 | #### Examples 50 | 51 | * Append 3 alignments 52 | 53 | ``` 54 | cat > input.1 <s1 56 | ACGACGACGACC 57 | >2 58 | ATCTT-TTTTTC 59 | >3 60 | ATCTT-TTTTTT 61 | EOF 62 | 63 | cat > input.2 <s4 65 | ACGACGACGACC 66 | >5 67 | ATCTT-TTTTTC 68 | >6 69 | ATCTT-TTTTTT 70 | EOF 71 | 72 | cat > input.3 <s7 74 | ACGACGACGACC 75 | >8 76 | ATCTT-TTTTTC 77 | >9 78 | ATCTT-TTTTTT 79 | EOF 80 | 81 | goalign append -i input.1 input.2 input.3 82 | 83 | >s1 84 | ACGACGACGACC 85 | >2 86 | ATCTT-TTTTTC 87 | >3 88 | ATCTT-TTTTTT 89 | >s4 90 | ACGACGACGACC 91 | >5 92 | ATCTT-TTTTTC 93 | >6 94 | ATCTT-TTTTTT 95 | >s7 96 | ACGACGACGACC 97 | >8 98 | ATCTT-TTTTTC 99 | >9 100 | ATCTT-TTTTTT 101 | 102 | ``` 103 | -------------------------------------------------------------------------------- /docs/commands/codonalign.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### codonalign 6 | 7 | Aligns a given nt fasta file using a corresponding aa alignment. 8 | 9 | If the input alignment is not amino acid, then returns an error. 10 | If the given fasta file is not nucleotides then returns an error. 11 | 12 | Warning: It does not check that the amino acid sequence is a good 13 | translation of the nucleotide sequence, but just add gaps to the 14 | nucleotide sequence where needed. 15 | 16 | Once gaps are added, if the nucleotide alignment length does not match 17 | the protein alignment length * 3, returns an error. 18 | 19 | 20 | 21 | #### Usage 22 | ``` 23 | Usage: 24 | goalign codonalign [flags] 25 | 26 | Flags: 27 | -f, --fasta string Input nucleotide Fasta file to be codon aligned (default "stdin") 28 | -h, --help help for codonalign 29 | -o, --output string Output codon aligned file (default "stdout") 30 | 31 | Global Flags: 32 | -i, --align string Alignment input file (default "stdin") 33 | --auto-detect Auto detects input format (overrides -p, -x and -u) 34 | -u, --clustal Alignment is in clustal? default fasta 35 | --input-strict Strict phylip input format (only used with -p) 36 | -x, --nexus Alignment is in nexus? default fasta 37 | --output-strict Strict phylip output format (only used with -p) 38 | -p, --phylip Alignment is in phylip? default fasta 39 | ``` 40 | 41 | #### Examples 42 | 43 | input_aa.fa 44 | 45 | ``` 46 | >Seq0000 47 | D*-AVGQNLK 48 | >Seq0001 49 | IE-FKF-LLM 50 | >Seq0002 51 | ERTSSYFLNT 52 | ``` 53 | 54 | input_nt.fa 55 | ``` 56 | >Seq0000 57 | GATTAAGCCGTAGGCCAGAATCTGAAG 58 | >Seq0001 59 | ATCGAATTTAAGTTTCTTCTAATG 60 | >Seq0002 61 | GAGAGGACTAGTTCATACTTTTTAAACACT 62 | ``` 63 | 64 | ``` 65 | goalign codonalign -i input_aa.fa -f input_nt.fa 66 | ``` 67 | 68 | should give 69 | 70 | ``` 71 | >Seq0000 72 | GATTAA---GCCGTAGGCCAGAATCTGAAG 73 | >Seq0001 74 | ATCGAA---TTTAAGTTT---CTTCTAATG 75 | >Seq0002 76 | GAGAGGACTAGTTCATACTTTTTAAACACT 77 | EOF 78 | ``` 79 | -------------------------------------------------------------------------------- /docs/commands/completion.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### completion 6 | This command generates auto-completion commands for bash or zsh. 7 | 8 | #### Usage 9 | 10 | ``` 11 | Usage: 12 | goalign completion SHELL 13 | ``` 14 | 15 | #### Bash 16 | * Install bash-completion: 17 | ``` 18 | # MacOS brew 19 | brew install bash-completion 20 | # MacOS port (do not forget to change 21 | # the path to bash command in terminal 22 | # preferences to /opt/local/bin/bash -l) 23 | sudo port install bash-completion 24 | # Linux 25 | yum install bash-completion -y 26 | apt-get install bash-completion 27 | ``` 28 | 29 | * Activate goalign bash completion 30 | ``` 31 | # Once 32 | source <(goalign completion bash) 33 | # Permanently 34 | mkdir ~/.goalign 35 | goalign completion bash > ~/.goalign/completion.bash.inc 36 | printf " 37 | # goalign shell completion 38 | source '$HOME/.goalign/completion.bash.inc' 39 | " >> $HOME/.bashrc 40 | ``` 41 | 42 | #### Zsh (not tested) 43 | 44 | ``` 45 | # Once 46 | source <(goalign completion zsh) 47 | # Permanently 48 | goalign completion zsh > "${fpath[1]}/_goalign" 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/commands/compress.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### compress 6 | This command removes identical patterns/sites from an input alignment 7 | 8 | #### Usage 9 | ``` 10 | Usage: 11 | goalign compress [flags] 12 | 13 | Flags: 14 | -o, --output string Compressed output alignment file (default "stdout") 15 | --weight-out string Pattern weight output file (default "none") 16 | 17 | Global Flags: 18 | -i, --align string Alignment input file (default "stdin") 19 | --auto-detect Auto detects input format (overrides -p, -x and -u) 20 | -u, --clustal Alignment is in clustal? default fasta 21 | --input-strict Strict phylip input format (only used with -p) 22 | -x, --nexus Alignment is in nexus? default fasta 23 | --no-block Write Phylip sequences without space separated blocks (only used with -p) 24 | --one-line Write Phylip sequences on 1 line (only used with -p) 25 | --output-strict Strict phylip output format (only used with -p) 26 | -p, --phylip Alignment is in phylip? default fasta 27 | ``` 28 | 29 | #### Examples 30 | 31 | ``` 32 | cat > input < alignment.phy 32 | goalign concat -i alignment.phy -p | goalign stats -p 33 | ``` 34 | 35 | It should give the following statistics: 36 | ``` 37 | length 1500 38 | nseqs 5 39 | avgalleles 1.3220 40 | char nb freq 41 | A 1894 0.252533 42 | C 1898 0.253067 43 | G 1788 0.238400 44 | T 1920 0.256000 45 | ``` 46 | -------------------------------------------------------------------------------- /docs/commands/consensus.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### consensus 6 | This command generates a basic "majority consensus" sequence, _i.e._ a single sequence whose sites correspond to the majority characters at each positions. 7 | 8 | If '-' is the most abundant character, then '-' will be in the consensus, except if `--ignore-gaps` is specified. If `--ignore-gaps`is specified, then the majority is computed on non gaps characters, except if the column is only made of gaps. 9 | If 'N' is the most abundant character, then 'N' will be in the consensus, except if `--ignore-n` is specified. If `--ignore-n`is specified, then the majority is computed on non N/n characters (X/x for proteins), except if the column is only made of N/n (X/x). 10 | 11 | 12 | #### Usage 13 | ``` 14 | Usage: 15 | goalign consensus [flags] 16 | 17 | Flags: 18 | --exclude-gaps Exclude gaps in the majority computation 19 | -h, --help help for consensus 20 | -o, --output string Alignment output file (default "stdout") 21 | 22 | Global Flags: 23 | -i, --align string Alignment input file (default "stdin") 24 | --auto-detect Auto detects input format (overrides -p, -x and -u) 25 | -u, --clustal Alignment is in clustal? default fasta 26 | --ignore-identical int Ignore duplicated sequences that have the same name and same sequences 27 | --ignore-gaps Ignore gaps (except if only gaps on the column) 28 | --ignore-n Ignore Ns (except if only N on the column) 29 | --input-strict Strict phylip input format (only used with -p) 30 | -x, --nexus Alignment is in nexus? default fasta 31 | --no-block Write Phylip sequences without space separated blocks (only used with -p) 32 | --one-line Write Phylip sequences on 1 line (only used with -p) 33 | --output-strict Strict phylip output format (only used with -p) 34 | -p, --phylip Alignment is in phylip? default fasta 35 | ``` 36 | 37 | #### Examples 38 | 39 | * Consensus of 3 sequences: 40 | 41 | Input alignment `al.fa`: 42 | ``` 43 | >s1 44 | ACGACGACGACC 45 | >2 46 | ATCTT-TTTTTC 47 | >3 48 | ATCTT-TTTTTT 49 | ``` 50 | 51 | ``` 52 | $ goalign consensus -i al.fa 53 | 54 | >consensus 55 | ATCTT-TTTTTC 56 | ``` 57 | -------------------------------------------------------------------------------- /docs/commands/dedup.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### dedup 6 | This command deduplicates sequences. To do so it removes identical sequences. 7 | 8 | If -l is specified, then identical sequences are printed in the given file 9 | with the following format: 10 | 11 | ``` 12 | seq1,seq2 13 | seq3,seq4 14 | ``` 15 | 16 | This means that seq1 is identical to seq2 and seq3 is identical to seq4. 17 | 18 | if `--n-as-gap` is specified: X/N (depending on alphabet) are considered identical to GAPS for identifying identical sequences. Only the first sequence appears in the output alignment. 19 | 20 | #### Usage 21 | ``` 22 | Usage: 23 | goalign dedup [flags] 24 | 25 | Flags: 26 | -h, --help help for dedup 27 | -l, --log string Deduplicated output log file (default "none") 28 | --n-as-gap Considers N/X identical to GAPS for identifying identical sequences 29 | --name Deduplicate by name instead of sequence event if sequences are different (only the first appears in the output file) 30 | -o, --output string Deduplicated output alignment file (default "stdout") 31 | --unaligned Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored) 32 | 33 | Global Flags: 34 | -i, --align string Alignment input file (default "stdin") 35 | --auto-detect Auto detects input format (overrides -p, -x and -u) 36 | -u, --clustal Alignment is in clustal? default fasta 37 | --input-strict Strict phylip input format (only used with -p) 38 | -x, --nexus Alignment is in nexus? default fasta 39 | --output-strict Strict phylip output format (only used with -p) 40 | -p, --phylip Alignment is in phylip? default fasta 41 | ``` 42 | 43 | #### Examples 44 | 45 | ``` 46 | cat > input.phy <Seq0000 23 | GATTAA---GCCGTAGGCCAGAATCTGAAG 24 | >Seq0001 25 | ATCGAA---TTTAAGTTT---CTTCTAATG 26 | >Seq0002 27 | GAGAGGACTAGTTCATACTTTTTAAACACT 28 | ``` 29 | 30 | align2.fa 31 | 32 | ``` 33 | >Seq0001 34 | ATCGAA---TTTAAGTTT---CTTCTAATG 35 | >Seq0000 36 | GATTAA---GCCGTAGGCCAGAATCTGAAG 37 | >Seq0002 38 | GAGAGGACTAGTTCATACTTTTTAAACACT 39 | ``` 40 | 41 | ``` 42 | goalign identical -i align1.fa -c align2.fa 43 | ``` 44 | 45 | should print: 46 | 47 | ``` 48 | true 49 | ``` 50 | 51 | 52 | #### Usage 53 | ``` 54 | Usage: 55 | goalign identical [flags] 56 | 57 | Flags: 58 | -c, --compared string Compared alignment file (default "none") 59 | 60 | Global Flags: 61 | -i, --align string Alignment input file (default "stdin") 62 | --auto-detect Auto detects input format (overrides -p, -x and -u) 63 | -u, --clustal Alignment is in clustal? default fasta 64 | --input-strict Strict phylip input format (only used with -p) 65 | -x, --nexus Alignment is in nexus? default fasta 66 | --output-strict Strict phylip output format (only used with -p) 67 | -p, --phylip Alignment is in phylip? default fasta 68 | ``` 69 | -------------------------------------------------------------------------------- /docs/commands/orf.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### orf 6 | Find the longest orf in all given sequences in forward strand. 7 | 8 | If input sequences are not nucleotidic, then returns an error. 9 | 10 | If input sequences are aligned (contain '-'), then they are unaligned first. 11 | 12 | Output is in fasta format (format options such as -p and -x are ignored). 13 | 14 | #### Usage 15 | ``` 16 | Usage: 17 | goalign orf [flags] 18 | 19 | Flags: 20 | -h, --help help for orf 21 | -o, --output string ORF Output Fasta File (default "stdout") 22 | 23 | Global Flags: 24 | -i, --align string Alignment input file (default "stdin") 25 | --auto-detect Auto detects input format (overrides -p, -x and -u) 26 | -u, --clustal Alignment is in clustal? default fasta 27 | --input-strict Strict phylip input format (only used with -p) 28 | -x, --nexus Alignment is in nexus? default fasta 29 | --no-block Write Phylip sequences without space separated blocks (only used with -p) 30 | --one-line Write Phylip sequences on 1 line (only used with -p) 31 | --output-strict Strict phylip output format (only used with -p) 32 | -p, --phylip Alignment is in phylip? default fasta 33 | ``` 34 | 35 | #### Examples 36 | 37 | -------------------------------------------------------------------------------- /docs/commands/random.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### random 6 | This command generate a random alignment with uniform distribution of nucleotides or amino acids. It is intended for testing purpose, as no evolutionary information is taken into account. 7 | 8 | #### Usage 9 | ``` 10 | Usage: 11 | goalign random [flags] 12 | 13 | Flags: 14 | -a, --amino-acids Aminoacid sequences (otherwise, nucleotides) 15 | -l, --length int Length of sequences to generate (default 100) 16 | -n, --nb-seqs int Number of sequences to generate (default 10) 17 | -o, --out-align string Random alignment output file (default "stdout") 18 | --seed int Random Seed: -1 = nano seconds since 1970/01/01 00:00:00 (default -1) 19 | 20 | Global Flags: 21 | -i, --align string Alignment input file (default "stdin") 22 | -p, --phylip Alignment is in phylip? False=Fasta 23 | --input-strict Strict phylip input format (only used with -p) 24 | --output-strict Strict phylip output format (only used with -p) 25 | ``` 26 | 27 | #### Examples 28 | 29 | * Generating a random alignment with 100 sequences and 1000 nucleotides: 30 | ``` 31 | goalign random -n 100 -l 1000 --seed 10 | goalign stats 32 | ``` 33 | 34 | Should give the following statistics: 35 | ``` 36 | length 1000 37 | nseqs 100 38 | avgalleles 4.0000 39 | char nb freq 40 | A 24899 0.248990 41 | C 25032 0.250320 42 | G 24888 0.248880 43 | T 25181 0.251810 44 | ``` 45 | -------------------------------------------------------------------------------- /docs/commands/revcomp.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### revcomp 6 | Reverse complements an input alignment. 7 | 8 | If the input alignment is not nucleotides, then returns an error. 9 | 10 | If `--unaligned` is specified, then input sequences may be unaligned. 11 | 12 | IUPAC codes are taken into account. 13 | 14 | If sequence names are given in the command line (e.g. goalign revcomp -i al.fasta s1 s2 s3), 15 | only given sequences are reverse-complemented, if they exist in the alignment. 16 | 17 | #### Usage 18 | ``` 19 | Usage: 20 | goalign revcomp [flags] 21 | 22 | Flags: 23 | -h, --help help for revcomp 24 | -o, --output string Output reverse complement alignment file (default "stdout") 25 | --unaligned Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored) 26 | 27 | Global Flags: 28 | -i, --align string Alignment input file (default "stdin") 29 | --auto-detect Auto detects input format (overrides -p, -x and -u) 30 | -u, --clustal Alignment is in clustal? default fasta 31 | --ignore-identical int Ignore duplicated sequences that have the same name and same sequences 32 | --input-strict Strict phylip input format (only used with -p) 33 | -x, --nexus Alignment is in nexus? default fasta 34 | --no-block Write Phylip sequences without space separated blocks (only used with -p) 35 | --one-line Write Phylip sequences on 1 line (only used with -p) 36 | --output-strict Strict phylip output format (only used with -p) 37 | -p, --phylip Alignment is in phylip? default fasta 38 | ``` 39 | 40 | 41 | #### Examples 42 | * Reverse complement: 43 | 44 | seq.fa 45 | ``` 46 | >Seq0000 47 | CTTTCGCAAA 48 | >Seq0001 49 | GTGCAGTCCG 50 | >Seq0002 51 | TGAGTTTAGT 52 | >Seq0003 53 | CATTCACTCG 54 | >Seq0004 55 | CGGTCTGATC 56 | >Seq0005 57 | CCCTACAGTT 58 | >Seq0006 59 | TGCAGACGTG 60 | >Seq0007 61 | TAGGTGCTAA 62 | >Seq0008 63 | TCCCCTCTTG 64 | >Seq0009 65 | GAGTATATCG 66 | ``` 67 | 68 | 69 | ``` 70 | goalign revcomp -i seq.fa 71 | ``` 72 | 73 | Should output: 74 | ``` 75 | >Seq0000 76 | TTTGCGAAAG 77 | >Seq0001 78 | CGGACTGCAC 79 | >Seq0002 80 | ACTAAACTCA 81 | >Seq0003 82 | CGAGTGAATG 83 | >Seq0004 84 | GATCAGACCG 85 | >Seq0005 86 | AACTGTAGGG 87 | >Seq0006 88 | CACGTCTGCA 89 | >Seq0007 90 | TTAGCACCTA 91 | >Seq0008 92 | CAAGAGGGGA 93 | >Seq0009 94 | CGATATACTC 95 | ``` 96 | -------------------------------------------------------------------------------- /docs/commands/sort.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### sort 6 | This command sort an input alignment by sequence names. 7 | 8 | #### Usage 9 | ``` 10 | Usage: 11 | goalign sort [flags] 12 | 13 | Flags: 14 | -o, --output string Sorted alignment output file (default "stdout") 15 | 16 | Global Flags: 17 | -i, --align string Alignment input file (default "stdin") 18 | --input-strict Strict phylip input format (only used with -p) 19 | -x, --nexus Alignment is in nexus? default fasta 20 | --output-strict Strict phylip output format (only used with -p) 21 | -p, --phylip Alignment is in phylip? default fasta 22 | ``` 23 | 24 | #### Examples 25 | 26 | * Sorting a random alignment 27 | ``` 28 | goalign random --seed 10 -l 10 -n 5 | goalign shuffle seqs | goalign sort 29 | ``` 30 | 31 | It should give the following alignment: 32 | ``` 33 | >Seq0000 34 | GATTAATTTG 35 | >Seq0001 36 | CCGTAGGCCA 37 | >Seq0002 38 | GAATCTGAAG 39 | >Seq0003 40 | ATCGAACACT 41 | >Seq0004 42 | TTAAGTTTTC 43 | ``` 44 | -------------------------------------------------------------------------------- /docs/commands/split.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### split 6 | This command splits an input alignment according to partitions given as input. 7 | 8 | The partitions are defined as in [RAxML](https://cme.h-its.org/exelixis/web/software/raxml/index.html). 9 | 10 | #### Usage 11 | ``` 12 | goalign split -i align.phylip --partition partition.txt 13 | 14 | 15 | Usage: 16 | goalign split [flags] 17 | 18 | Flags: 19 | -h, --help help for split 20 | -o, --out-prefix string Prefix of output files 21 | --partition string File containing definition of the partitions (default "none") 22 | 23 | Global Flags: 24 | -i, --align string int Alignment input file (default "stdin") 25 | --auto-detect int Auto detects input format (overrides -p, -x and -u) 26 | -u, --clustal int Alignment is in clustal? default fasta 27 | --ignore-identical int Ignore duplicated sequences that have the same name and same sequences 28 | --input-strict int Strict phylip input format (only used with -p) 29 | -x, --nexus int Alignment is in nexus? default fasta 30 | --no-block int Write Phylip sequences without space separated blocks (only used with -p) 31 | --one-line int Write Phylip sequences on 1 line (only used with -p) 32 | --output-strict int Strict phylip output format (only used with -p) 33 | -p, --phylip int Alignment is in phylip? default fasta 34 | ``` 35 | 36 | #### Examples 37 | 38 | * Spliting an alignment 39 | 40 | input.fa 41 | ``` 42 | >s1 43 | AAAACCCCCGG 44 | >2 45 | AAAACCCCCGG 46 | >3 47 | AAAACCCCCGG 48 | >4 49 | AAAACCCCCGG 50 | >5 51 | AAAACCCCCGG 52 | ``` 53 | 54 | partition.txt 55 | ``` 56 | M1,p1=1-4,10-11 57 | M2,p2=5-9 58 | ``` 59 | 60 | This command: 61 | ``` 62 | goalign split -i input.fa --partition partition.txt --out-prefix ./ 63 | ``` 64 | 65 | Should produce: 66 | 67 | p1.fa 68 | ``` 69 | >s1 70 | AAAAGG 71 | >2 72 | AAAAGG 73 | >3 74 | AAAAGG 75 | >4 76 | AAAAGG 77 | >5 78 | AAAAGG 79 | ``` 80 | 81 | and p2.fa 82 | ``` 83 | >s1 84 | CCCCC 85 | >2 86 | CCCCC 87 | >3 88 | CCCCC 89 | >4 90 | CCCCC 91 | >5 92 | CCCCC 93 | ``` 94 | -------------------------------------------------------------------------------- /docs/commands/tolower.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### tolower 6 | This command replaces upper case characters by lower case characters. 7 | 8 | #### Usage 9 | ``` 10 | Usage: 11 | goalign tolower [flags] 12 | 13 | Flags: 14 | -h, --help help for tolower 15 | -o, --output string Output lower case alignment file (default "stdout") 16 | --unaligned Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored) 17 | 18 | Global Flags: 19 | -i, --align string Alignment input file (default "stdin") 20 | --alphabet string Alignment/Sequences alphabet: auto (default), aa, or nt (default "auto") 21 | --auto-detect Auto detects input format (overrides -p, -x and -u) 22 | -u, --clustal Alignment is in clustal? default fasta 23 | --ignore-identical int Ignore duplicated sequences that have the same name and potentially have same sequences, 0 : Does not ignore anything, 1: Ignore sequences having the same name (keep the first one whatever their sequence), 2: Ignore sequences having the same name and the same sequence 24 | --input-strict Strict phylip input format (only used with -p) 25 | -x, --nexus Alignment is in nexus? default fasta 26 | --no-block Write Phylip sequences without space separated blocks (only used with -p) 27 | --one-line Write Phylip sequences on 1 line (only used with -p) 28 | --output-strict Strict phylip output format (only used with -p) 29 | -p, --phylip Alignment is in phylip? default fasta 30 | ``` 31 | 32 | #### Examples 33 | 34 | ``` 35 | goalign random --seed 10 -l 10 -n 5 | goalign tolower 36 | ``` 37 | 38 | It should give the following alignment: 39 | ``` 40 | >Seq0000 41 | gattaatttg 42 | >Seq0001 43 | ccgtaggcca 44 | >Seq0002 45 | gaatctgaag 46 | >Seq0003 47 | atcgaacact 48 | >Seq0004 49 | ttaagttttc 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/commands/toupper.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### toupper 6 | This command replaces lower case characters by upper case characters. 7 | 8 | #### Usage 9 | ``` 10 | Usage: 11 | goalign toupper [flags] 12 | 13 | Flags: 14 | -h, --help help for tolower 15 | -o, --output string Output upper case alignment file (default "stdout") 16 | --unaligned Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored) 17 | 18 | Global Flags: 19 | -i, --align string Alignment input file (default "stdin") 20 | --alphabet string Alignment/Sequences alphabet: auto (default), aa, or nt (default "auto") 21 | --auto-detect Auto detects input format (overrides -p, -x and -u) 22 | -u, --clustal Alignment is in clustal? default fasta 23 | --ignore-identical int Ignore duplicated sequences that have the same name and potentially have same sequences, 0 : Does not ignore anything, 1: Ignore sequences having the same name (keep the first one whatever their sequence), 2: Ignore sequences having the same name and the same sequence 24 | --input-strict Strict phylip input format (only used with -p) 25 | -x, --nexus Alignment is in nexus? default fasta 26 | --no-block Write Phylip sequences without space separated blocks (only used with -p) 27 | --one-line Write Phylip sequences on 1 line (only used with -p) 28 | --output-strict Strict phylip output format (only used with -p) 29 | -p, --phylip Alignment is in phylip? default fasta 30 | ``` 31 | 32 | #### Examples 33 | 34 | ``` 35 | echo ">Seq0000 36 | gattaatttg 37 | >Seq0001 38 | ccgtaggcca 39 | >Seq0002 40 | gaatctgaag 41 | >Seq0003 42 | atcgaacact 43 | >Seq0004 44 | ttaagttttc" | ./goalign toupper 45 | ``` 46 | 47 | It should give the following alignment: 48 | ``` 49 | >Seq0000 50 | GATTAATTTG 51 | >Seq0001 52 | CCGTAGGCCA 53 | >Seq0002 54 | GAATCTGAAG 55 | >Seq0003 56 | ATCGAACACT 57 | >Seq0004 58 | TTAAGTTTTC 59 | ``` 60 | -------------------------------------------------------------------------------- /docs/commands/transpose.md: -------------------------------------------------------------------------------- 1 | # Goalign: toolkit and api for alignment manipulation 2 | 3 | ## Commands 4 | 5 | ### transpose 6 | Transposes an input alignment such that the sequences become the sites and the sites become the sequence. 7 | 8 | Each sequence of the output alignment is one site of the input alignment, whose name is the site index (starting from 0). 9 | 10 | #### Usage 11 | ``` 12 | Usage: 13 | goalign transpose [flags] 14 | 15 | Flags: 16 | -h, --help help for transpose 17 | -o, --output string Output transposed alignment (default "stdout") 18 | 19 | Global Flags: 20 | -i, --align string Alignment input file (default "stdin") 21 | --auto-detect Auto detects input format (overrides -p, -x and -u) 22 | -u, --clustal Alignment is in clustal? default fasta 23 | --ignore-identical int Ignore duplicated sequences that have the same name and same sequences 24 | --input-strict Strict phylip input format (only used with -p) 25 | -x, --nexus Alignment is in nexus? default fasta 26 | --no-block Write Phylip sequences without space separated blocks (only used with -p) 27 | --one-line Write Phylip sequences on 1 line (only used with -p) 28 | --output-strict Strict phylip output format (only used with -p) 29 | -p, --phylip Alignment is in phylip? default fasta 30 | ``` 31 | 32 | 33 | #### Examples 34 | * Transposing an input alignment: 35 | 36 | seq.fa 37 | ``` 38 | >Seq0000 39 | CTTTC 40 | >Seq0001 41 | GCAAA 42 | >Seq0002 43 | GTGCA 44 | >Seq0003 45 | GTCCG 46 | >Seq0004 47 | TGAGT 48 | ``` 49 | 50 | 51 | ``` 52 | goalign transpose -i seq.fa 53 | ``` 54 | 55 | Should output: 56 | ``` 57 | >0 58 | CGGGT 59 | >1 60 | TCTTG 61 | >2 62 | TAGCA 63 | >3 64 | TACCG 65 | >4 66 | CAAGT 67 | ``` 68 | -------------------------------------------------------------------------------- /draw/biojs.go: -------------------------------------------------------------------------------- 1 | package draw 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | "github.com/evolbioinfo/goalign/io/fasta" 9 | ) 10 | 11 | type bioJSLayout struct { 12 | writer *bufio.Writer 13 | } 14 | 15 | func NewBioJSLayout(writer *bufio.Writer) AlignLayout { 16 | return &bioJSLayout{writer} 17 | } 18 | 19 | /* 20 | Draw the tree on the specific drawer. Does not close the file. The caller must do it. 21 | */ 22 | func (layout *bioJSLayout) DrawAlign(a align.Alignment) (err error) { 23 | var biojs string 24 | 25 | if biojs, err = biojsDepString(); err != nil { 26 | return 27 | } 28 | 29 | layout.writer.WriteString(` 30 | 31 | 32 | 37 | 44 | 45 | 46 | 52 |
53 |
54 | 75 | 76 | 77 | `) 78 | return err 79 | } 80 | -------------------------------------------------------------------------------- /draw/draw.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package intended to draw alignmentd on different devices : 3 | - Terminal, 4 | - Html file 5 | - ... 6 | */ 7 | package draw 8 | 9 | import ( 10 | "github.com/evolbioinfo/goalign/align" 11 | ) 12 | 13 | /* 14 | Generic struct that represents tree layout: 15 | * normal layout... 16 | */ 17 | type AlignLayout interface { 18 | DrawAlign(a align.Alignment) error 19 | } 20 | -------------------------------------------------------------------------------- /draw/png.go: -------------------------------------------------------------------------------- 1 | package draw 2 | 3 | import ( 4 | "bufio" 5 | "image" 6 | "image/color" 7 | "image/png" 8 | 9 | "github.com/evolbioinfo/goalign/align" 10 | ) 11 | 12 | // Colors from bioSyntax (doi.org/10.1186/s12859-018-2315-y) 13 | var nucleotideColors = map[rune]color.RGBA{ 14 | 'A': {71, 255, 25, 255}, 'C': {255, 70, 65, 255}, 'G': {240, 144, 0, 255}, 'T': {65, 146, 255, 255}, 15 | 'R': {255, 254, 128, 255}, 'Y': {225, 128, 255, 255}, 'S': {255, 155, 128, 255}, 'W': {128, 255, 242, 255}, 16 | 'K': {144, 184, 44, 255}, 'M': {206, 136, 52, 255}, 'B': {248, 193, 192, 255}, 'D': {199, 255, 185, 255}, 17 | 'H': {191, 216, 249, 255}, 'V': {255, 227, 185, 255}, 'N': {230, 230, 230, 255}, '-': {255, 255, 255, 255}, 18 | 'X': {230, 230, 230, 255}, '.': {255, 255, 255, 255}, 19 | } 20 | 21 | // Colors adapted from "Shapely" color scheme http://acces.ens-lyon.fr/biotic/rastop/help/colour.htm 22 | var aminoAcidColors = map[rune]color.RGBA{ 23 | 'A': {140, 255, 140, 255}, 'G': {255, 255, 255, 255}, 'L': {69, 94, 69, 255}, 'S': {255, 112, 66, 255}, 24 | 'V': {255, 140, 255, 255}, 'T': {184, 76, 0, 255}, 'K': {71, 71, 184, 255}, 'D': {160, 0, 66, 255}, 25 | 'I': {0, 76, 0, 255}, 'N': {255, 124, 112, 255}, 'E': {102, 0, 0, 255}, 'P': {82, 82, 82, 255}, 26 | 'R': {0, 0, 124, 255}, 'F': {83, 76, 66, 255}, 'Q': {255, 76, 76, 255}, 'Y': {140, 112, 76, 255}, 27 | 'H': {112, 112, 255, 255}, 'C': {255, 255, 112, 255}, 'M': {184, 160, 66, 255}, 'W': {79, 70, 0, 255}, 28 | 'B': {255, 0, 255, 255}, 'Z': {255, 0, 255, 255}, 'X': {184, 184, 184, 255}, '-': {0, 0, 0, 255}, 29 | '.': {0, 0, 0, 255}, 30 | } 31 | 32 | type pngLayout struct { 33 | writer *bufio.Writer 34 | } 35 | 36 | func NewPngLayout(writer *bufio.Writer) AlignLayout { 37 | return &pngLayout{writer} 38 | } 39 | 40 | func (layout *pngLayout) DrawAlign(a align.Alignment) (err error) { 41 | 42 | colors := nucleotideColors 43 | if a.Alphabet() == align.AMINOACIDS { 44 | colors = aminoAcidColors 45 | } 46 | 47 | height := a.NbSequences() 48 | width := a.Length() 49 | 50 | img := image.NewRGBA(image.Rect(0, 0, width, height)) 51 | 52 | for j, seq := range a.Sequences() { 53 | for i, char := range seq.Sequence() { 54 | c, ok := colors[char] 55 | if !ok { 56 | // Set unkown character to black 57 | c = color.RGBA{0, 0, 0, 0} 58 | } 59 | img.Set(i, j, c) 60 | } 61 | } 62 | 63 | png.Encode(layout.writer, img) 64 | 65 | return nil 66 | } 67 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/evolbioinfo/goalign 2 | 3 | go 1.21.6 4 | 5 | require ( 6 | github.com/armon/go-radix v1.0.0 7 | github.com/fredericlemoine/cobrashell v0.0.0-20180921081141-49c72f93426c 8 | github.com/spf13/cobra v1.5.0 9 | github.com/ulikunitz/xz v0.5.10 10 | gonum.org/v1/gonum v0.9.3 11 | ) 12 | 13 | require ( 14 | github.com/abiosoft/ishell v2.0.0+incompatible // indirect 15 | github.com/abiosoft/readline v0.0.0-20180607040430-155bce2042db // indirect 16 | github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect 17 | github.com/fatih/color v1.7.0 // indirect 18 | github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568 // indirect 19 | github.com/inconshreveable/mousetrap v1.0.1 // indirect 20 | github.com/mattn/go-colorable v0.0.9 // indirect 21 | github.com/mattn/go-isatty v0.0.3 // indirect 22 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 23 | github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect 24 | github.com/spf13/pflag v1.0.5 // indirect 25 | golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6 // indirect 26 | golang.org/x/sys v0.8.0 // indirect 27 | gopkg.in/yaml.v2 v2.4.0 // indirect 28 | ) 29 | -------------------------------------------------------------------------------- /gutils/gutils.go: -------------------------------------------------------------------------------- 1 | package gutils 2 | 3 | import "unicode" 4 | 5 | func Contains[T comparable](s []T, e T) bool { 6 | for _, v := range s { 7 | if v == e { 8 | return true 9 | } 10 | } 11 | return false 12 | } 13 | 14 | func ContainsRune(s []uint8, e uint8, ignoreCase bool) bool { 15 | for _, v := range s { 16 | if v == e || (ignoreCase && unicode.ToLower(rune(v)) == unicode.ToLower(rune(e))) { 17 | return true 18 | } 19 | } 20 | return false 21 | } 22 | -------------------------------------------------------------------------------- /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evolbioinfo/goalign/f4a9aaba0c41e1fd188cc906f6514e8d849d8f72/images/logo.png -------------------------------------------------------------------------------- /io/clustal/tokens.go: -------------------------------------------------------------------------------- 1 | package clustal 2 | 3 | type Token int64 4 | 5 | var eof = rune(0) 6 | 7 | const ( 8 | ILLEGAL Token = iota 9 | INTEGER // Number of sequences or length 10 | IDENTIFIER // Identifier of sequence or part of sequence 11 | ENDOFLINE // End of line token 12 | CLUSTAL // Start of the file: "^CLUSTAL" 13 | EOF // End of File 14 | WS // Whitespace 15 | NUMERIC // Number of taxa and length of sequences 16 | ) 17 | 18 | func isEndOfLine(ch rune) bool { 19 | return ch == '\n' || ch == '\r' 20 | } 21 | 22 | func isCR(ch rune) bool { 23 | return ch == '\r' 24 | } 25 | 26 | func isNL(ch rune) bool { 27 | return ch == '\n' 28 | } 29 | 30 | func isIdent(ch rune) bool { 31 | return ch != '\n' && ch != ' ' && ch != '\r' 32 | } 33 | 34 | func isWhitespace(ch rune) bool { 35 | return ch == ' ' || ch == '\t' 36 | } 37 | -------------------------------------------------------------------------------- /io/clustal/writer.go: -------------------------------------------------------------------------------- 1 | package clustal 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | "github.com/evolbioinfo/goalign/version" 9 | ) 10 | 11 | const ( 12 | CLUSTAL_LINE = 50 13 | ) 14 | 15 | func min_int(a int, b int) int { 16 | if a < b { 17 | return a 18 | } 19 | return b 20 | } 21 | 22 | func WriteAlignment(al align.Alignment) string { 23 | var buf bytes.Buffer 24 | cursize := 0 25 | 26 | // Get length of the longest name 27 | maxnamelength := 0 28 | al.IterateChar(func(name string, seq []uint8) bool { 29 | if len(name) > maxnamelength { 30 | maxnamelength = len(name) 31 | } 32 | return false 33 | }) 34 | 35 | buf.WriteString(fmt.Sprintf("CLUSTAL W (goalign version %s)\n\n", version.Version)) 36 | for cursize < al.Length() { 37 | if cursize > 0 { 38 | buf.WriteRune('\n') 39 | } 40 | end := 0 41 | al.IterateChar(func(name string, seq []uint8) bool { 42 | buf.WriteString(name) 43 | for i := len(name); i < maxnamelength+3; i++ { 44 | buf.WriteRune(' ') 45 | } 46 | 47 | end = min_int(cursize+CLUSTAL_LINE, len(seq)) 48 | for j := cursize; j < end; j++ { 49 | buf.WriteByte(seq[j]) 50 | } 51 | buf.WriteRune(' ') 52 | buf.WriteString(fmt.Sprintf("%d", end)) 53 | buf.WriteRune('\n') 54 | return false 55 | }) 56 | // Conservation line 57 | // White spaces 58 | for i := 0; i < maxnamelength+3; i++ { 59 | buf.WriteRune(' ') 60 | } 61 | // Each position in the line 62 | for pos := cursize; pos < end; pos++ { 63 | conservation, _ := al.SiteConservation(pos) 64 | switch conservation { 65 | case align.POSITION_IDENTICAL: 66 | buf.WriteRune('*') 67 | case align.POSITION_CONSERVED: 68 | buf.WriteRune(':') 69 | case align.POSITION_SEMI_CONSERVED: 70 | buf.WriteRune('.') 71 | default: 72 | buf.WriteRune(' ') 73 | } 74 | } 75 | 76 | buf.WriteRune('\n') 77 | cursize += CLUSTAL_LINE 78 | } 79 | return buf.String() 80 | } 81 | -------------------------------------------------------------------------------- /io/countprofile/countprofile.go: -------------------------------------------------------------------------------- 1 | package countprofile 2 | 3 | import ( 4 | "bufio" 5 | "compress/gzip" 6 | "fmt" 7 | "os" 8 | "strconv" 9 | "strings" 10 | 11 | "github.com/evolbioinfo/goalign/align" 12 | "github.com/evolbioinfo/goalign/io/utils" 13 | ) 14 | 15 | // FromFile Parses a "profile" file constisting of a number of occurences of each character 16 | // per site, tab separated, in the form: 17 | // site - A B C D G H K M N R... 18 | // 0 1 2 3 4 0 ... 19 | func FromFile(file string) (p *align.CountProfile, err error) { 20 | var f *os.File 21 | var r *bufio.Reader 22 | var gr *gzip.Reader 23 | var l string 24 | var i int 25 | var field string 26 | 27 | p = align.NewCountProfile() 28 | 29 | if file == "stdin" || file == "-" { 30 | f = os.Stdin 31 | } else { 32 | if f, err = os.Open(file); err != nil { 33 | return 34 | } 35 | } 36 | 37 | if strings.HasSuffix(file, ".gz") { 38 | if gr, err = gzip.NewReader(f); err != nil { 39 | return 40 | } 41 | r = bufio.NewReader(gr) 42 | } else { 43 | r = bufio.NewReader(f) 44 | } 45 | 46 | // We parse the header 47 | if l, err = utils.Readln(r); err != nil { 48 | return 49 | } 50 | headslice := strings.Split(l, "\t") 51 | header := make([]uint8, 0, len(headslice)-1) 52 | for i, field = range headslice { 53 | if i > 0 { 54 | r := []uint8(field) 55 | if len(r) != 1 { 56 | err = fmt.Errorf("Character name Should be One character") 57 | return 58 | } 59 | header = append(header, r[0]) 60 | } 61 | } 62 | p.SetHeader(header) 63 | 64 | // Then the counts 65 | var count int = 0 66 | l, err = utils.Readln(r) 67 | for err == nil { 68 | for i, field = range strings.Split(l, "\t") { 69 | if i > 0 { 70 | if count, err = strconv.Atoi(field); err != nil { 71 | return 72 | } 73 | p.AppendCount(i-1, count) 74 | } 75 | } 76 | l, err = utils.Readln(r) 77 | } 78 | if err.Error() == "EOF" { 79 | err = nil 80 | } 81 | return 82 | } 83 | -------------------------------------------------------------------------------- /io/error.go: -------------------------------------------------------------------------------- 1 | package io 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "runtime" 7 | "strings" 8 | ) 9 | 10 | const ( 11 | EXIT_SUCCESS = 0 12 | EXIT_FAILURE = 1 13 | ) 14 | 15 | func ExitWithMessage(err error) { 16 | _, fn, line, _ := runtime.Caller(1) 17 | 18 | cols := strings.Split(fn, "goalign/") 19 | name := cols[len(cols)-1] 20 | fmt.Fprintf(os.Stderr, "[Error] in %s (line %d), message: %v\n", name, line, err) 21 | os.Exit(EXIT_FAILURE) 22 | } 23 | 24 | func PrintMessage(message string) { 25 | _, fn, line, _ := runtime.Caller(1) 26 | 27 | cols := strings.Split(fn, "goalign/") 28 | name := cols[len(cols)-1] 29 | fmt.Fprintf(os.Stderr, "[Warning] in %s (line %d), message: %s\n", name, line, message) 30 | } 31 | 32 | func PrintSimpleMessage(message string) { 33 | fmt.Fprintf(os.Stderr, "%s\n", message) 34 | } 35 | 36 | func LogError(err error) { 37 | _, fn, line, _ := runtime.Caller(1) 38 | cols := strings.Split(fn, "goalign/") 39 | name := cols[len(cols)-1] 40 | fmt.Fprintf(os.Stderr, "[Error] in %s (line %d), message: %v\n", name, line, err) 41 | } 42 | -------------------------------------------------------------------------------- /io/fasta/lexer.go: -------------------------------------------------------------------------------- 1 | package fasta 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "io" 7 | ) 8 | 9 | // Scanner represents a lexical scanner. 10 | type Scanner struct { 11 | r *bufio.Reader 12 | } 13 | 14 | // NewScanner returns a new instance of Scanner. 15 | func NewScanner(r io.Reader) *Scanner { 16 | return &Scanner{r: bufio.NewReader(r)} 17 | } 18 | 19 | // read reads the next rune from the bufferred reader. 20 | // Returns the rune(0) if an error occurs (or io.EOF is returned). 21 | func (s *Scanner) read() rune { 22 | ch, _, err := s.r.ReadRune() 23 | if err != nil { 24 | return eof 25 | } 26 | return ch 27 | } 28 | 29 | // unread places the previously read rune back on the reader. 30 | func (s *Scanner) unread() { 31 | _ = s.r.UnreadRune() 32 | } 33 | 34 | // Scan returns the next token and literal value. 35 | func (s *Scanner) Scan() (tok Token, lit string) { 36 | // Read the next rune. 37 | ch := s.read() 38 | 39 | // If we see whitespace then consume all contiguous whitespace. 40 | // If we see a letter then consume as an ident or reserved word. 41 | if isEndOfLine(ch) { 42 | s.unread() 43 | return s.scanEndOfLine() 44 | } 45 | 46 | switch ch { 47 | case eof: 48 | return EOF, "" 49 | case '>': 50 | return STARTIDENT, string(ch) 51 | } 52 | 53 | s.unread() 54 | return s.scanIdent() 55 | } 56 | 57 | // scanEndOfLine consumes the current rune and all contiguous \n\r. 58 | func (s *Scanner) scanEndOfLine() (tok Token, lit string) { 59 | // Create a buffer and read the current character into it. 60 | var buf bytes.Buffer 61 | buf.WriteRune(s.read()) 62 | 63 | // Read every subsequent whitespace character into the buffer. 64 | // Non-whitespace characters and EOF will cause the loop to exit. 65 | for { 66 | if ch := s.read(); ch == eof { 67 | break 68 | } else if !isEndOfLine(ch) { 69 | s.unread() 70 | break 71 | } else { 72 | buf.WriteRune(ch) 73 | } 74 | } 75 | 76 | return ENDOFLINE, buf.String() 77 | } 78 | 79 | // scanIdent consumes the current rune and all contiguous ident runes. 80 | func (s *Scanner) scanIdent() (tok Token, lit string) { 81 | // Create a buffer and read the current character into it. 82 | var buf bytes.Buffer 83 | buf.WriteRune(s.read()) 84 | 85 | // Read every subsequent ident character into the buffer. 86 | // Non-ident characters and EOF will cause the loop to exit. 87 | for { 88 | if ch := s.read(); ch == eof { 89 | break 90 | } else if !isIdent(ch) { 91 | s.unread() 92 | break 93 | } else { 94 | _, _ = buf.WriteRune(ch) 95 | } 96 | } 97 | 98 | return IDENTIFIER, buf.String() 99 | } 100 | -------------------------------------------------------------------------------- /io/fasta/parser_test.go: -------------------------------------------------------------------------------- 1 | package fasta 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | var fastastring string = ">s1\nACGATCGATTACTACTGAC\nACGACTGATCGATCG" 11 | var fastastring2 string = " >s1\nACGATCGATTACTACTGAC\nACGACTGATCGATCG" 12 | var fastastring3 string = ">s1\nACGATCGATTACTACTGAC\nACGACTGATCGATCG\n>s2\nACGATCGATTACTACTGAC\nACGACTGATCGATCG\n" 13 | var fastastring4 string = ">s1\nACGATCGATTACTACTGAC\nACGACTGATCGATCG\n>s2\nACGATCGATTACTACTGAC\nACGACTGATCGATC\n" 14 | var seq = "AACGTACGTACAGCTAGCTATGTACTGATCATGCTAGCTGC\nACCAGCATGCTACTACTAGCTCGATGCATCGCATATGCAC\n" 15 | 16 | func TestParse(t *testing.T) { 17 | align, err := NewParser(strings.NewReader(fastastring)).Parse() 18 | 19 | if err != nil { 20 | t.Error(err) 21 | } 22 | if align.Length() != 34 { 23 | t.Errorf("Alignment length is not 34 %d", align.Length()) 24 | } 25 | if align.NbSequences() != 1 { 26 | t.Errorf("There is not 1 sequence in the alignment %d", align.NbSequences()) 27 | } 28 | 29 | _, err2 := NewParser(strings.NewReader(fastastring2)).Parse() 30 | 31 | if err2 == nil { 32 | t.Errorf("There should be an error while parsing fastastring2") 33 | } 34 | 35 | align3, err3 := NewParser(strings.NewReader(fastastring3)).Parse() 36 | 37 | if err3 != nil { 38 | t.Error(err3) 39 | } 40 | 41 | if align3.NbSequences() != 2 { 42 | t.Errorf("There are not 2 sequence in the alignment" + fmt.Sprintf("%d", align3.NbSequences())) 43 | } 44 | 45 | _, err4 := NewParser(strings.NewReader(fastastring4)).Parse() 46 | 47 | if err4 == nil { 48 | t.Errorf("There should be an error while parsing fastastring4, which has different length sequences") 49 | } 50 | 51 | var fasta bytes.Buffer 52 | for i := 0; i < 1000; i++ { 53 | fasta.WriteString(fmt.Sprintf(">s%d\n%s", i, seq)) 54 | } 55 | align5, err5 := NewParser(strings.NewReader(fasta.String())).Parse() 56 | if err5 != nil { 57 | t.Error(err5) 58 | } 59 | if align5.NbSequences() != 1000 { 60 | t.Errorf("Alignment has not 1000 sequences : %d", align5.NbSequences()) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /io/fasta/tokens.go: -------------------------------------------------------------------------------- 1 | package fasta 2 | 3 | type Token int64 4 | 5 | var eof = rune(0) 6 | 7 | const ( 8 | ILLEGAL Token = iota 9 | STARTIDENT // > start of ident line 10 | IDENTIFIER // Identifier of sequence or part of sequence 11 | ENDOFLINE // End of line token 12 | EOF // End of File 13 | ) 14 | 15 | func isEndOfLine(ch rune) bool { 16 | return ch == '\n' || ch == '\r' 17 | } 18 | 19 | func isIdent(ch rune) bool { 20 | return ch != '\n' && ch != '\r' 21 | } 22 | -------------------------------------------------------------------------------- /io/fasta/utils.go: -------------------------------------------------------------------------------- 1 | package fasta 2 | 3 | const ( 4 | FASTA_LINE_LEN = 80 // max line length for fasta output 5 | FASTA_INITIAL_NB = 2 // max line length for fasta output 6 | ) 7 | -------------------------------------------------------------------------------- /io/fasta/writer.go: -------------------------------------------------------------------------------- 1 | package fasta 2 | 3 | import ( 4 | "bytes" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | ) 8 | 9 | const ( 10 | FASTA_LINE = 80 11 | ) 12 | 13 | func min_int(a int, b int) int { 14 | if a < b { 15 | return a 16 | } 17 | return b 18 | } 19 | 20 | func WriteAlignment(sb align.SeqBag) string { 21 | var buf bytes.Buffer 22 | sb.IterateChar(func(name string, seq []uint8) bool { 23 | buf.WriteString(">") 24 | buf.WriteString(name) 25 | buf.WriteString("\n") 26 | for i := 0; i < len(seq); i++ { 27 | if i%FASTA_LINE == 0 && i > 0 { 28 | buf.WriteString("\n") 29 | } 30 | buf.WriteByte(seq[i]) 31 | } 32 | buf.WriteRune('\n') 33 | return false 34 | }) 35 | return buf.String() 36 | } 37 | 38 | // Write input alignment as standard fasta sequences 39 | // It removes "-" characters. 40 | func WriteSequences(sb align.SeqBag) string { 41 | var buf bytes.Buffer 42 | 43 | sb.IterateChar(func(name string, seq []uint8) bool { 44 | buf.WriteString(">") 45 | buf.WriteString(name) 46 | buf.WriteString("\n") 47 | nbchar := 0 48 | for i := 0; i < len(seq); i++ { 49 | if seq[i] != '-' { 50 | buf.WriteByte(seq[i]) 51 | nbchar++ 52 | if nbchar == FASTA_LINE { 53 | buf.WriteString("\n") 54 | nbchar = 0 55 | } 56 | } 57 | } 58 | if nbchar != 0 { 59 | buf.WriteString("\n") 60 | } 61 | return false 62 | }) 63 | return buf.String() 64 | } 65 | -------------------------------------------------------------------------------- /io/nexus/nexus_parser_test.go: -------------------------------------------------------------------------------- 1 | package nexus_test 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/evolbioinfo/goalign/io/nexus" 8 | ) 9 | 10 | // Ensure the parser can parse strings into Statement ASTs. 11 | func TestParser_ParseTree(t *testing.T) { 12 | goodnexus := [...]string{ 13 | `#NEXUS 14 | BEGIN TAXA; 15 | TaxLabels fish frog snake mouse; 16 | END; 17 | 18 | BEGIN CHARACTERS; 19 | Dimensions NChar=40; 20 | Format DataType=DNA; 21 | Matrix 22 | fish ACATA GAGGG TACCT CTAAA 23 | fish ACATA GAGGG TACCT CTAAG 24 | 25 | frog ACATA GAGGG TACCT CTAAC 26 | frog CCATA GAGGG TACCT CTAAG 27 | 28 | snake ACATA GAGGG TACCT CTAAG 29 | snake GCATA GAGGG TACCT CTAAG 30 | 31 | mouse ACATA GAGGG TACCT CTAAT 32 | mouse TCATA GAGGG TACCT CTAAG 33 | ; 34 | END; 35 | EOF 36 | `, 37 | } 38 | 39 | for i, innexus := range goodnexus { 40 | align, err := nexus.NewParser(strings.NewReader(innexus)).Parse() 41 | if err != nil { 42 | t.Errorf("Nexus parser error %d ERROR: %s\n", i, err.Error()) 43 | } else { 44 | if align.NbSequences() != 4 { 45 | t.Errorf("Alignment should have 4 sequences but has %d\n", align.NbSequences()) 46 | } 47 | if align.Length() != 40 { 48 | t.Errorf("Alignment should be 40 nt long, but is %d\n", align.Length()) 49 | } 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /io/nexus/nexus_token.go: -------------------------------------------------------------------------------- 1 | package nexus 2 | 3 | type Token int64 4 | 5 | var eof = rune(0) 6 | 7 | const ( 8 | ILLEGAL Token = iota 9 | EOF 10 | WS 11 | IDENT // Name of Node, or comment, or keyword 12 | NUMERIC // Any numerical value 13 | OPENBRACK // [ : For comment 14 | CLOSEBRACK // ] : For comment 15 | ENDOFCOMMAND // ; End of command 16 | ENDOFLINE // \r \n 17 | 18 | // Keywords 19 | NEXUS // #NEXUS : Start of nexus file 20 | EQUAL // '=' between keyword and value 21 | BEGIN // Begin 22 | DATA // Begin data -> Alignment 23 | TAXA // Begin taxa -> Definition of taxa 24 | TAXLABELS // Begin taxa : list of taxlabels 25 | TREES // Begin trees -> Definition of trees 26 | TREE // A specific tree in the BEGIN TREES section 27 | 28 | DIMENSIONS // Dimensions 29 | NTAX // Dimensions : Number of taxa 30 | NCHAR // Dimensions : Length of alignment 31 | 32 | FORMAT // Format 33 | DATATYPE // Format datatype=dna 34 | MISSING // Format missing=? missing char 35 | GAP // Format gap=- gap character 36 | MATCHCHAR // Format matchchar=. matching character compared to first seq 37 | 38 | MATRIX // Matrix 39 | END // End 40 | ) 41 | 42 | func isWhitespace(ch rune) bool { 43 | return ch == ' ' || ch == '\t' 44 | } 45 | 46 | func isIdent(ch rune) bool { 47 | return ch != '[' && ch != ']' && ch != ';' && ch != '=' && ch != '\r' && ch != '\n' && !isWhitespace(ch) 48 | } 49 | 50 | func isEndOfLine(ch rune) bool { 51 | return ch == '\n' || ch == '\r' 52 | } 53 | 54 | func isCR(ch rune) bool { 55 | return ch == '\r' 56 | } 57 | 58 | func isNL(ch rune) bool { 59 | return ch == '\n' 60 | } 61 | -------------------------------------------------------------------------------- /io/nexus/writer.go: -------------------------------------------------------------------------------- 1 | package nexus 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | ) 9 | 10 | func min_int(a int, b int) int { 11 | if a < b { 12 | return a 13 | } 14 | return b 15 | } 16 | 17 | func WriteAlignment(al align.Alignment) string { 18 | var buf bytes.Buffer 19 | 20 | var seqtype string = "dna" 21 | 22 | if al.Alphabet() == align.AMINOACIDS { 23 | seqtype = "protein" 24 | } 25 | 26 | buf.WriteString("#NEXUS\n") 27 | buf.WriteString("begin data;\n") 28 | buf.WriteString(fmt.Sprintf("dimensions ntax=%d nchar=%d;\n", al.NbSequences(), al.Length())) 29 | buf.WriteString(fmt.Sprintf("format datatype=%s gap=-;\n", seqtype)) 30 | buf.WriteString("matrix\n") 31 | al.Iterate(func(name string, seq string) bool { 32 | buf.WriteString(name) 33 | buf.WriteString(" ") 34 | buf.WriteString(seq) 35 | buf.WriteRune('\n') 36 | return false 37 | }) 38 | buf.WriteString(";\n") 39 | buf.WriteString("end;\n") 40 | 41 | return buf.String() 42 | } 43 | -------------------------------------------------------------------------------- /io/paml/writer.go: -------------------------------------------------------------------------------- 1 | package paml 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | ) 9 | 10 | const ( 11 | PAML_LINE = 60 12 | PAML_BLOCK = 10 13 | ) 14 | 15 | func min_int(a int, b int) int { 16 | if a < b { 17 | return a 18 | } 19 | return b 20 | } 21 | 22 | func WriteAlignment(al align.Alignment) string { 23 | var buf bytes.Buffer 24 | 25 | buf.WriteString(fmt.Sprintf(" %d %d I\n", al.NbSequences(), al.Length())) 26 | al.Iterate(func(name string, seq string) bool { 27 | buf.WriteString(name + "\n") 28 | return false 29 | }) 30 | buf.WriteRune('\n') 31 | cursize := 0 32 | for cursize < al.Length() { 33 | if cursize > 0 { 34 | buf.WriteString(fmt.Sprintf("%d\n", cursize+1)) 35 | } 36 | al.IterateChar(func(name string, seq []uint8) bool { 37 | for i := cursize; i < cursize+PAML_LINE && i < len(seq); i += PAML_BLOCK { 38 | if i > cursize { 39 | buf.WriteString(" ") 40 | } 41 | end := min_int(i+PAML_BLOCK, len(seq)) 42 | for j := i; j < end; j++ { 43 | buf.WriteByte(seq[j]) 44 | } 45 | } 46 | buf.WriteString("\n") 47 | return false 48 | }) 49 | cursize += PAML_LINE 50 | } 51 | return buf.String() 52 | } 53 | -------------------------------------------------------------------------------- /io/partition/lexer.go: -------------------------------------------------------------------------------- 1 | package partition 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "io" 7 | "strconv" 8 | ) 9 | 10 | // Scanner represents a lexical scanner. 11 | type Scanner struct { 12 | r *bufio.Reader 13 | } 14 | 15 | // NewScanner returns a new instance of Scanner. 16 | func NewScanner(r io.Reader) *Scanner { 17 | return &Scanner{r: bufio.NewReader(r)} 18 | } 19 | 20 | // read reads the next rune from the bufferred reader. 21 | // Returns the rune(0) if an error occurs (or io.EOF is returned). 22 | func (s *Scanner) read() rune { 23 | ch, _, err := s.r.ReadRune() 24 | if err != nil { 25 | return eof 26 | } 27 | return ch 28 | } 29 | 30 | // unread places the previously read rune back on the reader. 31 | func (s *Scanner) unread() { 32 | _ = s.r.UnreadRune() 33 | } 34 | 35 | // Scan returns the next token and literal value. 36 | func (s *Scanner) Scan() (tok Token, lit string) { 37 | // Read the next rune. 38 | ch := s.read() 39 | 40 | if isEndOfLine(ch) { 41 | if isCR(ch) { 42 | ch := s.read() 43 | if !isNL(ch) { 44 | s.unread() 45 | } 46 | } 47 | return ENDOFLINE, "" 48 | } 49 | 50 | for isWhiteSpace(ch) { 51 | ch = s.read() 52 | } 53 | 54 | switch ch { 55 | case eof: 56 | return EOF, "" 57 | case ',': 58 | return SEPARATOR, string(ch) 59 | case '=': 60 | return EQUAL, string(ch) 61 | case '-': 62 | return RANGE, string(ch) 63 | case '/': 64 | return MODULO, string(ch) 65 | } 66 | 67 | s.unread() 68 | tok, ident := s.scanIdent() 69 | 70 | _, err := strconv.ParseInt(ident, 10, 64) 71 | if err != nil { 72 | return IDENTIFIER, ident 73 | } else { 74 | return DECIMAL, ident 75 | } 76 | } 77 | 78 | // scanIdent consumes the current rune and all contiguous ident runes. 79 | func (s *Scanner) scanIdent() (tok Token, lit string) { 80 | // Create a buffer and read the current character into it. 81 | var buf bytes.Buffer 82 | buf.WriteRune(s.read()) 83 | 84 | // Read every subsequent ident character into the buffer. 85 | // Non-ident characters and EOF will cause the loop to exit. 86 | for { 87 | if ch := s.read(); ch == eof { 88 | break 89 | } else if !isIdent(ch) { 90 | s.unread() 91 | break 92 | } else { 93 | _, _ = buf.WriteRune(ch) 94 | } 95 | } 96 | 97 | return IDENTIFIER, buf.String() 98 | } 99 | -------------------------------------------------------------------------------- /io/partition/tokens.go: -------------------------------------------------------------------------------- 1 | package partition 2 | 3 | type Token int64 4 | 5 | var eof = rune(0) 6 | 7 | const ( 8 | ILLEGAL Token = iota 9 | IDENTIFIER // Identifier of model or partition 10 | SEPARATOR // field separator : , 11 | EQUAL // Separator between model name and definition 12 | RANGE // When defining a range ex 1-500 13 | MODULO // Take one site every x sites: '/' 14 | DECIMAL // Decimal 15 | ENDOFLINE // End of line token 16 | EOF // End of File 17 | ) 18 | 19 | func isEndOfLine(ch rune) bool { 20 | return ch == '\n' || ch == '\r' 21 | } 22 | 23 | func isWhiteSpace(ch rune) bool { 24 | return ch == ' ' 25 | } 26 | 27 | func isCR(ch rune) bool { 28 | return ch == '\r' 29 | } 30 | 31 | func isNL(ch rune) bool { 32 | return ch == '\n' 33 | } 34 | 35 | func isIdent(ch rune) bool { 36 | return ch != '\n' && ch != '\r' && ch != ',' && ch != '-' && ch != '/' && ch != '=' && ch != ' ' 37 | } 38 | -------------------------------------------------------------------------------- /io/phylip/tokens.go: -------------------------------------------------------------------------------- 1 | package phylip 2 | 3 | type Token int64 4 | 5 | var eof = rune(0) 6 | 7 | const ( 8 | ILLEGAL Token = iota 9 | INTEGER // Number of sequences or length 10 | IDENTIFIER // Identifier of sequence or part of sequence 11 | ENDOFLINE // End of line token 12 | EOF // End of File 13 | WS // Whitespace 14 | NUMERIC // Number of taxa and length of sequences 15 | ) 16 | 17 | func isEndOfLine(ch rune) bool { 18 | return ch == '\n' || ch == '\r' 19 | } 20 | 21 | func isCR(ch rune) bool { 22 | return ch == '\r' 23 | } 24 | 25 | func isNL(ch rune) bool { 26 | return ch == '\n' 27 | } 28 | 29 | func isIdent(ch rune) bool { 30 | return ch != '\n' && ch != ' ' && ch != '\r' 31 | } 32 | 33 | func isWhitespace(ch rune) bool { 34 | return ch == ' ' || ch == '\t' 35 | } 36 | -------------------------------------------------------------------------------- /io/phylip/utils.go: -------------------------------------------------------------------------------- 1 | package phylip 2 | 3 | const ( 4 | FASTA_LINE_LEN = 80 // max line length for fasta output 5 | FASTA_INITIAL_NB = 2 // max line length for fasta output 6 | ) 7 | -------------------------------------------------------------------------------- /io/phylip/writer.go: -------------------------------------------------------------------------------- 1 | package phylip 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | 7 | "github.com/evolbioinfo/goalign/align" 8 | ) 9 | 10 | const ( 11 | PHYLIP_LINE = 60 12 | PHYLIP_BLOCK = 10 13 | ) 14 | 15 | func min_int(a int, b int) int { 16 | if a < b { 17 | return a 18 | } 19 | return b 20 | } 21 | 22 | func WriteAlignment(al align.Alignment, strict, oneline, noblock bool) string { 23 | var buf bytes.Buffer 24 | var header bool = true 25 | var line_length = PHYLIP_LINE 26 | var block_length = PHYLIP_BLOCK 27 | 28 | cursize := 0 29 | buf.WriteString(fmt.Sprintf(" %d %d\n", al.NbSequences(), al.Length())) 30 | 31 | if oneline { 32 | line_length = al.Length() 33 | } 34 | if noblock { 35 | block_length = line_length 36 | } 37 | 38 | for cursize < al.Length() { 39 | if cursize > 0 { 40 | buf.WriteString("\n") 41 | } 42 | al.IterateChar(func(name string, seq []uint8) bool { 43 | if header { 44 | if strict { 45 | buf.WriteString(fmt.Sprintf("%-10s", name[:min_int(10, len(name))])) 46 | } else { 47 | buf.WriteString(name) 48 | buf.WriteString(" ") 49 | } 50 | } 51 | 52 | for i := cursize; i < cursize+line_length && i < len(seq); i += block_length { 53 | if i > cursize { 54 | buf.WriteString(" ") 55 | } else if !header { 56 | if strict { 57 | buf.WriteString(" ") 58 | } else { 59 | buf.WriteString(" ") 60 | } 61 | } 62 | end := min_int(i+block_length, len(seq)) 63 | for j := i; j < end; j++ { 64 | buf.WriteByte(seq[j]) 65 | } 66 | } 67 | buf.WriteString("\n") 68 | return false 69 | }) 70 | cursize += line_length 71 | header = false 72 | } 73 | return buf.String() 74 | } 75 | -------------------------------------------------------------------------------- /io/stockholm/stockholm_token.go: -------------------------------------------------------------------------------- 1 | package stockholm 2 | 3 | type Token int64 4 | 5 | var eof = rune(0) 6 | 7 | const ( 8 | ILLEGAL Token = iota 9 | EOF 10 | WS 11 | IDENT // Name of Node, or comment, or keyword 12 | NUMERIC // Any numerical value 13 | ENDOFLINE // \r \n 14 | 15 | // Keywords 16 | STOCKHOLM // STOCKHOLM : Start of Stockholm file 17 | MARKUP // # 18 | END // // 19 | TREE // A specific tree in the BEGIN TREES section 20 | ) 21 | 22 | func isWhitespace(ch rune) bool { 23 | return ch == ' ' || ch == '\t' 24 | } 25 | 26 | func isIdent(ch rune) bool { 27 | return ch != '[' && ch != ']' && ch != ';' && ch != '=' && ch != '\r' && ch != '\n' && !isWhitespace(ch) 28 | } 29 | 30 | func isEndOfLine(ch rune) bool { 31 | return ch == '\n' || ch == '\r' 32 | } 33 | 34 | func isCR(ch rune) bool { 35 | return ch == '\r' 36 | } 37 | 38 | func isNL(ch rune) bool { 39 | return ch == '\n' 40 | } 41 | -------------------------------------------------------------------------------- /io/stockholm/writer.go: -------------------------------------------------------------------------------- 1 | package stockholm 2 | 3 | import ( 4 | "bytes" 5 | 6 | "github.com/evolbioinfo/goalign/align" 7 | ) 8 | 9 | func WriteAlignment(al align.Alignment) string { 10 | var buf bytes.Buffer 11 | 12 | buf.WriteString("# STOCKHOLM 1.0\n") 13 | buf.WriteString("#=GF ID Goalign generated alignment\n") 14 | al.Iterate(func(name string, seq string) bool { 15 | buf.WriteString(name) 16 | buf.WriteString("\t") 17 | buf.WriteString(seq) 18 | buf.WriteRune('\n') 19 | return false 20 | }) 21 | buf.WriteString("//") 22 | 23 | return buf.String() 24 | } 25 | -------------------------------------------------------------------------------- /io/utils/writefiles.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bufio" 5 | "compress/gzip" 6 | "io" 7 | "os" 8 | "strings" 9 | 10 | "github.com/ulikunitz/xz" 11 | ) 12 | 13 | type StringWriterCloser interface { 14 | io.Writer 15 | io.Closer 16 | io.StringWriter 17 | } 18 | 19 | type gzstringwritercloser struct { 20 | f *os.File 21 | gw *gzip.Writer 22 | buf *bufio.Writer 23 | } 24 | 25 | type xzstringwritercloser struct { 26 | f *os.File 27 | xw *xz.Writer 28 | buf *bufio.Writer 29 | } 30 | 31 | func (gswc *gzstringwritercloser) Close() (err error) { 32 | if err = gswc.buf.Flush(); err != nil { 33 | return 34 | } 35 | if err = gswc.gw.Close(); err != nil { 36 | return 37 | } 38 | return gswc.f.Close() 39 | } 40 | 41 | func (gswc *gzstringwritercloser) Write(p []byte) (nn int, err error) { 42 | return gswc.buf.Write(p) 43 | } 44 | 45 | func (gswc *gzstringwritercloser) WriteString(s string) (nn int, err error) { 46 | return gswc.buf.WriteString(s) 47 | } 48 | 49 | func (xswc *xzstringwritercloser) Close() (err error) { 50 | if err = xswc.buf.Flush(); err != nil { 51 | return 52 | } 53 | if err = xswc.xw.Close(); err != nil { 54 | return 55 | } 56 | return xswc.f.Close() 57 | } 58 | 59 | func (xswc *xzstringwritercloser) Write(p []byte) (nn int, err error) { 60 | return xswc.buf.Write(p) 61 | } 62 | 63 | func (xswc *xzstringwritercloser) WriteString(s string) (nn int, err error) { 64 | return xswc.buf.WriteString(s) 65 | } 66 | 67 | func OpenWriteFile(file string) (f StringWriterCloser, err error) { 68 | if file == "stdout" || file == "-" { 69 | f = os.Stdout 70 | } else if file == "none" { 71 | f, err = os.OpenFile(os.DevNull, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0666) 72 | } else if strings.HasSuffix(file, ".gz") { 73 | var fi *os.File 74 | if fi, err = os.Create(file); err != nil { 75 | return 76 | } 77 | gw := gzip.NewWriter(fi) 78 | buf := bufio.NewWriter(gw) 79 | f = &gzstringwritercloser{f: fi, gw: gw, buf: buf} 80 | } else if strings.HasSuffix(file, ".xz") { 81 | var fi *os.File 82 | if fi, err = os.Create(file); err != nil { 83 | return 84 | } 85 | xw, _ := xz.NewWriter(fi) 86 | buf := bufio.NewWriter(xw) 87 | f = &xzstringwritercloser{f: fi, xw: xw, buf: buf} 88 | } else { 89 | f, err = os.Create(file) 90 | } 91 | return 92 | } 93 | 94 | func CloseWriteFile(f io.Closer, filename string) { 95 | if filename != "-" && filename != "stdout" && filename != "none" { 96 | f.Close() 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // Main entry point for goalign command line 2 | package main 3 | 4 | import "github.com/evolbioinfo/goalign/cmd" 5 | 6 | func main() { 7 | cmd.Execute() 8 | } 9 | -------------------------------------------------------------------------------- /models/dna/f81.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "fmt" 5 | 6 | "gonum.org/v1/gonum/mat" 7 | ) 8 | 9 | type F81Model struct { 10 | // Parameters (for eigen values/vectors computation) 11 | // See https://en.wikipedia.org/wiki/Models_of_DNA_evolution#F81_model_(Felsenstein_1981) 12 | qmatrix *mat.Dense 13 | leigenvect *mat.Dense 14 | val []float64 15 | reigenvect *mat.Dense 16 | } 17 | 18 | func NewF81Model() *F81Model { 19 | return &F81Model{ 20 | nil, 21 | nil, 22 | nil, 23 | nil, 24 | } 25 | } 26 | 27 | func (m *F81Model) InitModel(piA, piC, piG, piT float64) (err error) { 28 | m.qmatrix = mat.NewDense(4, 4, []float64{ 29 | -(piC + piG + piT), piC, piG, piT, 30 | piA, -(piA + piG + piT), piG, piT, 31 | piA, piC, -(piA + piC + piT), piT, 32 | piA, piC, piG, -(piA + piC + piG), 33 | }) 34 | // Normalization of Q 35 | norm := -piA*m.qmatrix.At(0, 0) - 36 | piC*m.qmatrix.At(1, 1) - 37 | piG*m.qmatrix.At(2, 2) - 38 | piT*m.qmatrix.At(3, 3) 39 | //norm := 1. / (2 * (piA*piC + piA*piG + piA*piT + piC*piG + piC*piT + piG*piT)) 40 | m.qmatrix.Apply(func(i, j int, v float64) float64 { return v / norm }, m.qmatrix) 41 | 42 | //fmt.Printf("Q=%v\n", mat.Formatted(m.qmatrix, mat.Prefix(" "), mat.Squeeze())) 43 | 44 | err = m.computeEigens() 45 | 46 | return 47 | } 48 | 49 | func (m *F81Model) computeEigens() (err error) { 50 | var u mat.CDense 51 | // Compute eigen values, left and right eigenvectors of Q 52 | eigen := &mat.Eigen{} 53 | if ok := eigen.Factorize(m.qmatrix, mat.EigenRight); !ok { 54 | err = fmt.Errorf("Problem during matrix decomposition") 55 | return 56 | } 57 | 58 | val := make([]float64, 4) 59 | for i, b := range eigen.Values(nil) { 60 | val[i] = real(b) 61 | } 62 | eigen.VectorsTo(&u) 63 | reigenvect := mat.NewDense(4, 4, nil) 64 | leigenvect := mat.NewDense(4, 4, nil) 65 | reigenvect.Apply(func(i, j int, val float64) float64 { return real(u.At(i, j)) }, reigenvect) 66 | leigenvect.Inverse(reigenvect) 67 | 68 | m.leigenvect = leigenvect 69 | m.reigenvect = reigenvect 70 | m.val = val 71 | 72 | return 73 | } 74 | 75 | func (m *F81Model) Eigens() (val []float64, leftvectors, rightvectors *mat.Dense, err error) { 76 | leftvectors = m.leigenvect 77 | rightvectors = m.reigenvect 78 | val = m.val 79 | 80 | return 81 | } 82 | 83 | func (m *F81Model) Pij(i, j int, l float64) float64 { 84 | return -1 85 | } 86 | 87 | func (m *F81Model) Analytical() bool { 88 | return false 89 | } 90 | 91 | func (m *F81Model) NState() int { 92 | return 4 93 | } 94 | -------------------------------------------------------------------------------- /models/dna/f84.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "gonum.org/v1/gonum/mat" 5 | ) 6 | 7 | type F84Model struct { 8 | // Parameters (for eigen values/vectors computation) 9 | // https://en.wikipedia.org/wiki/Models_of_DNA_evolution#HKY85_model_(Hasegawa,_Kishino_and_Yano_1985) 10 | piA, piC, piG, piT float64 11 | kappa float64 12 | } 13 | 14 | func NewF84Model() *F84Model { 15 | return &F84Model{ 16 | 1. / 4., 1. / 4., 1. / 4., 1. / 4., 17 | 1.0, 18 | } 19 | } 20 | 21 | func (m *F84Model) InitModel(kappa, piA, piC, piG, piT float64) { 22 | //m.qmatrix = mat.NewDense(4, 4, []float64{ 23 | // -(piC + (1+kappa/piR)*piG + piT), piC, (1 + kappa/piR) * piG, piT, 24 | // piA, -(piA + piG + (1+kappa/piY)*piT), piG, (1 + kappa/piY) * piT, 25 | // (1 + kappa/piR) * piA, piC, -((1+kappa/piR)*piA + piC + piT), piT, 26 | // piA, (1 + kappa/piY) * piC, piG, -(piA + (1+kappa/piY)*piC + piG), 27 | //}) 28 | // Normalization of Q 29 | m.kappa = kappa 30 | m.piA = piA 31 | m.piC = piC 32 | m.piG = piG 33 | m.piT = piT 34 | } 35 | 36 | // See http://biopp.univ-montp2.fr/Documents/ClassDocumentation/bpp-phyl/html/F84_8cpp_source.html 37 | func (m *F84Model) Eigens() (val []float64, leftvectors, rightvectors *mat.Dense, err error) { 38 | piY := m.piT + m.piC 39 | piR := m.piA + m.piG 40 | norm := 1. / (1 - m.piA*m.piA - m.piC*m.piC - m.piG*m.piG - m.piT*m.piT + 2.*m.kappa*(m.piC*m.piT/piY+m.piA*m.piG/piR)) 41 | 42 | val = []float64{ 43 | 0, 44 | -norm * (1 + m.kappa), 45 | -norm * (1 + m.kappa), 46 | -norm, 47 | } 48 | 49 | leftvectors = mat.NewDense(4, 4, []float64{ 50 | m.piA, m.piC, m.piG, m.piT, 51 | 0., m.piT / piY, 0., -m.piT / piY, 52 | m.piG / piR, 0., -m.piG / piR, 0., 53 | m.piA * piY / piR, -m.piC, m.piG * piY / piR, -m.piT, 54 | }) 55 | 56 | rightvectors = mat.NewDense(4, 4, []float64{ 57 | 1., 0., 1., 1., 58 | 1., 1., 0., -piR / piY, 59 | 1., 0., -m.piA / m.piG, 1., 60 | 1., -m.piC / m.piT, 0., -piR / piY, 61 | }) 62 | 63 | return 64 | } 65 | 66 | func (m *F84Model) Pij(i, j int, l float64) float64 { 67 | return -1 68 | } 69 | 70 | func (m *F84Model) Analytical() bool { 71 | return false 72 | } 73 | 74 | func (m *F84Model) NState() int { 75 | return 4 76 | } 77 | -------------------------------------------------------------------------------- /models/dna/gtr.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "fmt" 5 | 6 | "gonum.org/v1/gonum/mat" 7 | ) 8 | 9 | type GTRModel struct { 10 | qmatrix *mat.Dense 11 | leigenvect *mat.Dense 12 | val []float64 13 | reigenvect *mat.Dense 14 | } 15 | 16 | func NewGTRModel() *GTRModel { 17 | return >RModel{ 18 | nil, 19 | nil, 20 | nil, 21 | nil, 22 | } 23 | } 24 | 25 | // / \ 26 | // | * d f b | 27 | // | d * e a | 28 | // | f e * c | 29 | // | b a c * | 30 | // \ / 31 | func (m *GTRModel) InitModel(d, f, b, e, a, c, piA, piC, piG, piT float64) (err error) { 32 | m.qmatrix = mat.NewDense(4, 4, []float64{ 33 | -(d*piC + f*piG + b*piT), d * piC, f * piG, b * piT, 34 | d * piA, -(d*piA + e*piG + a*piT), e * piG, a * piT, 35 | f * piA, e * piC, -(f*piA + e*piC + c*piT), c * piT, 36 | b * piA, a * piC, c * piG, -(b*piA + a*piC + c*piG), 37 | }) 38 | // Normalization of Q 39 | norm := -piA*m.qmatrix.At(0, 0) - 40 | piC*m.qmatrix.At(1, 1) - 41 | piG*m.qmatrix.At(2, 2) - 42 | piT*m.qmatrix.At(3, 3) 43 | m.qmatrix.Apply(func(i, j int, v float64) float64 { return v / norm }, m.qmatrix) 44 | err = m.computeEigens() 45 | 46 | return 47 | } 48 | 49 | func (m *GTRModel) computeEigens() (err error) { 50 | var u mat.CDense 51 | 52 | // Compute eigen values, left and right eigenvectors of Q 53 | eigen := &mat.Eigen{} 54 | if ok := eigen.Factorize(m.qmatrix, mat.EigenRight); !ok { 55 | err = fmt.Errorf("Problem during matrix decomposition") 56 | return 57 | } 58 | 59 | val := make([]float64, 4) 60 | for i, b := range eigen.Values(nil) { 61 | val[i] = real(b) 62 | } 63 | eigen.VectorsTo(&u) 64 | reigenvect := mat.NewDense(4, 4, nil) 65 | leigenvect := mat.NewDense(4, 4, nil) 66 | reigenvect.Apply(func(i, j int, val float64) float64 { return real(u.At(i, j)) }, reigenvect) 67 | leigenvect.Inverse(reigenvect) 68 | 69 | m.leigenvect = leigenvect 70 | m.reigenvect = reigenvect 71 | m.val = val 72 | 73 | return 74 | } 75 | 76 | func (m *GTRModel) Eigens() (val []float64, leftvectors, rightvectors *mat.Dense, err error) { 77 | leftvectors = m.leigenvect 78 | rightvectors = m.reigenvect 79 | val = m.val 80 | 81 | return 82 | } 83 | 84 | func (m *GTRModel) Pij(i, j int, l float64) float64 { 85 | return -1 86 | } 87 | 88 | func (m *GTRModel) Analytical() bool { 89 | return false 90 | } 91 | 92 | func (m *GTRModel) NState() int { 93 | return 4 94 | } 95 | -------------------------------------------------------------------------------- /models/dna/jc.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "math" 5 | 6 | "gonum.org/v1/gonum/mat" 7 | ) 8 | 9 | type JCModel struct { 10 | } 11 | 12 | func NewJCModel() *JCModel { 13 | return &JCModel{} 14 | } 15 | 16 | func (m *JCModel) InitModel() (err error) { 17 | return 18 | } 19 | 20 | // Left vectors and right vectors are given in column-major format 21 | func (m *JCModel) Eigens() (val []float64, leftvectors, rightvectors *mat.Dense, err error) { 22 | val = []float64{ 23 | 0, 24 | -4. / 3., 25 | -4. / 3., 26 | -4. / 3., 27 | } 28 | 29 | leftvectors = mat.NewDense(4, 4, []float64{ 30 | 1. / 4., 1. / 4., 1. / 4., 1. / 4., 31 | -1. / 4., -1. / 4., 3. / 4., -1. / 4., 32 | -1. / 4., 3. / 4., -1. / 4., -1. / 4., 33 | 3. / 4., -1. / 4., -1. / 4., -1. / 4., 34 | }) 35 | 36 | rightvectors = mat.NewDense(4, 4, []float64{ 37 | 1., 0., 0., 1., 38 | 1., 0., 1., 0., 39 | 1., 1., 0., 0., 40 | 1., -1., -1., -1., 41 | }) 42 | return 43 | } 44 | 45 | func (m *JCModel) Pij(i, j int, l float64) float64 { 46 | p := 0.25 * (1.0 - math.Exp(-4.0/3.0*l)) 47 | if i != j { 48 | return p 49 | } 50 | return p + math.Exp(-4.0/3.0*l) 51 | } 52 | 53 | func (m *JCModel) Analytical() bool { 54 | return true 55 | } 56 | 57 | func (m *JCModel) NState() int { 58 | return 4 59 | } 60 | -------------------------------------------------------------------------------- /models/dna/k2p.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "math" 5 | 6 | "gonum.org/v1/gonum/mat" 7 | ) 8 | 9 | type K2PModel struct { 10 | // Parameters (for eigen values/vectors computation) 11 | // Default 1.0 12 | // See https://en.wikipedia.org/wiki/Models_of_DNA_evolution#K80_model_(Kimura_1980) 13 | kappa float64 14 | } 15 | 16 | func NewK2PModel() *K2PModel { 17 | return &K2PModel{ 18 | 1., 19 | } 20 | } 21 | 22 | // For Eigen values/vectors computation 23 | // 24 | func (m *K2PModel) InitModel(kappa float64) { 25 | m.kappa = kappa 26 | } 27 | 28 | func (m *K2PModel) Eigens() (val []float64, leftvectors, rightvectors *mat.Dense, err error) { 29 | val = []float64{ 30 | 0, 31 | -2 * (1 + m.kappa) / (m.kappa + 2), 32 | -2 * (1 + m.kappa) / (m.kappa + 2), 33 | -4 / (m.kappa + 2), 34 | } 35 | 36 | leftvectors = mat.NewDense(4, 4, []float64{ 37 | 1. / 4., 1. / 4., 1. / 4., 1. / 4., 38 | 0, 1. / 2., 0, -1. / 2., 39 | 1. / 2., 0, -1. / 2., 0, 40 | 1. / 4., -1. / 4., 1. / 4., -1. / 4., 41 | }) 42 | 43 | rightvectors = mat.NewDense(4, 4, []float64{ 44 | 1., 0., 1., 1., 45 | 1., 1., 0., -1., 46 | 1., 0., -1., 1., 47 | 1., -1., 0., -1., 48 | }) 49 | 50 | return 51 | } 52 | 53 | func (m *K2PModel) Pij(i, j int, l float64) float64 { 54 | k := 0.5 * m.kappa 55 | pts := (0.25 - 0.5*math.Exp(-(2.0*k+1.0)/(k+1)*l) + 0.25*math.Exp(-2.0/(k+1.0)*l)) 56 | ptr := 0.5 * (0.5 - 0.5*math.Exp(-2.0/(k+1)*l)) 57 | if (i == 0 && j == 2) || (i == 1 && j == 3) || 58 | (i == 2 && j == 0) || (i == 3 && j == 1) { 59 | // Transition 60 | return pts 61 | } else if i == j { 62 | // Same i and j 63 | return 1.0 - (pts + 2.0*ptr) 64 | } else { 65 | // Transversion 66 | return ptr 67 | } 68 | } 69 | 70 | func (m *K2PModel) Analytical() bool { 71 | return true 72 | } 73 | 74 | func (m *K2PModel) NState() int { 75 | return 4 76 | } 77 | -------------------------------------------------------------------------------- /models/dna/model.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "gonum.org/v1/gonum/mat" 5 | ) 6 | 7 | const ( 8 | DBL_MIN = 2.2250738585072014e-308 9 | ) 10 | 11 | type DNAModel interface { 12 | Eigens() (val []float64, leftvectors, rightvectors *mat.Dense, err error) 13 | Analytical() bool // returns true if analytical pij computation is possible and implemented 14 | Pij(i, j int, l float64) float64 // Returns -1 if not possible to compute it anatically without eigens (or not yet implemented) 15 | } 16 | -------------------------------------------------------------------------------- /models/dna/tn93.go: -------------------------------------------------------------------------------- 1 | package dna 2 | 3 | import ( 4 | "fmt" 5 | 6 | "gonum.org/v1/gonum/mat" 7 | ) 8 | 9 | type TN93Model struct { 10 | // Parameters (for eigen values/vectors computation) 11 | // See https://en.wikipedia.org/wiki/Models_of_DNA_evolution#F81_model_(Felsenstein_1981) 12 | qmatrix *mat.Dense 13 | leigenvect *mat.Dense 14 | val []float64 15 | reigenvect *mat.Dense 16 | } 17 | 18 | func NewTN93Model() *TN93Model { 19 | return &TN93Model{ 20 | nil, 21 | nil, 22 | nil, 23 | nil, 24 | } 25 | } 26 | 27 | func (m *TN93Model) InitModel(kappa1, kappa2, piA, piC, piG, piT float64) (err error) { 28 | m.qmatrix = mat.NewDense(4, 4, []float64{ 29 | -(piC + kappa1*piG + piT), piC, kappa1 * piG, piT, 30 | piA, -(piA + piG + kappa2*piT), piG, kappa2 * piT, 31 | kappa1 * piA, piC, -(kappa1*piA + piC + piT), piT, 32 | piA, kappa2 * piC, piG, -(piA + kappa2*piC + piG), 33 | }) 34 | // Normalization of Q 35 | norm := -piA*m.qmatrix.At(0, 0) - 36 | piC*m.qmatrix.At(1, 1) - 37 | piG*m.qmatrix.At(2, 2) - 38 | piT*m.qmatrix.At(3, 3) 39 | m.qmatrix.Apply(func(i, j int, v float64) float64 { return v / norm }, m.qmatrix) 40 | err = m.computeEigens() 41 | return 42 | } 43 | 44 | func (m *TN93Model) computeEigens() (err error) { 45 | var u mat.CDense 46 | 47 | // Compute eigen values, left and right eigenvectors of Q 48 | eigen := &mat.Eigen{} 49 | if ok := eigen.Factorize(m.qmatrix, mat.EigenRight); !ok { 50 | err = fmt.Errorf("Problem during matrix decomposition") 51 | return 52 | } 53 | 54 | val := make([]float64, 4) 55 | for i, b := range eigen.Values(nil) { 56 | val[i] = real(b) 57 | } 58 | eigen.VectorsTo(&u) 59 | reigenvect := mat.NewDense(4, 4, nil) 60 | leigenvect := mat.NewDense(4, 4, nil) 61 | reigenvect.Apply(func(i, j int, val float64) float64 { return real(u.At(i, j)) }, reigenvect) 62 | leigenvect.Inverse(reigenvect) 63 | 64 | m.leigenvect = leigenvect 65 | m.reigenvect = reigenvect 66 | m.val = val 67 | return 68 | } 69 | 70 | func (m *TN93Model) Eigens() (val []float64, leftvectors, rightvectors *mat.Dense, err error) { 71 | leftvectors = m.leigenvect 72 | rightvectors = m.reigenvect 73 | val = m.val 74 | return 75 | } 76 | 77 | func (m *TN93Model) Pij(i, j int, l float64) float64 { 78 | return -1.0 79 | } 80 | 81 | func (m *TN93Model) Analytical() bool { 82 | return false 83 | } 84 | 85 | func (m *TN93Model) NState() int { 86 | return 4 87 | } 88 | -------------------------------------------------------------------------------- /models/model.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "math" 5 | 6 | "gonum.org/v1/gonum/mat" 7 | ) 8 | 9 | type Model interface { 10 | NState() int 11 | Eigens() (val []float64, leftvectors, rightvectors *mat.Dense, err error) 12 | Analytical() bool 13 | Pij(i, j int, l float64) float64 14 | } 15 | 16 | // Probability matrix 17 | type Pij struct { 18 | length float64 // branch length / t 19 | model Model // Model 20 | pij *mat.Dense 21 | expt []float64 // tmp array 22 | uexpt *mat.Dense // tmp Dense 23 | } 24 | 25 | func NewPij(m Model, l float64) (pij *Pij, err error) { 26 | pij = &Pij{DBL_MIN, 27 | m, 28 | mat.NewDense(m.NState(), m.NState(), nil), 29 | make([]float64, m.NState()), 30 | mat.NewDense(m.NState(), m.NState(), nil), 31 | } 32 | err = pij.SetLength(l) 33 | return 34 | } 35 | 36 | func (pij *Pij) SetLength(l float64) (err error) { 37 | if pij.length != l && !pij.model.Analytical() { 38 | var i, j, k int 39 | var val []float64 40 | var left, right *mat.Dense 41 | ns := pij.model.NState() 42 | 43 | if val, left, right, err = pij.model.Eigens(); err != nil { 44 | return 45 | } 46 | 47 | for i = 0; i < ns; i++ { 48 | pij.expt[i] = float64(math.Exp(val[i] * l)) 49 | } 50 | for i = 0; i < ns; i++ { 51 | for j = 0; j < ns; j++ { 52 | pij.uexpt.Set(i, j, right.At(i, j)*pij.expt[j]) 53 | } 54 | } 55 | v := 0.0 56 | for i = 0; i < ns; i++ { 57 | for j = 0; j < ns; j++ { 58 | v = 0.0 59 | for k = 0; k < ns; k++ { 60 | v += pij.uexpt.At(i, k) * left.At(k, j) 61 | } 62 | if v < DBL_MIN { 63 | v = DBL_MIN 64 | } 65 | pij.pij.Set(i, j, v) 66 | } 67 | } 68 | } 69 | pij.length = l 70 | 71 | return 72 | } 73 | 74 | func (pij *Pij) Pij(i, j int) float64 { 75 | if pij.model.Analytical() { 76 | return (pij.model.Pij(i, j, pij.length)) 77 | } 78 | return pij.pij.At(i, j) 79 | } 80 | -------------------------------------------------------------------------------- /stats/dirichlet.go: -------------------------------------------------------------------------------- 1 | package stats 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "slices" 7 | ) 8 | 9 | // Dirichlet returns a set of random numbers from dirichlet distribution 10 | // (See https://en.wikipedia.org/wiki/Dirichlet_distribution#Random_variate_generation) 11 | // Factor: multiplying factor to apply to all values 12 | func Dirichlet(factor float64, alpha ...float64) (sample []float64, err error) { 13 | if len(alpha) <= 2 { 14 | err = fmt.Errorf("alpha parameter vector for Dirichlet sample contains less than 2 values") 15 | return 16 | } 17 | sample = make([]float64, len(alpha)) 18 | sum := 0.0 19 | for i, a := range alpha { 20 | if a <= 0.0 { 21 | err = fmt.Errorf("invalid parameter alpha %.2f", a) 22 | return 23 | } 24 | sample[i] = gamma(a, 1) 25 | sum += sample[i] 26 | } 27 | for i, _ := range alpha { 28 | sample[i] = factor * sample[i] / sum 29 | } 30 | return 31 | } 32 | 33 | // Dirichlet1 returns a set of random numbers from dirichlet distribution 34 | // When each alpha equals 1 (see https://en.wikipedia.org/wiki/Dirichlet_distribution#When_each_alpha_is_1) 35 | func Dirichlet1(factor float64, nvalues int) (sample []float64, err error) { 36 | if nvalues <= 2 { 37 | err = fmt.Errorf("nvalues should be > 2") 38 | return 39 | } 40 | sample = make([]float64, nvalues) 41 | intervals := make([]float64, nvalues+1) 42 | intervals[0] = 0.0 43 | intervals[1] = 1.0 44 | for i := 2; i < nvalues+1; i++ { 45 | intervals[i] = rand.Float64() 46 | } 47 | slices.Sort(intervals) 48 | for i := 1; i < nvalues+1; i++ { 49 | sample[i-1] = factor * (intervals[i] - intervals[i-1]) 50 | } 51 | return 52 | } 53 | -------------------------------------------------------------------------------- /stats/gamma.go: -------------------------------------------------------------------------------- 1 | package stats 2 | 3 | // Inspired from https://github.com/leesper/go_rng/blob/master/gamma.go 4 | 5 | import ( 6 | "errors" 7 | "fmt" 8 | "math" 9 | "math/rand" 10 | 11 | "github.com/evolbioinfo/goalign/io" 12 | ) 13 | 14 | // Gamma returns a random number of gamma distribution (alpha > 0.0 and beta > 0.0) 15 | func Gamma(alpha, beta float64) float64 { 16 | if !(alpha > 0.0) || !(beta > 0.0) { 17 | io.ExitWithMessage(errors.New(fmt.Sprintf("Invalid parameter alpha %.2f beta %.2f", alpha, beta))) 18 | } 19 | return gamma(alpha, beta) 20 | } 21 | 22 | // inspired by random.py 23 | func gamma(alpha, beta float64) float64 { 24 | var MAGIC_CONST float64 = 4 * math.Exp(-0.5) / math.Sqrt(2.0) 25 | if alpha > 1.0 { 26 | // Use R.C.H Cheng "The generation of Gamma variables with 27 | // non-integral shape parameters", Applied Statistics, (1977), 26, No. 1, p71-74 28 | 29 | ainv := math.Sqrt(2.0*alpha - 1.0) 30 | bbb := alpha - math.Log(4.0) 31 | ccc := alpha + ainv 32 | 33 | for { 34 | u1 := rand.Float64() 35 | if !(1e-7 < u1 && u1 < .9999999) { 36 | continue 37 | } 38 | u2 := 1.0 - rand.Float64() 39 | v := math.Log(u1/(1.0-u1)) / ainv 40 | x := alpha * math.Exp(v) 41 | z := u1 * u1 * u2 42 | r := bbb + ccc*v - x 43 | if r+MAGIC_CONST-4.5*z >= 0.0 || r >= math.Log(z) { 44 | return x * beta 45 | } 46 | } 47 | } else if alpha == 1.0 { 48 | u := rand.Float64() 49 | for u <= 1e-7 { 50 | u = rand.Float64() 51 | } 52 | return -math.Log(u) * beta 53 | } else { // alpha between 0.0 and 1.0 (exclusive) 54 | // Uses Algorithm of Statistical Computing - kennedy & Gentle 55 | var x float64 56 | for { 57 | u := rand.Float64() 58 | b := (math.E + alpha) / math.E 59 | p := b * u 60 | if p <= 1.0 { 61 | x = math.Pow(p, 1.0/alpha) 62 | } else { 63 | x = -math.Log((b - p) / alpha) 64 | } 65 | u1 := rand.Float64() 66 | if p > 1.0 { 67 | if u1 <= math.Pow(x, alpha-1.0) { 68 | break 69 | } 70 | } else if u1 <= math.Exp(-x) { 71 | break 72 | } 73 | } 74 | return x * beta 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /stats/stats_test.go: -------------------------------------------------------------------------------- 1 | package stats 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "os" 7 | "testing" 8 | ) 9 | 10 | func TestDirichlet(t *testing.T) { 11 | 12 | size := 1000 13 | alpha := make([]float64, size) 14 | for i := 0; i < size; i++ { 15 | alpha[i] = 1.0 16 | } 17 | sample, _ := Dirichlet(1.0, alpha...) 18 | 19 | if len(sample) != size { 20 | t.Error("Size of sample is different from alpha slice") 21 | } 22 | 23 | for _, a := range sample { 24 | if a < 0 { 25 | t.Error("Dirichlet Sample should be positive") 26 | } 27 | fmt.Fprintf(os.Stdout, "\t%f", a) 28 | } 29 | } 30 | 31 | func TestDirichlet1(t *testing.T) { 32 | 33 | size := 1000 34 | factor := float64(size) 35 | sample, _ := Dirichlet1(factor, size) 36 | 37 | if len(sample) != size { 38 | t.Error("Size of sample is different from alpha slice") 39 | } 40 | 41 | sum := 0.0 42 | for _, a := range sample { 43 | sum += a 44 | if a < 0 { 45 | t.Error("Dirichlet Sample should be positive") 46 | } 47 | fmt.Fprintf(os.Stdout, "\t%f", a) 48 | } 49 | if math.Abs(sum-factor) > 0.00000000001 { 50 | t.Errorf("Dirichlet sum %f != %f", sum, factor) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /tests/data/test_bz.bz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evolbioinfo/goalign/f4a9aaba0c41e1fd188cc906f6514e8d849d8f72/tests/data/test_bz.bz -------------------------------------------------------------------------------- /tests/data/test_distance.phy.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evolbioinfo/goalign/f4a9aaba0c41e1fd188cc906f6514e8d849d8f72/tests/data/test_distance.phy.gz -------------------------------------------------------------------------------- /tests/data/test_rawdistance.phy.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evolbioinfo/goalign/f4a9aaba0c41e1fd188cc906f6514e8d849d8f72/tests/data/test_rawdistance.phy.gz -------------------------------------------------------------------------------- /tests/data/test_rawdistance2.phy.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evolbioinfo/goalign/f4a9aaba0c41e1fd188cc906f6514e8d849d8f72/tests/data/test_rawdistance2.phy.gz -------------------------------------------------------------------------------- /tests/data/test_rawdistance3.phy.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evolbioinfo/goalign/f4a9aaba0c41e1fd188cc906f6514e8d849d8f72/tests/data/test_rawdistance3.phy.gz -------------------------------------------------------------------------------- /tests/data/test_xz.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evolbioinfo/goalign/f4a9aaba0c41e1fd188cc906f6514e8d849d8f72/tests/data/test_xz.xz -------------------------------------------------------------------------------- /version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | var Version string = "Unset" 4 | --------------------------------------------------------------------------------