├── .eslintrc.json ├── .gitignore ├── .prettierrc.json ├── LICENSE ├── README.md ├── jest.config.js ├── jest.js ├── package-lock.json ├── package.json ├── release.sh ├── src ├── cli.ts ├── examples │ ├── benchling │ │ ├── benchling-empty.json │ │ ├── benchling1.json │ │ └── benchling2.json │ ├── biobrick │ │ └── iGEM.BioBrick.xml │ ├── fasta │ │ ├── Benchling.Fasta.fasta │ │ ├── JPUB_000128.fasta │ │ ├── JPUB_test.fasta │ │ ├── R0010_AB.gb │ │ ├── example.fas │ │ ├── multi_test.fas │ │ ├── multisequence.fas │ │ ├── oldstyle.fas │ │ ├── pBbE0c-RFP.fasta │ │ ├── pBbS0c-RFP.fasta │ │ ├── pBbS0c-RFP_no_name.fasta │ │ ├── pBbS0c-RFP_no_name.txt │ │ └── pBbS8c_RFP.fas │ ├── genbank │ │ ├── Benchling.Genbank.gb │ │ ├── CCR5_multifrag_insert1.gb │ │ ├── Ecoli_DERA_Implicitly_Circular.gb │ │ ├── Ecoli_DERA_Implicitly_Linear.gb │ │ ├── Geneious.Genbank.gb │ │ ├── NC_011521.gb │ │ ├── RTO4_16460_individual_exons.gb │ │ ├── RTO4_16460_joined_feature.gb │ │ ├── SBOL_no_preserve_example.gb │ │ ├── SBOL_preserve_example.gb │ │ ├── breakingGenbank.gb │ │ ├── featNameIsNumber.gb │ │ ├── gbFileWithTagsOnParts.gb │ │ ├── gen_bank_ex.gb │ │ ├── genbankThatBrokeSbolImport.gb │ │ ├── multi-seq-genbank.gb │ │ ├── pBbE0c-RFP.gb │ │ ├── pBbE0c-RFP_1.gb │ │ ├── pBbS0c-RFP.gb │ │ ├── pBbS0c-RFP_no_name.gb │ │ ├── pBbS8c_RFP.gb │ │ ├── pProSrfp.gb │ │ ├── pUC_+_insert.gb │ │ ├── pj5_00002.gb │ │ ├── pj5_00028.gb │ │ ├── proteinTestSeq1.gp │ │ ├── rhaBp-Pfu-pUN_alt.gb │ │ ├── sequence.gp │ │ ├── testGenbankFile.2.gb │ │ ├── testGenbankFile.gb │ │ └── testPart.snapgene.gb │ ├── jbei │ │ ├── pBbE0c-RFP.linear.seq │ │ ├── pBbE0c-RFP.seq │ │ └── pBbS0c-RFP.seq │ ├── sbol │ │ ├── v1 │ │ │ ├── SBOL1and2Test.xml │ │ │ ├── example.xml │ │ │ ├── j5.SBOL.xml │ │ │ ├── pBbE0c-RFP.xml │ │ │ ├── pBbS0c-RFP.xml │ │ │ └── signal_peptide_SBOL.xml │ │ └── v2 │ │ │ ├── A1.xml │ │ │ ├── BBa_I0462.xml │ │ │ ├── BBa_I0462_orig.xml │ │ │ ├── BBa_T9002.xml │ │ │ ├── BBa_T9002_orig.xml │ │ │ ├── ComponentDefinitionOutput.xml │ │ │ ├── CreateAndRemoveModel.xml │ │ │ ├── CutExample.xml │ │ │ ├── ModuleDefinitionOutput.xml │ │ │ ├── RepressionModel.xml │ │ │ ├── SequenceOutput.xml │ │ │ ├── SimpleComponentDefinitionExample.xml │ │ │ ├── attachment_pointers.xml │ │ │ ├── eukaryotic_promoters.xml │ │ │ ├── eukaryotic_promoters_enumerated.xml │ │ │ ├── eukaryotic_promoters_sampled.xml │ │ │ ├── eukaryotic_transcriptional_unit.xml │ │ │ ├── eukaryotic_transcriptional_unit_enumerated.xml │ │ │ ├── eukaryotic_transcriptional_unit_sampled.xml │ │ │ ├── gfp_reporter_template.xml │ │ │ ├── gfp_reporter_template_enumerated.xml │ │ │ ├── gfp_reporter_template_sampled.xml │ │ │ ├── igem1.xml │ │ │ ├── igem2.xml │ │ │ ├── igem3.xml │ │ │ ├── labhost_All.xml │ │ │ ├── labhost_All_orig.xml │ │ │ ├── labhost_Aspergillus_nidulans.xml │ │ │ ├── labhost_Aspergillus_nidulans_orig.xml │ │ │ ├── labhost_Bacillus_subtilis.xml │ │ │ ├── labhost_Bacillus_subtilis_orig.xml │ │ │ ├── labhost_Drosophila_melanogaster.xml │ │ │ ├── labhost_Drosophila_melanogaster_orig.xml │ │ │ ├── labhost_Escherichia_Coli.xml │ │ │ ├── labhost_Escherichia_Coli_orig.xml │ │ │ ├── labhost_Gram-negative_bacteria.xml │ │ │ ├── labhost_Gram-negative_bacteria_orig.xml │ │ │ ├── labhost_Insect_Cells.xml │ │ │ ├── labhost_Insect_Cells_orig.xml │ │ │ ├── labhost_Kluyveromyces_lactis.xml │ │ │ ├── labhost_Kluyveromyces_lactis_orig.xml │ │ │ ├── labhost_Mammalian_Cells.xml │ │ │ ├── labhost_Mammalian_Cells_orig.xml │ │ │ ├── labhost_Pichia_pastoris.xml │ │ │ ├── labhost_Pichia_pastoris_orig.xml │ │ │ ├── labhost_Plant_Cells.xml │ │ │ ├── labhost_Plant_Cells_orig.xml │ │ │ ├── labhost_Saccharomyces_cerevisiae.xml │ │ │ ├── labhost_Saccharomyces_cerevisiae_orig.xml │ │ │ ├── labhost_Schizosaccharomyces_pombe.xml │ │ │ ├── labhost_Schizosaccharomyces_pombe_orig.xml │ │ │ ├── labhost_Unspecified.xml │ │ │ ├── labhost_Unspecified_orig.xml │ │ │ ├── memberAnnotations.xml │ │ │ ├── multipleSequences.xml │ │ │ ├── pIKE_pTAK_cassettes 2.xml │ │ │ ├── pIKE_pTAK_cassettes 2_orig.xml │ │ │ ├── pIKE_pTAK_cassettes.xml │ │ │ ├── pIKE_pTAK_cassettes_orig.xml │ │ │ ├── pIKE_pTAK_left_right_cassettes.xml │ │ │ ├── pIKE_pTAK_left_right_cassettes_orig.xml │ │ │ ├── pIKE_pTAK_toggle_switches.xml │ │ │ ├── pIKE_pTAK_toggle_switches_orig.xml │ │ │ ├── partial_pIKE_left_cassette.xml │ │ │ ├── partial_pIKE_left_cassette_orig.xml │ │ │ ├── partial_pIKE_right_casette.xml │ │ │ ├── partial_pIKE_right_casette_orig.xml │ │ │ ├── partial_pIKE_right_cassette.xml │ │ │ ├── partial_pIKE_right_cassette_orig.xml │ │ │ ├── partial_pTAK_left_cassette.xml │ │ │ ├── partial_pTAK_left_cassette_orig.xml │ │ │ ├── partial_pTAK_right_cassette.xml │ │ │ ├── partial_pTAK_right_cassette_orig.xml │ │ │ ├── phoenix_plasmid_lib_collection.xml │ │ │ ├── sequence1.xml │ │ │ ├── sequence2.xml │ │ │ ├── sequence3.xml │ │ │ ├── sequence4.xml │ │ │ ├── singleSequence.xml │ │ │ └── toggle.xml │ ├── seqbuilder │ │ ├── Pombe_ch3_annotate.sbd │ │ └── pFA6a nmt41 3xflag AID kanMX6 annotated.sbd │ └── snapgene │ │ ├── RV027028.dna │ │ └── pBbB8c-GFP.dna ├── fetchFile.test.ts ├── fetchFile.ts ├── index.ts ├── parseFile.test.ts ├── parseFile.ts ├── parsers │ ├── benchling.ts │ ├── biobrick.test.ts │ ├── biobrick.ts │ ├── fasta.ts │ ├── genbank.ts │ ├── jbei.test.ts │ ├── jbei.ts │ ├── sbol.ts │ ├── sbol.v1.ts │ ├── sbol.v2.ts │ ├── seqbuilder.ts │ └── snapgene.ts ├── utils.test.ts └── utils.ts ├── tsconfig.json └── webpack.config.js /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "browser": true, 4 | "node": true, 5 | "es2021": true 6 | }, 7 | "extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"], 8 | "parser": "@typescript-eslint/parser", 9 | "parserOptions": { 10 | "ecmaVersion": "latest", 11 | "sourceType": "module" 12 | }, 13 | "plugins": ["@typescript-eslint", "sort-destructure-keys", "sort-keys-fix", "typescript-sort-keys"], 14 | "rules": { 15 | "@typescript-eslint/ban-ts-comment": "warn", 16 | "@typescript-eslint/padding-line-between-statements": [ 17 | "error", 18 | { 19 | "blankLine": "always", 20 | "prev": "*", 21 | "next": ["class", "export", "function", "interface", "type"] 22 | } 23 | ], 24 | "sort-destructure-keys/sort-destructure-keys": 2, 25 | "sort-keys-fix/sort-keys-fix": "error", 26 | "typescript-sort-keys/interface": "error", 27 | "typescript-sort-keys/string-enum": "error" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | #production 9 | /dist 10 | 11 | # testing 12 | /coverage 13 | 14 | # misc 15 | .DS_Store 16 | .env 17 | .env.local 18 | .env.development.local 19 | .env.test.local 20 | .env.production.local 21 | 22 | # IDE specific 23 | /.vscode 24 | .eslintcache 25 | 26 | npm-debug.log* 27 | yarn-debug.log* 28 | yarn-error.log* 29 | /.log/ 30 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "arrowParens": "avoid", 3 | "importOrder": ["^\\w", "^[./]"], 4 | "importOrderSeparation": true, 5 | "importOrderSortSpecifiers": true, 6 | "printWidth": 120 7 | } 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Lattice Automation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # seqparse 2 | 3 | [![npm version](https://badge.fury.io/js/seqparse.svg)](https://www.npmjs.com/package/seqparse) 4 | 5 | Parse sequence files (GenBank, FASTA, JBEI, SnapGene, SBOL) or accession IDs (NCBI, iGEM) to a simple, common format: 6 | 7 | ```ts 8 | interface Seq { 9 | name: string; 10 | type: "dna" | "rna" | "aa" | "unknown"; 11 | seq: string; 12 | annotations: Annotation[]; 13 | } 14 | 15 | interface Annotation { 16 | name: string; 17 | start: number; 18 | end: number; 19 | direction?: number; 20 | color?: string; 21 | type?: string; 22 | } 23 | ``` 24 | 25 | ## Installation 26 | 27 | ```bash 28 | npm i seqparse 29 | ``` 30 | 31 | To install the CLI globally: 32 | 33 | ```bash 34 | npm i -g seqparse 35 | ``` 36 | 37 | ## Examples 38 | 39 | ### Library 40 | 41 | ```ts 42 | import seqparse from "seqparse"; 43 | 44 | const { name, type, seq, annotations } = await seqparse(file); 45 | ``` 46 | 47 | ### CLI 48 | 49 | Example outputs are truncated for clarity. 50 | 51 | ```bash 52 | # parse files 53 | $ seqparse pBbE0c-RFP.gb 54 | { 55 | "name": "pBbE0c-RFP", 56 | "type": "dna", 57 | "seq": "cagctagctcagtcctaggtactgtgctagctacta...", 58 | "annotations": [ 59 | { 60 | "name": "colE1 origin", 61 | "start": 1234, 62 | "end": 1917, 63 | "direction": -1, 64 | "type": "rep_origin" 65 | }, 66 | ... 67 | 68 | # parse files from stdin 69 | $ cat pBbE0c-RFP.fasta | seqparse 70 | { 71 | "name": "pBbE0c-RFP.1", 72 | "type": "dna", 73 | "seq": "cagctagctcagtcctagg...", 74 | "annotations": [] 75 | } 76 | 77 | # parse files then use jq to get seqs alone 78 | $ seqparse j5.SBOL.xml | jq -r '.seq' 79 | ggcagcaaggtctacggcaaggaacagtttttgcggatgcgccagagcatgttccccgatcgc 80 | 81 | # fetch and parse remote sequence files from NCBI 82 | $ seqparse NC_011521 83 | { 84 | "name": "NC_011521", 85 | "type": "dna", 86 | "seq": "cccatcttaagacttcacaagactt...", 87 | "annotations": [ 88 | { 89 | "name": "HS566_RS00005", 90 | "start": 6, 91 | "end": 285, 92 | "direction": -1, 93 | "type": "gene" 94 | }, 95 | ... 96 | ``` 97 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | preset: "ts-jest", 3 | roots: ["/src"], 4 | setupFiles: ["./jest.js"], 5 | testEnvironment: "jsdom", 6 | }; 7 | -------------------------------------------------------------------------------- /jest.js: -------------------------------------------------------------------------------- 1 | require("jest-fetch-mock").enableMocks(); 2 | fetchMock.dontMock(); 3 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "seqparse", 3 | "version": "0.2.1", 4 | "description": "Parse sequence files (GenBank, FASTA, SnapGene, SBOL) and accession IDs (NCBI, iGEM) to a common format", 5 | "main": "dist/index.js", 6 | "types": "dist/index.d.ts", 7 | "bin": { 8 | "seqparse": "./dist/cli.js" 9 | }, 10 | "scripts": { 11 | "build": "rm -rf ./dist && webpack && chmod +x dist/cli.js", 12 | "fix": "prettier ./src/** --write && eslint src --ext ts,tsx --fix", 13 | "lint": "prettier ./src/** --check && eslint src --ext ts,tsx --quiet", 14 | "major": "./release.sh major", 15 | "minor": "./release.sh minor", 16 | "patch": "./release.sh patch", 17 | "test": "jest", 18 | "test:watch": "jest --watch" 19 | }, 20 | "files": [ 21 | "dist", 22 | "LICENSE", 23 | "README.md" 24 | ], 25 | "repository": { 26 | "type": "git", 27 | "url": "git+https://github.com/Lattice-Automation/seqparse.git" 28 | }, 29 | "keywords": [ 30 | "accession", 31 | "cli", 32 | "dna", 33 | "fasta", 34 | "genbank", 35 | "igem", 36 | "ncbi", 37 | "parsing", 38 | "rna" 39 | ], 40 | "author": "", 41 | "license": "MIT", 42 | "bugs": { 43 | "url": "https://github.com/Lattice-Automation/seqparse/issues" 44 | }, 45 | "homepage": "https://github.com/Lattice-Automation/seqparse#readme", 46 | "dependencies": { 47 | "buffer": "^6.0.3", 48 | "fast-xml-parser": "^4.0.13", 49 | "node-fetch": "^2.6.7", 50 | "stream": "^0.0.2", 51 | "timers": "^0.1.1" 52 | }, 53 | "devDependencies": { 54 | "@trivago/prettier-plugin-sort-imports": "^3.2.0", 55 | "@types/jest": "^28.1.8", 56 | "@types/node": "^18.7.15", 57 | "@types/node-fetch": "^2.6.2", 58 | "@types/xml2js": "^0.4.9", 59 | "@typescript-eslint/eslint-plugin": "^5.10.1", 60 | "@typescript-eslint/parser": "^5.10.1", 61 | "eslint": "^8.8.0", 62 | "eslint-plugin-sort-destructure-keys": "^1.4.0", 63 | "eslint-plugin-sort-keys-fix": "^1.1.2", 64 | "eslint-plugin-typescript-sort-keys": "^2.1.0", 65 | "jest": "^28.0.0", 66 | "jest-environment-jsdom": "^29.0.2", 67 | "jest-fetch-mock": "^3.0.3", 68 | "path-browserify": "^1.0.1", 69 | "prettier": "^2.5.1", 70 | "source-map-loader": "^1.1.3", 71 | "stream-browserify": "^3.0.0", 72 | "timers-browserify": "^2.0.12", 73 | "ts-jest": "^28.0.0", 74 | "ts-loader": "^8.3.0", 75 | "typescript": "^4.5.2", 76 | "url": "^0.11.0", 77 | "webpack": "^5.73.0", 78 | "webpack-cli": "^4.10.0", 79 | "webpack-node-externals": "^3.0.0" 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [ "$(git rev-parse --abbrev-ref HEAD)" != "main" ]; then 6 | echo 'wrong branch: not on main'; 7 | exit 1; 8 | fi 9 | 10 | # run tests 11 | npm run test 12 | 13 | # run lint 14 | npm run lint 15 | 16 | # build the package 17 | npm run build 18 | 19 | # bump the package version 20 | npm version "$1" --git-tag-version 21 | 22 | # git commit the version bump 23 | git add . 24 | git commit --amend -C HEAD 25 | 26 | # publish to npm 27 | npm publish 28 | git push 29 | 30 | # create a github release + tag 31 | version="$(jq -r '.version' < 'package.json')" 32 | release="$(gh release create "$version" --title "$version" --generate-notes --target main)" 33 | open "$release" 34 | -------------------------------------------------------------------------------- /src/cli.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import { existsSync, readFileSync } from "fs"; 3 | 4 | import seqparse, { ParseOptions } from "."; 5 | 6 | /** a crappy but dependency-free log implementation */ 7 | const LOG_LEVEL = process.env.LOG_LEVEL || "info"; 8 | const debug = (msg: string) => { 9 | if (LOG_LEVEL.toLowerCase() !== "debug") { 10 | return; 11 | } 12 | console.log(`[DEBUG] ${msg}`); 13 | }; 14 | 15 | /** bail, log an example */ 16 | const exit = () => { 17 | console.error(`# Example USAGE: 18 | 19 | # parse a file 20 | seqparse gene.fa 21 | 22 | # parse a file from stdin 23 | cat gene.fa | seqparse 24 | 25 | # fetch and parse a file from NCBI of iGEM by accession 26 | seqparse FJ172221`); 27 | process.exit(1); 28 | }; 29 | 30 | /** input can be a file name as first arg or stdin */ 31 | const parseOptions = {} as ParseOptions; 32 | 33 | let input: string | null = null; 34 | if (process.argv[2]) { 35 | input = process.argv[2]; 36 | debug("reading from arg"); 37 | } else { 38 | debug("reading from stdin"); 39 | try { 40 | parseOptions.source = readFileSync(process.stdin.fd); 41 | parseOptions.fileName = "Unknown"; 42 | input = (parseOptions.source as Buffer).toString("utf-8"); 43 | debug("successfully read stdin"); 44 | } catch (err) { 45 | // only a debug here because am assuming the user just didn't pass anything 46 | debug("failed to read stdin"); 47 | exit(); 48 | } 49 | } 50 | 51 | /** throw, no input detected */ 52 | if (!input || !input.length) { 53 | console.error("no input detected"); 54 | exit(); 55 | process.exit(1); 56 | } 57 | 58 | /** check if file, if so, read */ 59 | const isFile = existsSync(input); 60 | 61 | let fileContents: string | null = null; 62 | if (isFile) { 63 | parseOptions.fileName = input; 64 | try { 65 | debug("attempting to read file"); 66 | parseOptions.source = readFileSync(input); 67 | fileContents = (parseOptions.source as Buffer).toString("utf-8"); 68 | debug("successfully read file"); 69 | } catch (err) { 70 | console.error("failed to read file", err); 71 | exit(); 72 | } 73 | } 74 | 75 | /** parse, write to stdout */ 76 | debug("parsing"); 77 | seqparse(fileContents || input, parseOptions) 78 | .then(r => { 79 | debug("successfully parsed"); 80 | console.log(JSON.stringify(r, null, 2)); 81 | }) 82 | .catch(err => { 83 | console.error("failed to parse input", err); 84 | exit(); 85 | }); 86 | -------------------------------------------------------------------------------- /src/examples/benchling/benchling-empty.json: -------------------------------------------------------------------------------- 1 | { 2 | "aliases": [], 3 | "annotations": [ 4 | { 5 | "color": "#F58A5E", 6 | "end": 2344, 7 | "name": "Mlp84B protein_bind", 8 | "start": 2334, 9 | "strand": -1, 10 | "type": "protein_bind" 11 | }, 12 | { 13 | "color": "#F8D3A9", 14 | "end": 2946, 15 | "name": "Mlp84B 5'UTR", 16 | "start": 2867, 17 | "strand": 0, 18 | "type": "5'UTR" 19 | }, 20 | { 21 | "color": "#9EAFD2", 22 | "end": 4655, 23 | "name": "Mlp84B mRNA", 24 | "start": 2867, 25 | "strand": 1, 26 | "type": "mRNA" 27 | }, 28 | { 29 | "color": "#9EAFD2", 30 | "end": 4964, 31 | "name": "Mlp84B protein_bind", 32 | "start": 4954, 33 | "type": "protein_bind" 34 | }, 35 | { 36 | "color": "#D6B295", 37 | "end": 4638, 38 | "name": "Mlp84B protein_bind", 39 | "start": 4628, 40 | "strand": 1, 41 | "type": "protein_bind" 42 | }, 43 | { 44 | "color": "#F58A5E", 45 | "end": 1055, 46 | "name": "Mlp84B mRNA", 47 | "start": 1000, 48 | "strand": 1, 49 | "type": "mRNA" 50 | }, 51 | { 52 | "color": "#84B0DC", 53 | "end": 1234, 54 | "name": "Mlp84B protein_bind", 55 | "start": 1224, 56 | "strand": 1, 57 | "type": "protein_bind" 58 | }, 59 | { 60 | "color": "#FAAC61", 61 | "end": 175, 62 | "name": "Mlp84B protein_bind", 63 | "start": 165, 64 | "strand": 1, 65 | "type": "protein_bind" 66 | }, 67 | { 68 | "color": "#75C6A9", 69 | "end": 4655, 70 | "name": "Mlp84B 3'UTR", 71 | "start": 4434, 72 | "strand": 1, 73 | "type": "3'UTR" 74 | }, 75 | { 76 | "color": "#D59687", 77 | "end": 1016, 78 | "name": "Mlp84B protein_bind", 79 | "start": 1006, 80 | "strand": 1, 81 | "type": "protein_bind" 82 | }, 83 | { 84 | "color": "#FF9CCD", 85 | "end": 4434, 86 | "name": "Mlp84B CDS", 87 | "start": 2946, 88 | "strand": 1, 89 | "type": "CDS" 90 | }, 91 | { 92 | "color": "#F8D3A9", 93 | "end": 5010, 94 | "name": "Mlp84B protein_bind", 95 | "start": 5000, 96 | "strand": 1, 97 | "type": "protein_bind" 98 | }, 99 | { 100 | "color": "#F58A5E", 101 | "end": 1588, 102 | "name": "Mlp84B protein_bind", 103 | "start": 1578, 104 | "strand": 1, 105 | "type": "protein_bind" 106 | }, 107 | { 108 | "color": "#F8D3A9", 109 | "end": 5010, 110 | "name": "Mlp84B gene", 111 | "start": 165, 112 | "strand": 1, 113 | "type": "gene" 114 | }, 115 | { 116 | "color": "#75C6A9", 117 | "end": 1333, 118 | "name": "Mlp84B protein_bind", 119 | "start": 1323, 120 | "strand": 1, 121 | "type": "protein_bind" 122 | }, 123 | { 124 | "color": "#84B0DC", 125 | "end": 0, 126 | "name": "source", 127 | "start": 0, 128 | "strand": 1, 129 | "type": "source" 130 | }, 131 | { 132 | "color": "#F58A5E", 133 | "end": 1055, 134 | "name": "Mlp84B 5'UTR", 135 | "start": 1000, 136 | "strand": 1, 137 | "type": "5'UTR" 138 | }, 139 | { 140 | "color": "#F58A5E", 141 | "end": 1330, 142 | "name": "Mlp84B protein_bind", 143 | "start": 1320, 144 | "strand": 1, 145 | "type": "protein_bind" 146 | } 147 | ], 148 | "bases": "", 149 | "circular": false, 150 | "color": "#F7977A", 151 | "createdAt": "2018-02-01T15:49:23.330046+00:00", 152 | "creator": { 153 | "avatarUrl": "/static/img/no_avatar.jpg", 154 | "handle": "nikhilrao2381", 155 | "id": "ent_LWM7EN9J", 156 | "name": "Nikhil Rao" 157 | }, 158 | "description": "", 159 | "editURL": "/nikhilrao2381/f/P63LFfte-brca2-knockout/seq-5Qc7YAKV-af090832/edit", 160 | "folder": { 161 | "id": "lib_P63LFfte", 162 | "name": "BRCA2 Knockout" 163 | }, 164 | "id": "seq_5Qc7YAKV", 165 | "length": 5086, 166 | "modifiedAt": "2018-02-01T15:49:24.502392+00:00", 167 | "name": "AF090832", 168 | "notes": [ 169 | { 170 | "created_at": "2018-02-01T15:49:23.330046+00:00", 171 | "creator": "ent_LWM7EN9J", 172 | "text": "Imported using the Genbank importer." 173 | }, 174 | { 175 | "created_at": "2018-02-01T15:49:23.330046+00:00", 176 | "creator": "ent_LWM7EN9J", 177 | "text": "File name: AF090832.gb" 178 | }, 179 | { 180 | "created_at": "2018-02-01T15:49:23.330046+00:00", 181 | "creator": "ent_LWM7EN9J", 182 | "text": "Drosophila melanogaster muscle LIM protein at 84B (Mlp84B) gene, complete cds." 183 | } 184 | ], 185 | "primers": [], 186 | "registryId": null, 187 | "tagSchema": null, 188 | "tags": [ 189 | { 190 | "name": "ACCESSION", 191 | "url": "", 192 | "value": "AF090832" 193 | }, 194 | { 195 | "name": "ORGANISM", 196 | "url": "", 197 | "value": "Drosophila melanogaster Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora." 198 | }, 199 | { 200 | "name": "SOURCE", 201 | "url": "", 202 | "value": "Drosophila melanogaster (fruit fly)" 203 | }, 204 | { 205 | "name": "VERSION", 206 | "url": "", 207 | "value": "AF090832.1" 208 | } 209 | ] 210 | } 211 | -------------------------------------------------------------------------------- /src/examples/benchling/benchling1.json: -------------------------------------------------------------------------------- 1 | { 2 | "aliases": [], 3 | "annotations": [ 4 | { 5 | "color": "#F58A5E", 6 | "end": 2344, 7 | "name": "Mlp84B protein_bind", 8 | "start": 2334, 9 | "strand": -1, 10 | "type": "protein_bind" 11 | }, 12 | { 13 | "color": "#F8D3A9", 14 | "end": 2946, 15 | "name": "Mlp84B 5'UTR", 16 | "start": 2867, 17 | "strand": 0, 18 | "type": "5'UTR" 19 | }, 20 | { 21 | "color": "#9EAFD2", 22 | "end": 4655, 23 | "name": "Mlp84B mRNA", 24 | "start": 2867, 25 | "strand": 1, 26 | "type": "mRNA" 27 | }, 28 | { 29 | "color": "#9EAFD2", 30 | "end": 4964, 31 | "name": "Mlp84B protein_bind", 32 | "start": 4954, 33 | "type": "protein_bind" 34 | } 35 | ], 36 | "bases": "tgatcaaacctaaagagtgggacagagagtactactatattcgtttcactcgccaaaagttttgaac", 37 | "circular": false, 38 | "color": "#F7977A", 39 | "createdAt": "2018-02-01T15:49:23.330046+00:00", 40 | "creator": { 41 | "avatarUrl": "/static/img/no_avatar.jpg", 42 | "handle": "nikhilrao2381", 43 | "id": "ent_LWM7EN9J", 44 | "name": "Nikhil Rao" 45 | }, 46 | "description": "", 47 | "editURL": "/nikhilrao2381/f/P63LFfte-brca2-knockout/seq-5Qc7YAKV-af090832/edit", 48 | "folder": { 49 | "id": "lib_P63LFfte", 50 | "name": "BRCA2 Knockout" 51 | }, 52 | "id": "seq_5Qc7YAKV", 53 | "length": 5086, 54 | "modifiedAt": "2018-02-01T15:49:24.502392+00:00", 55 | "name": "AF090832", 56 | "notes": [ 57 | { 58 | "created_at": "2018-02-01T15:49:23.330046+00:00", 59 | "creator": "ent_LWM7EN9J", 60 | "text": "Imported using the Genbank importer." 61 | }, 62 | { 63 | "created_at": "2018-02-01T15:49:23.330046+00:00", 64 | "creator": "ent_LWM7EN9J", 65 | "text": "File name: AF090832.gb" 66 | }, 67 | { 68 | "created_at": "2018-02-01T15:49:23.330046+00:00", 69 | "creator": "ent_LWM7EN9J", 70 | "text": "Drosophila melanogaster muscle LIM protein at 84B (Mlp84B) gene, complete cds." 71 | } 72 | ], 73 | "primers": [], 74 | "registryId": null, 75 | "tagSchema": null, 76 | "tags": [ 77 | { 78 | "name": "ACCESSION", 79 | "url": "", 80 | "value": "AF090832" 81 | }, 82 | { 83 | "name": "ORGANISM", 84 | "url": "", 85 | "value": "Drosophila melanogaster Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora." 86 | }, 87 | { 88 | "name": "SOURCE", 89 | "url": "", 90 | "value": "Drosophila melanogaster (fruit fly)" 91 | }, 92 | { 93 | "name": "VERSION", 94 | "url": "", 95 | "value": "AF090832.1" 96 | } 97 | ] 98 | } 99 | -------------------------------------------------------------------------------- /src/examples/biobrick/iGEM.BioBrick.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 151 8 | BBa_B0034 9 | B0034 10 | RBS (Elowitz 1999) -- defines RBS efficiency 11 | RBS 12 | Released HQ 2013 13 | In stock 14 | Works 15 | 16 | 1 17 | http://parts.igem.org/Part:BBa_B0034 18 | 2003-01-31 19 | 20 | Vinay S Mahajan, Voichita D. Marinescu, Brian Chow, Alexander D Wissner-Gross and Peter Carr IAP, 2003. 21 | 22 | 23 | 24 | 25 | 26 | aaagaggagaaa 27 | 28 | 29 | 30 | 23325 31 | 32 | <type>conserved</type> 33 | <direction>forward</direction> 34 | <startpos>5</startpos> 35 | <endpos>8</endpos> 36 | </feature> 37 | </features> 38 | <parameters> 39 | <!-- 40 | NOTE: Currently, each parameter name can have only one value. 41 | --> 42 | <!-- 43 | This will change as we fully support the context of a parameter. RDR 4/2010 44 | --> 45 | <parameter> 46 | <name>efficiency</name> 47 | <value>1</value> 48 | <units/> 49 | <url/> 50 | <id>2480</id> 51 | <m_date>2008-11-29 13:15:14</m_date> 52 | <user_id>24</user_id> 53 | <user_name>registry</user_name> 54 | </parameter> 55 | <parameter> 56 | <name>biology</name> 57 | <value>NA</value> 58 | <units/> 59 | <url/> 60 | <id>3314</id> 61 | <m_date>2008-11-29 13:15:14</m_date> 62 | <user_id>24</user_id> 63 | <user_name>registry</user_name> 64 | </parameter> 65 | </parameters> 66 | <categories> 67 | <category>//chassis/prokaryote/ecoli</category> 68 | <category>//direction/forward</category> 69 | <category>//function/coliroid</category> 70 | <category>//rbs/prokaryote/constitutive/community</category> 71 | <category>//regulation/constitutive</category> 72 | <category>//ribosome/prokaryote/ecoli</category> 73 | </categories> 74 | <twins> 75 | <twin>BBa_J34801</twin> 76 | <twin>BBa_J70591</twin> 77 | <twin>BBa_K773001</twin> 78 | <twin>BBa_K783051</twin> 79 | <twin>BBa_K1529997</twin> 80 | <twin>BBa_K1325011</twin> 81 | <twin>BBa_K1583061</twin> 82 | <twin>BBa_K2149018</twin> 83 | </twins> 84 | <samples> 85 | <!-- Samples have been turned off for now - rdr 2013 --> 86 | </samples> 87 | <references> 88 | <!-- References are not available yet - rdr 2013 --> 89 | </references> 90 | <groups> 91 | <!-- 92 | Group access information is not yet available - rdr 2013 93 | --> 94 | </groups> 95 | </part> 96 | </part_list> 97 | </rsbpml> -------------------------------------------------------------------------------- /src/examples/fasta/Benchling.Fasta.fasta: -------------------------------------------------------------------------------- 1 | >Nox Plasmid final 3 6/8 2 | tactagtagcggccgctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaaga 3 | ttacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctc 4 | actcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaa 5 | ggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcga 6 | cgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgct 7 | ctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctc 8 | acgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgac 9 | cgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactg 10 | gtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactag 11 | aagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaa 12 | caaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatc 13 | ctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaa 14 | aaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtct 15 | gacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgttcatccatagttgcctgactcc 16 | ccgtcgtgtagataactacgatacgggagggcttaccatctggccccagtgctgcaatgataccgcgagacccacgctc 17 | accggctccagatttatcagcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcc 18 | tccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagtttgcgcaacgttgttgcca 19 | ttgctacaggcatcgtggtgtcacgctcgtcgtttggtatggcttcattcagctccggttcccaacgatcaaggcgagt 20 | tacatgatcccccatgttgtgcaaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgca 21 | gtgttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaagatgcttttctgtgactg 22 | gtgagtactcaaccaagtcattctgagaatagtgtatgcggcgaccgagttgctcttgcccggcgtcaatacgggataa 23 | taccgcgccacatagcagaactttaaaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatctta 24 | ccgctgttgagatccagttcgatataacccactcgtgcacccaactgatcttcagcatcttttactttcaccagcgttt 25 | ctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaataagggcgacacggaaatgttgaatactcatact 26 | cttcctttttcaatattattgaagcatttatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaa 27 | aataaacaaataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattattatcatgacat 28 | taacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctg 29 | gaattcgcggccgcttctagagttttttgatctcaatcaataaagtcgcctatcttttcagcaacaaaacttgattaac 30 | atcaattttggtatgaccaatgcaccattcatgttattctcaatagcgaagaacattccaggaaagagagccatccATG 31 | AGTAAAATCGTTGTAGTTGGTGCAAACCACGCAGGTACAGCGTGTATTAATACAATGTTGGACAATTATGGTGCAGAAA 32 | ACGAAGTAGTTATTTTTGACCAAAACTCAAACATTTCATTCTTGGCATGCGGAATGGCCCTTTGGATTGGACAGCAAAT 33 | CAGCAAGCCAGATGGACTTTTTTATGCAGATAAAGAAACTTTTGAAGCAAAAGGCGCAAAAGTTTATATGAACTCACCA 34 | GTTGAGTCAATTGATTATGATGCGAAAAAAGTTACAGCTATTGTTGATGGCAAAGAGCATGTAGAGTCATATGACAAAT 35 | TGATTTTGGCAACAGGTTCTCAGCCAATTCTGCCTCCGATTAAAGGTGCTGAGATGGATCCTAACAGTCGTGAATTTAA 36 | GTCAACCTTGGAAAATCTGCAGTTTGTTAAATTGTATCAAAATGCTGCTGATGTTATCGAGAAATTGCAAGACAAGAGC 37 | AAACATATCGAACGTGTGGCAGTAGTAGGAGCTGGTTACATTGGGGTTGAGTTAGCAGAAGCCTTCAAACGCCTTGGTA 38 | AAGAAGTGATTCTGATTGACGTTGTGGATACTTGCTTGGCGGGTTACTATGACCACGACTTGTCTGAAATGATGCGTCA 39 | AAATCTTGAAGACAATGGTGTTCAATTAGCCTTTGGCCAAACTGTTCAAGCTATTGAAGGTGAAAACAAGGTAGAACGC 40 | ATAGTAACGGATAAAGCTAGCTATGATGTAGATATGGTCGTCTTGGCAGTTGGCTTCCGTCCAAACACTGGTCTTGGTG 41 | CTGGCAAGTTGGAAACATTCCGCAATGGTGCTTTCTTGGTGGATAAGAAGCAAGAAACTAGCATTAAAGATGTTTATGC 42 | AATCGGTGACTGTGCGACTGTCTACGATAACTCTATCAATGATACAAACTACATCGCCTTGGCATCTAACGCCCTGCGC 43 | TCTGGTATTGTAGCAGCTCATAATGCTTGCGGTCATGAATTGGAGTCAAACGGTGTTCAAGGTTCTAACGGTATCGAAA 44 | TCTTTGGTTTGAAGATGGTTTCAACTGGTCTGACTGAAGAAAAAGCTAAACGCTTTGGTTACAGCCCAGCTGTAGTTGA 45 | GTTTAAAGACACTCAAAAACCAACTTTCCTTGAAAAGGTTGAGCATCATGATGTTACAATTAAGATTGTCTATGATAAG 46 | GATACACGTGTAGTTCTTGGAGCTCAGATGGTTTCTAGAGAAGATATGTCTATGGGTATTCACATGTTCTCATTGGCTA 47 | TTCAGGAAAAGGTTACAATTGATAGATTGGCCTTGCTGGATCTTTTCTTCCTGCCACACTTCAACAAACCGTACAACTA 48 | CATTACTCAAGCAGCTTTGAAAGCAAAATAAccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttc 49 | gttttatctgttgtttgtcggtgaacgctctctactagagccaggcatcaaataaaacgaaaggctcagtcgaaagact 50 | gggcctttcgttttatctgttgtttgtcggtgaacgctctc 51 | -------------------------------------------------------------------------------- /src/examples/fasta/JPUB_000128.fasta: -------------------------------------------------------------------------------- 1 | >org.jbei|pBbB8c-GFP.1| 2 | gacgtcttatgacaacttgacggctacatcattcactttttcttcacaaccggcacggaactcgctcgggctggccccgg 3 | tgcattttttaaatacccgcgagaaatagagttgatcgtcaaaaccaacattgcgaccgacggtggcgataggcatccgg 4 | gtggtgctcaaaagcagcttcgcctggctgatacgttggtcctcgcgccagcttaagacgctaatccctaactgctggcg 5 | gaaaagatgtgacagacgcgacggcgacaagcaaacatgctgtgcgacgctggcgatatcaaaattgctgtctgccaggt 6 | gatcgctgatgtactgacaagcctcgcgtacccgattatccatcggtggatggagcgactcgttaatcgcttccatgcgc 7 | cgcagtaacaattgctcaagcagatttatcgccagcagctccgaatagcgcccttccccttgcccggcgttaatgatttg 8 | cccaaacaggtcgctgaaatgcggctggtgcgcttcatccgggcgaaagaaccccgtattggcaaatattgacggccagt 9 | taagccattcatgccagtaggcgcgcggacgaaagtaaacccactggtgataccattcgcgagcctccggatgacgaccg 10 | tagtgatgaatctctcctggcgggaacagcaaaatatcacccggtcggcaaacaaattctcgtccctgatttttcaccac 11 | cccctgaccgcgaatggtgagattgagaatataacctttcattcccagcggtcggtcgataaaaaaatcgagataaccgt 12 | tggcctcaatcggcgttaaacccgccaccagatgggcattaaacgagtatcccggcagcaggggatcattttgcgcttca 13 | gccatacttttcatactcccgccattcagagaagaaaccaattgtccatattgcatcagacattgccgtcactgcgtctt 14 | ttactggctcttctcgctaaccaaaccggtaaccccgcttattaaaagcattctgtaacaaagcgggaccaaagccatga 15 | caaaaacgcgtaacaaaagtgtctataatcacggcagaaaagtccacattgattatttgcacggcgtcacactttgctat 16 | gccatagcatttttatccataagattagcggattctacctgacgctttttatcgcaactctctactgtttctccataccc 17 | gtttttttgggaattcaaaagatcttttaagaaggagatatacatatgagtaaaggagaagaacttttcactggagttgt 18 | cccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgcaacat 19 | acggaaaacttacccttaaatttatttgcactactggaaaactacctgttccgtggccaacacttgtcactactttctct 20 | tatggtgttcaatgcttttcccgttatccggatcacatgaaacggcatgactttttcaagagtgccatgcccgaaggtta 21 | tgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgataccc 22 | ttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacattctcggacacaaactggagtacaactat 23 | aactcacacaatgtatacatcacggcagacaaacaaaagaatggaatcaaagctaacttcaaaattcgccacaacattga 24 | agatggctccgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaacc 25 | attacctgtccacacaatctgccctttcgaaagatcccaacgaaaagcgtgaccacatggtccttcttgagtttgtaact 26 | gctgctgggattacacatggcatggatgagctctacaaataaggatccaaactcgagtaaggatctccaggcatcaaata 27 | aaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacac 28 | tggctcaccttcgggtgggcctttctgcgtttatacctaggctacagccgatagtctggaacagcgcacttacgggttgc 29 | tgcgcaacccaagtgctaccggcgcggcagcgtgacccgtgtcggcggctccaacggctcgccatcgtccagaaaacacg 30 | gctcatcgggcatcggcaggcgctgctgcccgcgccgttcccattcctccgtttcggtcaaggctggcaggtctggttcc 31 | atgcccggaatgccgggctggctgggcggctcctcgccggggccggtcggtagttgctgctcgcccggatacagggtcgg 32 | gatgcggcgcaggtcgccatgccccaacagcgattcgtcctggtcgtcgtgatcaaccaccacggcggcactgaacaccg 33 | acaggcgcaactggtcgcggggctggccccacgccacgcggtcattgaccacgtaggccaacacggtgccggggccgttg 34 | agcttcacgacggagatccagcgctcggccaccaagtccttgactgcgtattggaccgtccgcaaagaacgtccgatgag 35 | cttggaaagtgtcttctggctgaccaccacggcgttctggtggcccatctgcgccacgaggtgatgcagcagcattgccg 36 | ccgtgggtttcctcgcaataagcccggcccacgcctcatgcgctttgcgttccgtttgcacccagtgaccgggcttgttc 37 | ttggcttgaatgccgatttctctggactgcgtggccatgcttatctccatgcggtaggggtgccgcacggttgcggcacc 38 | atgcgcaatcagctgcaacttttcggcagcgcgacaacaattatgcgttgcgtaaaagtggcagtcaattacagattttc 39 | tttaacctacgcaatgagctattgcggggggtgccgcaatgagctgttgcgtaccccccttttttaagttgttgattttt 40 | aagtctttcgcatttcgccctatatctagttctttggtgcccaaagaagggcacccctgcggggttcccccacgccttcg 41 | gcgcggctccccctccggcaaaaagtggcccctccggggcttgttgatcgactgcgcggccttcggccttgcccaaggtg 42 | gcgctgcccccttggaacccccgcactcgccgccgtgaggctcggggggcaggcgggcgggcttcgcccttcgactgccc 43 | ccactcgcataggcttgggtcgttccaggcgcgtcaaggccaagccgctgcgcggtcgctgcgcgagccttgacccgcct 44 | tccacttggtgtccaaccggcaagcgaagcgcgcaggccgcaggccggaggcactagtgcttggattctcaccaataaaa 45 | aacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtcc 46 | aagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgaca 47 | tggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcc 48 | catggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattgg 49 | ctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaa 50 | tatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaac 51 | ggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattca 52 | tcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaata 53 | tccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattggga 54 | tatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaa 55 | atacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccag 56 | atatc 57 | -------------------------------------------------------------------------------- /src/examples/fasta/JPUB_test.fasta: -------------------------------------------------------------------------------- 1 | >test:pBbB8c-GFP.1 2 | gacgtcttatgacaacttgacggctacatcattcactttttcttcacaaccggcacggaactcgctcgggctggccccgg 3 | tgcattttttaaatacccgcgagaaatagagttgatcgtcaaaaccaacattgcgaccgacggtggcgataggcatccgg 4 | gtggtgctcaaaagcagcttcgcctggctgatacgttggtcctcgcgccagcttaagacgctaatccctaactgctggcg 5 | gaaaagatgtgacagacgcgacggcgacaagcaaacatgctgtgcgacgctggcgatatcaaaattgctgtctgccaggt 6 | gatcgctgatgtactgacaagcctcgcgtacccgattatccatcggtggatggagcgactcgttaatcgcttccatgcgc 7 | cgcagtaacaattgctcaagcagatttatcgccagcagctccgaatagcgcccttccccttgcccggcgttaatgatttg 8 | cccaaacaggtcgctgaaatgcggctggtgcgcTtcatccgggcgaaagaaccccgtattggcaaatattgacggccagt 9 | taagccattcatgccagtaggcgcgcggacgaaagtaaacccactggtgataccattcgcgagcctccggatgacgaccg 10 | tagtgatgaatctctcctggcgggaacagcaaaatatcacccggtcggcaaacaaattctcgtccctgatttttcaccac 11 | cccctgaccgcgaatggtgagattgagaatataacctttcattcccagcggtcggtcgataaaaaaatcgagataaccgt 12 | tggcctcaatcggcgttaaacccgccaccagatgggcattaaacgagtatcccggcagcaggggatcattttgcgcttca 13 | gccatacttttcatactcccgccattcagagaagaaaccaattgtccatattgcatcagacattgccgtcactgcgtctt 14 | ttactggctcttctcgctaaccaaaccggtaaccccgcttattaaaagcattctgtaacaaagcgggaccaaagccatga 15 | caaaaacgcgtaacaaaagtgtctataatcacggcagaaaagtccacattgattatttgcacggcgtcacactttgctat 16 | gtttttttgggaattcaaaagatcttttaagaaggagatatacatatgagtaaaggagaagaacttttcactggagttgt 17 | cccaattcttgttgaattagatggtgatgttaaGgggcacaaattttctgtcagtggagagggtgaaggtgatgcaacat 18 | acggaaaacttacccttaaatttatttgcactactggaaaactacctgttccgtggccaacacttgtcactactttctct 19 | tatggtgttcaatgcttttcccgttatccggatcacatgaaacggcatgactttttcaagagtgccatgcccgaaggtta 20 | tgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgataccc 21 | ttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacattctcggacacaaactggagtacaactat 22 | aactcacacaatgtatacatcacggcagacaaacaaaagaatggaatcaaagctaacttcaaaattcgccacaacattga 23 | agatggctccgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaacc 24 | attacctgtccacacaatctgccctttcgaaagatcccaacgaaaagcgtgaccacatggtccttcttgagtttgtaact 25 | gctgctgggattacacatggcatggatgagctctacaaataaggatccaaactcgagtaaggatctccaggcatcaaata 26 | aaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacac 27 | tggctcaccttcgggtgggcctttctgcgtttatacctaggctacagccgatagtctggaacagcgcacttacgggttgc 28 | tgcgcaacccaagtgctaccggcgcggcagcgtgacccgtgtcggcggctccaacggctcgccatcgtccagaaaacacg 29 | gctcatcgggcatcggcaggcgctgctgcccgcgccgttcccattcctccgtttcggtcaaggctggcaggtctggttcc 30 | atgcccggaatgccgggctggctgggcggctcctcgccggggccggtcggtagttgctgctcgcccggatacagggtcgg 31 | gatgcggcgcaggtcgccatgccccaacagcgattcgtcctggtcgtcgtgatcaaccaccacggcggcactgaacaccg 32 | acaggcgcaactggtcgcggggctggccccacgccacgcggtcattgaccacgtaggccaacacggtgccggggccgttg 33 | cttggaaagtgtcttctggctgaccaccacggcgttctggtggcccatctgcgccacgaggtgatgcagcagcattgccg 34 | ccgtgggtttcctcgcaataagcccggcccacgcctcatgcgctttgcgttccgtttgcacccagtgaccgggcttgttc 35 | ttggcttgaatgccgatttctctggactgcgtggccatgcttatctccatgcggtaggggtgccgcacggttgcggcacc 36 | atgcgcaatcagctgcaacttttcggcagcgcgacaacaattatgcgttgcgtaaaagtggcagtcaattacagattttc 37 | tttaacctacgcaatgagctattgcggggggtgccgcaatgagctgttgcgtaccccccttttttaagttgttgattttt 38 | aagtctttcgcatttcgccctatatctagttctttggtgcccaaagaagggcacccctgcggggttcccccacgccttcg 39 | gcgcggctccccctccggcaaaaagtggcccctccggggcttgttgatcgactgcgcggccttcggccttgcccaaggtg 40 | gcgctgcccccttggaacccccgcactcgccgccgtgaggctcggggggcaggcgggcgggcttcgcccttcgactgccc 41 | ccactcgcataggcttgggtcgttccaggcgcgtcaaggccaagccgctgcgcggtcgctgcgcgagccttgacccgcct 42 | tccacttggtgtccaaccggcaagcgaagcgcgcaggccgcaggccggaggcactagtgcttggattctcaccaataaaa 43 | aacgcccggcggcaaccgagcgttctgaacaaaCccagatggagttctgaggtcattactggatctatcaacaggagtcc 44 | aagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgaca 45 | tggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcc 46 | catggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattgg 47 | ctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaa 48 | tatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaac 49 | ggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattca 50 | tcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaata 51 | tccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattggga 52 | tatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaa 53 | atacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccag 54 | atatc 55 | -------------------------------------------------------------------------------- /src/examples/fasta/R0010_AB.gb: -------------------------------------------------------------------------------- 1 | >NC_011521.1:4419-5135 DVA_EA, complete sequence 2 | tgagacccactagtctctagaagcggccgcgaattccagaaatcatccttagcgaaagctaaggattttttttatctgaaattctgcctcgtgatacgcctatttttataggttaatgtcatgataataatggtttcttagacgtcaggtggcacttttcggggaaatgtgcgcggaacccctatttgtttatttttctaaatacattcaaatatgtatccgctcatgagacaataaccctgataaatgcttcaataatattgaaaaaggaagagtatgagtattcaacatttccgtgtcgcccttattcccttttttgcggcattttgccttcctgtttttgctcacccagaaacgctggtgaaagtaaaagatgctgaagatcagttgggtgcacgagtgggttacatcgaactggatctcaacagcggtaagatccttgagagttttcgccccgaagaacgttttccaatgatgagcacttttaaagttctgctatgtggcgcggtattatcccgtattgacgccgggcaagagcaactcggtcgccgcatacactattctcagaatgacttggttgagtactcaccagtcacagaaaagcatcttacggatggcatgacagtaagagaattatgcagtgctgccataaccatgagtgataacactgcggccaacttacttctgacaacgatcggaggaccgaaggagctaaccgcttttttgcacaacatgggggatcatgtaactcgccttgatcgttgggaaccggagctgaatgaagccataccaaacgacgagcgtgacaccacgatgcctgtagcaatggcaacaacgttgcgcaaactattaactggcgaactacttactctagcttcccggcaacaattaatagactggatggaggcggataaagttgcaggaccacttctgcgctcggcccttccggctggctggtttattgctgataaatctggagccggtgagcgtgggtcgcgcggtatcattgcagcactggggccagatggtaagccctcccgtatcgtagttatctacacgacggggagtcaggcaactatggatgaacgaaatagacagatcgctgagataggtgcctcactgattaagcattggtaactgtcagaccaagtttactcatatatactttagattgatttaaaacttcatttttaatttaaaaggatctaggtgaagatcctttttgataatctcatgaccaaaatcccttaacgtgagttttcgttccactgagcgtcagaccccgtagaaaagatcaaaggatcttcttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctgtggaaaaacgccagcaacgcggcctttttacggttcctggccttttgctggccttttgctcacatgttctttcctgcgttatcccctgattctgtggataaccgtattaccgcctttgagtgagctgataccgctcgccgcagccgaacgaccgagcgcagcgagtcagtgagcgaggaagcctgcagcggccgctactagtaggtctctAAGCacgtcttccccgcgcgttggccgattcattaatgcagctggcacgacaggtttcccgactggaaagcgggcagtgagcgcaacgcaattaatgtgagttagctcactcattaggcaccccaggctttacactttatgcttccggctcgtatgttgtgtggaattgtgagcggataacaatttcacacaggaaacagctatgaccatgattacgccaagcttgcatgcctgcaggtcgactctagaggatccccgggtaccgagctcgaattcactggccgtcgttttacaacgtcgtgactgggaaaaccctggcgttacccaacttaatcgccttgcagcacatccccctttcgccagctggcgtaatagcgaagaggcccgcaccgatcgcccttcccaacagttgcgcagcctgaatggcgaatggcgcctgatgcggtattttctccttacgcatctgtgcggtatttcacaccgcatatggtgcagaagacatCTCC 3 | -------------------------------------------------------------------------------- /src/examples/fasta/example.fas: -------------------------------------------------------------------------------- 1 | >ssrA_tag_enhance 2 | GTAgagattagatag 3 | AGT -------------------------------------------------------------------------------- /src/examples/fasta/multi_test.fas: -------------------------------------------------------------------------------- 1 | >Sequence_1 2 | ACTGCCCCCCCCC 3 | 4 | >Sequence_2 5 | GTCAgggggggggg 6 | -------------------------------------------------------------------------------- /src/examples/fasta/multisequence.fas: -------------------------------------------------------------------------------- 1 | >fasta1 2 | atccgacgtctac 3 | 4 | >fasta2 5 | atggtgaggccp 6 | 7 | >fasta3 8 | atccgacgtctac 9 | 10 | >fasta3 11 | atccgacgtctac 12 | 13 | >fasta5 14 | atccacatacccccccccccccccccc 15 | 16 | >fasta2 17 | atggtgaggccp 18 | 19 | >fasta3 20 | atggtgaggccp -------------------------------------------------------------------------------- /src/examples/fasta/oldstyle.fas: -------------------------------------------------------------------------------- 1 | ;my|NAME 2 | ;my comment 3 | actGacgata -------------------------------------------------------------------------------- /src/examples/fasta/pBbE0c-RFP.fasta: -------------------------------------------------------------------------------- 1 | >pBbE0c-RFP.1 2 | cagctagctcagtcctaggtactgtgctagctactagtgaaagaggagaaatactagatggcttcctccgaagacgttat 3 | caaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaag 4 | gtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctg 5 | tccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttccc 6 | ggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaag 7 | acggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatg 8 | ggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaa 9 | agacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaa 10 | ccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcac 11 | tccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggctcag 12 | tcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcggg 13 | tgggcctttctgcgtttatatactagaagcggccgggatcctaactcgagtaaggatctccaggcatcaaataaaacgaa 14 | aggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctca 15 | ccttcgggtgggcctttctgcgtttatacctagggcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaat 16 | acggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaa 17 | aggccgcgttgctggcgtttttccataggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtgg 18 | cgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgcc 19 | gcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagtt 20 | cggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaac 21 | tatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgag 22 | gtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaaggacagtatttggtatctgcg 23 | ctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggt 24 | ttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctga 25 | cgctcagtggaacgaaaactcacgttaagggattttggtcatgactagtgcttggattctcaccaataaaaaacgcccgg 26 | cggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagct 27 | cgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagcca 28 | tcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaa 29 | aacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacga 30 | aaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgt 31 | agaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaaca 32 | agggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcggg 33 | caagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctga 34 | acggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaac 35 | ggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccg 36 | gtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccagatatcgaat 37 | tcatgagatctttga 38 | -------------------------------------------------------------------------------- /src/examples/fasta/pBbS0c-RFP.fasta: -------------------------------------------------------------------------------- 1 | >org.jbei|pBbS0c-RFP.1| 2 | ttgacagctagctcagtcctaggtactgtgctagctactagtgaaagaggagaaatactagatggcttcctccgaagacg 3 | ttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggt 4 | gaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacat 5 | cctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtcct 6 | tcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctg 7 | caagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaac 8 | catgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaac 9 | tgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttac 10 | aaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcg 11 | tcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggc 12 | tcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcacctt 13 | cgggtgggcctttctgcgtttatatactagaagcggccgggatcctaactcgagtaaggatctccaggcatcaaataaaa 14 | cgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactgg 15 | ctcaccttcgggtgggcctttctgcgtttatacctagggtacgggttttgctgcccgcaaacgggctgttctggtgttgc 16 | tagtttgttatcagaatcgcagatccggcttcagccggtttgccggctgaaagcgctatttcttccagaattgccatgat 17 | tttttccccacgggaggcgtcactggctcccgtgttgtcggcagctttgattcgataagcagcatcgcctgtttcaggct 18 | gtctatgtgtgactgttgagctgtaacaagttgtctcaggtgttcaatttcatgttctagttgctttgttttactggttt 19 | cacctgttctattaggtgttacatgctgttcatctgttacattgtcgatctgttcatggtgaacagctttgaatgcacca 20 | aaaactcgtaaaagctctgatgtatctatcttttttacaccgttttcatctgtgcatatggacagttttccctttgatat 21 | gtaacggtgaacagttgttctacttttgtttgttagtcttgatgcttcactgatagatacaagagccataagaacctcag 22 | atccttccgtatttagccagtatgttctctagtgtggttcgttgtttttgcgtgagccatgagaacgaaccattgagatc 23 | atacttactttgcatgtcactcaaaaattttgcctcaaaactggtgagctgaatttttgcagttaaagcatcgtgtagtg 24 | tttttcttagtccgttatgtaggtaggaatctgatgtaatggttgttggtattttgtcaccattcatttttatctggttg 25 | ttctcaagttcggttacgagatccatttgtctatctagttcaacttggaaaatcaacgtatcagtcgggcggcctcgctt 26 | atcaaccaccaatttcatattgctgtaagtgtttaaatctttacttattggtttcaaaacccattggttaagccttttaa 27 | actcatggtagttattttcaagcattaacatgaacttaaattcatcaaggctaatctctatatttgccttgtgagttttc 28 | ttttgtgttagttcttttaataaccactcataaatcctcatagagtatttgttttcaaaagacttaacatgttccagatt 29 | atattttatgaatttttttaactggaaaagataaggcaatatctcttcactaaaaactaattctaatttttcgcttgaga 30 | acttggcatagtttgtccactggaaaatctcaaagcctttaaccaaaggattcctgatttccacagttctcgtcatcagc 31 | tctctggttgctttagctaatacaccataagcattttccctactgatgttcatcatctgagcgtattggttataagtgaa 32 | cgataccgtccgttctttccttgtagggttttcaatcgtggggttgagtagtgccacacagcataaaattagcttggttt 33 | catgctccgttaagtcatagcgactaatcgctagttcatttgctttgaaaacaactaattcagacatacatctcaattgg 34 | tctaggtgattttaatcactataccaattgagatgggctagtcaatgataattactagtccttttcccgggtgatctggg 35 | tatctgtaaattctgctagacctttgctggaaaacttgtaaattctgctagaccctctgtaaattccgctagacctttgt 36 | gtgttttttttgtttatattcaagtggttataatttatagaataaagaaagaataaaaaaagataaaaagaatagatccc 37 | agccctgtgtataactcactactttagtcagttccgcagtattacaaaaggatgtcgcaaacgctgtttgctcctctaca 38 | aaacagaccttaaaaccctaaaggcttaagtagcaccctcgcaagctcgggcaaatcgctgaatattccttttgtctccg 39 | accatcaggcacctgagtcgctgtctttttcgtgacattcagttcgctgcgctcacggctctggcagtgaatgggggtaa 40 | atggcactacaggcgccttttatggattcatgcaaggaaactacccataatacaagaaaagcccgtcacgggcttctcag 41 | ggcgttttatggcgggtctgctatgtggtgctatctgactttttgctgttcagcagttcctgccctctgattttccagtc 42 | tgaccacttcggattatcccgtgacaggtcattcagactggctaatgcacccagtaaggcagcggtatcatcaacaggct 43 | tacccgtcttactgtccctagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatcc 44 | agatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgcc 45 | actcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatc 46 | gccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatatt 47 | ggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttag 48 | ggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtat 49 | tcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccag 50 | ctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaa 51 | acttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagca 52 | actgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctc 53 | cattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaa 54 | agttggaacctcttacgtgccgatcaacgtctcattttcgccagatatcgaattcatgagatct 55 | -------------------------------------------------------------------------------- /src/examples/fasta/pBbS0c-RFP_no_name.fasta: -------------------------------------------------------------------------------- 1 | ttgacagctagctcagtcctaggtactgtgctagctactagtgaaagaggagaaatactagatggcttcctccgaagacg 2 | ttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggt 3 | gaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacat 4 | cctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtcct 5 | tcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctg 6 | caagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaac 7 | catgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaac 8 | tgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttac 9 | aaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcg 10 | tcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggc 11 | tcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcacctt 12 | cgggtgggcctttctgcgtttatatactagaagcggccgggatcctaactcgagtaaggatctccaggcatcaaataaaa 13 | cgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactgg 14 | ctcaccttcgggtgggcctttctgcgtttatacctagggtacgggttttgctgcccgcaaacgggctgttctggtgttgc 15 | tagtttgttatcagaatcgcagatccggcttcagccggtttgccggctgaaagcgctatttcttccagaattgccatgat 16 | tttttccccacgggaggcgtcactggctcccgtgttgtcggcagctttgattcgataagcagcatcgcctgtttcaggct 17 | gtctatgtgtgactgttgagctgtaacaagttgtctcaggtgttcaatttcatgttctagttgctttgttttactggttt 18 | cacctgttctattaggtgttacatgctgttcatctgttacattgtcgatctgttcatggtgaacagctttgaatgcacca 19 | aaaactcgtaaaagctctgatgtatctatcttttttacaccgttttcatctgtgcatatggacagttttccctttgatat 20 | gtaacggtgaacagttgttctacttttgtttgttagtcttgatgcttcactgatagatacaagagccataagaacctcag 21 | atccttccgtatttagccagtatgttctctagtgtggttcgttgtttttgcgtgagccatgagaacgaaccattgagatc 22 | atacttactttgcatgtcactcaaaaattttgcctcaaaactggtgagctgaatttttgcagttaaagcatcgtgtagtg 23 | tttttcttagtccgttatgtaggtaggaatctgatgtaatggttgttggtattttgtcaccattcatttttatctggttg 24 | ttctcaagttcggttacgagatccatttgtctatctagttcaacttggaaaatcaacgtatcagtcgggcggcctcgctt 25 | atcaaccaccaatttcatattgctgtaagtgtttaaatctttacttattggtttcaaaacccattggttaagccttttaa 26 | actcatggtagttattttcaagcattaacatgaacttaaattcatcaaggctaatctctatatttgccttgtgagttttc 27 | ttttgtgttagttcttttaataaccactcataaatcctcatagagtatttgttttcaaaagacttaacatgttccagatt 28 | atattttatgaatttttttaactggaaaagataaggcaatatctcttcactaaaaactaattctaatttttcgcttgaga 29 | acttggcatagtttgtccactggaaaatctcaaagcctttaaccaaaggattcctgatttccacagttctcgtcatcagc 30 | tctctggttgctttagctaatacaccataagcattttccctactgatgttcatcatctgagcgtattggttataagtgaa 31 | cgataccgtccgttctttccttgtagggttttcaatcgtggggttgagtagtgccacacagcataaaattagcttggttt 32 | catgctccgttaagtcatagcgactaatcgctagttcatttgctttgaaaacaactaattcagacatacatctcaattgg 33 | tctaggtgattttaatcactataccaattgagatgggctagtcaatgataattactagtccttttcccgggtgatctggg 34 | tatctgtaaattctgctagacctttgctggaaaacttgtaaattctgctagaccctctgtaaattccgctagacctttgt 35 | gtgttttttttgtttatattcaagtggttataatttatagaataaagaaagaataaaaaaagataaaaagaatagatccc 36 | agccctgtgtataactcactactttagtcagttccgcagtattacaaaaggatgtcgcaaacgctgtttgctcctctaca 37 | aaacagaccttaaaaccctaaaggcttaagtagcaccctcgcaagctcgggcaaatcgctgaatattccttttgtctccg 38 | accatcaggcacctgagtcgctgtctttttcgtgacattcagttcgctgcgctcacggctctggcagtgaatgggggtaa 39 | atggcactacaggcgccttttatggattcatgcaaggaaactacccataatacaagaaaagcccgtcacgggcttctcag 40 | ggcgttttatggcgggtctgctatgtggtgctatctgactttttgctgttcagcagttcctgccctctgattttccagtc 41 | tgaccacttcggattatcccgtgacaggtcattcagactggctaatgcacccagtaaggcagcggtatcatcaacaggct 42 | tacccgtcttactgtccctagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatcc 43 | agatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgcc 44 | actcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatc 45 | gccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatatt 46 | ggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttag 47 | ggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtat 48 | tcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccag 49 | ctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaa 50 | acttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagca 51 | actgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctc 52 | cattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaa 53 | agttggaacctcttacgtgccgatcaacgtctcattttcgccagatatcgaattcatgagatct 54 | -------------------------------------------------------------------------------- /src/examples/fasta/pBbS0c-RFP_no_name.txt: -------------------------------------------------------------------------------- 1 | ttgacagctagctcagtcctaggtactgtgctagctactagtgaaagaggagaaatactagatggcttcctccgaagacg 2 | ttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggt 3 | gaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacat 4 | cctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtcct 5 | tcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctg 6 | caagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaac 7 | catgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaac 8 | tgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttac 9 | aaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcg 10 | tcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggc 11 | tcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcacctt 12 | cgggtgggcctttctgcgtttatatactagaagcggccgggatcctaactcgagtaaggatctccaggcatcaaataaaa 13 | cgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactgg 14 | ctcaccttcgggtgggcctttctgcgtttatacctagggtacgggttttgctgcccgcaaacgggctgttctggtgttgc 15 | tagtttgttatcagaatcgcagatccggcttcagccggtttgccggctgaaagcgctatttcttccagaattgccatgat 16 | tttttccccacgggaggcgtcactggctcccgtgttgtcggcagctttgattcgataagcagcatcgcctgtttcaggct 17 | gtctatgtgtgactgttgagctgtaacaagttgtctcaggtgttcaatttcatgttctagttgctttgttttactggttt 18 | cacctgttctattaggtgttacatgctgttcatctgttacattgtcgatctgttcatggtgaacagctttgaatgcacca 19 | aaaactcgtaaaagctctgatgtatctatcttttttacaccgttttcatctgtgcatatggacagttttccctttgatat 20 | gtaacggtgaacagttgttctacttttgtttgttagtcttgatgcttcactgatagatacaagagccataagaacctcag 21 | atccttccgtatttagccagtatgttctctagtgtggttcgttgtttttgcgtgagccatgagaacgaaccattgagatc 22 | atacttactttgcatgtcactcaaaaattttgcctcaaaactggtgagctgaatttttgcagttaaagcatcgtgtagtg 23 | tttttcttagtccgttatgtaggtaggaatctgatgtaatggttgttggtattttgtcaccattcatttttatctggttg 24 | ttctcaagttcggttacgagatccatttgtctatctagttcaacttggaaaatcaacgtatcagtcgggcggcctcgctt 25 | atcaaccaccaatttcatattgctgtaagtgtttaaatctttacttattggtttcaaaacccattggttaagccttttaa 26 | actcatggtagttattttcaagcattaacatgaacttaaattcatcaaggctaatctctatatttgccttgtgagttttc 27 | ttttgtgttagttcttttaataaccactcataaatcctcatagagtatttgttttcaaaagacttaacatgttccagatt 28 | atattttatgaatttttttaactggaaaagataaggcaatatctcttcactaaaaactaattctaatttttcgcttgaga 29 | acttggcatagtttgtccactggaaaatctcaaagcctttaaccaaaggattcctgatttccacagttctcgtcatcagc 30 | tctctggttgctttagctaatacaccataagcattttccctactgatgttcatcatctgagcgtattggttataagtgaa 31 | cgataccgtccgttctttccttgtagggttttcaatcgtggggttgagtagtgccacacagcataaaattagcttggttt 32 | catgctccgttaagtcatagcgactaatcgctagttcatttgctttgaaaacaactaattcagacatacatctcaattgg 33 | tctaggtgattttaatcactataccaattgagatgggctagtcaatgataattactagtccttttcccgggtgatctggg 34 | tatctgtaaattctgctagacctttgctggaaaacttgtaaattctgctagaccctctgtaaattccgctagacctttgt 35 | gtgttttttttgtttatattcaagtggttataatttatagaataaagaaagaataaaaaaagataaaaagaatagatccc 36 | agccctgtgtataactcactactttagtcagttccgcagtattacaaaaggatgtcgcaaacgctgtttgctcctctaca 37 | aaacagaccttaaaaccctaaaggcttaagtagcaccctcgcaagctcgggcaaatcgctgaatattccttttgtctccg 38 | accatcaggcacctgagtcgctgtctttttcgtgacattcagttcgctgcgctcacggctctggcagtgaatgggggtaa 39 | atggcactacaggcgccttttatggattcatgcaaggaaactacccataatacaagaaaagcccgtcacgggcttctcag 40 | ggcgttttatggcgggtctgctatgtggtgctatctgactttttgctgttcagcagttcctgccctctgattttccagtc 41 | tgaccacttcggattatcccgtgacaggtcattcagactggctaatgcacccagtaaggcagcggtatcatcaacaggct 42 | tacccgtcttactgtccctagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatcc 43 | agatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgcc 44 | actcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatc 45 | gccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatatt 46 | ggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttag 47 | ggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtat 48 | tcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccag 49 | ctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaa 50 | acttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagca 51 | actgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctc 52 | cattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaa 53 | agttggaacctcttacgtgccgatcaacgtctcattttcgccagatatcgaattcatgagatct 54 | -------------------------------------------------------------------------------- /src/examples/fasta/pBbS8c_RFP.fas: -------------------------------------------------------------------------------- 1 | >pBbS8c-RFP 2 | gacgtcttatgacaacttgacggctacatcattcactttttcttcacaaccggcacggaactcgctcgggctggccccggtgcattttttaaatacccgcgagaaatagagttgatcgtcaaaaccaacattgcgaccgacggtggcgataggcatccgggtggtgctcaaaagcagcttcgcctggctgatacgttggtcctcgcgccagcttaagacgctaatccctaactgctggcggaaaagatgtgacagacgcgacggcgacaagcaaacatgctgtgcgacgctggcgatatcaaaattgctgtctgccaggtgatcgctgatgtactgacaagcctcgcgtacccgattatccatcggtggatggagcgactcgttaatcgcttccatgcgccgcagtaacaattgctcaagcagatttatcgccagcagctccgaatagcgcccttccccttgcccggcgttaatgatttgcccaaacaggtcgctgaaatgcggctggtgcgcttcatccgggcgaaagaaccccgtattggcaaatattgacggccagttaagccattcatgccagtaggcgcgcggacgaaagtaaacccactggtgataccattcgcgagcctccggatgacgaccgtagtgatgaatctctcctggcgggaacagcaaaatatcacccggtcggcaaacaaattctcgtccctgatttttcaccaccccctgaccgcgaatggtgagattgagaatataacctttcattcccagcggtcggtcgataaaaaaatcgagataaccgttggcctcaatcggcgttaaacccgccaccagatgggcattaaacgagtatcccggcagcaggggatcattttgcgcttcagccatacttttcatactcccgccattcagagaagaaaccaattgtccatattgcatcagacattgccgtcactgcgtcttttactggctcttctcgctaaccaaaccggtaaccccgcttattaaaagcattctgtaacaaagcgggaccaaagccatgacaaaaacgcgtaacaaaagtgtctataatcacggcagaaaagtccacattgattatttgcacggcgtcacactttgctatgccatagcatttttatccataagattagcggattctacctgacgctttttatcgcaactctctactgtttctccatacccgtttttttgggaattcaaaagatcttttaagaaggagatatacatatggcgagtagcgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcactccaccggtgcttaaggatccaaactcgagtaaggatctccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacctagggtacgggttttgctgcccgcaaacgggctgttctggtgttgctagtttgttatcagaatcgcagatccggcttcagccggtttgccggctgaaagcgctatttcttccagaattgccatgattttttccccacgggaggcgtcactggctcccgtgttgtcggcagctttgattcgataagcagcatcgcctgtttcaggctgtctatgtgtgactgttgagctgtaacaagttgtctcaggtgttcaatttcatgttctagttgctttgttttactggtttcacctgttctattaggtgttacatgctgttcatctgttacattgtcgatctgttcatggtgaacagctttgaatgcaccaaaaactcgtaaaagctctgatgtatctatcttttttacaccgttttcatctgtgcatatggacagttttccctttgatatgtaacggtgaacagttgttctacttttgtttgttagtcttgatgcttcactgatagatacaagagccataagaacctcagatccttccgtatttagccagtatgttctctagtgtggttcgttgtttttgcgtgagccatgagaacgaaccattgagatcatacttactttgcatgtcactcaaaaattttgcctcaaaactggtgagctgaatttttgcagttaaagcatcgtgtagtgtttttcttagtccgttatgtaggtaggaatctgatgtaatggttgttggtattttgtcaccattcatttttatctggttgttctcaagttcggttacgagatccatttgtctatctagttcaacttggaaaatcaacgtatcagtcgggcggcctcgcttatcaaccaccaatttcatattgctgtaagtgtttaaatctttacttattggtttcaaaacccattggttaagccttttaaactcatggtagttattttcaagcattaacatgaacttaaattcatcaaggctaatctctatatttgccttgtgagttttcttttgtgttagttcttttaataaccactcataaatcctcatagagtatttgttttcaaaagacttaacatgttccagattatattttatgaatttttttaactggaaaagataaggcaatatctcttcactaaaaactaattctaatttttcgcttgagaacttggcatagtttgtccactggaaaatctcaaagcctttaaccaaaggattcctgatttccacagttctcgtcatcagctctctggttgctttagctaatacaccataagcattttccctactgatgttcatcatctgagcgtattggttataagtgaacgataccgtccgttctttccttgtagggttttcaatcgtggggttgagtagtgccacacagcataaaattagcttggtttcatgctccgttaagtcatagcgactaatcgctagttcatttgctttgaaaacaactaattcagacatacatctcaattggtctaggtgattttaatcactataccaattgagatgggctagtcaatgataattactagtccttttcccgggtgatctgggtatctgtaaattctgctagacctttgctggaaaacttgtaaattctgctagaccctctgtaaattccgctagacctttgtgtgttttttttgtttatattcaagtggttataatttatagaataaagaaagaataaaaaaagataaaaagaatagatcccagccctgtgtataactcactactttagtcagttccgcagtattacaaaaggatgtcgcaaacgctgtttgctcctctacaaaacagaccttaaaaccctaaaggcttaagtagcaccctcgcaagctcgggcaaatcgctgaatattccttttgtctccgaccatcaggcacctgagtcgctgtctttttcgtgacattcagttcgctgcgctcacggctctggcagtgaatgggggtaaatggcactacaggcgccttttatggattcatgcaaggaaactacccataatacaagaaaagcccgtcacgggcttctcagggcgttttatggcgggtctgctatgtggtgctatctgactttttgctgttcagcagttcctgccctctgattttccagtctgaccacttcggattatcccgtgacaggtcattcagactggctaatgcacccagtaaggcagcggtatcatcaacaggcttacccgtcttactgtccctagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccagatatc -------------------------------------------------------------------------------- /src/examples/genbank/Benchling.Genbank.gb: -------------------------------------------------------------------------------- 1 | LOCUS J23102_AB 2143 bp ds-DNA circular 21-NOV-2017 2 | DEFINITION . 3 | KEYWORDS "Antibiotic:Ampicillin (AMP)" "Backbone:DVA_AB" "Cloned by 4 | (Date):SI 4/6/2014" "Fusion site(s):AB" "Glycerol:--" "Part 5 | Source:CIDAR" "Part subtype:Constitutive" "Part type:Promoter" 6 | "Plasmid:--" "Seq Source:parts.igem.org/Part\:Bba_J23102" "Seq 7 | Verify:--" 8 | FEATURES Location/Qualifiers 9 | source 1..2143 10 | /organism="synthetic DNA construct" 11 | /mol_type="other DNA" 12 | promoter 5..39 13 | /label=J23102 14 | primer_bind 5..39 15 | /label=Primer 1 16 | primer_bind complement(11..40) 17 | /label=Primer 1 18 | misc_feature 44..2143 19 | /label=DVA 20 | primer_bind complement(179..209) 21 | /label=Primer 2 22 | CDS complement(1007..1867) 23 | /label=bla 24 | ORIGIN 25 | 1 ggagttgaca gctagctcag tcctaggtac tgtgctagct actagagacc tactagtagc 26 | 61 ggccgctgca ggcttcctcg ctcactgact cgctgcgctc ggtcgttcgg ctgcggcgag 27 | 121 cggtatcagc tcactcaaag gcggtaatac ggttatccac agaatcaggg gataacgcag 28 | 181 gaaagaacat gtgagcaaaa ggccagcaaa aggccaggaa ccgtaaaaag gccgcgttgc 29 | 241 tggcgttttt ccacaggctc cgcccccctg acgagcatca caaaaatcga cgctcaagtc 30 | 301 agaggtggcg aaacccgaca ggactataaa gataccaggc gtttccccct ggaagctccc 31 | 361 tcgtgcgctc tcctgttccg accctgccgc ttaccggata cctgtccgcc tttctccctt 32 | 421 cgggaagcgt ggcgctttct catagctcac gctgtaggta tctcagttcg gtgtaggtcg 33 | 481 ttcgctccaa gctgggctgt gtgcacgaac cccccgttca gcccgaccgc tgcgccttat 34 | 541 ccggtaacta tcgtcttgag tccaacccgg taagacacga cttatcgcca ctggcagcag 35 | 601 ccactggtaa caggattagc agagcgaggt atgtaggcgg tgctacagag ttcttgaagt 36 | 661 ggtggcctaa ctacggctac actagaagaa cagtatttgg tatctgcgct ctgctgaagc 37 | 721 cagttacctt cggaaaaaga gttggtagct cttgatccgg caaacaaacc accgctggta 38 | 781 gcggtggttt ttttgtttgc aagcagcaga ttacgcgcag aaaaaaagga tctcaagaag 39 | 841 atcctttgat cttttctacg gggtctgacg ctcagtggaa cgaaaactca cgttaaggga 40 | 901 ttttggtcat gagattatca aaaaggatct tcacctagat ccttttaaat taaaaatgaa 41 | 961 gttttaaatc aatctaaagt atatatgagt aaacttggtc tgacagttac caatgcttaa 42 | 1021 tcagtgaggc acctatctca gcgatctgtc tatttcgttc atccatagtt gcctgactcc 43 | 1081 ccgtcgtgta gataactacg atacgggagg gcttaccatc tggccccagt gctgcaatga 44 | 1141 taccgcgcga cccacgctca ccggctccag atttatcagc aataaaccag ccagccggaa 45 | 1201 gggccgagcg cagaagtggt cctgcaactt tatccgcctc catccagtct attaattgtt 46 | 1261 gccgggaagc tagagtaagt agttcgccag ttaatagttt gcgcaacgtt gttgccattg 47 | 1321 ctacaggcat cgtggtgtca cgctcgtcgt ttggtatggc ttcattcagc tccggttccc 48 | 1381 aacgatcaag gcgagttaca tgatccccca tgttgtgcaa aaaagcggtt agctccttcg 49 | 1441 gtcctccgat cgttgtcaga agtaagttgg ccgcagtgtt atcactcatg gttatggcag 50 | 1501 cactgcataa ttctcttact gtcatgccat ccgtaagatg cttttctgtg actggtgagt 51 | 1561 actcaaccaa gtcattctga gaatagtgta tgcggcgacc gagttgctct tgcccggcgt 52 | 1621 caatacggga taataccgcg ccacatagca gaactttaaa agtgctcatc attggaaaac 53 | 1681 gttcttcggg gcgaaaactc tcaaggatct taccgctgtt gagatccagt tcgatgtaac 54 | 1741 ccactcgtgc acccaactga tcttcagcat cttttacttt caccagcgtt tctgggtgag 55 | 1801 caaaaacagg aaggcaaaat gccgcaaaaa agggaataag ggcgacacgg aaatgttgaa 56 | 1861 tactcatact cttccttttt caatattatt gaagcattta tcagggttat tgtctcatga 57 | 1921 gcggatacat atttgaatgt atttagaaaa ataaacaaat aggggttccg cgcacatttc 58 | 1981 cccgaaaagt gccacctgac gtctaagaaa ccattattat catgacatta acctataaaa 59 | 2041 ataggcgtat cacgaggcag aatttcagat aaaaaaaatc cttagctttc gctaaggatg 60 | 2101 atttctggaa ttcgcggccg cttctagaga ctagtgggtc tca 61 | // 62 | -------------------------------------------------------------------------------- /src/examples/genbank/CCR5_multifrag_insert1.gb: -------------------------------------------------------------------------------- 1 | LOCUS Exported 947 bp ds-DNA linear UNA 25-JUL-2014 2 | DEFINITION natural linear DNA 3 | ACCESSION . 4 | VERSION . 5 | KEYWORDS CCR5_multifrag_insert1 6 | SOURCE natural DNA sequence 7 | ORGANISM unspecified 8 | REFERENCE 1 (bases 1 to 947) 9 | AUTHORS Clontech 10 | TITLE Direct Submission 11 | JOURNAL Exported Friday, Apr 29, 2016 from SnapGene 3.1.2 12 | http://www.snapgene.com 13 | FEATURES Location/Qualifiers 14 | source 1..947 15 | /organism="unspecified" 16 | /mol_type="genomic DNA" 17 | misc_feature 1..12 18 | /note="20bp overlap" 19 | misc_feature 1..10 20 | /note="15bp overlap" 21 | misc_feature 1..8 22 | /note="MCS1" 23 | /note="pUC18/19 multiple cloning site" 24 | misc_feature 9..947 25 | /note="L-arm" 26 | misc_feature 941..947 27 | /note="20bp overlap" 28 | misc_feature 943..947 29 | /note="15bp overlap" 30 | ORIGIN 31 | 1 ggggatccat ggtcttgtct tttaattctc ttttcgagga ctgagaggga gggtagcatg 32 | 61 gtagttaaga gtgcaggctt cccgcattca aaatcggttg cttactagct gtgtggcttt 33 | 121 gagcaagtta ctcaccctct ctgtgcttca aggtccttgt ctgcaaaatg tgaaaaatat 34 | 181 ttcctgcctc ataaggttgc cctaaggatt aaatgaatga atgggtatga tgcttagaac 35 | 241 agtgattggc atccagtatg tgccctcgag gcctcttaat tattactggc ttgctcatag 36 | 301 tgcatgttct ttgtgggcta actctagcgt caataaaaat gttaagactg agttgcagcc 37 | 361 gggcatggtg gctcatgcct gtaatcccag cattctagga ggctgaggca ggaggatcgc 38 | 421 ttgagcccag gagttcgaga ccagcctggg caacatagtg tgatcttgta tctataaaaa 39 | 481 taaacaaaat tagcttggtg tggtggcgcc tgtagtcccc agccacttgg aggggtgagg 40 | 541 tgagaggatt gcttgagccc gggatggtcc aggctgcagt gagccatgat cgtgccactg 41 | 601 cactccagcc tgggcgacag agtgagaccc tgtctcacaa caacaacaac aacaacaaaa 42 | 661 aggctgagct gcaccatgct tgacccagtt tcttaaaatt gttgtcaaag cttcattcac 43 | 721 tccatggtgc tatagagcac aagattttat ttggtgagat ggtgctttca tgaattcccc 44 | 781 caacagagcc aagctctcca tctagtggac agggaagcta gcagcaaacc ttcccttcac 45 | 841 tacaaaactt cattgcttgg ccaaaaagag agttaattca atgtagacat ctatgtaggc 46 | 901 aattaaaaac ctattgatgt ataaaacagt ttgcattcat ggagggc 47 | // 48 | -------------------------------------------------------------------------------- /src/examples/genbank/Ecoli_DERA_Implicitly_Circular.gb: -------------------------------------------------------------------------------- 1 | LOCUS E.coli DERA 1024 bp DNA 01-SEP-2015 2 | FEATURES Location/Qualifiers 3 | misc_feature 1..40 4 | /label="UNS7" 5 | protein_bind 41..62 6 | /label="CRP-cAMP binding site" 7 | /ApEinfo_revcolor=#d59687 8 | /ApEinfo_fwdcolor=#d59687 9 | promoter 411..183 10 | /label="rhaBADp" 11 | /ApEinfo_revcolor=#b1ff67 12 | /ApEinfo_fwdcolor=#b1ff67 13 | protein_bind 71..92 14 | /label="CRP-cAMP binding site" 15 | /ApEinfo_revcolor=#ffef86 16 | /ApEinfo_fwdcolor=#ffef86 17 | protein_bind 93..109 18 | /label="RhaS binding site" 19 | /ApEinfo_revcolor=#faac61 20 | /ApEinfo_fwdcolor=#faac61 21 | protein_bind 126..142 22 | /label="RhaS binding site" 23 | /ApEinfo_revcolor=#d59687 24 | /ApEinfo_fwdcolor=#d59687 25 | -35_signal 141..146 26 | /label="-35 region" 27 | /ApEinfo_revcolor=#faac61 28 | /ApEinfo_fwdcolor=#faac61 29 | misc_signal 146..151 30 | /label="EcoRI site" 31 | /ApEinfo_revcolor=#d59687 32 | /ApEinfo_fwdcolor=#d59687 33 | misc_feature 147..147 34 | /label="mutation" 35 | -10_signal 162..167 36 | /label="-10 region" 37 | /ApEinfo_revcolor=#ffef86 38 | /ApEinfo_fwdcolor=#ffef86 39 | misc_signal 174..174 40 | /label="TSS" 41 | /ApEinfo_revcolor=#9eafd2 42 | /ApEinfo_fwdcolor=#9eafd2 43 | RBS 184..217 44 | /label="syn-RBS-66K" 45 | start 218..220 46 | /label="start codon" 47 | misc_feature 221..244 48 | /label="8His" 49 | misc_feature 245..1024 50 | /label="E. coli MG1655 DERA" 51 | ORIGIN 52 | 1 caagacgctg gctctgacat ttccgctact gaactactcg cggtgagcat cacatcacca 53 | 61 caattcagca aattgtgaac atcatcacgt tcatctttcc ctggttgcca atggcccatt 54 | 121 ttcctgtcag taacgagaag gtcgcgtatt caggcgcttt ttagactggt cgtaatgaaa 55 | 181 ttcTAGGGCT ACGAGCCGAC ATTAGGCGAG GAAGAAAatg catcatcatc accatcatca 56 | 241 tcatATGACT GATCTGAAAG CAAGCAGCCT GCGTGCACTG AAATTGATGG ACCTGACCAC 57 | 301 CCTGAATGAC GACGACACCG ACGAGAAAGT GATCGCCCTG TGTCATCAGG CCAAAACTCC 58 | 361 GGTCGGCAAT ACCGCCGCTA TCTGTATCTA TCCTCGCTTT ATCCCGATTG CTCGCAAAAC 59 | 421 TCTGAAAGAG CAGGGCACCC CGGAAATCCG TATCGCTACG GTAACCAACT TCCCACACGG 60 | 481 TAACGACGAC ATCGACATCG CGCTGGCAGA AACCCGTGCG GCAATCGCCT ACGGTGCTGA 61 | 541 TGAAGTTGAC GTTGTGTTCC CGTACCGCGC GCTGATGGCG GGTAACGAGC AGGTTGGTTT 62 | 601 TGACCTGGTG AAAGCCTGTA AAGAGGCTTG CGCGGCAGCG AATGTACTGC TGAAAGTGAT 63 | 661 CATCGAAACC GGCGAACTGA AAGACGAAGC GCTGATCCGT AAAGCGTCTG AAATCTCCAT 64 | 721 CAAAGCGGGT GCGGACTTCA TCAAAACCTC TACCGGTAAA GTGGCTGTGA ACGCGACGCC 65 | 781 GGAAAGCGCG CGCATCATGA TGGAAGTGAT CCGTGATATG GGCGTAGAAA AAACCGTTGG 66 | 841 TTTCAAACCG GCGGGCGGCG TGCGTACTGC GGAAGATGCG CAGAAATATC TCGCCATTGC 67 | 901 AGATGAACTG TTCGGTGCTG ACTGGGCAGA TGCGCGTCAC TACCGCTTTG GCGCTTCCAG 68 | 961 CCTGCTGGCA AGCCTGCTGA AAGCGCTGGG TCACGGCGAC GGTAAGAGCG CCAGCAGCTA 69 | 1021 CTAA 70 | // -------------------------------------------------------------------------------- /src/examples/genbank/Ecoli_DERA_Implicitly_Linear.gb: -------------------------------------------------------------------------------- 1 | LOCUS E.coli DERA 1024 bp DNA 01-SEP-2015 2 | FEATURES Location/Qualifiers 3 | misc_feature 1..40 4 | /label="UNS7" 5 | protein_bind 41..62 6 | /label="CRP-cAMP binding site" 7 | /ApEinfo_revcolor=#d59687 8 | /ApEinfo_fwdcolor=#d59687 9 | promoter 41..183 10 | /label="rhaBADp" 11 | /ApEinfo_revcolor=#b1ff67 12 | /ApEinfo_fwdcolor=#b1ff67 13 | protein_bind 71..92 14 | /label="CRP-cAMP binding site" 15 | /ApEinfo_revcolor=#ffef86 16 | /ApEinfo_fwdcolor=#ffef86 17 | protein_bind 93..109 18 | /label="RhaS binding site" 19 | /ApEinfo_revcolor=#faac61 20 | /ApEinfo_fwdcolor=#faac61 21 | protein_bind 126..142 22 | /label="RhaS binding site" 23 | /ApEinfo_revcolor=#d59687 24 | /ApEinfo_fwdcolor=#d59687 25 | -35_signal 141..146 26 | /label="-35 region" 27 | /ApEinfo_revcolor=#faac61 28 | /ApEinfo_fwdcolor=#faac61 29 | misc_signal 146..151 30 | /label="EcoRI site" 31 | /ApEinfo_revcolor=#d59687 32 | /ApEinfo_fwdcolor=#d59687 33 | misc_feature 147..147 34 | /label="mutation" 35 | -10_signal 162..167 36 | /label="-10 region" 37 | /ApEinfo_revcolor=#ffef86 38 | /ApEinfo_fwdcolor=#ffef86 39 | misc_signal 174..174 40 | /label="TSS" 41 | /ApEinfo_revcolor=#9eafd2 42 | /ApEinfo_fwdcolor=#9eafd2 43 | RBS 184..217 44 | /label="syn-RBS-66K" 45 | start 218..220 46 | /label="start codon" 47 | misc_feature 221..244 48 | /label="8His" 49 | misc_feature 245..1024 50 | /label="E. coli MG1655 DERA" 51 | ORIGIN 52 | 1 caagacgctg gctctgacat ttccgctact gaactactcg cggtgagcat cacatcacca 53 | 61 caattcagca aattgtgaac atcatcacgt tcatctttcc ctggttgcca atggcccatt 54 | 121 ttcctgtcag taacgagaag gtcgcgtatt caggcgcttt ttagactggt cgtaatgaaa 55 | 181 ttcTAGGGCT ACGAGCCGAC ATTAGGCGAG GAAGAAAatg catcatcatc accatcatca 56 | 241 tcatATGACT GATCTGAAAG CAAGCAGCCT GCGTGCACTG AAATTGATGG ACCTGACCAC 57 | 301 CCTGAATGAC GACGACACCG ACGAGAAAGT GATCGCCCTG TGTCATCAGG CCAAAACTCC 58 | 361 GGTCGGCAAT ACCGCCGCTA TCTGTATCTA TCCTCGCTTT ATCCCGATTG CTCGCAAAAC 59 | 421 TCTGAAAGAG CAGGGCACCC CGGAAATCCG TATCGCTACG GTAACCAACT TCCCACACGG 60 | 481 TAACGACGAC ATCGACATCG CGCTGGCAGA AACCCGTGCG GCAATCGCCT ACGGTGCTGA 61 | 541 TGAAGTTGAC GTTGTGTTCC CGTACCGCGC GCTGATGGCG GGTAACGAGC AGGTTGGTTT 62 | 601 TGACCTGGTG AAAGCCTGTA AAGAGGCTTG CGCGGCAGCG AATGTACTGC TGAAAGTGAT 63 | 661 CATCGAAACC GGCGAACTGA AAGACGAAGC GCTGATCCGT AAAGCGTCTG AAATCTCCAT 64 | 721 CAAAGCGGGT GCGGACTTCA TCAAAACCTC TACCGGTAAA GTGGCTGTGA ACGCGACGCC 65 | 781 GGAAAGCGCG CGCATCATGA TGGAAGTGAT CCGTGATATG GGCGTAGAAA AAACCGTTGG 66 | 841 TTTCAAACCG GCGGGCGGCG TGCGTACTGC GGAAGATGCG CAGAAATATC TCGCCATTGC 67 | 901 AGATGAACTG TTCGGTGCTG ACTGGGCAGA TGCGCGTCAC TACCGCTTTG GCGCTTCCAG 68 | 961 CCTGCTGGCA AGCCTGCTGA AAGCGCTGGG TCACGGCGAC GGTAAGAGCG CCAGCAGCTA 69 | 1021 CTAA 70 | // -------------------------------------------------------------------------------- /src/examples/genbank/Geneious.Genbank.gb: -------------------------------------------------------------------------------- 1 | LOCUS Construct1 3681 bp DNA circular UNA 01-NOV-2017 2 | DEFINITION Golden Gate ligation product. 3 | ACCESSION urn.local...j6-83tb8rk 4 | VERSION urn.local...j6-83tb8rk 5 | KEYWORDS . 6 | SOURCE 7 | ORGANISM . 8 | FEATURES Location/Qualifiers 9 | misc_feature 1..138 10 | /note="Geneious type: Concatenated sequence" 11 | /standard_name="I13453_AB (concatenated sequence 1)" 12 | misc_feature 1..4 13 | /note="Geneious type: ligation" 14 | /standard_name="Ligation" 15 | promoter 5..134 16 | /standard_name="I13453" 17 | misc_feature 135..138 18 | /note="Geneious type: ligation" 19 | /standard_name="Ligation" 20 | misc_feature 135..164 21 | /note="Geneious type: Concatenated sequence" 22 | /standard_name="B0032m_BC (concatenated sequence 2)" 23 | RBS 139..160 24 | /standard_name="B0032m" 25 | misc_feature 161..164 26 | /note="Geneious type: ligation" 27 | /standard_name="Ligation" 28 | misc_feature 161..1321 29 | /note="Geneious type: Concatenated sequence" 30 | /standard_name="C0012m_CD (concatenated sequence 3)" 31 | CDS 165..1317 32 | /standard_name="C0012" 33 | misc_feature 1318..1321 34 | /note="Geneious type: ligation" 35 | /standard_name="Ligation" 36 | misc_feature 1318..1454 37 | /note="Geneious type: Concatenated sequence" 38 | /standard_name="B0015_DE (concatenated sequence 4)" 39 | terminator 1322..1450 40 | /standard_name="B0015" 41 | misc_feature 1451..1454 42 | /note="Geneious type: ligation" 43 | /standard_name="Ligation" 44 | misc_feature 1451..4 45 | /note="Geneious type: Concatenated sequence" 46 | /standard_name="DVK_AE (concatenated sequence 5)" 47 | CDS complement(2560..3372) 48 | /standard_name="KanR" 49 | ORIGIN 50 | 1 ggagacattg attatttgca cggcgtcaca ctttgctatg ccatagcatt tttatccata 51 | 61 agattagcgg atcctacctg acgcttttta tcgcaactct ctactgtttc tccataccgt 52 | 121 ttttttgggc tagctactag agtcacacag gaaagtacta aatgatggtg aatgtgaaac 53 | 181 cagtaacgtt atacgatgtc gcagagtatg ccggtgtctc ttatcagacc gtttcccgcg 54 | 241 tggtgaacca ggccagccac gtttctgcga aaacgcggga aaaagtggaa gcggcgatgg 55 | 301 cggagctgaa ttacattccc aaccgcgtgg cacaacaact ggcgggcaaa cagtcgttgc 56 | 361 tgattggcgt tgccacctcc agtctggccc tgcacgcgcc gtcgcaaatt gtcgcggcga 57 | 421 ttaaatctcg cgccgatcaa ctgggtgcca gcgtggtggt gtcgatggta gaacgaagcg 58 | 481 gcgtcgaagc ctgtaaagcg gcggtgcaca atcttctcgc gcaacgcgtc agtgggctga 59 | 541 tcattaacta tccgctggat gaccaggatg ccattgctgt ggaagctgcc tgcactaatg 60 | 601 ttccggcgtt atttcttgat gtctctgacc agacacccat caacagtatt attttctccc 61 | 661 atgaggacgg tacgcgactg ggcgtggagc atctggtcgc attgggtcac cagcaaatcg 62 | 721 cgctgttagc gggcccatta agttctgtct cggcgcgtct gcgtctggct ggctggcata 63 | 781 aatatctcac tcgcaatcaa attcagccga tagcggaacg ggaaggcgac tggagtgcca 64 | 841 tgtccggttt tcaacaaacc atgcaaatgc tgaatgaggg catcgttccc actgcgatgc 65 | 901 tggttgccaa cgatcagatg gcgctgggcg caatgcgcgc cattaccgag tccgggctgc 66 | 961 gcgttggtgc ggatatctcg gtagtgggat acgacgatac cgaagatagc tcatgttata 67 | 1021 tcccgccgtt aaccaccatc aaacaggatt ttcgcctgct ggggcaaacc agcgtggacc 68 | 1081 gcttgctgca actctctcag ggccaggcgg tgaagggcaa tcagctgttg cccgtctcac 69 | 1141 tggtgaaaag aaaaaccacc ctggcgccca atacgcaaac cgcctctccc cgcgcgttgg 70 | 1201 ccgattcatt aatgcagctg gcacgacagg tttcccgact ggaaagcggg caggctgcaa 71 | 1261 acgacgaaaa ctacgcttta gtagcttaat aactctgata gtgctagtgt agatctcagg 72 | 1321 tccaggcatc aaataaaacg aaaggctcag tcgaaagact gggcctttcg ttttatctgt 73 | 1381 tgtttgtcgg tgaacgctct ctactagagt cacactggct caccttcggg tgggcctttc 74 | 1441 tgcgtttata gcttatgtct tctactagta gcggccgctg cagtccggca aaaaagggca 75 | 1501 aggtgtcacc accctgccct ttttctttaa aaccgaaaag attacttcgc gttatgcagg 76 | 1561 cttcctcgct cactgactcg ctgcgctcgg tcgttcggct gcggcgagcg gtatcagctc 77 | 1621 actcaaaggc ggtaatacgg ttatccacag aatcagggga taacgcagga aagaacatgt 78 | 1681 gagcaaaagg ccagcaaaag gccaggaacc gtaaaaaggc cgcgttgctg gcgtttttcc 79 | 1741 acaggctccg cccccctgac gagcatcaca aaaatcgacg ctcaagtcag aggtggcgaa 80 | 1801 acccgacagg actataaaga taccaggcgt ttccccctgg aagctccctc gtgcgctctc 81 | 1861 ctgttccgac cctgccgctt accggatacc tgtccgcctt tctcccttcg ggaagcgtgg 82 | 1921 cgctttctca tagctcacgc tgtaggtatc tcagttcggt gtaggtcgtt cgctccaagc 83 | 1981 tgggctgtgt gcacgaaccc cccgttcagc ccgaccgctg cgccttatcc ggtaactatc 84 | 2041 gtcttgagtc caacccggta agacacgact tatcgccact ggcagcagcc actggtaaca 85 | 2101 ggattagcag agcgaggtat gtaggcggtg ctacagagtt cttgaagtgg tggcctaact 86 | 2161 acggctacac tagaagaaca gtatttggta tctgcgctct gctgaagcca gttaccttcg 87 | 2221 gaaaaagagt tggtagctct tgatccggca aacaaaccac cgctggtagc ggtggttttt 88 | 2281 ttgtttgcaa gcagcagatt acgcgcagaa aaaaaggatc tcaagaagat cctttgatct 89 | 2341 tttctacggg gtctgacgct cagtggaacg aaaactcacg ttaagggatt ttggtcatga 90 | 2401 gattatcaaa aaggatcttc acctagatcc ttttaaatta aaaatgaagt tttaaatcaa 91 | 2461 tctaaagtat atatgagtaa acttggtctg acagctcgag tcccgtcaag tcagcgtaat 92 | 2521 gctctgccag tgttacaacc aattaaccaa ttctgattag aaaaactcat cgagcatcaa 93 | 2581 atgaaactgc aatttattca tatcaggatt atcaatacca tatttttgaa aaagccgttt 94 | 2641 ctgtaatgaa ggagaaaact caccgaggca gttccatagg atggcaagat cctggtatcg 95 | 2701 gtctgcgatt ccgactcgtc caacatcaat acaacctatt aatttcccct cgtcaaaaat 96 | 2761 aaggttatca agtgagaaat caccatgagt gacgactgaa tccggtgaga atggcaaaag 97 | 2821 cttatgcatt tctttccaga cttgttcaac aggccagcca ttacgctcgt catcaaaatc 98 | 2881 actcgcatca accaaaccgt tattcattcg tgattgcgcc tgagcgagac gaaatacgcg 99 | 2941 atcgctgtta aaaggacaat tacaaacagg aatcgaatgc aaccggcgca ggaacactgc 100 | 3001 cagcgcatca acaatatttt cacctgaatc aggatattct tctaatacct ggaatgctgt 101 | 3061 tttcccgggg atcgcagtgg tgagtaacca tgcatcatca ggagtacgga taaaatgctt 102 | 3121 gatggtcgga agaggcataa attccgtcag ccagtttagt ctgaccatct catctgtaac 103 | 3181 atcattggca acgctacctt tgccatgttt cagaaacaac tctggcgcat cgggcttccc 104 | 3241 atacaatcga tagattgtcg cacctgattg cccgacatta tcgcgagccc atttataccc 105 | 3301 atataaatca gcatccatgt tggaatttaa tcgcggcctg gagcaagacg tttcccgttg 106 | 3361 aatatggctc ataacacccc ttgtattact gtttatgtaa gcagacagtt ttattgttca 107 | 3421 tgatgatata tttttatctt gtgcaatgta acatcagaga ttttgagaca caacgtggct 108 | 3481 ttgttgaata aatcgaactt ttgctgagtt gaaggatcag ctcgagtgcc acctgacgtc 109 | 3541 taagaaacca ttattatcat gacattaacc tataaaaata ggcgtatcac gaggcagaat 110 | 3601 ttcagataaa aaaaatcctt agctttcgct aaggatgatt tctggaattc gcggccgctt 111 | 3661 ctagagacta gtggaagaca t 112 | // 113 | -------------------------------------------------------------------------------- /src/examples/genbank/NC_011521.gb: -------------------------------------------------------------------------------- 1 | LOCUS NC_011521 717 bp DNA linear CON 16-DEC-2014 2 | DEFINITION Neisseria gonorrhoeae plasmid pCmGFP, complete sequence. 3 | ACCESSION NC_011521 REGION: 4419..5135 4 | VERSION NC_011521.1 5 | DBLINK BioProject: PRJNA178868 6 | KEYWORDS RefSeq. 7 | SOURCE Neisseria gonorrhoeae 8 | ORGANISM Neisseria gonorrhoeae 9 | Bacteria; Proteobacteria; Betaproteobacteria; Neisseriales; 10 | Neisseriaceae; Neisseria. 11 | REFERENCE 1 (bases 1 to 717) 12 | AUTHORS Srikhanta,Y.N., Dowideit,S.J., Edwards,J.L., Falsetta,M.L., 13 | Wu,H.J., Harrison,O.B., Fox,K.L., Seib,K.L., Maguire,T.L., 14 | Wang,A.H., Maiden,M.C., Grimmond,S.M., Apicella,M.A. and 15 | Jennings,M.P. 16 | TITLE Phasevarions mediate random switching of gene expression in 17 | pathogenic Neisseria 18 | JOURNAL PLoS Pathog. 5 (4), E1000400 (2009) 19 | PUBMED 19390608 20 | REFERENCE 2 (bases 1 to 717) 21 | CONSRTM NCBI Genome Project 22 | TITLE Direct Submission 23 | JOURNAL Submitted (04-NOV-2008) National Center for Biotechnology 24 | Information, NIH, Bethesda, MD 20894, USA 25 | REFERENCE 3 (bases 1 to 717) 26 | AUTHORS Dowideit,S.J., Eckert,A.M. and Jennings,M.P. 27 | TITLE Direct Submission 28 | JOURNAL Submitted (30-AUG-2008) School of Molecular and Microbial Sciences, 29 | The University of Queensland, 11 Cooper Rd, St Lucia Campus, St 30 | Lucia, QLD 4072, Australia 31 | COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final 32 | NCBI review. The reference sequence was derived from FJ172221. 33 | COMPLETENESS: full length. 34 | FEATURES Location/Qualifiers 35 | source 1..717 36 | /organism="Neisseria gonorrhoeae" 37 | /mol_type="genomic DNA" 38 | /db_xref="taxon:485" 39 | /plasmid="pCmGFP" 40 | gene 1..717 41 | /gene="gfp" 42 | /locus_tag="pCmGFP_001" 43 | /db_xref="GeneID:7011691" 44 | CDS 1..717 45 | /gene="gfp" 46 | /locus_tag="pCmGFP_001" 47 | /codon_start=1 48 | /transl_table=11 49 | /product="green fluorescent protein" 50 | /protein_id="YP_002302326.1" 51 | /db_xref="GeneID:7011691" 52 | /translation="MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTL 53 | KFICTTGKLPVPWPTLVTTFGYGVQCFARYPDHMKQHDFFKSAMPEGYVQERTIFFKD 54 | DGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNG 55 | IKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHM 56 | VLLEFVTAAGITHGMDELYK" 57 | primer_bind 1..22 58 | /label=Primer 1 59 | primer_bind complement(697..717) 60 | /label=Primer 2 61 | CONTIG join(FJ172221.1:1..6062) 62 | ORIGIN 63 | 1 atgagtaaag gagaagaact tttcactgga gttgtcccaa ttcttgttga attagatggt 64 | 61 gatgttaatg ggcacaaatt ttctgtcagt ggagagggtg aaggtgatgc aacatacgga 65 | 121 aaacttaccc ttaaatttat ttgcactact ggaaaactac ctgttccatg gccaacactt 66 | 181 gtcactactt tcggttatgg tgttcaatgc tttgcgagat acccagatca tatgaaacag 67 | 241 catgactttt tcaagagtgc catgcctgaa ggttatgtac aggaaagaac tatatttttc 68 | 301 aaagatgacg ggaactacaa gacacgtgct gaagtcaagt ttgaaggtga tacccttgtt 69 | 361 aatagaatcg agttaaaagg tattgatttt aaagaagatg gaaacattct tggacacaaa 70 | 421 ttggaataca actataactc acacaatgta tacatcatgg cagacaaaca aaagaatgga 71 | 481 atcaaagtta acttcaaaat tagacacaac attgaagatg gaagcgttca actagcagac 72 | 541 cattatcaac aaaatactcc aattggcgat ggccctgtcc ttttaccaga caaccattac 73 | 601 ctgtccacac aatctgccct ttcgaaagat cccaacgaaa agagagacca catggtcctt 74 | 661 cttgagtttg taacagctgc tgggattaca catggcatgg atgaactata caaataa 75 | // -------------------------------------------------------------------------------- /src/examples/genbank/RTO4_16460_individual_exons.gb: -------------------------------------------------------------------------------- 1 | LOCUS Exported 1856 bp ds-DNA linear PLN 18-DEC-2017 2 | DEFINITION R. toruloides protein ID RTO4_16460 coding sequence with introns. 3 | ACCESSION NoAccession 4 | VERSION . 5 | KEYWORDS . 6 | SOURCE Rhodosporidium toruloides IFO 0880 (aka Rhodotorula toruloides) 7 | ORGANISM Rhodosporidium toruloides IFO 0880 (aka Rhodotorula toruloides) 8 | REFERENCE 1 (bases 1 to 1856) 9 | AUTHORS . 10 | TITLE Direct Submission 11 | JOURNAL Exported Dec 18, 2017 from SnapGene 4.1.2 12 | http://www.snapgene.com 13 | FEATURES Location/Qualifiers 14 | source 1..1856 15 | /organism="Rhodosporidium toruloides IFO 0880 (aka 16 | Rhodotorula toruloides)" 17 | /mol_type="genomic DNA" 18 | CDS 1..59 19 | /product="RTO4_16460" 20 | /label=RTO4_16460_exon_1 21 | CDS 168..272 22 | /product="RTO4_16460" 23 | /label=RTO4_16460_exon_2 24 | CDS 338..484 25 | /product="RTO4_16460" 26 | /label=RTO4_16460_exon_3 27 | CDS 571..580 28 | /product="RTO4_16460" 29 | /label=RTO4_16460_exon_4 30 | CDS 692..935 31 | /product="RTO4_16460" 32 | /label=RTO4_16460_exon_5 33 | CDS 1021..1184 34 | /product="RTO4_16460" 35 | /label=RTO4_16460_exon_6 36 | CDS 1250..1334 37 | /product="RTO4_16460" 38 | /label=RTO4_16460_exon_7 39 | CDS 1402..1431 40 | /product="RTO4_16460" 41 | /label=RTO4_16460_exon_8 42 | CDS 1509..1553 43 | /product="RTO4_16460" 44 | /label=RTO4_16460_exon_9 45 | CDS 1617..1707 46 | /product="RTO4_16460" 47 | /label=RTO4_16460_exon_10 48 | CDS 1790..1856 49 | /product="RTO4_16460" 50 | /label=RTO4_16460_exon_11 51 | ORIGIN 52 | 1 atgggccagc aggcgacgct cgaggagctg tacacacgct cagaaatctc caagatcaag 53 | 61 caagtcgagc cagctcttct cctcaccacc ccacaacata ccccgcagcc cacgacagct 54 | 121 ctcccacagc acccacagcc tgctgaccag ctcgagtgca tccacagatt tgcgcccttt 55 | 181 ggcgtcccgc ggtcgcgccg gctgcagacg ttctccgtct ttgcctggac gacggcactg 56 | 241 cccatcctac tcggcgtctt cttcctgctc tggtgcgtca ggcttggcgt ggattgggag 57 | 301 tagcgggcga ctcagctgac ttgcgcatcc gctccagctc gttcccaccg ctctggccgg 58 | 361 ccgtcatcgc ctacctcacc tgggtctttt tcattgacca ggcgccgact cacggtggac 59 | 421 gggcgcagtc ttggctgcgg aagagtcgga tatgggtctg gtttgcagga tattatcccg 60 | 481 tcaggtgcgt cgtcccgtct gttgcgcgtc ttgcgacctc gctcacggcc aactcgcccg 61 | 541 accggctacc tccgaacttc ccgccaacag cttgatcaag gttcgtccac ctttccttca 62 | 601 gcttgagtga tctgtagagg agctgcagga tcaagcccaa cccggggagg acctcggagg 63 | 661 acgacgccgc tgacttgctc tcctcctaca gagcgccgac ttgccgcctg accggaagta 64 | 721 cgtcttcggc tatcatccgc acggcgtcat aggcatgggc gccatcgcca acttcgcgac 65 | 781 cgacgcaacc ggcttctcga cactcttccc cggcttgaac cctcacctcc tcaccctcca 66 | 841 aagcaacttc aagctcccgc tctatcgcga gttgctgctc gccctcggca tctgctccgt 67 | 901 ctcgatgaag agctgccaga acatcctgcg gcaaggtgcg ccagtcattc cgaacgggcg 68 | 961 gtcgagcgtg aactctgggg atgggaagag ctgaccttct gcctcactcc atccatgcag 69 | 1021 gtcctggctc ggctctcacc atcgttgtcg gtggcgcagc cgagagcttg agtgcgcatc 70 | 1081 ccggaaccgc cgacctcacg ctcaagcgac gaaaaggatt catcaagctc gcgatccggc 71 | 1141 aaggcgccga cctcgtgccc gtcttttcgt tcggcgagaa cgacgtgcgt cctctgctcg 72 | 1201 acttccgcta gcgaagccct tcgctgacgc tcccggtttc ttcccccaga tcttcggcca 73 | 1261 gctgcgaaat gagcgaggga cgcggctgta caagttgcag aagcgtttcc agggcgtatt 74 | 1321 cggcttcact ctccgtacgt tgcgccgtgt cgcttcaatc tgtcgagcgt ccagtcgctc 75 | 1381 acgcagctac aactcccaca gctctcttct acggtcgggg actcttcaac tgtacgcccg 76 | 1441 agtctacgtg actagtctac cgtgggaggc actgaagagc acggctgacg tcccacctct 77 | 1501 ccgcgcagat aacgttggct tgatgccgta ccgccatccg atcgtctcgg tcggtgcgtc 78 | 1561 ccccctcgtc cctcctgacc tgcgggcttc agctaacaat tctcgacgac atctagtcgg 79 | 1621 tcgaccaatc tcggtgcagc agaaggacca cccaacgaca gcggatctcg aagaagtcca 80 | 1681 ggcgcggtat atcgcagaac tcaagcggtg cgttccagac gtctaccttt gcccgttgtc 81 | 1741 tcagactcgg taagacagat cactgacgct tcggtcactg gccgcgcagc atctgggaag 82 | 1801 actacaagga cgcctacgcc aaaagtcgca cgcgggagct caatattatc gcctga 83 | // 84 | -------------------------------------------------------------------------------- /src/examples/genbank/RTO4_16460_joined_feature.gb: -------------------------------------------------------------------------------- 1 | LOCUS Exported 1856 bp ds-DNA linear PLN 18-DEC-2017 2 | DEFINITION natural linear DNA 3 | ACCESSION No 4 | VERSION . 5 | KEYWORDS . 6 | SOURCE natural DNA sequence 7 | ORGANISM unspecified 8 | REFERENCE 1 (bases 1 to 1856) 9 | AUTHORS . 10 | TITLE Direct Submission 11 | JOURNAL Exported Dec 18, 2017 from SnapGene 4.1.2 12 | http://www.snapgene.com 13 | FEATURES Location/Qualifiers 14 | source 1..1856 15 | /organism="unspecified" 16 | /mol_type="genomic DNA" 17 | CDS join(1..59,168..272,338..484,571..580,692..935,1021..1184, 18 | 1250..1334,1402..1431,1509..1553,1617..1707,1790..1856) 19 | /product="RTO4_16460 CDS" 20 | /label=RTO4_16460 CDS 21 | ORIGIN 22 | 1 atgggccagc aggcgacgct cgaggagctg tacacacgct cagaaatctc caagatcaag 23 | 61 caagtcgagc cagctcttct cctcaccacc ccacaacata ccccgcagcc cacgacagct 24 | 121 ctcccacagc acccacagcc tgctgaccag ctcgagtgca tccacagatt tgcgcccttt 25 | 181 ggcgtcccgc ggtcgcgccg gctgcagacg ttctccgtct ttgcctggac gacggcactg 26 | 241 cccatcctac tcggcgtctt cttcctgctc tggtgcgtca ggcttggcgt ggattgggag 27 | 301 tagcgggcga ctcagctgac ttgcgcatcc gctccagctc gttcccaccg ctctggccgg 28 | 361 ccgtcatcgc ctacctcacc tgggtctttt tcattgacca ggcgccgact cacggtggac 29 | 421 gggcgcagtc ttggctgcgg aagagtcgga tatgggtctg gtttgcagga tattatcccg 30 | 481 tcaggtgcgt cgtcccgtct gttgcgcgtc ttgcgacctc gctcacggcc aactcgcccg 31 | 541 accggctacc tccgaacttc ccgccaacag cttgatcaag gttcgtccac ctttccttca 32 | 601 gcttgagtga tctgtagagg agctgcagga tcaagcccaa cccggggagg acctcggagg 33 | 661 acgacgccgc tgacttgctc tcctcctaca gagcgccgac ttgccgcctg accggaagta 34 | 721 cgtcttcggc tatcatccgc acggcgtcat aggcatgggc gccatcgcca acttcgcgac 35 | 781 cgacgcaacc ggcttctcga cactcttccc cggcttgaac cctcacctcc tcaccctcca 36 | 841 aagcaacttc aagctcccgc tctatcgcga gttgctgctc gccctcggca tctgctccgt 37 | 901 ctcgatgaag agctgccaga acatcctgcg gcaaggtgcg ccagtcattc cgaacgggcg 38 | 961 gtcgagcgtg aactctgggg atgggaagag ctgaccttct gcctcactcc atccatgcag 39 | 1021 gtcctggctc ggctctcacc atcgttgtcg gtggcgcagc cgagagcttg agtgcgcatc 40 | 1081 ccggaaccgc cgacctcacg ctcaagcgac gaaaaggatt catcaagctc gcgatccggc 41 | 1141 aaggcgccga cctcgtgccc gtcttttcgt tcggcgagaa cgacgtgcgt cctctgctcg 42 | 1201 acttccgcta gcgaagccct tcgctgacgc tcccggtttc ttcccccaga tcttcggcca 43 | 1261 gctgcgaaat gagcgaggga cgcggctgta caagttgcag aagcgtttcc agggcgtatt 44 | 1321 cggcttcact ctccgtacgt tgcgccgtgt cgcttcaatc tgtcgagcgt ccagtcgctc 45 | 1381 acgcagctac aactcccaca gctctcttct acggtcgggg actcttcaac tgtacgcccg 46 | 1441 agtctacgtg actagtctac cgtgggaggc actgaagagc acggctgacg tcccacctct 47 | 1501 ccgcgcagat aacgttggct tgatgccgta ccgccatccg atcgtctcgg tcggtgcgtc 48 | 1561 ccccctcgtc cctcctgacc tgcgggcttc agctaacaat tctcgacgac atctagtcgg 49 | 1621 tcgaccaatc tcggtgcagc agaaggacca cccaacgaca gcggatctcg aagaagtcca 50 | 1681 ggcgcggtat atcgcagaac tcaagcggtg cgttccagac gtctaccttt gcccgttgtc 51 | 1741 tcagactcgg taagacagat cactgacgct tcggtcactg gccgcgcagc atctgggaag 52 | 1801 actacaagga cgcctacgcc aaaagtcgca cgcgggagct caatattatc gcctga 53 | // 54 | -------------------------------------------------------------------------------- /src/examples/genbank/multi-seq-genbank.gb: -------------------------------------------------------------------------------- 1 | LOCUS sequence1 109 bp 2 | ORIGIN 3 | 1 ACTCCCCGTG CGCGCCCGGC CCGTAGCGTC CTCGTCGCCG CCCCTCGTCT CGCAGCCGCA 4 | 61 GCCCGCGTGG ACGCTCTCGC CTGAGCGCCG CGGACTAGCC CGGGTGGCC 5 | // 6 | LOCUS sequence2 171 bp 7 | ORIGIN 8 | 1 CAGTCCGGCA GCGCCGGGGT TAAGCGGCCC AAGTAAACGT AGCGCAGCGA TCGGCGCCGG 9 | 61 AGATTCGCGA ACCCGACACT CCGCGCCGCC CGCCGGCCAG GACCCGCGGC GCGATCGCGG 10 | 121 CGCCGCGCTA CAGCCAGCCT CACTGGCGCG CGGGCGAGCG CACGGGCGCT C 11 | // 12 | LOCUS sequence3 81 bp 13 | ORIGIN 14 | 1 CACGACAGGC CCGCTGAGGC TTGTGCCAGA CCTTGGAAAC CTCAGGTATA TACCTTTCCA 15 | 61 GACGCGGGAT CTCCCCTCCC C 16 | // 17 | LOCUS sequence4 150 bp 18 | ORIGIN 19 | 1 CAGCAGACAT CTGAATGAAG AAGAGGGTGC CAGCGGGTAT GAGGAGTGCA TTATCGTTAA 20 | 61 TGGGAACTTC AGTGACCAGT CCTCAGACAC GAAGGATGCT CCCTCACCCC CAGTCTTGGA 21 | 121 GGCAATCTGC ACAGAGCCAG TCTGCACACC 22 | // -------------------------------------------------------------------------------- /src/examples/genbank/pBbE0c-RFP.gb: -------------------------------------------------------------------------------- 1 | LOCUS pBbE0c-RFP 2848 bp DNA circular 20-AUG-2015 2 | ACCESSION pBbE0c-RFP 3 | VERSION pBbE0c-RFP.1 4 | KEYWORDS . 5 | FEATURES Location/Qualifiers 6 | rep_origin complement(1235..1917) 7 | /label="colE1 origin" 8 | /note="REP_ORIGIN REP_ORIGIN [ColE1]" 9 | /note="REP_ORIGIN [ColE1]" 10 | /gene="ColE1 Origin" 11 | /vntifkey="33" 12 | terminator 1923..2028 13 | /label="T0" 14 | /gene="Terminator" 15 | /note="TERMINATOR [p15A KanR]" 16 | /note="TERMINATOR TERMINATOR [p15A KanR]" 17 | /vntifkey="43" 18 | gene complement(2044..2703) 19 | /label="CmR" 20 | /note="GENE [ZFP-GG destination LacUV5 p15A CmR]" 21 | /note="[ZFP-GG destination LacUV5 p15A CmR]" 22 | /vntifkey="22" 23 | /gene="CmR" 24 | gene 2845..948 25 | /label="RFP cassette" 26 | /note="GENE [pBbE0k-RFP]" 27 | /note="GENE GENE [pBbE0k-RFP]" 28 | /gene="RFP cassette" 29 | ORIGIN 30 | 1 cagctagctc agtcctaggt actgtgctag ctactagtga aagaggagaa atactagatg 31 | 61 gcttcctccg aagacgttat caaagagttc atgcgtttca aagttcgtat ggaaggttcc 32 | 121 gttaacggtc acgagttcga aatcgaaggt gaaggtgaag gtcgtccgta cgaaggtacc 33 | 181 cagaccgcta aactgaaagt taccaaaggt ggtccgctgg gggggggggg gggggggggg 34 | 241 ggggggccgt tcgcttggga catcckkkkk ktgtccccgc agttccagta cggttccaaa 35 | 301 gcttacgtta aacacccggc tgacatcccg gactacctga aactgtcctt cccggaaggt 36 | 361 ttcaaatggg aacgtgttat gaacttcgaa gacggtggtg ttgttaccgt tacccaggac 37 | 421 tcctccctgc aagacggtga gttcatctac aaagttaaac tgcgtggtac caacttcccg 38 | 481 tccgacggtc cggttatgca gaaaaaaacc atgggttggg aagcttccac cgaacgtatg 39 | 541 tacccggaag acggtgctct gaaaggtgaa atcaaaatgc gtctgaaact gaaagacggt 40 | 601 ggtcactacg acgctgaagt taaaaccacc tacatggcta aaaaaccggt tcagctgccg 41 | 661 ggtgcttaca aaaccgacat caaactggac atcacctccc acaacgaaga ctacaccatc 42 | 721 gttgaacagt acgaacgtgc tgaaggtcgt cactccaccg gtgcttaata acgctgatag 43 | 781 tgctagtgta gatcgctact agagccaggc atcaaataaa acgaaaggct cagtcgaaag 44 | 841 actgggcctt tcgttttatc tgttgtttgt cggtgaacgc tctctactag agtcacactg 45 | 901 gctcaccttc gggtgggcct ttctgcgttt atatactaga agcggccggg atcctaactc 46 | 961 gagtaaggat ctccaggcat caaataaaac gaaaggctca gtcgaaagac tgggcctttc 47 | 1021 gttttatctg ttgtttgtcg gtgaacgctc tctactagag tcacactggc tcaccttcgg 48 | 1081 gtgggccttt ctgcgtttat acctagggcg ttcggctgcg gcgagcggta tcagctcact 49 | 1141 caaaggcggt aatacggtta tccacagaat caggggataa cgcaggaaag aacatgtgag 50 | 1201 caaaaggcca gcaaaaggcc aggaaccgta aaaaggccgc gttgctggcg tttttccata 51 | 1261 ggctccgccc ccctgacgag catcacaaaa atcgacgctc aagtcagagg tggcgaaacc 52 | 1321 cgacaggact ataaagatac caggcgtttc cccctggaag ctccctcgtg cgctctcctg 53 | 1381 ttccgaccct gccgcttacc ggatacctgt ccgcctttct cccttcggga agcgtggcgc 54 | 1441 tttctcatag ctcacgctgt aggtatctca gttcggtgta ggtcgttcgc tccaagctgg 55 | 1501 gctgtgtgca cgaacccccc gttcagcccg accgctgcgc cttatccggt aactatcgtc 56 | 1561 ttgagtccaa cccggtaaga cacgacttat cgccactggc agcagccact ggtaacagga 57 | 1621 ttagcagagc gaggtatgta ggcggtgcta cagagttctt gaagtggtgg cctaactacg 58 | 1681 gctacactag aaggacagta tttggtatct gcgctctgct gaagccagtt accttcggaa 59 | 1741 aaagagttgg tagctcttga tccggcaaac aaaccaccgc tggtagcggt ggtttttttg 60 | 1801 tttgcaagca gcagattacg cgcagaaaaa aaggatctca agaagatcct ttgatctttt 61 | 1861 ctacggggtc tgacgctcag tggaacgaaa actcacgtta agggattttg gtcatgacta 62 | 1921 gtgcttggat tctcaccaat aaaaaacgcc cggcggcaac cgagcgttct gaacaaatcc 63 | 1981 agatggagtt ctgaggtcat tactggatct atcaacagga gtccaagcga gctcgatatc 64 | 2041 aaattacgcc ccgccctgcc actcatcgca gtactgttgt aattcattaa gcattctgcc 65 | 2101 gacatggaag ccatcacaaa cggcatgatg aacctgaatc gccagcggca tcagcacctt 66 | 2161 gtcgccttgc gtataatatt tgcccatggt gaaaacgggg gcgaagaagt tgtccatatt 67 | 2221 ggccacgttt aaatcaaaac tggtgaaact cacccaggga ttggctgaga cgaaaaacat 68 | 2281 attctcaata aaccctttag ggaaataggc caggttttca ccgtaacacg ccacatcttg 69 | 2341 cgaatatatg tgtagaaact gccggaaatc gtcgtggtat tcactccaga gcgatgaaaa 70 | 2401 cgtttcagtt tgctcatgga aaacggtgta acaagggtga acactatccc atatcaccag 71 | 2461 ctcaccgtct ttcattgcca tacgaaattc cggatgagca ttcatcaggc gggcaagaat 72 | 2521 gtgaataaag gccggataaa acttgtgctt atttttcttt acggtcttta aaaaggccgt 73 | 2581 aatatccagc tgaacggtct ggttataggt acattgagca actgactgaa atgcctcaaa 74 | 2641 atgttcttta cgatgccatt gggatatatc aacggtggta tatccagtga tttttttctc 75 | 2701 cattttagct tccttagctc ctgaaaatct cgataactca aaaaatacgc ccggtagtga 76 | 2761 tcttatttca ttatggtgaa agttggaacc tcttacgtgc cgatcaacgt ctcattttcg 77 | 2821 ccagatatcg aattcatgag atctttga 78 | // -------------------------------------------------------------------------------- /src/examples/genbank/pBbE0c-RFP_1.gb: -------------------------------------------------------------------------------- 1 | LOCUS pBbE0c-RFP 2815 bp DNA circular 20-AUG-2015 2 | ACCESSION pBbE0c-RFP 3 | VERSION pBbE0c-RFP.1 4 | KEYWORDS . 5 | FEATURES Location/Qualifiers 6 | rep_origin complement(1202..1884) 7 | /label="colE1 origin" 8 | /vntifkey="33" 9 | /gene="ColE1 Origin" 10 | /note="REP_ORIGIN REP_ORIGIN [ColE1]" 11 | /note="REP_ORIGIN [ColE1]" 12 | terminator 1890..1995 13 | /label="T0" 14 | /vntifkey="43" 15 | /note="TERMINATOR [p15A KanR]" 16 | /note="TERMINATOR TERMINATOR [p15A KanR]" 17 | /gene="Terminator" 18 | gene complement(2011..2670) 19 | /label="CmR" 20 | /gene="CmR" 21 | /vntifkey="22" 22 | /note="GENE [ZFP-GG destination LacUV5 p15A CmR]" 23 | /note="[ZFP-GG destination LacUV5 p15A CmR]" 24 | gene 2812..915 25 | /label="RFP cassette" 26 | /gene="RFP cassette" 27 | /note="GENE [pBbE0k-RFP]" 28 | /note="GENE GENE [pBbE0k-RFP]" 29 | ORIGIN 30 | 1 cagctagctc agtcctaggt actgtgctag ctactagtga aagaggagaa atactagatg 31 | 61 gcttcctccg aagacgttat caaagagttc atgcgtttca aagttcgtat ggaaggttcc 32 | 121 gttaacggtc acgagttcga aatcgaaggt gaaggtgaag gtcgtccgta cgaaggtacc 33 | 181 cagaccgcta aactgaaagt taccaaaggt ggtccgctgc cgttcgcttg ggacatcctg 34 | 241 tccccgcagt tccagtacgg ttccaaagct tacgttaaac acccggctga catcccggac 35 | 301 tacctgaaac tgtccttccc ggaaggtttc aaatgggaac gtgttatgaa cttcgaagac 36 | 361 ggtggtgttg ttaccgttac ccaggactcc tccctgcaag acggtgagtt catctacaaa 37 | 421 gttaaactgc gtggtaccaa cttcccgtcc gacggtccgg ttatgcagaa aaaaaccatg 38 | 481 ggttgggaag cttccaccga acgtatgtac ccggaagacg gtgctctgaa aggtgaaatc 39 | 541 aaaatgcgtc tgaaactgaa agacggtggt cactacgacg ctgaagttaa aaccacctac 40 | 601 atggctaaaa aaccggttca gctgccgggt gcttacaaaa ccgacatcaa actggacatc 41 | // -------------------------------------------------------------------------------- /src/examples/genbank/pBbS0c-RFP.gb: -------------------------------------------------------------------------------- 1 | LOCUS pBbS0c-RFP 4224 bp DNA circular 2 | ACCESSION pBbS0c-RFP 3 | VERSION pBbS0c-RFP.1 4 | KEYWORDS . 5 | FEATURES Location/Qualifiers 6 | ORIGIN 7 | 1 ttgacagcta gctcagtcct aggtactgtg ctagctacta gtgaaagagg agaaatacta 8 | 61 gatggcttcc tccgaagacg ttatcaaaga gttcatgcgt ttcaaagttc gtatggaagg 9 | 121 ttccgttaac ggtcacgagt tcgaaatcga aggtgaaggt gaaggtcgtc cgtacgaagg 10 | 181 tacccagacc gctaaactga aagttaccaa aggtggtccg ctgccgttcg cttgggacat 11 | 241 cctgtccccg cagttccagt acggttccaa agcttacgtt aaacacccgg ctgacatccc 12 | 301 ggactacctg aaactgtcct tcccggaagg tttcaaatgg gaacgtgtta tgaacttcga 13 | 361 agacggtggt gttgttaccg ttacccagga ctcctccctg caagacggtg agttcatcta 14 | 421 caaagttaaa ctgcgtggta ccaacttccc gtccgacggt ccggttatgc agaaaaaaac 15 | 481 catgggttgg gaagcttcca ccgaacgtat gtacccggaa gacggtgctc tgaaaggtga 16 | 541 aatcaaaatg cgtctgaaac tgaaagacgg tggtcactac gacgctgaag ttaaaaccac 17 | 601 ctacatggct aaaaaaccgg ttcagctgcc gggtgcttac aaaaccgaca tcaaactgga 18 | 661 catcacctcc cacaacgaag actacaccat cgttgaacag tacgaacgtg ctgaaggtcg 19 | 721 tcactccacc ggtgcttaat aacgctgata gtgctagtgt agatcgctac tagagccagg 20 | 781 catcaaataa aacgaaaggc tcagtcgaaa gactgggcct ttcgttttat ctgttgtttg 21 | 841 tcggtgaacg ctctctacta gagtcacact ggctcacctt cgggtgggcc tttctgcgtt 22 | 901 tatatactag aagcggccgg gatcctaact cgagtaagga tctccaggca tcaaataaaa 23 | 961 cgaaaggctc agtcgaaaga ctgggccttt cgttttatct gttgtttgtc ggtgaacgct 24 | 1021 ctctactaga gtcacactgg ctcaccttcg ggtgggcctt tctgcgttta tacctagggt 25 | 1081 acgggttttg ctgcccgcaa acgggctgtt ctggtgttgc tagtttgtta tcagaatcgc 26 | 1141 agatccggct tcagccggtt tgccggctga aagcgctatt tcttccagaa ttgccatgat 27 | 1201 tttttcccca cgggaggcgt cactggctcc cgtgttgtcg gcagctttga ttcgataagc 28 | 1261 agcatcgcct gtttcaggct gtctatgtgt gactgttgag ctgtaacaag ttgtctcagg 29 | 1321 tgttcaattt catgttctag ttgctttgtt ttactggttt cacctgttct attaggtgtt 30 | 1381 acatgctgtt catctgttac attgtcgatc tgttcatggt gaacagcttt gaatgcacca 31 | 1441 aaaactcgta aaagctctga tgtatctatc ttttttacac cgttttcatc tgtgcatatg 32 | 1501 gacagttttc cctttgatat gtaacggtga acagttgttc tacttttgtt tgttagtctt 33 | 1561 gatgcttcac tgatagatac aagagccata agaacctcag atccttccgt atttagccag 34 | 1621 tatgttctct agtgtggttc gttgtttttg cgtgagccat gagaacgaac cattgagatc 35 | 1681 atacttactt tgcatgtcac tcaaaaattt tgcctcaaaa ctggtgagct gaatttttgc 36 | 1741 agttaaagca tcgtgtagtg tttttcttag tccgttatgt aggtaggaat ctgatgtaat 37 | 1801 ggttgttggt attttgtcac cattcatttt tatctggttg ttctcaagtt cggttacgag 38 | 1861 atccatttgt ctatctagtt caacttggaa aatcaacgta tcagtcgggc ggcctcgctt 39 | 1921 atcaaccacc aatttcatat tgctgtaagt gtttaaatct ttacttattg gtttcaaaac 40 | 1981 ccattggtta agccttttaa actcatggta gttattttca agcattaaca tgaacttaaa 41 | 2041 ttcatcaagg ctaatctcta tatttgcctt gtgagttttc ttttgtgtta gttcttttaa 42 | 2101 taaccactca taaatcctca tagagtattt gttttcaaaa gacttaacat gttccagatt 43 | 2161 atattttatg aattttttta actggaaaag ataaggcaat atctcttcac taaaaactaa 44 | 2221 ttctaatttt tcgcttgaga acttggcata gtttgtccac tggaaaatct caaagccttt 45 | 2281 aaccaaagga ttcctgattt ccacagttct cgtcatcagc tctctggttg ctttagctaa 46 | 2341 tacaccataa gcattttccc tactgatgtt catcatctga gcgtattggt tataagtgaa 47 | 2401 cgataccgtc cgttctttcc ttgtagggtt ttcaatcgtg gggttgagta gtgccacaca 48 | 2461 gcataaaatt agcttggttt catgctccgt taagtcatag cgactaatcg ctagttcatt 49 | 2521 tgctttgaaa acaactaatt cagacataca tctcaattgg tctaggtgat tttaatcact 50 | 2581 ataccaattg agatgggcta gtcaatgata attactagtc cttttcccgg gtgatctggg 51 | 2641 tatctgtaaa ttctgctaga cctttgctgg aaaacttgta aattctgcta gaccctctgt 52 | 2701 aaattccgct agacctttgt gtgttttttt tgtttatatt caagtggtta taatttatag 53 | 2761 aataaagaaa gaataaaaaa agataaaaag aatagatccc agccctgtgt ataactcact 54 | 2821 actttagtca gttccgcagt attacaaaag gatgtcgcaa acgctgtttg ctcctctaca 55 | 2881 aaacagacct taaaacccta aaggcttaag tagcaccctc gcaagctcgg gcaaatcgct 56 | 2941 gaatattcct tttgtctccg accatcaggc acctgagtcg ctgtcttttt cgtgacattc 57 | 3001 agttcgctgc gctcacggct ctggcagtga atgggggtaa atggcactac aggcgccttt 58 | 3061 tatggattca tgcaaggaaa ctacccataa tacaagaaaa gcccgtcacg ggcttctcag 59 | 3121 ggcgttttat ggcgggtctg ctatgtggtg ctatctgact ttttgctgtt cagcagttcc 60 | 3181 tgccctctga ttttccagtc tgaccacttc ggattatccc gtgacaggtc attcagactg 61 | 3241 gctaatgcac ccagtaaggc agcggtatca tcaacaggct tacccgtctt actgtcccta 62 | 3301 gtgcttggat tctcaccaat aaaaaacgcc cggcggcaac cgagcgttct gaacaaatcc 63 | 3361 agatggagtt ctgaggtcat tactggatct atcaacagga gtccaagcga gctcgatatc 64 | 3421 aaattacgcc ccgccctgcc actcatcgca gtactgttgt aattcattaa gcattctgcc 65 | 3481 gacatggaag ccatcacaaa cggcatgatg aacctgaatc gccagcggca tcagcacctt 66 | 3541 gtcgccttgc gtataatatt tgcccatggt gaaaacgggg gcgaagaagt tgtccatatt 67 | 3601 ggccacgttt aaatcaaaac tggtgaaact cacccaggga ttggctgaga cgaaaaacat 68 | 3661 attctcaata aaccctttag ggaaataggc caggttttca ccgtaacacg ccacatcttg 69 | 3721 cgaatatatg tgtagaaact gccggaaatc gtcgtggtat tcactccaga gcgatgaaaa 70 | 3781 cgtttcagtt tgctcatgga aaacggtgta acaagggtga acactatccc atatcaccag 71 | 3841 ctcaccgtct ttcattgcca tacgaaattc cggatgagca ttcatcaggc gggcaagaat 72 | 3901 gtgaataaag gccggataaa acttgtgctt atttttcttt acggtcttta aaaaggccgt 73 | 3961 aatatccagc tgaacggtct ggttataggt acattgagca actgactgaa atgcctcaaa 74 | 4021 atgttcttta cgatgccatt gggatatatc aacggtggta tatccagtga tttttttctc 75 | 4081 cattttagct tccttagctc ctgaaaatct cgataactca aaaaatacgc ccggtagtga 76 | 4141 tcttatttca ttatggtgaa agttggaacc tcttacgtgc cgatcaacgt ctcattttcg 77 | 4201 ccagatatcg aattcatgag atct 78 | // 79 | -------------------------------------------------------------------------------- /src/examples/genbank/pBbS0c-RFP_no_name.gb: -------------------------------------------------------------------------------- 1 | LOCUS 4224 bp DNA circular 2 | ACCESSION pBbS0c-RFP 3 | VERSION pBbS0c-RFP.1 4 | KEYWORDS . 5 | FEATURES Location/Qualifiers 6 | rep_origin complement(1074..3302) 7 | /label=pSC101** 8 | /note="REP_ORIGIN REP_ORIGIN pSC101* aka pMPP6, gives plasm 9 | id number 3 -4 copies per cell, BglII site in pSC101* ori h 10 | as been dele ted by quick change agatcT changed to agatcA g 11 | iving pSC101* * pSC101* aka pMPP6, gives plasmid number 3-4 12 | copies p er cell, BglII site in pSC101* ori has been delet 13 | ed by quic k change agatcT changed to agatcA giving pSC101* 14 | * [pBbS0a-RFP]" 15 | /gene="SC101** Ori" 16 | /note="pSC101* aka pMPP6, gives plasmid number 3-4 copies p 17 | er cell, BglII site in pSC101* ori has been deleted by qui 18 | c k change agatcT changed to agatcA giving pSC101**" 19 | /vntifkey="33" 20 | gene 1..919 21 | /label=RFP cassette 22 | /note="GENE [pBbE0k-RFP]" 23 | /name="RFP cassette" 24 | /gene="RFP cassette" 25 | gene complement(3424..4083) 26 | /label=CmR 27 | /vntifkey="22" 28 | /gene="CmR" 29 | /note="[ZFP-GG destination LacUV5 p15A CmR]" 30 | terminator 944..1072 31 | /label=dbl term 32 | /gene="double term" 33 | /note="[p15A KanR]" 34 | /vntifkey="21" 35 | terminator 3303..3408 36 | /label=T0 37 | /note="TERMINATOR [p15A KanR]" 38 | /gene="Terminator" 39 | /vntifkey="43" 40 | ORIGIN 41 | 1 ttgacagcta gctcagtcct aggtactgtg ctagctacta gtgaaagagg agaaatacta 42 | 61 gatggcttcc tccgaagacg ttatcaaaga gttcatgcgt ttcaaagttc gtatggaagg 43 | 121 ttccgttaac ggtcacgagt tcgaaatcga aggtgaaggt gaaggtcgtc cgtacgaagg 44 | 181 tacccagacc gctaaactga aagttaccaa aggtggtccg ctgccgttcg cttgggacat 45 | 241 cctgtccccg cagttccagt acggttccaa agcttacgtt aaacacccgg ctgacatccc 46 | 301 ggactacctg aaactgtcct tcccggaagg tttcaaatgg gaacgtgtta tgaacttcga 47 | 361 agacggtggt gttgttaccg ttacccagga ctcctccctg caagacggtg agttcatcta 48 | 421 caaagttaaa ctgcgtggta ccaacttccc gtccgacggt ccggttatgc agaaaaaaac 49 | 481 catgggttgg gaagcttcca ccgaacgtat gtacccggaa gacggtgctc tgaaaggtga 50 | 541 aatcaaaatg cgtctgaaac tgaaagacgg tggtcactac gacgctgaag ttaaaaccac 51 | 601 ctacatggct aaaaaaccgg ttcagctgcc gggtgcttac aaaaccgaca tcaaactgga 52 | 661 catcacctcc cacaacgaag actacaccat cgttgaacag tacgaacgtg ctgaaggtcg 53 | 721 tcactccacc ggtgcttaat aacgctgata gtgctagtgt agatcgctac tagagccagg 54 | 781 catcaaataa aacgaaaggc tcagtcgaaa gactgggcct ttcgttttat ctgttgtttg 55 | 841 tcggtgaacg ctctctacta gagtcacact ggctcacctt cgggtgggcc tttctgcgtt 56 | 901 tatatactag aagcggccgg gatcctaact cgagtaagga tctccaggca tcaaataaaa 57 | 961 cgaaaggctc agtcgaaaga ctgggccttt cgttttatct gttgtttgtc ggtgaacgct 58 | 1021 ctctactaga gtcacactgg ctcaccttcg ggtgggcctt tctgcgttta tacctagggt 59 | 1081 acgggttttg ctgcccgcaa acgggctgtt ctggtgttgc tagtttgtta tcagaatcgc 60 | 1141 agatccggct tcagccggtt tgccggctga aagcgctatt tcttccagaa ttgccatgat 61 | 1201 tttttcccca cgggaggcgt cactggctcc cgtgttgtcg gcagctttga ttcgataagc 62 | 1261 agcatcgcct gtttcaggct gtctatgtgt gactgttgag ctgtaacaag ttgtctcagg 63 | 1321 tgttcaattt catgttctag ttgctttgtt ttactggttt cacctgttct attaggtgtt 64 | 1381 acatgctgtt catctgttac attgtcgatc tgttcatggt gaacagcttt gaatgcacca 65 | 1441 aaaactcgta aaagctctga tgtatctatc ttttttacac cgttttcatc tgtgcatatg 66 | 1501 gacagttttc cctttgatat gtaacggtga acagttgttc tacttttgtt tgttagtctt 67 | 1561 gatgcttcac tgatagatac aagagccata agaacctcag atccttccgt atttagccag 68 | 1621 tatgttctct agtgtggttc gttgtttttg cgtgagccat gagaacgaac cattgagatc 69 | 1681 atacttactt tgcatgtcac tcaaaaattt tgcctcaaaa ctggtgagct gaatttttgc 70 | 1741 agttaaagca tcgtgtagtg tttttcttag tccgttatgt aggtaggaat ctgatgtaat 71 | 1801 ggttgttggt attttgtcac cattcatttt tatctggttg ttctcaagtt cggttacgag 72 | 1861 atccatttgt ctatctagtt caacttggaa aatcaacgta tcagtcgggc ggcctcgctt 73 | 1921 atcaaccacc aatttcatat tgctgtaagt gtttaaatct ttacttattg gtttcaaaac 74 | 1981 ccattggtta agccttttaa actcatggta gttattttca agcattaaca tgaacttaaa 75 | 2041 ttcatcaagg ctaatctcta tatttgcctt gtgagttttc ttttgtgtta gttcttttaa 76 | 2101 taaccactca taaatcctca tagagtattt gttttcaaaa gacttaacat gttccagatt 77 | 2161 atattttatg aattttttta actggaaaag ataaggcaat atctcttcac taaaaactaa 78 | 2221 ttctaatttt tcgcttgaga acttggcata gtttgtccac tggaaaatct caaagccttt 79 | 2281 aaccaaagga ttcctgattt ccacagttct cgtcatcagc tctctggttg ctttagctaa 80 | 2341 tacaccataa gcattttccc tactgatgtt catcatctga gcgtattggt tataagtgaa 81 | 2401 cgataccgtc cgttctttcc ttgtagggtt ttcaatcgtg gggttgagta gtgccacaca 82 | 2461 gcataaaatt agcttggttt catgctccgt taagtcatag cgactaatcg ctagttcatt 83 | 2521 tgctttgaaa acaactaatt cagacataca tctcaattgg tctaggtgat tttaatcact 84 | 2581 ataccaattg agatgggcta gtcaatgata attactagtc cttttcccgg gtgatctggg 85 | 2641 tatctgtaaa ttctgctaga cctttgctgg aaaacttgta aattctgcta gaccctctgt 86 | 2701 aaattccgct agacctttgt gtgttttttt tgtttatatt caagtggtta taatttatag 87 | 2761 aataaagaaa gaataaaaaa agataaaaag aatagatccc agccctgtgt ataactcact 88 | 2821 actttagtca gttccgcagt attacaaaag gatgtcgcaa acgctgtttg ctcctctaca 89 | 2881 aaacagacct taaaacccta aaggcttaag tagcaccctc gcaagctcgg gcaaatcgct 90 | 2941 gaatattcct tttgtctccg accatcaggc acctgagtcg ctgtcttttt cgtgacattc 91 | 3001 agttcgctgc gctcacggct ctggcagtga atgggggtaa atggcactac aggcgccttt 92 | 3061 tatggattca tgcaaggaaa ctacccataa tacaagaaaa gcccgtcacg ggcttctcag 93 | 3121 ggcgttttat ggcgggtctg ctatgtggtg ctatctgact ttttgctgtt cagcagttcc 94 | 3181 tgccctctga ttttccagtc tgaccacttc ggattatccc gtgacaggtc attcagactg 95 | 3241 gctaatgcac ccagtaaggc agcggtatca tcaacaggct tacccgtctt actgtcccta 96 | 3301 gtgcttggat tctcaccaat aaaaaacgcc cggcggcaac cgagcgttct gaacaaatcc 97 | 3361 agatggagtt ctgaggtcat tactggatct atcaacagga gtccaagcga gctcgatatc 98 | 3421 aaattacgcc ccgccctgcc actcatcgca gtactgttgt aattcattaa gcattctgcc 99 | 3481 gacatggaag ccatcacaaa cggcatgatg aacctgaatc gccagcggca tcagcacctt 100 | 3541 gtcgccttgc gtataatatt tgcccatggt gaaaacgggg gcgaagaagt tgtccatatt 101 | 3601 ggccacgttt aaatcaaaac tggtgaaact cacccaggga ttggctgaga cgaaaaacat 102 | 3661 attctcaata aaccctttag ggaaataggc caggttttca ccgtaacacg ccacatcttg 103 | 3721 cgaatatatg tgtagaaact gccggaaatc gtcgtggtat tcactccaga gcgatgaaaa 104 | 3781 cgtttcagtt tgctcatgga aaacggtgta acaagggtga acactatccc atatcaccag 105 | 3841 ctcaccgtct ttcattgcca tacgaaattc cggatgagca ttcatcaggc gggcaagaat 106 | 3901 gtgaataaag gccggataaa acttgtgctt atttttcttt acggtcttta aaaaggccgt 107 | 3961 aatatccagc tgaacggtct ggttataggt acattgagca actgactgaa atgcctcaaa 108 | 4021 atgttcttta cgatgccatt gggatatatc aacggtggta tatccagtga tttttttctc 109 | 4081 cattttagct tccttagctc ctgaaaatct cgataactca aaaaatacgc ccggtagtga 110 | 4141 tcttatttca ttatggtgaa agttggaacc tcttacgtgc cgatcaacgt ctcattttcg 111 | 4201 ccagatatcg aattcatgag atct 112 | // 113 | -------------------------------------------------------------------------------- /src/examples/genbank/sequence.gp: -------------------------------------------------------------------------------- 1 | LOCUS AEV91138 225 aa linear SYN 02-FEB-2012 2 | DEFINITION Rfp (plasmid) [synthetic construct]. 3 | ACCESSION AEV91138 4 | VERSION AEV91138.1 GI:359950697 5 | DBSOURCE accession GQ495894.1 6 | KEYWORDS . 7 | SOURCE synthetic construct 8 | ORGANISM synthetic construct 9 | other sequences; artificial sequences. 10 | REFERENCE 1 (residues 1 to 225) 11 | AUTHORS Krol,J.E., Penrod,J.T., McCaslin,H., Rogers,L.M., Yano,H., 12 | Stancik,A.D., Dejonghe,W., Brown,C.J., Parales,R.E., Wuertz,S. and 13 | Top,E.M. 14 | TITLE Role of IncP-1beta Plasmids pWDL7::rfp and pNB8c in Chloroaniline 15 | Catabolism as Determined by Genomic and Functional Analyses 16 | JOURNAL Appl. Environ. Microbiol. 78 (3), 828-838 (2012) 17 | PUBMED 22101050 18 | REFERENCE 2 (residues 1 to 225) 19 | AUTHORS Krol,J.E., Penrod,J.T., McCaslin,H., Rogers,L.M., Yano,H., 20 | Dejonghe,W., Brown,C.J., Parales,R.E., Wuertz,S. and Top,E.M. 21 | TITLE Direct Submission 22 | JOURNAL Submitted (18-AUG-2009) Department of Biological Sciences, 23 | University of Idaho, PO Box 443051, Moscow, ID 83844-3051, USA 24 | COMMENT Method: conceptual translation. 25 | FEATURES Location/Qualifiers 26 | source 1..225 27 | /organism="synthetic construct" 28 | /db_xref="taxon:32630" 29 | /plasmid="pWDL7::rfp" 30 | /note="natural plasmid was isolated from Comamonas 31 | testosteroni WDL7 and marked with a mini-transposon 32 | containing rfp and kanamycin resistance genes; Form A; 33 | Form B differs in orientation of the backbone genes caused 34 | by recombination between two identical Tn6063 sequences; 35 | both forms are present in the cell in an equal ratio" 36 | Protein 1..225 37 | /product="Rfp" 38 | /name="red fluorescent protein" 39 | Region 6..222 40 | /region_name="GFP" 41 | /note="Green fluorescent protein; pfam01353" 42 | /db_xref="CDD:250555" 43 | CDS 1..225 44 | /gene="rfp" 45 | /coded_by="complement(GQ495894.1:82628..83305)" 46 | /transl_table=11 47 | ORIGIN 48 | 1 mrssknvike fmrfkvrmeg tvnghefeie gegegrpyeg hntvklkvtk ggplpfawdi 49 | 61 lspqfqygsk vyvkhpadip dykklsfpeg fkwervmnfe dggvvtvtqd sslqdgcfiy 50 | 121 kvkfigvnfp sdgpvmqkkt mgweasterl yprdgvlkge ihkalklkdg ghylvefksi 51 | 181 ymakkpvqlp gyyyvdskld itshnedyti veqyertegr hhlfl 52 | // 53 | 54 | -------------------------------------------------------------------------------- /src/examples/genbank/testGenbankFile.2.gb: -------------------------------------------------------------------------------- 1 | LOCUS pj5_00001 5299 bp DNA circular 11-JUL-2015 2 | DEFINITION promoter seq from pBAD33. 3 | ACCESSION unknown 4 | KEYWORDS ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; . 5 | FEATURES Location/Qualifiers 6 | CDS complement(7..884) 7 | /label="araC" 8 | primer_bind 500 9 | /label="primer bind (test)" 10 | protein_bind 914..930 11 | /label="operator O2" 12 | promoter complement(1036..1063) 13 | /label="araC promoter" 14 | ORIGIN 15 | 1 gacgtcttat gacaacttga cggctacatc attcactttt tcttcacaac cggcacggaa 16 | 61 ctcgctcggg ctggccccgg tgcatttttt aaatacccgc gagaaataga gttgatcgtc 17 | 121 aaaaccaaca ttgcgaccga cggtggcgat aggcatccgg gtggtgctca aaagcagctt 18 | 181 cgcctggctg atacgttggt cctcgcgcca gcttaagacg ctaatcccta actgctggcg 19 | 241 gaaaagatgt gacagacgcg acggcgacaa gcaaacatgc tgtgcgacgc tggcgatatc 20 | 301 aaaattgctg tctgccaggt gatcgctgat gtactgacaa gcctcgcgta cccgattatc 21 | 361 catcggtgga tggagcgact cgttaatcgc ttccatgcgc cgcagtaaca attgctcaag 22 | 421 cagatttatc gccagcagct ccgaatagcg cccttcccct tgcccggcgt taatgatttg 23 | 481 cccaaacagg tcgctgaaat gcggctggtg cgcttcatcc gggcgaaaga accccgtatt 24 | 541 ggcaaatatt gacggccagt taagccattc atgccagtag gcgcgcggac gaaagtaaac 25 | 601 ccactggtga taccattcgc gagcctccgg atgacgaccg tagtgatgaa tctctcctgg 26 | 661 cgggaacagc aaaatatcac ccggtcggca aacaaattct cgtccctgat ttttcaccac 27 | 721 cccctgaccg cgaatggtga gattgagaat ataacctttc attcccagcg gtcggtcgat 28 | 781 aaaaaaatcg agataaccgt tggcctcaat cggcgttaaa cccgccacca gatgggcatt 29 | 841 aaacgagtat cccggcagca ggggatcatt ttgcgcttca gccatacttt tcatactccc 30 | 901 gccattcaga gaagaaacca attgtccata ttgcatcaga cattgccgtc actgcgtctt 31 | 961 ttactggctc ttctcgctaa ccaaaccggt aaccccgctt attaaaagca ttctgtaaca 32 | 1021 aagcgggacc aaagccatga caaaaacgcg taacaaaagt gtctataatc acggcagaaa 33 | 1081 agtccacatt gattatttgc acggcgtcac actttgctat gccatagcat ttttatccat 34 | 1141 aagattagcg gattctacct gacgcttttt atcgcaactc tctactgttt ctccataccc 35 | 1201 gtttttttgg gaatttttaa gaaggagata tacatatgag taaaggagaa gaacttttca 36 | 1261 ctggagttgt cccaattctt gttgaattag atggtgatgt taatgggcac aaattttctg 37 | 1321 tcagtggaga gggtgaaggt gatgcaacat acggaaaact tacccttaaa tttatttgca 38 | 1381 ctactggaaa actacctgtt ccatggccaa cacttgtcac tactttctct tatggtgttc 39 | 1441 aatgcttttc ccgttatccg gatcatatga aacggcatga ctttttcaag agtgccatgc 40 | 1501 ccgaaggtta tgtacaggaa cgcactatat ctttcaaaga tgacgggaac tacaagacgc 41 | 1561 gtgctgaagt caagtttgaa ggtgataccc ttgttaatcg tatcgagtta aaaggtattg 42 | 1621 attttaaaga agatggaaac attctcggac acaaactcga atacaactat aactcacaca 43 | 1681 atgtatacat cacggcagac aaacaaaaga atggaatcaa agctaacttc aaaattcgcc 44 | 1741 acaacattga agatggatct gttcaactag cagaccatta tcaacaaaat actccaattg 45 | 1801 gcgatggccc tgtcctttta ccagacaacc attacctgtc gacacaatct gccctttcga 46 | 1861 aagatcccaa cgaaaagcgt gaccacatgg tccttcttga gtttgtaact gctgctggga 47 | 1921 ttacacatgg catggatgag ctcggcggcg gcggcagcaa ggtctacggc aaggaacagt 48 | 1981 ttttgcggat gcgccagagc atgttccccg atcgctaaat cgagtaagga tctccaggca 49 | 2041 tcaaataaaa cgaaaggctc agtcgaaaga ctgggccttt cgttttatct gttgtttgtc 50 | 2101 ggtgaacgct ctctactaga gtcacactgg ctcaccttcg ggtgggcctt tctgcgttta 51 | 2161 tacctagggt acgggttttg ctgcccgcaa acgggctgtt ctggtgttgc tagtttgtta 52 | 2221 tcagaatcgc agatccggct tcagccggtt tgccggctga aagcgctatt tcttccagaa 53 | 2281 ttgccatgat tttttcccca cgggaggcgt cactggctcc cgtgttgtcg gcagctttga 54 | 2341 ttcgataagc agcatcgcct gtttcaggct gtctatgtgt gactgttgag ctgtaacaag 55 | 2401 ttgtctcagg tgttcaattt catgttctag ttgctttgtt ttactggttt cacctgttct 56 | 2461 attaggtgtt acatgctgtt catctgttac attgtcgatc tgttcatggt gaacagcttt 57 | 2521 gaatgcacca aaaactcgta aaagctctga tgtatctatc ttttttacac cgttttcatc 58 | 2581 tgtgcatatg gacagttttc cctttgatat gtaacggtga acagttgttc tacttttgtt 59 | 2641 tgttagtctt gatgcttcac tgatagatac aagagccata agaacctcag atccttccgt 60 | 2701 atttagccag tatgttctct agtgtggttc gttgtttttg cgtgagccat gagaacgaac 61 | 2761 cattgagatc atacttactt tgcatgtcac tcaaaaattt tgcctcaaaa ctggtgagct 62 | 2821 gaatttttgc agttaaagca tcgtgtagtg tttttcttag tccgttatgt aggtaggaat 63 | 2881 ctgatgtaat ggttgttggt attttgtcac cattcatttt tatctggttg ttctcaagtt 64 | 2941 cggttacgag atccatttgt ctatctagtt caacttggaa aatcaacgta tcagtcgggc 65 | 3001 ggcctcgctt atcaaccacc aatttcatat tgctgtaagt gtttaaatct ttacttattg 66 | 3061 gtttcaaaac ccattggtta agccttttaa actcatggta gttattttca agcattaaca 67 | 3121 tgaacttaaa ttcatcaagg ctaatctcta tatttgcctt gtgagttttc ttttgtgtta 68 | 3181 gttcttttaa taaccactca taaatcctca tagagtattt gttttcaaaa gacttaacat 69 | 3241 gttccagatt atattttatg aattttttta actggaaaag ataaggcaat atctcttcac 70 | 3301 taaaaactaa ttctaatttt tcgcttgaga acttggcata gtttgtccac tggaaaatct 71 | 3361 caaagccttt aaccaaagga ttcctgattt ccacagttct cgtcatcagc tctctggttg 72 | 3421 ctttagctaa tacaccataa gcattttccc tactgatgtt catcatctga gcgtattggt 73 | 3481 tataagtgaa cgataccgtc cgttctttcc ttgtagggtt ttcaatcgtg gggttgagta 74 | 3541 gtgccacaca gcataaaatt agcttggttt catgctccgt taagtcatag cgactaatcg 75 | 3601 ctagttcatt tgctttgaaa acaactaatt cagacataca tctcaattgg tctaggtgat 76 | 3661 tttaatcact ataccaattg agatgggcta gtcaatgata attactagtc cttttcccgg 77 | 3721 gtgatctggg tatctgtaaa ttctgctaga cctttgctgg aaaacttgta aattctgcta 78 | 3781 gaccctctgt aaattccgct agacctttgt gtgttttttt tgtttatatt caagtggtta 79 | 3841 taatttatag aataaagaaa gaataaaaaa agataaaaag aatagatccc agccctgtgt 80 | 3901 ataactcact actttagtca gttccgcagt attacaaaag gatgtcgcaa acgctgtttg 81 | 3961 ctcctctaca aaacagacct taaaacccta aaggcttaag tagcaccctc gcaagctcgg 82 | 4021 gcaaatcgct gaatattcct tttgtctccg accatcaggc acctgagtcg ctgtcttttt 83 | 4081 cgtgacattc agttcgctgc gctcacggct ctggcagtga atgggggtaa atggcactac 84 | 4141 aggcgccttt tatggattca tgcaaggaaa ctacccataa tacaagaaaa gcccgtcacg 85 | 4201 ggcttctcag ggcgttttat ggcgggtctg ctatgtggtg ctatctgact ttttgctgtt 86 | 4261 cagcagttcc tgccctctga ttttccagtc tgaccacttc ggattatccc gtgacaggtc 87 | 4321 attcagactg gctaatgcac ccagtaaggc agcggtatca tcaacaggct tacccgtctt 88 | 4381 actgtcccta gtgcttggat tctcaccaat aaaaaacgcc cggcggcaac cgagcgttct 89 | 4441 gaacaaatcc agatggagtt ctgaggtcat tactggatct atcaacagga gtccaagcga 90 | 4501 gctcgatatc aaattacgcc ccgccctgcc actcatcgca gtactgttgt aattcattaa 91 | 4561 gcattctgcc gacatggaag ccatcacaaa cggcatgatg aacctgaatc gccagcggca 92 | 4621 tcagcacctt gtcgccttgc gtataatatt tgcccatggt gaaaacgggg gcgaagaagt 93 | 4681 tgtccatatt ggccacgttt aaatcaaaac tggtgaaact cacccaggga ttggctgaga 94 | 4741 cgaaaaacat attctcaata aaccctttag ggaaataggc caggttttca ccgtaacacg 95 | 4801 ccacatcttg cgaatatatg tgtagaaact gccggaaatc gtcgtggtat tcactccaga 96 | 4861 gcgatgaaaa cgtttcagtt tgctcatgga aaacggtgta acaagggtga acactatccc 97 | 4921 atatcaccag ctcaccgtct ttcattgcca tacgaaattc cggatgagca ttcatcaggc 98 | 4981 gggcaagaat gtgaataaag gccggataaa acttgtgctt atttttcttt acggtcttta 99 | 5041 aaaaggccgt aatatccagc tgaacggtct ggttataggt acattgagca actgactgaa 100 | 5101 atgcctcaaa atgttcttta cgatgccatt gggatatatc aacggtggta tatccagtga 101 | 5161 tttttttctc cattttagct tccttagctc ctgaaaatct cgataactca aaaaatacgc 102 | 5221 ccggtagtga tcttatttca ttatggtgaa agttggaacc tcttacgtgc cgatcaacgt 103 | 5281 ctcattttcg ccagatatc 104 | // -------------------------------------------------------------------------------- /src/examples/jbei/pBbE0c-RFP.linear.seq: -------------------------------------------------------------------------------- 1 | <seq:seq xmlns:seq="http://jbei.org/sequence" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://jbei.org/sequence seq.xsd"> 2 | <seq:name>pBbE0c-RFP</seq:name> 3 | <seq:circular>false</seq:circular> 4 | <seq:sequence>cagctagctcagtcctaggtactgtgctagctactagtgaaagaggagaaatactagatggcttcctccgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaactt</seq:sequence> 5 | <seq:features> 6 | <seq:feature> 7 | <seq:label>CmR</seq:label> 8 | <seq:complement>true</seq:complement> 9 | <seq:type>gene</seq:type> 10 | <seq:location> 11 | <seq:genbankStart>2011</seq:genbankStart> 12 | <seq:end>2670</seq:end> 13 | </seq:location> 14 | <seq:attribute name="gene" quoted="false">CmR</seq:attribute> 15 | <seq:attribute name="note" quoted="false">GENE [ZFP-GG destination LacUV5 p15A CmR]</seq:attribute> 16 | <seq:attribute name="note" quoted="false">[ZFP-GG destination LacUV5 p15A CmR]</seq:attribute> 17 | <seq:attribute name="vntifkey" quoted="false">22</seq:attribute> 18 | <seq:seqHash>a72c2c4a8028bbc263350f91b94173fa147118ac20adba8609a0034dde50951e</seq:seqHash> 19 | </seq:feature> 20 | <seq:feature> 21 | <seq:label>RFP cassette</seq:label> 22 | <seq:complement>false</seq:complement> 23 | <seq:type>gene</seq:type> 24 | <seq:location> 25 | <seq:genbankStart>2812</seq:genbankStart> 26 | <seq:end>915</seq:end> 27 | </seq:location> 28 | <seq:attribute name="gene" quoted="false">RFP cassette</seq:attribute> 29 | <seq:attribute name="note" quoted="false">GENE GENE [pBbE0k-RFP]</seq:attribute> 30 | <seq:attribute name="note" quoted="false">GENE [pBbE0k-RFP]</seq:attribute> 31 | <seq:seqHash>a446e0456108a6eeb366b663b88a6a495022bbfff001131568b1727f9a6c1f55</seq:seqHash> 32 | </seq:feature> 33 | <seq:feature> 34 | <seq:label>colE1 origin</seq:label> 35 | <seq:complement>true</seq:complement> 36 | <seq:type>rep_origin</seq:type> 37 | <seq:location> 38 | <seq:genbankStart>1202</seq:genbankStart> 39 | <seq:end>1884</seq:end> 40 | </seq:location> 41 | <seq:attribute name="gene" quoted="false">ColE1 Origin</seq:attribute> 42 | <seq:attribute name="note" quoted="false">REP_ORIGIN REP_ORIGIN [ColE1]</seq:attribute> 43 | <seq:attribute name="note" quoted="false">REP_ORIGIN [ColE1]</seq:attribute> 44 | <seq:attribute name="vntifkey" quoted="false">33</seq:attribute> 45 | <seq:seqHash>e220c676221ffaf8c2588ea26c928d7e822e4e530ac1787a512e2c1e1302f0d5</seq:seqHash> 46 | </seq:feature> 47 | <seq:feature> 48 | <seq:label>T0</seq:label> 49 | <seq:complement>false</seq:complement> 50 | <seq:type>terminator</seq:type> 51 | <seq:location> 52 | <seq:genbankStart>1890</seq:genbankStart> 53 | <seq:end>1995</seq:end> 54 | </seq:location> 55 | <seq:attribute name="gene" quoted="false">Terminator</seq:attribute> 56 | <seq:attribute name="note" quoted="false">TERMINATOR TERMINATOR [p15A KanR]</seq:attribute> 57 | <seq:attribute name="note" quoted="false">TERMINATOR [p15A KanR]</seq:attribute> 58 | <seq:attribute name="vntifkey" quoted="false">43</seq:attribute> 59 | <seq:seqHash>b50863e4d0992b22d225efde18e8fc2a545ed248720a09767491724fb57b8cdb</seq:seqHash> 60 | </seq:feature> 61 | </seq:features> 62 | </seq:seq> -------------------------------------------------------------------------------- /src/examples/jbei/pBbE0c-RFP.seq: -------------------------------------------------------------------------------- 1 | <seq:seq xmlns:seq="http://jbei.org/sequence" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://jbei.org/sequence seq.xsd"> 2 | <seq:name>pBbE0c-RFP</seq:name> 3 | <seq:circular>true</seq:circular> 4 | <seq:sequence>cagctagctcagtcctaggtactgtgctagctactagtgaaagaggagaaatactagatggcttcctccgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatatactagaagcggccgggatcctaactcgagtaaggatctccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacctagggcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccataggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaaggacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgactagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccagatatcgaattcatgagatctttga</seq:sequence> 5 | <seq:features> 6 | <seq:feature> 7 | <seq:label>CmR</seq:label> 8 | <seq:complement>true</seq:complement> 9 | <seq:type>gene</seq:type> 10 | <seq:location> 11 | <seq:genbankStart>2011</seq:genbankStart> 12 | <seq:end>2670</seq:end> 13 | </seq:location> 14 | <seq:attribute name="gene" quoted="false">CmR</seq:attribute> 15 | <seq:attribute name="note" quoted="false">GENE [ZFP-GG destination LacUV5 p15A CmR]</seq:attribute> 16 | <seq:attribute name="note" quoted="false">[ZFP-GG destination LacUV5 p15A CmR]</seq:attribute> 17 | <seq:attribute name="vntifkey" quoted="false">22</seq:attribute> 18 | <seq:seqHash>a72c2c4a8028bbc263350f91b94173fa147118ac20adba8609a0034dde50951e</seq:seqHash> 19 | </seq:feature> 20 | <seq:feature> 21 | <seq:label>RFP cassette</seq:label> 22 | <seq:complement>false</seq:complement> 23 | <seq:type>gene</seq:type> 24 | <seq:location> 25 | <seq:genbankStart>2812</seq:genbankStart> 26 | <seq:end>915</seq:end> 27 | </seq:location> 28 | <seq:attribute name="gene" quoted="false">RFP cassette</seq:attribute> 29 | <seq:attribute name="note" quoted="false">GENE GENE [pBbE0k-RFP]</seq:attribute> 30 | <seq:attribute name="note" quoted="false">GENE [pBbE0k-RFP]</seq:attribute> 31 | <seq:seqHash>a446e0456108a6eeb366b663b88a6a495022bbfff001131568b1727f9a6c1f55</seq:seqHash> 32 | </seq:feature> 33 | <seq:feature> 34 | <seq:label>colE1 origin</seq:label> 35 | <seq:complement>true</seq:complement> 36 | <seq:type>rep_origin</seq:type> 37 | <seq:location> 38 | <seq:genbankStart>1202</seq:genbankStart> 39 | <seq:end>1884</seq:end> 40 | </seq:location> 41 | <seq:attribute name="gene" quoted="false">ColE1 Origin</seq:attribute> 42 | <seq:attribute name="note" quoted="false">REP_ORIGIN REP_ORIGIN [ColE1]</seq:attribute> 43 | <seq:attribute name="note" quoted="false">REP_ORIGIN [ColE1]</seq:attribute> 44 | <seq:attribute name="vntifkey" quoted="false">33</seq:attribute> 45 | <seq:seqHash>e220c676221ffaf8c2588ea26c928d7e822e4e530ac1787a512e2c1e1302f0d5</seq:seqHash> 46 | </seq:feature> 47 | <seq:feature> 48 | <seq:label>T0</seq:label> 49 | <seq:complement>false</seq:complement> 50 | <seq:type>terminator</seq:type> 51 | <seq:location> 52 | <seq:genbankStart>1890</seq:genbankStart> 53 | <seq:end>1995</seq:end> 54 | </seq:location> 55 | <seq:attribute name="gene" quoted="false">Terminator</seq:attribute> 56 | <seq:attribute name="note" quoted="false">TERMINATOR TERMINATOR [p15A KanR]</seq:attribute> 57 | <seq:attribute name="note" quoted="false">TERMINATOR [p15A KanR]</seq:attribute> 58 | <seq:attribute name="vntifkey" quoted="false">43</seq:attribute> 59 | <seq:seqHash>b50863e4d0992b22d225efde18e8fc2a545ed248720a09767491724fb57b8cdb</seq:seqHash> 60 | </seq:feature> 61 | </seq:features> 62 | </seq:seq> -------------------------------------------------------------------------------- /src/examples/sbol/v1/example.xml: -------------------------------------------------------------------------------- 1 | <?xml version=“1.0”?> 2 | <rdf:RDF 3 | xmlns:rdf=“http://www.w3.org/1999/02/22-rdf-syntax-ns#” 4 | xmlns:s=“http://sbols.org/v1#” 5 | xmlns:so=“http://purl.obolibrary.org/obo/” 6 | xmlns:d=“http://sbols.org/data#”> 7 | 8 | <s:DnaComponent rdf:about=“http://sbols.org/data#BBa_T9002”> 9 | <s:displayId>BBa_T9002</s:displayId> 10 | <s:name>T9002</s:name> 11 | <s:description>GFP Producer Controlled by 3OC6HSL Receiver Device</s:description> 12 | <s:dnaSequence> 13 | 14 | <s:DnaSequence rdf:about=“http://sbols.org/data#partseq_5591”> 15 | <s:nucleotides>tcc</s:nucleotides> 16 | </s:DnaSequence> 17 | 18 | </s:dnaSequence> 19 | <s:annotation> 20 | 21 | <s:SequenceAnnotation rdf:about=" http://sbols.org/data#a_1565164"> 22 | <s:bioStart>1</s:bioStart> 23 | <s:bioEnd>19</s:bioEnd> 24 | <s:strand>+</s:strand> 25 | <s:subComponent> 26 | 27 | <s:DnaComponent rdf:about=" http://sbols.org/data#f_1565164"> 28 | <rdf:type rdf:resource="http://purl.obolibrary.org/obo/SO_0000409"/> 29 | <s:displayId>f_1565164</s:displayId> 30 | <s:name>TetR 1</s:name> 31 | </s:DnaComponent> 32 | 33 | </s:subComponent> 34 | </s:SequenceAnnotation> 35 | 36 | </s:annotation> 37 | </s:DnaComponent> 38 | </rdf:RDF> -------------------------------------------------------------------------------- /src/examples/sbol/v1/j5.SBOL.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8" ?> 2 | <rdf:RDF xmlns="http://sbols.org/v1#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:so="http://purl.obolibrary.org/obo/"> 3 | <DnaComponent rdf:about="http://j5.jbei.org/dc#d23e01cc-9028-419a-ac01-50c1e71662d8"> 4 | <displayId>d23e01cc-9028-419a-ac01-50c1e71662d8</displayId> 5 | <name>signal_pep</name> 6 | <dnaSequence> 7 | <DnaSequence rdf:about="http://j5.jbei.org/ds#4b627502-268d-4c1e-a53a-5fc2962eeaef"> 8 | <nucleotides>ggcagcaaggtctacggcaaggaacagtttttgcggatgcgccagagcatgttccccgatcgc</nucleotides> 9 | </DnaSequence> 10 | </dnaSequence> 11 | <annotation> 12 | <SequenceAnnotation rdf:about="http://j5.jbei.org/sa#84ca57d7-1bbe-4399-9563-efb4f6d0e0e1"> 13 | <bioStart>1</bioStart> 14 | <bioEnd>63</bioEnd> 15 | <strand>+</strand> 16 | <subComponent> 17 | <DnaComponent rdf:about="http://j5.jbei.org/dc#995ed0f9-6613-4107-8a14-f7837d563f5d"> 18 | <rdf:type rdf:resource="http://purl.obolibrary.org/obo/SO_0000316" /> 19 | <displayId>995ed0f9-6613-4107-8a14-f7837d563f5d</displayId> 20 | <name>signal_peptide</name> 21 | </DnaComponent> 22 | </subComponent> 23 | </SequenceAnnotation> 24 | </annotation> 25 | </DnaComponent> 26 | </rdf:RDF> -------------------------------------------------------------------------------- /src/examples/sbol/v1/pBbE0c-RFP.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?> 2 | <ns2:RDF xmlns="http://sbols.org/v1#" xmlns:ns2="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 3 | <DnaComponent ns2:about="public-registry.jbei.org/entry/dc#JPUB_000267"> 4 | <displayId>JPUB_000267</displayId> 5 | <name>pBbE0c-RFP</name> 6 | <description>BglBrick vector based on the work of Rachel Krupa w/constitutive RFP cassette</description> 7 | <dnaSequence> 8 | <DnaSequence ns2:about="public-registry.jbei.org/entry/ds#8be1ae049adc3b2e8031951dcc007d75a531d4bb"> 9 | <nucleotides>cagctagctcagtcctaggtactgtgctagctactagtgaaagaggagaaatactagatggcttcctccgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatatactagaagcggccgggatcctaactcgagtaaggatctccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacctagggcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccataggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaaggacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgactagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccagatatcgaattcatgagatctttga</nucleotides> 10 | </DnaSequence> 11 | </dnaSequence> 12 | <annotation> 13 | <SequenceAnnotation ns2:about="public-registry.jbei.org/entry/sa#ae7eb7e3-be41-4a14-a50d-818de29f9378"> 14 | <bioStart>1202</bioStart> 15 | <bioEnd>1884</bioEnd> 16 | <strand>-</strand> 17 | <subComponent> 18 | <DnaComponent ns2:about="public-registry.jbei.org/entry/dc#18302dcf-fae9-40c8-a37a-f0e45dd77a64"> 19 | <ns2:type ns2:resource="http://purl.obolibrary.org/obo/SO_0000296" /> 20 | <displayId>18302dcf-fae9-40c8-a37a-f0e45dd77a64</displayId> 21 | <name>colE1 origin</name> 22 | </DnaComponent> 23 | </subComponent> 24 | </SequenceAnnotation> 25 | </annotation> 26 | <annotation> 27 | <SequenceAnnotation ns2:about="public-registry.jbei.org/entry/sa#47e6a5b6-bcc7-4a25-9a29-07aca67a2eba"> 28 | <bioStart>1890</bioStart> 29 | <bioEnd>1995</bioEnd> 30 | <strand>+</strand> 31 | <subComponent> 32 | <DnaComponent ns2:about="public-registry.jbei.org/entry/dc#63c61f00-ee52-471e-9b74-6d0232cfacce"> 33 | <ns2:type ns2:resource="http://purl.obolibrary.org/obo/SO_0000141" /> 34 | <displayId>63c61f00-ee52-471e-9b74-6d0232cfacce</displayId> 35 | <name>T0</name> 36 | </DnaComponent> 37 | </subComponent> 38 | </SequenceAnnotation> 39 | </annotation> 40 | <annotation> 41 | <SequenceAnnotation ns2:about="public-registry.jbei.org/entry/sa#0d708947-f8f4-4667-b8c3-5838b0e37b14"> 42 | <bioStart>2011</bioStart> 43 | <bioEnd>2670</bioEnd> 44 | <strand>-</strand> 45 | <subComponent> 46 | <DnaComponent ns2:about="public-registry.jbei.org/entry/dc#bcc90571-1643-4f5e-b37b-a9cdc12a7830"> 47 | <ns2:type ns2:resource="http://purl.obolibrary.org/obo/SO_0000704" /> 48 | <displayId>bcc90571-1643-4f5e-b37b-a9cdc12a7830</displayId> 49 | <name>CmR</name> 50 | </DnaComponent> 51 | </subComponent> 52 | </SequenceAnnotation> 53 | </annotation> 54 | <annotation> 55 | <SequenceAnnotation ns2:about="public-registry.jbei.org/entry/sa#b0da5ddd-a6d0-4642-99b2-4c330e2de794"> 56 | <strand>+</strand> 57 | <subComponent> 58 | <DnaComponent ns2:about="public-registry.jbei.org/entry/dc#22ceacc2-5844-4a5a-b164-57885ae0b8db"> 59 | <ns2:type ns2:resource="http://purl.obolibrary.org/obo/SO_0000704" /> 60 | <displayId>22ceacc2-5844-4a5a-b164-57885ae0b8db</displayId> 61 | <name>RFP cassette</name> 62 | <dnaSequence> 63 | <DnaSequence ns2:about="public-registry.jbei.org/entry/ds#bc89befb-850e-4f6f-a5d9-14dff3bd75dc"> 64 | <nucleotides>ttgacagctagctcagtcctaggtactgtgctagctactagtgaaagaggagaaatactagatggcttcctccgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatatactagaagcggccg</nucleotides> 65 | </DnaSequence> 66 | </dnaSequence> 67 | </DnaComponent> 68 | </subComponent> 69 | </SequenceAnnotation> 70 | </annotation> 71 | </DnaComponent> 72 | </ns2:RDF> 73 | -------------------------------------------------------------------------------- /src/examples/sbol/v1/signal_peptide_SBOL.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8" ?> 2 | <rdf:RDF xmlns="http://sbols.org/v1#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:so="http://purl.obolibrary.org/obo/"> 3 | <DnaComponent rdf:about="http://j5.jbei.org/dc#97A98D05-96EF-489F-B338-0B30B1AB59BC"> 4 | <displayId>signal_pep</displayId> 5 | <dnaSequence> 6 | <DnaSequence rdf:about="http://j5.jbei.org/ds#172ECA24-1557-4870-939E-D7C699B09170"> 7 | <nucleotides>ggcagcaaggtctacggcaaggaacagtttttgcggatgcgccagagcatgttccccgatcgc</nucleotides> 8 | </DnaSequence> 9 | </dnaSequence> 10 | <annotation> 11 | <SequenceAnnotation rdf:about="http://j5.jbei.org/sa#7B75111D-08BC-4FB9-AA59-03EDFEFC314E"> 12 | <bioStart>1</bioStart> 13 | <bioEnd>63</bioEnd> 14 | <strand>+</strand> 15 | <subComponent> 16 | <DnaComponent rdf:about="http://j5.jbei.org/dc#43FB4AC0-217C-49D1-9E65-01F5332177F8"> 17 | <rdf:type rdf:resource="http://purl.obolibrary.org/obo/SO_0000316" /> 18 | <displayId>signal_peptide</displayId> 19 | </DnaComponent> 20 | </subComponent> 21 | </SequenceAnnotation> 22 | </annotation> 23 | </DnaComponent> 24 | </rdf:RDF> 25 | -------------------------------------------------------------------------------- /src/examples/sbol/v2/A1.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:sbol="http://sbols.org/v2#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#dateTime/" xmlns:synbiohub="http://synbiohub.org#" xmlns:sbh="http://wiki.synbiohub.org/wiki/Terms/synbiohub#" xmlns:sybio="http://www.sybio.ncl.ac.uk#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:ncbi="http://www.ncbi.nlm.nih.gov#" xmlns:igem="http://wiki.synbiohub.org/wiki/Terms/igem#" xmlns:genbank="http://www.ncbi.nlm.nih.gov/genbank#" xmlns:gbconv="http://sbols.org/genBankConversion#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:obo="http://purl.obolibrary.org/obo/"> 3 | <sbol:ComponentDefinition rdf:about="https://synbiohub.cidarlab.org/public/Demo/A1/1"> 4 | <sbol:persistentIdentity rdf:resource="https://synbiohub.cidarlab.org/public/Demo/A1"/> 5 | <sbol:displayId>A1</sbol:displayId> 6 | <sbol:version>1</sbol:version> 7 | <prov:wasDerivedFrom rdf:resource="https://github.com/CIDARLAB/cello/blob/master/resources/UCF/Eco1C1G1T0.UCF.json"/> 8 | <prov:wasGeneratedBy rdf:resource="https://synbiohub.cidarlab.org/public/Demo/cello2sbol/1"/> 9 | <dcterms:title>A1</dcterms:title> 10 | <dcterms:created>2016-04-01T04:00:00.000Z</dcterms:created> 11 | <sbh:ownedBy rdf:resource="https://synbiohub.cidarlab.org/user/prash"/> 12 | <sbh:topLevel rdf:resource="https://synbiohub.cidarlab.org/public/Demo/A1/1"/> 13 | <sbol:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#DnaRegion"/> 14 | <sbol:role rdf:resource="http://identifiers.org/so/SO:0000139"/> 15 | <sbol:sequence rdf:resource="https://synbiohub.cidarlab.org/public/Demo/A1_sequence/1"/> 16 | </sbol:ComponentDefinition> 17 | <sbol:Sequence rdf:about="https://synbiohub.cidarlab.org/public/Demo/A1_sequence/1"> 18 | <sbol:persistentIdentity rdf:resource="https://synbiohub.cidarlab.org/public/Demo/A1_sequence"/> 19 | <sbol:displayId>A1_sequence</sbol:displayId> 20 | <sbol:version>1</sbol:version> 21 | <prov:wasDerivedFrom rdf:resource="https://github.com/CIDARLAB/cello/blob/master/resources/UCF/Eco1C1G1T0.UCF.json"/> 22 | <prov:wasGeneratedBy rdf:resource="https://synbiohub.cidarlab.org/public/Demo/cello2sbol/1"/> 23 | <dcterms:title>A1_sequence</dcterms:title> 24 | <sbh:ownedBy rdf:resource="https://synbiohub.cidarlab.org/user/prash"/> 25 | <sbh:topLevel rdf:resource="https://synbiohub.cidarlab.org/public/Demo/A1_sequence/1"/> 26 | <sbol:elements>AATGTTCCCTAATAATCAGCAAAGAGGTTACTAG</sbol:elements> 27 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 28 | </sbol:Sequence> 29 | </rdf:RDF> -------------------------------------------------------------------------------- /src/examples/sbol/v2/BBa_I0462_orig.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" ?> 2 | <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:sbol="http://sbols.org/v2#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prov="http://www.w3.org/ns/prov#"> 3 | <sbol:ComponentDefinition rdf:about="http://partsregistry.org/Part:BBa_B0034"> 4 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/Part:BBa_B0034"/> 5 | <sbol:displayId>BBa_B0034</sbol:displayId> 6 | <dcterms:title>B0034</dcterms:title> 7 | <sbol:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#DnaRegion"/> 8 | <sbol:type rdf:resource="http://identifiers.org/so/SO:0000987"/> 9 | <sbol:role rdf:resource="http://identifiers.org/so/SO:0000139"/> 10 | </sbol:ComponentDefinition> 11 | <sbol:ComponentDefinition rdf:about="http://partsregistry.org/Part:BBa_C0062"> 12 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/Part:BBa_C0062"/> 13 | <sbol:displayId>BBa_C0062</sbol:displayId> 14 | <dcterms:title>luxR</dcterms:title> 15 | <sbol:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#DnaRegion"/> 16 | <sbol:type rdf:resource="http://identifiers.org/so/SO:0000987"/> 17 | <sbol:role rdf:resource="http://identifiers.org/so/SO:0000316"/> 18 | </sbol:ComponentDefinition> 19 | <sbol:ComponentDefinition rdf:about="http://partsregistry.org/Part:BBa_B0015"> 20 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/Part:BBa_B0015"/> 21 | <sbol:displayId>BBa_B0015</sbol:displayId> 22 | <dcterms:title>B0015</dcterms:title> 23 | <sbol:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#DnaRegion"/> 24 | <sbol:type rdf:resource="http://identifiers.org/so/SO:0000987"/> 25 | <sbol:role rdf:resource="http://identifiers.org/so/SO:0000141"/> 26 | </sbol:ComponentDefinition> 27 | <sbol:ComponentDefinition rdf:about="http://partsregistry.org/Part:BBa_I0462"> 28 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/Part:BBa_I0462"/> 29 | <sbol:displayId>BBa_I0462</sbol:displayId> 30 | <dcterms:title>I0462</dcterms:title> 31 | <dcterms:description>LuxR protein generator</dcterms:description> 32 | <sbol:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#DnaRegion"/> 33 | <sbol:type rdf:resource="http://identifiers.org/so/SO:0000987"/> 34 | <sbol:role rdf:resource="http://identifiers.org/so/SO:0000804"/> 35 | <sbol:component> 36 | <sbol:Component rdf:about="http://partsregistry.org/Part:BBa_I0462/component2"> 37 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/Part:BBa_I0462/component2"/> 38 | <sbol:displayId>component2</sbol:displayId> 39 | <sbol:access rdf:resource="http://sbols.org/v2#public"/> 40 | <sbol:definition rdf:resource="http://partsregistry.org/Part:BBa_B0015"/> 41 | </sbol:Component> 42 | </sbol:component> 43 | <sbol:component> 44 | <sbol:Component rdf:about="http://partsregistry.org/Part:BBa_I0462/component1"> 45 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/Part:BBa_I0462/component1"/> 46 | <sbol:displayId>component1</sbol:displayId> 47 | <sbol:access rdf:resource="http://sbols.org/v2#public"/> 48 | <sbol:definition rdf:resource="http://partsregistry.org/Part:BBa_C0062"/> 49 | </sbol:Component> 50 | </sbol:component> 51 | <sbol:component> 52 | <sbol:Component rdf:about="http://partsregistry.org/Part:BBa_I0462/component0"> 53 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/Part:BBa_I0462/component0"/> 54 | <sbol:displayId>component0</sbol:displayId> 55 | <sbol:access rdf:resource="http://sbols.org/v2#public"/> 56 | <sbol:definition rdf:resource="http://partsregistry.org/Part:BBa_B0034"/> 57 | </sbol:Component> 58 | </sbol:component> 59 | <sbol:sequenceAnnotation> 60 | <sbol:SequenceAnnotation rdf:about="http://sbols.org/anot#3456789"> 61 | <sbol:persistentIdentity rdf:resource="http://sbols.org/anot#3456789"/> 62 | <sbol:displayId>annotation3</sbol:displayId> 63 | <sbol:location> 64 | <sbol:Range rdf:about="http://sbols.org/anot#3456789/range"> 65 | <sbol:persistentIdentity rdf:resource="http://sbols.org/anot#3456789/range"/> 66 | <sbol:displayId>range</sbol:displayId> 67 | <sbol:start>808</sbol:start> 68 | <sbol:end>936</sbol:end> 69 | <sbol:orientation rdf:resource="http://sbols.org/v2#inline"/> 70 | </sbol:Range> 71 | </sbol:location> 72 | <sbol:component rdf:resource="http://partsregistry.org/Part:BBa_I0462/component2"/> 73 | </sbol:SequenceAnnotation> 74 | </sbol:sequenceAnnotation> 75 | <sbol:sequenceAnnotation> 76 | <sbol:SequenceAnnotation rdf:about="http://sbols.org/anot#1234567"> 77 | <sbol:persistentIdentity rdf:resource="http://sbols.org/anot#1234567"/> 78 | <sbol:displayId>annotation1</sbol:displayId> 79 | <sbol:location> 80 | <sbol:Range rdf:about="http://sbols.org/anot#1234567/range"> 81 | <sbol:persistentIdentity rdf:resource="http://sbols.org/anot#1234567/range"/> 82 | <sbol:displayId>range</sbol:displayId> 83 | <sbol:start>1</sbol:start> 84 | <sbol:end>12</sbol:end> 85 | <sbol:orientation rdf:resource="http://sbols.org/v2#inline"/> 86 | </sbol:Range> 87 | </sbol:location> 88 | <sbol:component rdf:resource="http://partsregistry.org/Part:BBa_I0462/component0"/> 89 | </sbol:SequenceAnnotation> 90 | </sbol:sequenceAnnotation> 91 | <sbol:sequenceAnnotation> 92 | <sbol:SequenceAnnotation rdf:about="http://sbols.org/anot#2345678"> 93 | <sbol:persistentIdentity rdf:resource="http://sbols.org/anot#2345678"/> 94 | <sbol:displayId>annotation2</sbol:displayId> 95 | <sbol:location> 96 | <sbol:Range rdf:about="http://sbols.org/anot#2345678/range"> 97 | <sbol:persistentIdentity rdf:resource="http://sbols.org/anot#2345678/range"/> 98 | <sbol:displayId>range</sbol:displayId> 99 | <sbol:start>19</sbol:start> 100 | <sbol:end>774</sbol:end> 101 | <sbol:orientation rdf:resource="http://sbols.org/v2#inline"/> 102 | </sbol:Range> 103 | </sbol:location> 104 | <sbol:component rdf:resource="http://partsregistry.org/Part:BBa_I0462/component1"/> 105 | </sbol:SequenceAnnotation> 106 | </sbol:sequenceAnnotation> 107 | <sbol:sequence rdf:resource="http://sbols.org/seq#d23749adb3a7e0e2f09168cb7267a6113b238973"/> 108 | </sbol:ComponentDefinition> 109 | <sbol:Sequence rdf:about="http://sbols.org/seq#d23749adb3a7e0e2f09168cb7267a6113b238973"> 110 | <sbol:persistentIdentity rdf:resource="http://sbols.org/seq#d23749adb3a7e0e2f09168cb7267a6113b238973"/> 111 | <sbol:elements>aaagaggagaaatactagatgaaaaacataaatgccgacgacacatacagaataattaataaaattaaagcttgtagaagcaataatgatattaatcaatgcttatctgatatgactaaaatggtacattgtgaatattatttactcgcgatcatttatcctcattctatggttaaatctgatatttcaatcctagataattaccctaaaaaatggaggcaatattatgatgacgctaatttaataaaatatgatcctatagtagattattctaactccaatcattcaccaattaattggaatatatttgaaaacaatgctgtaaataaaaaatctccaaatgtaattaaagaagcgaaaacatcaggtcttatcactgggtttagtttccctattcatacggctaacaatggcttcggaatgcttagttttgcacattcagaaaaagacaactatatagatagtttatttttacatgcgtgtatgaacataccattaattgttccttctctagttgataattatcgaaaaataaatatagcaaataataaatcaaacaacgatttaaccaaaagagaaaaagaatgtttagcgtgggcatgcgaaggaaaaagctcttgggatatttcaaaaatattaggttgcagtgagcgtactgtcactttccatttaaccaatgcgcaaatgaaactcaatacaacaaaccgctgccaaagtatttctaaagcaattttaacaggagcaattgattgcccatactttaaaaattaataacactgatagtgctagtgtagatcactactagagccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata</sbol:elements> 112 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 113 | </sbol:Sequence> 114 | </rdf:RDF> 115 | -------------------------------------------------------------------------------- /src/examples/sbol/v2/CreateAndRemoveModel.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" ?> 2 | <rdf:RDF xmlns:grn="urn:bbn.com:tasbe:grn/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:sbol="http://sbols.org/v2#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:annot="http://myannotation.org/" xmlns:prov="http://www.w3.org/ns/prov#"> 3 | <sbol:Collection rdf:about="http://www.async.ece.utah.edu/someSequence1/1.0"> 4 | <sbol:persistentIdentity rdf:resource="http://www.async.ece.utah.edu/someSequence1"/> 5 | <sbol:displayId>someSequence1</sbol:displayId> 6 | <sbol:version>1.0</sbol:version> 7 | </sbol:Collection> 8 | <sbol:Collection rdf:about="http://www.async.ece.utah.edu/someSequence2/1.0"> 9 | <sbol:persistentIdentity rdf:resource="http://www.async.ece.utah.edu/someSequence2"/> 10 | <sbol:displayId>someSequence2</sbol:displayId> 11 | <sbol:version>1.0</sbol:version> 12 | </sbol:Collection> 13 | <sbol:Collection rdf:about="http://www.async.ece.utah.edu/someSequence3/1.0"> 14 | <sbol:persistentIdentity rdf:resource="http://www.async.ece.utah.edu/someSequence3"/> 15 | <sbol:displayId>someSequence3</sbol:displayId> 16 | <sbol:version>1.0</sbol:version> 17 | </sbol:Collection> 18 | <sbol:Sequence rdf:about="http://www.async.ece.utah.edu/someSequence/1.0"> 19 | <sbol:persistentIdentity rdf:resource="http://www.async.ece.utah.edu/someSequence"/> 20 | <sbol:displayId>someSequence</sbol:displayId> 21 | <sbol:version>1.0</sbol:version> 22 | <sbol:elements>ACGTURYSWKMBDHVN-.</sbol:elements> 23 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 24 | </sbol:Sequence> 25 | </rdf:RDF> 26 | -------------------------------------------------------------------------------- /src/examples/sbol/v2/CutExample.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" ?> 2 | <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:sbol="http://sbols.org/v2#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prov="http://www.w3.org/ns/prov#"> 3 | <sbol:ComponentDefinition rdf:about="http://partsregistry.org/cd/BBa_J23119"> 4 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/cd/BBa_J23119"/> 5 | <sbol:displayId>BBa_J23119</sbol:displayId> 6 | <prov:wasDerivedFrom rdf:resource="http://partsregistry.org/Part:BBa_J23119"/> 7 | <dcterms:title>J23119 promoter</dcterms:title> 8 | <dcterms:description>Constitutive promoter</dcterms:description> 9 | <sbol:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#DnaRegion"/> 10 | <sbol:role rdf:resource="http://identifiers.org/so/SO:0000167"/> 11 | <sbol:role rdf:resource="http://identifiers.org/so/SO:0000613"/> 12 | <sbol:sequenceAnnotation> 13 | <sbol:SequenceAnnotation rdf:about="http://partsregistry.org/cd/BBa_J23119/cutat12"> 14 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/cd/BBa_J23119/cutat12"/> 15 | <sbol:displayId>cutat12</sbol:displayId> 16 | <sbol:location> 17 | <sbol:Cut rdf:about="http://partsregistry.org/cd/BBa_J23119/cutat12/cut"> 18 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/cd/BBa_J23119/cutat12/cut"/> 19 | <sbol:displayId>cut</sbol:displayId> 20 | <sbol:at>12</sbol:at> 21 | <sbol:orientation rdf:resource="http://sbols.org/v2#inline"/> 22 | </sbol:Cut> 23 | </sbol:location> 24 | </sbol:SequenceAnnotation> 25 | </sbol:sequenceAnnotation> 26 | <sbol:sequenceAnnotation> 27 | <sbol:SequenceAnnotation rdf:about="http://partsregistry.org/cd/BBa_J23119/cutat10"> 28 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/cd/BBa_J23119/cutat10"/> 29 | <sbol:displayId>cutat10</sbol:displayId> 30 | <sbol:location> 31 | <sbol:Cut rdf:about="http://partsregistry.org/cd/BBa_J23119/cutat10/cut"> 32 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/cd/BBa_J23119/cutat10/cut"/> 33 | <sbol:displayId>cut</sbol:displayId> 34 | <sbol:at>10</sbol:at> 35 | <sbol:orientation rdf:resource="http://sbols.org/v2#inline"/> 36 | </sbol:Cut> 37 | </sbol:location> 38 | </sbol:SequenceAnnotation> 39 | </sbol:sequenceAnnotation> 40 | <sbol:sequence rdf:resource="http://partsregistry.org/seq/BBa_J23119"/> 41 | </sbol:ComponentDefinition> 42 | <sbol:Sequence rdf:about="http://partsregistry.org/seq/BBa_J23119"> 43 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/seq/BBa_J23119"/> 44 | <sbol:displayId>BBa_J23119</sbol:displayId> 45 | <prov:wasDerivedFrom rdf:resource="http://parts.igem.org/Part:BBa_J23119:Design"/> 46 | <sbol:elements>ttgacagctagctcagtcctaggtataatgctagc</sbol:elements> 47 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 48 | </sbol:Sequence> 49 | </rdf:RDF> 50 | -------------------------------------------------------------------------------- /src/examples/sbol/v2/SequenceOutput.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" ?> 2 | <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:sbol="http://sbols.org/v2#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prov="http://www.w3.org/ns/prov#"> 3 | <sbol:Sequence rdf:about="http://partsregistry.org/seq/BBa_J23119"> 4 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/seq/BBa_J23119"/> 5 | <sbol:displayId>BBa_J23119</sbol:displayId> 6 | <prov:wasDerivedFrom rdf:resource="http://parts.igem.org/Part:BBa_J23119:Design"/> 7 | <sbol:elements>ttgacagctagctcagtcctaggtataatgctagc</sbol:elements> 8 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 9 | </sbol:Sequence> 10 | </rdf:RDF> 11 | -------------------------------------------------------------------------------- /src/examples/sbol/v2/SimpleComponentDefinitionExample.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" ?> 2 | <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:sbol="http://sbols.org/v2#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prov="http://www.w3.org/ns/prov#"> 3 | <sbol:ComponentDefinition rdf:about="http://partsregistry.org/cd/BBa_J23119"> 4 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/cd/BBa_J23119"/> 5 | <sbol:displayId>BBa_J23119</sbol:displayId> 6 | <prov:wasDerivedFrom rdf:resource="http://partsregistry.org/Part:BBa_J23119"/> 7 | <dcterms:title>J23119 promoter</dcterms:title> 8 | <dcterms:description>Constitutive promoter</dcterms:description> 9 | <sbol:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#DnaRegion"/> 10 | <sbol:type rdf:resource="http://identifiers.org/chebi/CHEBI:4705"/> 11 | <sbol:role rdf:resource="http://identifiers.org/so/SO:0000167"/> 12 | <sbol:role rdf:resource="http://identifiers.org/so/SO:0000613"/> 13 | <sbol:sequence rdf:resource="http://partsregistry.org/seq/BBa_J23119"/> 14 | </sbol:ComponentDefinition> 15 | <sbol:Sequence rdf:about="http://partsregistry.org/seq/BBa_J23119"> 16 | <sbol:persistentIdentity rdf:resource="http://partsregistry.org/seq/BBa_J23119"/> 17 | <sbol:displayId>BBa_J23119</sbol:displayId> 18 | <prov:wasDerivedFrom rdf:resource="http://parts.igem.org/Part:BBa_J23119:Design"/> 19 | <sbol:elements>ttgacagctagctcagtcctaggtataatgctagc</sbol:elements> 20 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 21 | </sbol:Sequence> 22 | </rdf:RDF> 23 | -------------------------------------------------------------------------------- /src/examples/sbol/v2/multipleSequences.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" ?> 2 | <rdf:RDF xmlns:grn="urn:bbn.com:tasbe:grn/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:sbol="http://sbols.org/v2#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:annot="http://myannotation.org/" xmlns:prov="http://www.w3.org/ns/prov#"> 3 | <sbol:Sequence rdf:about="http://www.async.ece.utah.edu/pLacSeq/1.0"> 4 | <sbol:persistentIdentity rdf:resource="http://www.async.ece.utah.edu/pLacSeq"/> 5 | <sbol:displayId>pLacSeq</sbol:displayId> 6 | <sbol:version>1.0</sbol:version> 7 | <sbol:elements>ACGTURYSWKMBDHVN-.</sbol:elements> 8 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 9 | </sbol:Sequence> 10 | <sbol:Sequence rdf:about="http://www.async.ece.utah.edu/pLactetRSeq/1.0"> 11 | <sbol:persistentIdentity rdf:resource="http://www.async.ece.utah.edu/pLactetRSeq"/> 12 | <sbol:displayId>pLactetRSeq</sbol:displayId> 13 | <sbol:version>1.0</sbol:version> 14 | <sbol:elements>ACGTURYSWKMBDHVN-.</sbol:elements> 15 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 16 | </sbol:Sequence> 17 | <sbol:Sequence rdf:about="http://www.async.ece.utah.edu/tetRSeq/1.0"> 18 | <sbol:persistentIdentity rdf:resource="http://www.async.ece.utah.edu/tetRSeq"/> 19 | <sbol:displayId>tetRSeq</sbol:displayId> 20 | <sbol:version>1.0</sbol:version> 21 | <sbol:elements>ACGTURYSWKMBDHVN-.</sbol:elements> 22 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 23 | </sbol:Sequence> 24 | </rdf:RDF> 25 | -------------------------------------------------------------------------------- /src/examples/sbol/v2/singleSequence.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" ?> 2 | <rdf:RDF xmlns:grn="urn:bbn.com:tasbe:grn/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:sbol="http://sbols.org/v2#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:annot="http://myannotation.org/" xmlns:prov="http://www.w3.org/ns/prov#"> 3 | <sbol:Sequence rdf:about="http://www.async.ece.utah.edu/pLacSeq/1.0"> 4 | <sbol:persistentIdentity rdf:resource="http://www.async.ece.utah.edu/pLacSeq"/> 5 | <sbol:displayId>pLacSeq</sbol:displayId> 6 | <sbol:version>1.0</sbol:version> 7 | <sbol:elements>ACGTURYSWKMBDHVN-.</sbol:elements> 8 | <sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/> 9 | </sbol:Sequence> 10 | </rdf:RDF> 11 | -------------------------------------------------------------------------------- /src/examples/seqbuilder/Pombe_ch3_annotate.sbd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lattice-Automation/seqparse/8f05b12d1a01ae80fb5b1c97a8dee6e496c53a89/src/examples/seqbuilder/Pombe_ch3_annotate.sbd -------------------------------------------------------------------------------- /src/examples/seqbuilder/pFA6a nmt41 3xflag AID kanMX6 annotated.sbd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lattice-Automation/seqparse/8f05b12d1a01ae80fb5b1c97a8dee6e496c53a89/src/examples/seqbuilder/pFA6a nmt41 3xflag AID kanMX6 annotated.sbd -------------------------------------------------------------------------------- /src/examples/snapgene/RV027028.dna: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lattice-Automation/seqparse/8f05b12d1a01ae80fb5b1c97a8dee6e496c53a89/src/examples/snapgene/RV027028.dna -------------------------------------------------------------------------------- /src/examples/snapgene/pBbB8c-GFP.dna: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lattice-Automation/seqparse/8f05b12d1a01ae80fb5b1c97a8dee6e496c53a89/src/examples/snapgene/pBbB8c-GFP.dna -------------------------------------------------------------------------------- /src/fetchFile.ts: -------------------------------------------------------------------------------- 1 | import fetch, { Response } from "node-fetch"; 2 | 3 | import { ParseOptions, Seq } from "."; 4 | import parseFile from "./parseFile"; 5 | 6 | /** 7 | * Get a remote sequence from NCBI or the iGEM registry. 8 | */ 9 | export default async (accession: string, options?: ParseOptions): Promise<Seq> => { 10 | // The user doesn't specify the target registry, so we have to infer it from the passed accession: iGEM or NCBI 11 | let url = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=${accession.trim()}&rettype=gbwithparts&retmode=text`; 12 | if (accession.startsWith("BB")) { 13 | // it's a BioBrick... target the iGEM repo 14 | if ((typeof window !== "undefined" && typeof process === "undefined") || options?.cors) { 15 | // use this hack to get around a no-CORS setting on iGEM webserver, pending fix on their side 16 | url = `https://cors-anywhere.herokuapp.com/http://parts.igem.org/cgi/xml/part.cgi?part=${accession.trim()}`; 17 | } else { 18 | url = `http://parts.igem.org/cgi/xml/part.cgi?part=${accession.trim()}`; 19 | } 20 | } 21 | 22 | // Request the XML from the webserver 23 | let body = ""; 24 | let response: Response; 25 | try { 26 | response = await fetch(url); 27 | body = await response.text(); 28 | } catch (err) { 29 | throw new Error(`Failed to get part: accession=${accession} url=${url} err=${err}`); 30 | } 31 | if (!response.ok || !body.length) { 32 | throw new Error(`Failed to get part, no body returned: accession=${accession} url=${url}`); 33 | } 34 | 35 | return (await parseFile(body))[0]; 36 | }; 37 | 38 | /** returns whether the passed ID is an accession in iGEM or NCBI */ 39 | export const isAccession = (accession: string): boolean => { 40 | if (accession.startsWith("BB")) { 41 | return true; // biobrick 42 | } 43 | if (accession.length < 14 && accession.match(/^[a-z0-9_\-.]+$/i)) { 44 | return true; 45 | } 46 | return false; 47 | }; 48 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import fetchFile, { isAccession } from "./fetchFile"; 2 | import parseFile from "./parseFile"; 3 | 4 | /** Seq is a single parsed sequence from a file or accession. */ 5 | export interface Seq { 6 | /** annotations of the sequence */ 7 | annotations: Annotation[]; 8 | /** name of the sequence */ 9 | name: string; 10 | /** the sequence */ 11 | seq: string; 12 | /** type of sequence. Inferred from the seq's symbols */ 13 | type: "dna" | "rna" | "aa" | "unknown"; 14 | } 15 | 16 | /** Annotation is a single feature/annotation parsed from a sequence file. */ 17 | export interface Annotation { 18 | /** color of the annotation if set */ 19 | color?: string; 20 | /** 1 if forward, 0 if no direction, -1 if in reverse direction */ 21 | direction?: number; 22 | /** end of the annotation, 0-based */ 23 | end: number; 24 | /** name of the annotation */ 25 | name: string; 26 | /** start of the annotation, 0-based */ 27 | start: number; 28 | /** type field if set on the annotation */ 29 | type?: string; 30 | } 31 | 32 | /** Options to parse sequence files. */ 33 | export interface ParseOptions { 34 | /** 35 | * Whether to use cors-anywhere to circumvent iGEM's web server having a bad configuration. 36 | */ 37 | cors?: boolean; 38 | 39 | /** name of the source file */ 40 | fileName?: string; 41 | 42 | /** 43 | * Source of the file (ArrayBuffer). This is necessary for SnapGene. 44 | * 45 | * Eg after a read from FileReader.readAsArrayBuffer() in a browser: 46 | * https://developer.mozilla.org/en-US/docs/Web/API/FileReader/readAsArrayBuffer 47 | */ 48 | source?: ArrayBuffer; 49 | } 50 | 51 | /* Parse a sequence file. Or download a sequence with an Accession ID. */ 52 | export default async (input: string, options?: ParseOptions): Promise<Seq> => { 53 | if (!options?.fileName && isAccession(input)) { 54 | return await fetchFile(input, options); 55 | } 56 | return parseFile(input, options)[0]; 57 | }; 58 | 59 | export { parseFile }; 60 | -------------------------------------------------------------------------------- /src/parseFile.ts: -------------------------------------------------------------------------------- 1 | import { sep } from "path"; 2 | 3 | import { ParseOptions, Seq } from "."; 4 | import parseBenchling from "./parsers/benchling"; 5 | import parseBioBrick from "./parsers/biobrick"; 6 | import parseFasta from "./parsers/fasta"; 7 | import parseGenbank from "./parsers/genbank"; 8 | import parseJbei from "./parsers/jbei"; 9 | import parseSbol from "./parsers/sbol"; 10 | import parseSeqBuilder from "./parsers/seqbuilder"; 11 | import parseSnapgene from "./parsers/snapgene"; 12 | import { complement, guessType } from "./utils"; 13 | 14 | /** 15 | * parseFile converts the contents of a sequence file to a an array of Seq 16 | */ 17 | export default (file: string, opts?: ParseOptions): Seq[] => { 18 | const fileName = opts?.fileName || ""; 19 | const sourceName = fileName.split(sep).pop() || fileName; 20 | 21 | if (!file) { 22 | throw Error("cannot parse null or empty string"); 23 | } 24 | 25 | // this is a check for an edge case, where the user uploads come kind 26 | // of file that's full of bps but doesn't fit into a defined type 27 | const firstLine = file.substring(0, file.search("\n")); 28 | const dnaCharLength = firstLine.replace(/[^atcgATCG]/, "").length; 29 | const dnaOnlyFile = dnaCharLength / firstLine.length > 0.8; // is it >80% dna? 30 | const name = fileName && sourceName ? sourceName.substring(0, sourceName.search("\\.")) : "Untitled"; 31 | 32 | // another edge case check for whether the seq is a JSON seq from Benchling 33 | // just a heuristic that says 1) yes it can be parsed 2) it contains a list of 34 | // fields that are common to Benchling files 35 | let isBenchling = false; 36 | try { 37 | const benchlingJSON = JSON.parse(file); // will err out if not JSON 38 | if (["bases", "annotations", "primers"].every(k => typeof benchlingJSON[k] !== "undefined")) { 39 | isBenchling = true; 40 | } 41 | } catch (ex) { 42 | // expected 43 | } 44 | 45 | const prefix = file.substring(0, 200); 46 | let seqs: Seq[]; 47 | switch (true) { 48 | // JBEI 49 | case prefix.includes(':seq="http://jbei.org/sequence"'): 50 | case file.startsWith("<seq:seq"): 51 | seqs = parseJbei(file); 52 | break; 53 | 54 | // FASTA 55 | case file.startsWith(">"): 56 | case file.startsWith(";"): 57 | case fileName.endsWith(".seq"): 58 | case fileName.endsWith(".fa"): 59 | case fileName.endsWith(".fas"): 60 | case fileName.endsWith(".fasta"): 61 | seqs = parseFasta(file, fileName); 62 | break; 63 | 64 | // Genbank 65 | case file.includes("LOCUS") && file.includes("ORIGIN"): 66 | case fileName.endsWith(".gb"): 67 | case fileName.endsWith(".gbk"): 68 | case fileName.endsWith(".genbank"): 69 | case fileName.endsWith(".ape"): 70 | seqs = parseGenbank(file, fileName); 71 | break; 72 | 73 | // SnapGene 74 | case fileName.endsWith(".dna"): 75 | seqs = parseSnapgene(opts); 76 | break; 77 | 78 | // SeqBuilder 79 | case prefix.includes("Written by SeqBuilder"): 80 | case fileName.endsWith(".sbd"): 81 | seqs = parseSeqBuilder(file, fileName); 82 | break; 83 | 84 | // BioBrick XML 85 | case prefix.includes("Parts from the iGEM"): 86 | case prefix.includes("<part_list>"): 87 | seqs = parseBioBrick(file); 88 | break; 89 | 90 | // Benchling JSON 91 | case isBenchling: 92 | seqs = parseBenchling(file); 93 | break; 94 | 95 | // SBOL 96 | case prefix.includes("RDF"): 97 | seqs = parseSbol(file, fileName); 98 | break; 99 | 100 | // a DNA text file without an official formatting 101 | case dnaOnlyFile: { 102 | const { seq } = complement(file); 103 | seqs = [{ annotations: [], name, seq, type: guessType(seq) }]; 104 | break; 105 | } 106 | 107 | default: 108 | throw Error(`${fileName} File type not recognized: ${file}`); 109 | } 110 | 111 | // bit of clean up to: only return the fields in a Seq and reorder to match expectations. 112 | return seqs.map(p => ({ 113 | annotations: p.annotations 114 | .sort((a, b) => a.start - b.start || a.end - b.end) 115 | .map(a => ({ 116 | color: a.color, 117 | direction: a.direction, 118 | end: a.end, 119 | name: a.name, 120 | start: a.start, 121 | type: a.type, 122 | })), 123 | name: p.name, 124 | seq: p.seq, 125 | type: p.type, 126 | })); 127 | }; 128 | -------------------------------------------------------------------------------- /src/parsers/benchling.ts: -------------------------------------------------------------------------------- 1 | import { Seq } from ".."; 2 | import { complement, guessType, parseDirection } from "../utils"; 3 | 4 | /** 5 | * Benchling format is just JSON. It's virtually the same format. 6 | */ 7 | export default (text: string): Seq[] => { 8 | const partJSON = JSON.parse(text); 9 | const { seq } = complement(partJSON.bases); 10 | 11 | // throw an error if the sequence is empty 12 | if (seq.length < 1) { 13 | throw new Error("Invalid Benchling part: empty sequence"); 14 | } 15 | 16 | return [ 17 | { 18 | annotations: partJSON.annotations.map(a => ({ 19 | ...a, 20 | direction: parseDirection(a.strand), 21 | })), 22 | name: partJSON.name || partJSON._id, 23 | seq: seq, 24 | type: guessType(seq), 25 | }, 26 | ]; 27 | }; 28 | -------------------------------------------------------------------------------- /src/parsers/biobrick.test.ts: -------------------------------------------------------------------------------- 1 | import { readFileSync } from "fs"; 2 | import { join } from "path"; 3 | 4 | import parseBiobrick from "./biobrick"; 5 | 6 | describe("BioBrick parser", () => { 7 | it("should parse a JBEI file", () => { 8 | const file = readFileSync(join(__dirname, "..", "examples", "biobrick", "iGEM.BioBrick.xml"), "utf8"); 9 | 10 | const seqs = parseBiobrick(file); 11 | 12 | expect(seqs).toEqual([ 13 | { 14 | annotations: [ 15 | { 16 | direction: 1, 17 | end: 8, 18 | name: "forward-5", 19 | start: 5, 20 | type: "conserved", 21 | }, 22 | ], 23 | name: "BBa_B0034", 24 | seq: "aaagaggagaaa", 25 | type: "dna", 26 | }, 27 | ]); 28 | }); 29 | }); 30 | -------------------------------------------------------------------------------- /src/parsers/biobrick.ts: -------------------------------------------------------------------------------- 1 | import { XMLParser } from "fast-xml-parser"; 2 | 3 | import { Seq } from ".."; 4 | import { complement, firstElement, guessType, parseDirection } from "../utils"; 5 | 6 | /** 7 | * Parse a BioBrick in XML format to Seq[] 8 | * 9 | * Eg: https://parts.igem.org/cgi/xml/part.cgi?part=BBa_J23100 10 | */ 11 | export default (file: string): Seq[] => { 12 | const bail = (err: string) => { 13 | throw new Error(`Failed on BioBrick: ${err}`); 14 | }; 15 | 16 | // parse 17 | const parsedBiobrick = new XMLParser({ 18 | isArray: name => { 19 | return ["features", "part_name", "sequences"].includes(name); 20 | }, 21 | removeNSPrefix: true, 22 | }).parse(file); 23 | 24 | // get the first part 25 | const { part } = parsedBiobrick.rsbpml.part_list; 26 | if (!part) bail("No part seen in part_list"); 27 | 28 | // extract the useful fields 29 | const { features, part_name, sequences } = part; 30 | 31 | const name = firstElement(part_name); 32 | 33 | // parse the iGEM annotations 34 | const annotations = features 35 | .map(({ feature }) => { 36 | if (!feature) return null; 37 | 38 | const { direction, endpos, startpos, type } = feature; 39 | 40 | return { 41 | direction: parseDirection(direction), 42 | end: +endpos, 43 | name: `${direction}-${startpos}`, 44 | start: +startpos || 0, 45 | type: type || undefined, 46 | }; 47 | }) 48 | .filter(a => a); 49 | 50 | // parse the sequence 51 | const { seq } = complement(sequences[0].seq_data); 52 | 53 | return [ 54 | { 55 | annotations: annotations, 56 | name, 57 | seq, 58 | type: guessType(seq), 59 | }, 60 | ]; 61 | }; 62 | -------------------------------------------------------------------------------- /src/parsers/fasta.ts: -------------------------------------------------------------------------------- 1 | import { Seq } from ".."; 2 | import { guessType } from "../utils"; 3 | 4 | export default (text: string, fileName: string): Seq[] => { 5 | // partFactory returns a negative "circular" prop, we assume they're all linear 6 | if (text.trim().startsWith(">")) { 7 | return text 8 | .split(">") // split up if it's a multi-seq FASTA file 9 | .map(t => { 10 | // this starts at the end of the first line, grabs all other characters, 11 | // and removes any newlines (leaving only the original sequence) 12 | // sequence "cleaning" happens in complement (we don't support bps other than 13 | // the most common right now) 14 | const seq = t.substr(t.indexOf("\n"), t.length).replace(/\s/g, ""); 15 | 16 | // the first line contains the name, though there's lots of variability around 17 | // the information on this line... 18 | // >MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken 19 | const name = t.substring(0, t.search(/\n|\|/)).replace(/\//g, ""); 20 | 21 | return { 22 | annotations: [], 23 | name, 24 | seq, 25 | type: guessType(seq), 26 | }; 27 | }) 28 | .filter(p => p.name && p.seq); 29 | } 30 | 31 | if (text.trim().startsWith(";")) { 32 | // it's an old-school style FASTA that's punctuated with semi-colons 33 | // ;my|NAME 34 | // ;my comment 35 | // actGacgata 36 | const name = text.substring(0, text.search(/\n|\|/)).replace(/\//g, ""); 37 | const newlineBeforeSeq = text.indexOf("\n", text.lastIndexOf(";")); 38 | const seq = text.substring(newlineBeforeSeq, text.length); 39 | return [ 40 | { 41 | annotations: [], 42 | name, 43 | seq, 44 | type: guessType(seq), 45 | }, 46 | ]; 47 | } 48 | 49 | // assume that it's a no name FASTA. Ie it's just a file with dna and no header 50 | // try and get the name from the fileName 51 | const lastChar = fileName.lastIndexOf(".") || fileName.length; 52 | const name = fileName.substring(0, lastChar) || "Untitled"; 53 | const seq = text; 54 | return [ 55 | { 56 | annotations: [], 57 | name, 58 | seq, 59 | type: guessType(seq), 60 | }, 61 | ]; 62 | }; 63 | -------------------------------------------------------------------------------- /src/parsers/genbank.ts: -------------------------------------------------------------------------------- 1 | import { Annotation } from ".."; 2 | import { complement, guessType } from "../utils"; 3 | 4 | // a list of recognized types that would constitute an annotation name 5 | const tagNameSet = new Set(["gene", "product", "note", "db_xref", "protein_id", "label", "lab_host", "locus_tag"]); 6 | 7 | // a list of tags that could represent colors 8 | const tagColorSet = new Set(["ApEinfo_fwdcolor", "ApEinfo_revcolor", "loom_color"]); 9 | 10 | /** 11 | * takes in a string representation of a GenBank file and outputs our 12 | * part representation of it. an example of a Genbank file can be found 13 | * at ./parsers/Gebank, though there is significant variability to the 14 | * format 15 | * 16 | * another official example can be found at: 17 | * https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html 18 | */ 19 | export default (fileInput: string, fileName: string) => 20 | fileInput 21 | .split(/\/\/\s/g) 22 | .filter(f => f.length > 5) 23 | .map(file => { 24 | // the first row contains the name of the part and its creation date 25 | // LOCUS SCU49845 5028 bp DNA PLN 21-JUN-1999 26 | const HEADER_ROW = file.substring(file.indexOf("LOCUS"), file.search(/\\n|\n/)); 27 | const [, name] = HEADER_ROW.split(/\s{2,}/g).filter(h => h); 28 | 29 | // trying to avoid giving a stupid name like Exported which Snapgene has by default 30 | // also, if there is not name in header, the seq length will be used as name, which should 31 | // be corrected (Number.parseInt to check for this case) https://stackoverflow.com/a/175787/7541747 32 | let parsedName = name; 33 | if ( 34 | (parsedName === "Exported" && file.includes("SnapGene")) || // stupid Snapgene name 35 | Number.parseInt(parsedName, 10) // it thinks seq-length is the name 36 | ) { 37 | // first try and get the name from ACCESSION 38 | let accessionName = false; 39 | if (file.includes("ACCESSION")) { 40 | // this will be undefined is there is no 41 | const accession = file 42 | .substring(file.indexOf("ACCESSION"), file.indexOf("\n", file.indexOf("ACCESSION"))) 43 | .replace(".", "") 44 | .split(/\s{2,}/) 45 | .filter(a => a !== "ACCESSION") 46 | .pop(); 47 | if (accession) { 48 | parsedName = accession; 49 | accessionName = true; 50 | } 51 | } 52 | 53 | // otherwise, revert to trying to get the part name from the file name 54 | if (!accessionName && fileName) { 55 | parsedName = fileName 56 | .substring(0, Math.max(fileName.search(/\n|\||\./), fileName.lastIndexOf("."))) 57 | .replace(/\/\s/g, ""); 58 | } else if (!accessionName) { 59 | parsedName = "Unnamed"; // give up 60 | } 61 | } 62 | 63 | // the part sequence is contained in and after the line that begins with ORIGIN 64 | // do this before annotations so we can calc seqlength 65 | // 66 | // ORIGIN 67 | // 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg 68 | // 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct 69 | const SEQ_ROWS = file.substring(file.lastIndexOf("ORIGIN") + "ORIGIN".length, file.length); 70 | let seq = SEQ_ROWS.replace(/[^gatc]/gi, ""); 71 | ({ seq } = complement(seq)); // seq and compSeq 72 | 73 | // the features are translated into annotations 74 | // region is FEATURES thru ORIGIN 75 | // FEATURES Location/Qualifiers 76 | // source 1..5028 77 | // /organism="Saccharomyces cerevisiae" 78 | // /db_xref="taxon:4932" 79 | // /chromosome="IX" 80 | // /map="9" 81 | // 82 | // in the example above, source is the annotation "type" and name is "taxon:4932" 83 | // because "db_xref" is a recognized name type 84 | // the name depends on whether the tag type is in the reocgnized list of types 85 | const annotations: Annotation[] = []; 86 | const primers = []; 87 | if (file.indexOf("FEATURES")) { 88 | const FEATURES_LINE = file.indexOf("FEATURES"); 89 | const FEATURES_NEW_LINE = file.indexOf("\n", FEATURES_LINE); 90 | let ORIGIN_LINE = file.lastIndexOf("ORIGIN"); 91 | 92 | // some files have a contig file line that needs to parsed out/ shouldn't be included in 93 | // the features parsing 94 | if (file.includes("CONTIG")) { 95 | ORIGIN_LINE = Math.min(ORIGIN_LINE, file.indexOf("CONTIG")); 96 | } 97 | const FEATURES_ROWS = file 98 | .substring(FEATURES_NEW_LINE, ORIGIN_LINE) 99 | .split(/\n/) 100 | .filter(r => r); 101 | 102 | FEATURES_ROWS.forEach(r => { 103 | // in the example above, the following converts it to ['source', '1..5028'] 104 | const currLine = r.split(/\s{2,}/g).filter(l => l); 105 | if (currLine.length > 1) { 106 | // it's the beginning of a new feature/annotation 107 | const [type, rangeString] = currLine; 108 | const rangeRegex = /\d+/g; 109 | const direction = r.includes("complement") ? -1 : 1; 110 | 111 | // using the example above, this parses 1..5028 into 1 and 5028 112 | let [start, end] = [0, 0]; 113 | const startSearch = rangeRegex.exec(rangeString); 114 | 115 | if (startSearch) { 116 | // the - 1 is because genbank is 1-based while we're 0 117 | start = +startSearch[0] - (1 % seq.length); 118 | // single bp annotations are a thing in Genbank: 119 | // https://github.com/Lattice-Automation/seqviz/issues/117 120 | end = (start + 1) % seq.length; 121 | const endSearch = rangeRegex.exec(rangeString); 122 | if (endSearch) { 123 | end = +endSearch[0] % seq.length; 124 | } 125 | } 126 | 127 | if (type !== "source") { 128 | // create a new annotation around the properties in this line (type and range) 129 | annotations.push({ 130 | direction, 131 | // set in next block 132 | end, 133 | name: "", 134 | start, 135 | type, 136 | }); 137 | } 138 | } else if (currLine.length === 1) { 139 | // it's a continuation of a prior feature/annotation 140 | // any updates (to name or color) to the last annotation should affect 141 | // the last annotation that's in the array 142 | if (currLine[0].startsWith("/")) { 143 | let [tag] = currLine; 144 | tag = tag.replace(/[/"]/g, ""); // get rid of quotation marks and forward slaches 145 | // should now look like ['organism', 'Saccharomyces cerevisiae'] 146 | const [tagName, tagValue] = tag.split(/=/); 147 | 148 | // the two values that can be extracted are name or color 149 | const lastAnn = annotations.length - 1; 150 | if (tagNameSet.has(tagName.toLowerCase())) { 151 | // the key is something we recognize as an annotation name 152 | if (lastAnn >= 0 && !annotations[lastAnn].name) { 153 | annotations[lastAnn].name = tagValue.trim(); 154 | } 155 | } else if (tagColorSet.has(tagName)) { 156 | // the key is something we recognize as an annotation color 157 | if (lastAnn > -1) { 158 | annotations[lastAnn].color = tagValue; 159 | } 160 | } 161 | } 162 | } 163 | }); 164 | } 165 | 166 | return { 167 | annotations: annotations, 168 | name: parsedName.trim() || fileName, 169 | primers: primers, 170 | seq: seq, 171 | type: guessType(seq), 172 | }; 173 | }); 174 | -------------------------------------------------------------------------------- /src/parsers/jbei.test.ts: -------------------------------------------------------------------------------- 1 | import { readFileSync } from "fs"; 2 | import { join } from "path"; 3 | 4 | import parseJBEI from "./jbei"; 5 | 6 | describe("JBEI parser", () => { 7 | it("should parse a JBEI file", () => { 8 | const file = readFileSync(join(__dirname, "..", "examples", "jbei", "pBbE0c-RFP.seq"), "utf8"); 9 | 10 | const seqs = parseJBEI(file); 11 | 12 | expect(seqs).toEqual([ 13 | { 14 | annotations: [ 15 | { 16 | direction: -1, 17 | end: 2670, 18 | name: "CmR", 19 | start: 2010, 20 | type: "gene", 21 | }, 22 | { 23 | direction: 1, 24 | end: 915, 25 | name: "RFP cassette", 26 | start: 2811, 27 | type: "gene", 28 | }, 29 | { 30 | direction: -1, 31 | end: 1884, 32 | name: "colE1 origin", 33 | start: 1201, 34 | type: "rep_origin", 35 | }, 36 | { 37 | direction: 1, 38 | end: 1995, 39 | name: "T0", 40 | start: 1889, 41 | type: "terminator", 42 | }, 43 | ], 44 | name: "pBbE0c-RFP", 45 | seq: "cagctagctcagtcctaggtactgtgctagctactagtgaaagaggagaaatactagatggcttcctccgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatatactagaagcggccgggatcctaactcgagtaaggatctccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacctagggcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccataggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaaggacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgactagtgcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcaacgtctcattttcgccagatatcgaattcatgagatctttga", 46 | type: "dna", 47 | }, 48 | ]); 49 | }); 50 | }); 51 | -------------------------------------------------------------------------------- /src/parsers/jbei.ts: -------------------------------------------------------------------------------- 1 | import { XMLParser } from "fast-xml-parser"; 2 | 3 | import { Annotation, Seq } from ".."; 4 | import { complement, guessType } from "../utils"; 5 | 6 | /** 7 | * Converts a JBEI file to a Seq 8 | * 9 | * https://j5.jbei.org/j5manual/pages/94.html 10 | */ 11 | export default (JBEI: string): Seq[] => { 12 | // weird edge case with directed quotation characters 13 | const fileString = JBEI.replace(/“|”/g, '"'); 14 | 15 | // parse 16 | const parsedJbei = new XMLParser({ 17 | removeNSPrefix: true, 18 | }).parse(fileString); 19 | 20 | // destructure the parameters from JBEI 21 | const { seq } = parsedJbei; 22 | const { features, name, sequence } = seq; 23 | 24 | // attempt to get the name out of the JBEI 25 | let parsedName = "Unnamed"; 26 | if (name) { 27 | parsedName = name; 28 | } 29 | 30 | // attempt to get the sequence. fail if it's not findable 31 | const { seq: parsedSeq } = complement(sequence); // seq and compSeq 32 | if (!parsedSeq) return []; 33 | 34 | // attempt to parse the JBEI annotations into our version of annotations 35 | const annotations: Annotation[] = []; 36 | if (features && features.feature) { 37 | features.feature.forEach(feature => { 38 | if (!feature) return; 39 | 40 | const { complement, label, location, type } = feature; 41 | if (location && location.genbankStart && location.end) { 42 | annotations.push({ 43 | direction: complement ? -1 : 1, 44 | // JBEI is 1-based 45 | end: +location.end || 0, 46 | name: label || "Untitled", 47 | start: +location.genbankStart - 1 || 0, 48 | type: type || "N/A", 49 | }); 50 | } 51 | }); 52 | } 53 | 54 | return [ 55 | { 56 | annotations: annotations, 57 | name: parsedName, 58 | seq: parsedSeq, 59 | type: guessType(parsedSeq), 60 | }, 61 | ]; 62 | }; 63 | -------------------------------------------------------------------------------- /src/parsers/sbol.ts: -------------------------------------------------------------------------------- 1 | import { Seq } from ".."; 2 | import sbolV1 from "./sbol.v1"; 3 | import sbolV2 from "./sbol.v2"; 4 | 5 | /** 6 | * takes in an SBOL file in v1 or v2 format, and parses to an array of parts 7 | * that match the Loom data model 8 | */ 9 | export default (sbol: string, fileName: string): Seq[] => 10 | sbol.includes("sbols.org/v1#") ? sbolV1(sbol) : sbolV2(sbol, fileName); 11 | -------------------------------------------------------------------------------- /src/parsers/sbol.v2.ts: -------------------------------------------------------------------------------- 1 | import { XMLParser } from "fast-xml-parser"; 2 | 3 | import { Annotation, Seq } from ".."; 4 | import { complement, guessType } from "../utils"; 5 | 6 | /** 7 | * Converts an SBOL file to our Seq format. 8 | * 9 | * SBOL v2.0 schema definition can be found at: http://sbolstandard.org/wp-content/uploads/2016/06/SBOL-data-model-2.2.1.pdf 10 | * differs from SBOL v1.0 in that the ComponentDefinitions are like the root parts, 11 | * and the sequence and annotations are separated (they're no longer defined relationally 12 | * by nesting but, instead, by id) we only care about components that have sequence information 13 | */ 14 | export default (sbol: string, fileName: string): Seq[] => { 15 | // weird edge case with directed quotation characters 16 | const fileString = sbol.replace(/“|”/g, '"'); 17 | 18 | // parse 19 | const parsedSBOL = new XMLParser({ 20 | ignoreAttributes: false, 21 | isArray: name => 22 | ["Sequence", "ComponentDefinition", "SequenceAnnotation", "sequenceAnnotation", "elements"].includes(name), 23 | removeNSPrefix: true, 24 | }).parse(fileString); 25 | 26 | try { 27 | const seqList = parseSBOL2(parsedSBOL, fileName); 28 | 29 | if (seqList.length) { 30 | return seqList; 31 | } else { 32 | throw new Error("No Sequence info found"); 33 | } 34 | } catch (err) { 35 | throw new Error(`Failed to parse SBOL v2 file: ${err}`); 36 | } 37 | }; 38 | 39 | const parseSBOL2 = (parsedSBOL, fileName: string): Seq[] => { 40 | let RDF = null; 41 | if (parsedSBOL.RDF) { 42 | ({ RDF } = parsedSBOL); 43 | } 44 | 45 | if (!RDF) { 46 | throw new Error("No root RDF document"); 47 | } 48 | 49 | // check if anything is defined, return if not 50 | const { ComponentDefinition, Sequence } = RDF; 51 | if (!ComponentDefinition && !Sequence) { 52 | throw new Error("Failed to parse SBOL v2: No ComponentDefinition or Sequence"); 53 | } 54 | 55 | // read thru the Sequence elements 56 | const getSeq = (seqID?: string) => { 57 | const seqElement = seqID 58 | ? // @ts-ignore 59 | Sequence.find( 60 | s => 61 | (s.persistentIdentity && s.persistentIdentity.length && s.persistentIdentity["@_resource"] === seqID) || 62 | s["@_about"] === seqID 63 | ) 64 | : Sequence[0]; 65 | 66 | if (seqElement && seqElement.elements) { 67 | const { seq } = complement(seqElement.elements[0] || ""); 68 | return { 69 | annotations: [], 70 | name: seqElement.displayId, 71 | seq, 72 | type: guessType(seq), 73 | }; 74 | } 75 | return null; 76 | }; 77 | 78 | // if it's a collection of DnaComponents, parse each to a part 79 | const seqList: Seq[] = []; 80 | // @ts-ignore 81 | ComponentDefinition?.forEach((c, i) => { 82 | // we're only making parts out of those with seq info 83 | if (!c.sequence) { 84 | return; 85 | } 86 | 87 | const { displayId, sequence, sequenceAnnotation } = c; 88 | const name = displayId || `${fileName}_${i + 1}`; 89 | 90 | const annotations: Annotation[] = []; 91 | (sequenceAnnotation || []).forEach(({ SequenceAnnotation }) => { 92 | const ann = SequenceAnnotation[0]; 93 | const annId = ann.displayId; 94 | const { Range } = ann.location; 95 | 96 | const range = Range; 97 | if (range) { 98 | annotations.push({ 99 | end: range.end - 1, 100 | name: annId, 101 | start: range.start - 1, 102 | }); 103 | } 104 | }); 105 | 106 | const seq = getSeq(sequence["@_resource"]); 107 | 108 | if (seq) { 109 | seqList.push({ 110 | annotations, 111 | name, 112 | seq: seq.seq, 113 | type: seq.type, 114 | }); 115 | } 116 | }); 117 | 118 | // if it's a single sequence, just try and get the sequence from that alone 119 | const seq = getSeq(); 120 | if (!seqList.length && seq) { 121 | seqList.push(seq); 122 | } 123 | return seqList; 124 | }; 125 | -------------------------------------------------------------------------------- /src/parsers/seqbuilder.ts: -------------------------------------------------------------------------------- 1 | import { Annotation } from ".."; 2 | import { complement, guessType } from "../utils"; 3 | 4 | // a list of recognized types that would constitute an annotation name 5 | const tagNameList = ["gene", "product", "note", "db_xref", "protein_id", "label", "lab_host"]; 6 | 7 | // a list of tags that could represent colors 8 | const tagColorList = ["ApEinfo_fwdcolor", "ApEinfo_revcolor", "loom_color"]; 9 | 10 | /** 11 | * takes in a string representation of a SeqBuilder file and outputs our 12 | * part representation of it. an example of a SeqBuilder file can be found 13 | * at imports/io/examples/seqbuilder, though there may be variations to the 14 | * format 15 | */ 16 | export default (fileInput: string, fileName: string) => 17 | fileInput.split(/\/\/\s/g).map(file => { 18 | // +++++SEQUENCE+++++// 19 | // the part sequence comes after the line that specifies the seqbuilder version number 20 | // @ts-ignore 21 | const SEQ_ROWS = file 22 | .substring( 23 | file.search(/.*?written by seqbuilder .*?[0-9.]+[^actg]+/i) + 24 | // @ts-ignore 25 | file.match(/.*?written by seqbuilder .*?[0-9.]+[^actg]+/i)[0].length, 26 | file.length 27 | ) 28 | .match(/[actgyrwskmdvhbxn]+/gim)[0]; 29 | 30 | let seq = SEQ_ROWS; 31 | ({ seq } = complement(seq)); // seq and compSeq 32 | // there may be a genbank-like header row after the sequence 33 | // LOCUS SCU49845 5028 bp DNA PLN 21-JUN-1999 34 | let parsedName = fileName.length > 0 ? fileName : "Unnamed"; 35 | 36 | if (~file.indexOf("LOCUS")) { 37 | const HEADER_ROW = file.substring(file.indexOf("LOCUS"), file.search(/\\n|\n/)); 38 | if (HEADER_ROW && HEADER_ROW.split(/\s{2,}/g)) { 39 | const [, name] = HEADER_ROW.split(/\s{2,}/g).filter(h => h); 40 | parsedName = name; 41 | } 42 | } 43 | // Name setting logic ported from GenBank parser 44 | if ( 45 | (parsedName === "Exported" && file.includes("SnapGene")) || // stupid Snapgene name 46 | Number.parseInt(parsedName, 10) // it thinks seq-length is the name 47 | ) { 48 | // first try and get the name from ACCESSION 49 | let accessionName = false; 50 | if (file.includes("ACCESSION")) { 51 | // this will be undefined is there is no 52 | const accession = file 53 | .substring(file.indexOf("ACCESSION"), file.indexOf("\n", file.indexOf("ACCESSION"))) 54 | .replace(".", "") 55 | .split(/\s{2,}/) 56 | .filter(a => a !== "ACCESSION") 57 | .pop(); 58 | if (accession) { 59 | parsedName = accession; 60 | accessionName = true; 61 | } 62 | } 63 | 64 | // otherwise, revert to trying to get the part name from the file name 65 | if (!accessionName && fileName) { 66 | parsedName = fileName 67 | .substring(0, Math.max(fileName.search(/\n|\||\./), fileName.lastIndexOf("."))) 68 | .replace(/\/\s/g, ""); 69 | } else if (!accessionName) { 70 | parsedName = "Unnamed"; // give up 71 | } 72 | } 73 | 74 | // +++++ANNOTATIONS+++++// 75 | // the features are translated into annotations 76 | // region is FEATURES thru ORIGIN 77 | // FEATURES Location/Qualifiers 78 | // source 1..5028 79 | // /organism="Saccharomyces cerevisiae" 80 | // /db_xref="taxon:4932" 81 | // /chromosome="IX" 82 | // /map="9" 83 | // 84 | // in the example above, source is the annotation "type" and name is "taxon:4932" 85 | // because "db_xref" is a recognized name type 86 | // the name depends on whether the tag type is in the reocgnized list of types 87 | const annotations: Annotation[] = []; 88 | if (file.indexOf("FEATURES")) { 89 | const FEATURES_LINE = file.indexOf("FEATURES"); 90 | const FEATURES_NEW_LINE = file.indexOf("\n", FEATURES_LINE); 91 | let ORIGIN_LINE = file.lastIndexOf("ORIGIN"); 92 | 93 | // some files have a contig file line that needs to parsed out/ shouldn't be included in 94 | // the features parsing 95 | if (file.includes("CONTIG")) { 96 | ORIGIN_LINE = Math.min(ORIGIN_LINE, file.indexOf("CONTIG")); 97 | } 98 | const FEATURES_ROWS = file 99 | .substring(FEATURES_NEW_LINE, ORIGIN_LINE) 100 | .split(/\n/) 101 | .filter(r => r); 102 | 103 | FEATURES_ROWS.forEach(r => { 104 | // in the example above, the following converts it to ['source', '1..5028'] 105 | const currLine = r.split(/\s{2,}/g).filter(l => l); 106 | if (currLine.length > 1) { 107 | // it's the beginning of a new feature/annotation 108 | const [type, rangeString] = currLine; 109 | const rangeRegex = /\d+/g; 110 | const direction = r.includes("complement") ? -1 : 1; 111 | 112 | // using the example above, this parses 1..5028 into 1 and 5028 113 | let [start, end] = [0, 0]; 114 | const startSearch = rangeRegex.exec(rangeString); 115 | 116 | if (startSearch) { 117 | // the - 1 is because genbank is 1-based while we're 0 118 | start = +startSearch[0] - (1 % seq.length); 119 | const endSearch = rangeRegex.exec(rangeString); 120 | if (endSearch) { 121 | end = +endSearch[0] % seq.length; 122 | } 123 | } 124 | 125 | if (type !== "source") { 126 | // source would just be an annotation for the entire sequence so remove 127 | // create a new annotation around the properties in this line (type and range) 128 | annotations.push({ 129 | direction, 130 | end, 131 | name: "", 132 | start, 133 | type, 134 | }); 135 | } 136 | } else if (currLine.length === 1) { 137 | // it's a continuation of a prior feature/annotation 138 | // any updates (to name or color) to the last annotation should affect 139 | // the last annotation that's in the array 140 | let [tag] = currLine; 141 | tag = tag.replace(/[/"]/g, ""); // get rid of quotation marks and forward slaches 142 | // should now look like ['organism', 'Saccharomyces cerevisiae'] 143 | const [tagName, tagValue] = tag.split(/=/); 144 | 145 | // the two values that can be extracted are name or color 146 | const lastAnnIndex = annotations.length - 1; 147 | if (tagNameList.includes(tagName)) { 148 | // it's key value pair where the key is something we recognize as an annotation name 149 | if (lastAnnIndex > -1 && !annotations[annotations.length - 1].name) { 150 | // defensively check that there isn't already a defined annotation w/o a name 151 | annotations[annotations.length - 1].name = tagValue.trim(); 152 | } 153 | } else if (tagColorList.includes(tagName)) { 154 | // it's key value pair where the key is something we recognize as an annotation color 155 | if (lastAnnIndex > -1) { 156 | // defensively check that there's already been a defined annotation 157 | annotations[annotations.length - 1].color = tagValue; 158 | } 159 | } 160 | } 161 | }); 162 | } 163 | 164 | return { 165 | annotations: annotations, 166 | name: parsedName.trim() || fileName, 167 | seq: seq, 168 | type: guessType(seq), 169 | }; 170 | }); 171 | -------------------------------------------------------------------------------- /src/parsers/snapgene.ts: -------------------------------------------------------------------------------- 1 | import { XMLParser } from "fast-xml-parser"; 2 | import { sep } from "path"; 3 | 4 | import { Annotation, ParseOptions, Seq } from ".."; 5 | import { guessType, parseDirection } from "../utils"; 6 | 7 | /** 8 | * Parse a SnapGene file to Seq[] 9 | * 10 | * this is adapted from https://github.com/TeselaGen/ve-sequence-parsers/blob/master/src/parsers/snapgeneToJson.js 11 | * which was adapted from https://github.com/IsaacLuo/SnapGeneFileReader/blob/master/snapgene_reader/snapgene_reader.py 12 | */ 13 | export default (options?: ParseOptions): Seq[] => { 14 | if (!options || !options.source) { 15 | throw new Error("Failed to parse SnapGene file. No valid file input"); 16 | } 17 | 18 | const fileName = options?.fileName || ""; 19 | const seq = { 20 | annotations: [] as Annotation[], 21 | circular: false, 22 | name: "", 23 | seq: "", 24 | type: "unknown", 25 | }; 26 | 27 | const buffer = Buffer.from(options.source); 28 | 29 | // Accumulate an offset from the start as we read through the file 30 | let offset = 0; 31 | 32 | // Read a buffer from the buffer 33 | const read = (size: number) => { 34 | const start = offset; 35 | offset += size; 36 | return buffer.subarray(start, offset); 37 | }; 38 | 39 | // Read from buffer and decode as string 40 | const readEnc = (size: number, fmt: BufferEncoding) => read(size).toString(fmt); 41 | 42 | // Read the first byte 43 | read(1); 44 | 45 | // Read document properties 46 | const length = read(4).readUInt32BE(); 47 | const title = readEnc(8, "ascii"); 48 | if (length !== 14 || title !== "SnapGene") { 49 | throw new Error(`Wrong format for a SnapGene file: length=${length} title=${title}`); 50 | } 51 | 52 | read(2); // isDNA 53 | read(2); // exportVersion 54 | read(2); // importVersion 55 | 56 | /* eslint-disable no-await-in-loop */ 57 | // READ THE WHOLE FILE, BLOCK BY BLOCK, UNTIL THE END 58 | while (offset < buffer.length) { 59 | // next_byte table 60 | // 0: dna sequence 61 | // 1: compressed DNA 62 | // 2: unknown 63 | // 3: unknown 64 | // 5: primers 65 | // 6: notes 66 | // 7: history tree 67 | // 8: additional sequence properties segment 68 | // 9: file Description 69 | // 10: features 70 | // 11: history node 71 | // 13: unknown 72 | // 16: alignable sequence 73 | // 17: alignable sequence 74 | // 18: sequence trace 75 | // 19: Uracil Positions 76 | // 20: custom DNA colors 77 | 78 | const nextByte = read(1); 79 | const blockSize = read(4).readUInt32BE(); 80 | const ord = nextByte.toString().charCodeAt(0); 81 | if (ord === 0) { 82 | // Read the sequence and its properties 83 | read(1); // isCircular 84 | 85 | const size = blockSize - 1; 86 | if (size < 0) throw new Error("Failed parsing SnapGene: < 0 length sequence"); 87 | seq.seq = readEnc(size, "ascii"); 88 | } else if (ord === 10) { 89 | // Read all the features 90 | const xml = readEnc(blockSize, "utf8") as string; 91 | const b = new XMLParser({ 92 | attributeNamePrefix: "", 93 | ignoreAttributes: false, 94 | isArray: name => name === "Q" || name === "Segment", 95 | removeNSPrefix: true, 96 | }).parse(xml); 97 | 98 | b.Features.Feature.forEach(feature => { 99 | let minStart = 0; 100 | let maxEnd = 0; 101 | const { range } = feature.Segment[0]; 102 | const [start, end] = range.split("-"); 103 | minStart = minStart === 0 ? +start : Math.min(minStart, +start); 104 | maxEnd = Math.max(maxEnd, +end); 105 | 106 | // create an Annotation 107 | seq.annotations.push({ 108 | direction: parseDirection( 109 | { 110 | "0": "NONE", 111 | "1": 1, 112 | "2": -1, 113 | "3": "BIDIRECTIONAL", 114 | undefined: "NONE", 115 | }[feature.directionality] 116 | ), 117 | end: maxEnd - 1, 118 | name: feature.name, 119 | start: minStart - 1, 120 | type: feature.type, 121 | }); 122 | }); 123 | } else { 124 | // UNKNOWN: WE IGNORE THE WHOLE BLOCK 125 | read(blockSize); 126 | } 127 | } 128 | 129 | return [ 130 | { 131 | ...seq, 132 | // SnapGene uses the filename as the sequence name 133 | name: fileName.split(sep).pop()?.replace(".dna", "") || fileName, 134 | type: guessType(seq.seq), 135 | }, 136 | ]; 137 | }; 138 | -------------------------------------------------------------------------------- /src/utils.test.ts: -------------------------------------------------------------------------------- 1 | import { complement, guessType, parseDirection, reverseComplement } from "./utils"; 2 | 3 | describe("Parse seq input", () => { 4 | it("parses DNA seq and compSeq", () => { 5 | const inSeq = 6 | "acacgattgcccgacggattcatgagatgtcaggccgcaaagggcgcctggtggcGATGAATTGCGCGGCCATTCCGGAGTCCCTCGccgagagcgagttattcggcgtggtcagcggtgcctacaccggcgctgatcgctccagagtcg"; 7 | 8 | const { compSeq, seq } = complement(inSeq); 9 | 10 | expect(seq).toEqual(inSeq); 11 | expect(compSeq).toEqual( 12 | "tgtgctaacgggctgcctaagtactctacagtccggcgtttcccgcggaccaccgCTACTTAACGCGCCGGTAAGGCCTCAGGGAGCggctctcgctcaataagccgcaccagtcgccacggatgtggccgcgactagcgaggtctcagc" 13 | ); 14 | }); 15 | 16 | it("returns the reverse complement", () => { 17 | const revCompSeq = reverseComplement( 18 | "acacgattgcccgacggattcatgagatgtcaggccgcaaagggcgcctggtggcGATGAATTGCGCGGCCATTCCGGAGTCCCTCGccgagagcgagttattcggcgtggtcagcggtgcctacaccggcgctgatcgctccagagtcg" 19 | ); 20 | 21 | expect(revCompSeq).toEqual( 22 | "cgactctggagcgatcagcgccggtgtaggcaccgctgaccacgccgaataactcgctctcggCGAGGGACTCCGGAATGGCCGCGCAATTCATCgccaccaggcgccctttgcggcctgacatctcatgaatccgtcgggcaatcgtgt" 23 | ); 24 | }); 25 | 26 | it("parses directionality from multiple formats", () => { 27 | expect(parseDirection("FWD")).toEqual(1); 28 | expect(parseDirection("FORWARD")).toEqual(1); 29 | expect(parseDirection(1)).toEqual(1); 30 | expect(parseDirection("1")).toEqual(1); 31 | expect(parseDirection("test")).toEqual(0); 32 | expect(parseDirection("NONE")).toEqual(0); 33 | expect(parseDirection("REVERSE")).toEqual(-1); 34 | expect(parseDirection("REV")).toEqual(-1); 35 | expect(parseDirection(-1)).toEqual(-1); 36 | expect(parseDirection("-1")).toEqual(-1); 37 | }); 38 | 39 | it("detects type", () => { 40 | const types = { 41 | KNTRSPRFLE: "aa", 42 | _fajsi: "unknown", 43 | atgagcAGTA: "dna", 44 | atugc: "unknown", 45 | augagcAGUAa: "rna", 46 | "kInm*": "aa", 47 | tgatcaaacctaaagagtgggacagagagtactactatattcgtttcactcgccnaaaagttttgaac: "dna", 48 | ttgacggctagctcagtcctaggtacagtgctagc: "dna", 49 | }; 50 | 51 | Object.keys(types).forEach(k => { 52 | expect(guessType(k)).toEqual(types[k]); 53 | }); 54 | }); 55 | }); 56 | -------------------------------------------------------------------------------- /src/utils.ts: -------------------------------------------------------------------------------- 1 | // from http://arep.med.harvard.edu/labgc/adnan/projects/Utilities/revcomp.html 2 | const comp = { 3 | A: "T", 4 | B: "V", 5 | C: "G", 6 | D: "H", 7 | G: "C", 8 | H: "D", 9 | K: "M", 10 | M: "K", 11 | N: "N", 12 | R: "Y", 13 | S: "S", 14 | T: "A", 15 | U: "A", 16 | V: "B", 17 | W: "W", 18 | X: "X", 19 | Y: "R", 20 | a: "t", 21 | b: "v", 22 | c: "g", 23 | d: "h", 24 | g: "c", 25 | h: "d", 26 | k: "m", 27 | m: "k", 28 | n: "n", 29 | r: "y", 30 | s: "s", 31 | t: "a", 32 | u: "a", 33 | v: "b", 34 | w: "w", 35 | x: "x", 36 | y: "r", 37 | }; 38 | 39 | /** 40 | * Return the filtered sequence and its complement if its an empty string, return the same for both. 41 | */ 42 | export const complement = (origSeq: string): { compSeq: string; seq: string } => { 43 | if (!origSeq) { 44 | return { compSeq: "", seq: "" }; 45 | } 46 | 47 | // filter out unrecognized basepairs and build up the complement 48 | let seq = ""; 49 | let compSeq = ""; 50 | for (let i = 0, origLength = origSeq.length; i < origLength; i += 1) { 51 | if (comp[origSeq[i]]) { 52 | seq += origSeq[i]; 53 | compSeq += comp[origSeq[i]]; 54 | } 55 | } 56 | return { compSeq, seq }; 57 | }; 58 | 59 | /** 60 | * Return the reverse complement of a DNA sequence 61 | */ 62 | export const reverseComplement = (inputSeq: string): string => { 63 | const { compSeq } = complement(inputSeq); 64 | return compSeq.split("").reverse().join(""); 65 | }; 66 | 67 | export const firstElement = (arr: any) => { 68 | if (!Array.isArray(arr)) return undefined; 69 | return arr[0]; 70 | }; 71 | 72 | const fwd = new Set(["FWD", "fwd", "FORWARD", "forward", "FOR", "for", "TOP", "top", "1", 1]); 73 | const rev = new Set(["REV", "rev", "REVERSE", "reverse", "BOTTOM", "bottom", "-1", -1]); 74 | 75 | /** 76 | * Parse the user defined direction, estimate the direction of the element 77 | * 78 | * ```js 79 | * parseDirection("FWD") => 1 80 | * parseDirection("FORWARD") => 1 81 | * ``` 82 | */ 83 | export const parseDirection = (direction: number | string | undefined): -1 | 0 | 1 => { 84 | if (!direction) { 85 | return 0; 86 | } 87 | if (fwd.has(direction)) { 88 | return 1; 89 | } 90 | if (rev.has(direction)) { 91 | return -1; 92 | } 93 | return 0; 94 | }; 95 | 96 | /** 97 | * mapping the 64 standard codons to amino acids 98 | * no synth AA's 99 | * 100 | * adapted from: "https://github.com/keithwhor/NtSeq/blob/master/lib/nt.js 101 | */ 102 | const codon2AA = { 103 | AAA: "K", 104 | AAC: "N", 105 | AAG: "K", 106 | AAT: "N", 107 | ACA: "T", 108 | ACC: "T", 109 | ACG: "T", 110 | ACT: "T", 111 | AGA: "R", 112 | AGC: "S", 113 | AGG: "R", 114 | AGT: "S", 115 | ATA: "I", 116 | ATC: "I", 117 | ATG: "M", 118 | ATT: "I", 119 | CAA: "Q", 120 | CAC: "H", 121 | CAG: "Q", 122 | CAT: "H", 123 | CCA: "P", 124 | CCC: "P", 125 | CCG: "P", 126 | CCT: "P", 127 | CGA: "R", 128 | CGC: "R", 129 | CGG: "R", 130 | CGT: "R", 131 | CTA: "L", 132 | CTC: "L", 133 | CTG: "L", 134 | CTT: "L", 135 | GAA: "E", 136 | GAC: "D", 137 | GAG: "E", 138 | GAT: "D", 139 | GCA: "A", 140 | GCC: "A", 141 | GCG: "A", 142 | GCT: "A", 143 | GGA: "G", 144 | GGC: "G", 145 | GGG: "G", 146 | GGT: "G", 147 | GTA: "V", 148 | GTC: "V", 149 | GTG: "V", 150 | GTT: "V", 151 | TAA: "*", 152 | TAC: "Y", 153 | TAG: "*", 154 | TAT: "Y", 155 | TCA: "S", 156 | TCC: "S", 157 | TCG: "S", 158 | TCT: "S", 159 | TGA: "*", 160 | TGC: "C", 161 | TGG: "W", 162 | TGT: "C", 163 | TTA: "L", 164 | TTC: "F", 165 | TTG: "L", 166 | TTT: "F", 167 | }; 168 | 169 | const aminoAcids = Array.from(new Set(Object.values(codon2AA)).values()).join(""); 170 | const aminoAcidRegex = new RegExp(`^[${aminoAcids}]+$`, "i"); 171 | 172 | /** Infer the type of a sequence. This only allows a couple wildcard characters so may be overly strict. */ 173 | export const guessType = (seq: string): "dna" | "rna" | "aa" | "unknown" => { 174 | if (/^[atgcn.]+$/i.test(seq)) { 175 | return "dna"; 176 | } else if (/^[augcn.]+$/i.test(seq)) { 177 | return "rna"; 178 | } else if (aminoAcidRegex.test(seq)) { 179 | return "aa"; 180 | } 181 | return "unknown"; 182 | }; 183 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "allowJs": false, // allow a partial TypeScript and JavaScript codebase 4 | "allowUnreachableCode": false, 5 | "declaration": true, 6 | "lib": ["es2015", "dom"], // https://marcobotto.com/blog/compiling-and-bundling-typescript-libraries-with-webpack/ 7 | "module": "commonjs", // specify module code generation 8 | "moduleResolution": "node", 9 | "noUnusedLocals": true, 10 | "noUnusedParameters": true, 11 | "outDir": "./dist/", // path to output directory 12 | "sourceMap": true, // allow sourcemap support 13 | "strict": false, // TODO: set to true 14 | "strictNullChecks": true, // enable strict null checks as a best practice 15 | "strictPropertyInitialization": true, 16 | "target": "es5", // specify ECMAScript target version 17 | "types": [ 18 | "node", 19 | "jest" 20 | ], 21 | }, 22 | "include": ["./src/"] 23 | } 24 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const webpack = require("webpack"); 3 | const nodeExternals = require("webpack-node-externals"); 4 | 5 | const package = require("./package.json"); 6 | 7 | const webBuild = { 8 | entry: path.join(__dirname, "src", "index.ts"), 9 | target: "web", 10 | mode: "none", 11 | devtool: "source-map", 12 | optimization: { 13 | minimize: false, 14 | }, 15 | output: { 16 | filename: "index.js", 17 | library: { 18 | name: package.name, 19 | type: "umd", 20 | }, 21 | path: path.join(__dirname, "dist"), 22 | publicPath: "/dist/", 23 | umdNamedDefine: true, 24 | // https://stackoverflow.com/questions/64639839/typescript-webpack-library-generates-referenceerror-self-is-not-defined 25 | globalObject: 'this', 26 | }, 27 | module: { 28 | rules: [ 29 | { test: /\.(t|j)sx?$/, loader: "ts-loader", exclude: /node_modules/ }, 30 | ], 31 | }, 32 | externals: [nodeExternals({ modulesDir: path.join(__dirname, "node_modules") })], 33 | resolve: { 34 | extensions: [".ts"], 35 | fallback: { 36 | buffer: require.resolve("buffer"), 37 | fs: false, 38 | net: false, 39 | tls: false, 40 | path: require.resolve("path-browserify"), 41 | stream: require.resolve("stream-browserify"), 42 | timers: require.resolve("timers-browserify"), 43 | url: require.resolve("url"), 44 | }, 45 | }, 46 | }; 47 | 48 | const cliBuild = { 49 | ...webBuild, 50 | entry: path.join(__dirname, "src", "cli.ts"), 51 | target: "node", 52 | mode: "none", 53 | devtool: "source-map", 54 | optimization: { 55 | minimize: false, 56 | }, 57 | output: { 58 | filename: "cli.js", 59 | path: path.join(__dirname, "dist"), 60 | publicPath: "/dist/", 61 | umdNamedDefine: true, 62 | chunkFormat: 'commonjs' 63 | }, 64 | module: { 65 | rules: [ 66 | { test: /\.(t|j)sx?$/, loader: "ts-loader", exclude: /node_modules/ }, 67 | ], 68 | }, 69 | // externals: [], 70 | resolve: { 71 | extensions: [".ts"], 72 | fallback: { 73 | buffer: require.resolve("buffer"), 74 | fs: false, 75 | net: false, 76 | tls: false, 77 | path: require.resolve("path-browserify"), 78 | stream: require.resolve("stream-browserify"), 79 | timers: require.resolve("timers-browserify"), 80 | url: require.resolve("url"), 81 | }, 82 | }, 83 | plugins: [ 84 | new webpack.BannerPlugin({ 85 | banner: '#!/usr/bin/env node', 86 | raw: true, 87 | }), 88 | ], 89 | // experiments: { 90 | // outputModule: true, 91 | // }, 92 | }; 93 | 94 | module.exports = [webBuild, cliBuild]; 95 | --------------------------------------------------------------------------------